Exemple #1
0
func itemsetCommonAncestor(patterns []lattice.Pattern) (_ lattice.Pattern, err error) {
	var items types.Set
	for i, pat := range patterns {
		p := pat.(*itemset.Pattern)
		if i == 0 {
			items = p.Items
		} else {
			items, err = items.Intersect(p.Items)
			if err != nil {
				return nil, err
			}
		}
	}
	return &itemset.Pattern{items.(*set.SortedSet)}, nil
}
Exemple #2
0
func jaccardSetSimilarity(a, b types.Set) float64 {
	i, err := a.Intersect(b)
	exc.ThrowOnError(err)
	inter := float64(i.Size())
	return 1.0 - (inter / (float64(a.Size()) + float64(b.Size()) - inter))
}
Exemple #3
0
func (n *Node) Parents() ([]lattice.Node, error) {
	if n.pat.Items.Size() == 0 {
		return []lattice.Node{}, nil
	} else if n.pat.Items.Size() == 1 {
		return []lattice.Node{n.dt.empty}, nil
	}
	i := setToInt32s(n.pat.Items)
	if has, err := n.dt.ParentCount.Has(i); err != nil {
		return nil, err
	} else if has {
		return n.cached(n.dt.Parents, i)
	}
	parents := make([]*set.SortedSet, 0, n.pat.Items.Size())
	for item, next := n.pat.Items.Items()(); next != nil; item, next = next() {
		parent := n.pat.Items.Copy()
		parent.Delete(item)
		parents = append(parents, parent)
	}
	nodes := make([]lattice.Node, 0, 10)
	for _, items := range parents {
		if node, err := TryLoadNode(setToInt32s(items), n.dt); err != nil {
			return nil, err
		} else if node != nil {
			nodes = append(nodes, node)
			continue
		}
		ctxs := int32sToSet(n.txs)
		var txs types.Set
		for item, next := items.Items()(); next != nil; item, next = next() {
			mytxs := set.NewSortedSet(len(n.txs) + 10)
			for _, tx := range n.dt.InvertedIndex[item.(types.Int32)] {
				if !ctxs.Has(types.Int32(tx)) {
					mytxs.Add(types.Int32(tx))
				}
			}
			var err error
			if txs == nil {
				txs = mytxs
			} else {
				txs, err = txs.Intersect(mytxs)
				if err != nil {
					return nil, err
				}
			}
		}
		txs, err := txs.Union(ctxs)
		if err != nil {
			return nil, err
		}
		stxs := make([]int32, 0, txs.Size())
		for item, next := txs.Items()(); next != nil; item, next = next() {
			stxs = append(stxs, int32(item.(types.Int32)))
		}
		node := &Node{Pattern{items}, n.dt, stxs}
		err = node.Save()
		if err != nil {
			return nil, err
		}
		nodes = append(nodes, node)
	}
	err := n.cache(n.dt.ParentCount, n.dt.Parents, i, nodes)
	if err != nil {
		return nil, err
	}
	return nodes, nil
}
Exemple #4
0
// unique extensions and supported embeddings
func ExtsAndEmbs(dt *Digraph, pattern *subgraph.SubGraph, patternOverlap []map[int]bool, unsupExts types.Set, unsupEmbs map[subgraph.VrtEmb]bool, mode Mode, debug bool) (int, []*subgraph.Extension, []*subgraph.Embedding, []map[int]bool, subgraph.VertexEmbeddings, error) {
	if !debug {
		if has, support, exts, embs, overlap, unsupEmbs, err := loadCachedExtsEmbs(dt, pattern); err != nil {
			return 0, nil, nil, nil, nil, err
		} else if has {
			if false {
				errors.Logf("LOAD-DEBUG", "Loaded cached %v exts %v embs %v", pattern, len(exts), len(embs))
			}
			return support, exts, embs, overlap, unsupEmbs, nil
		}
	}
	if CACHE_DEBUG || debug {
		errors.Logf("CACHE-DEBUG", "ExtsAndEmbs %v", pattern.Pretty(dt.Labels))
	}

	// compute the embeddings
	var seen map[int]bool = nil
	var ei subgraph.EmbIterator
	var dropped *subgraph.VertexEmbeddings
	switch {
	case mode&(MNI|FIS) != 0:
		ei, dropped = pattern.IterEmbeddings(
			dt.EmbSearchStartPoint, dt.Indices, unsupEmbs, patternOverlap, nil)
	case mode&(GIS) == GIS:
		seen = make(map[int]bool)
		ei, dropped = pattern.IterEmbeddings(
			dt.EmbSearchStartPoint,
			dt.Indices,
			unsupEmbs,
			patternOverlap,
			func(ids *subgraph.IdNode) bool {
				for c := ids; c != nil; c = c.Prev {
					if _, has := seen[c.Id]; has {
						for c := ids; c != nil; c = c.Prev {
							seen[c.Id] = true
						}
						return true
					}
				}
				return false
			})
	default:
		return 0, nil, nil, nil, nil, errors.Errorf("Unknown support counting strategy %v", mode)
	}

	// find the actual embeddings and compute the extensions
	// the extensions are stored in exts
	// the embeddings are stored in sets
	var exts types.Set
	var fisEmbs []*subgraph.Embedding
	var sets []*hashtable.LinearHash
	var overlap []map[int]bool
	var total int
	if mode&ExtFromEmb == ExtFromEmb && len(pattern.E) > 0 {
		// add the supported embeddings to the vertex sets
		// add the extensions to the extensions set
		total, overlap, fisEmbs, sets, exts = extensionsFromEmbeddings(dt, pattern, ei, seen)
		if total == 0 {
			// return 0, nil, nil, nil, nil, errors.Errorf("could not find any embedding of %v", pattern)
			// because we are extending from frequent edges for vertices this
			// is ok.
			return 0, nil, nil, nil, nil, nil
		}
	} else if mode&ExtFromFreqEdges == ExtFromFreqEdges || len(pattern.E) <= 0 {
		total, overlap, fisEmbs, sets, exts = extensionsFromFreqEdges(dt, pattern, ei, seen)
		if total < dt.Support() {
			return 0, nil, nil, nil, nil, nil
		}
	} else {
		return 0, nil, nil, nil, nil, errors.Errorf("Unknown extension strategy %v", mode)
	}

	// construct the extensions output slice
	extensions := make([]*subgraph.Extension, 0, exts.Size())
	for i, next := exts.Items()(); next != nil; i, next = next() {
		ext := i.(*subgraph.Extension)
		if unsupExts != nil && unsupExts.Has(ext) {
			continue
		}
		extensions = append(extensions, ext)
	}

	if mode&EmbeddingPruning == EmbeddingPruning && unsupEmbs != nil {
		// for i, next := unsupEmbs.Items()(); next != nil; i, next = next() {
		for emb, ok := range unsupEmbs {
			if ok {
				*dropped = append(*dropped, &emb)
			}
		}
	}

	var embeddings []*subgraph.Embedding
	if mode&(MNI|GIS) != 0 {
		// compute the minimally supported vertex
		arg, size := stats.Min(stats.RandomPermutation(len(sets)), func(i int) float64 {
			return float64(sets[i].Size())
		})
		// construct the embeddings output slice
		embeddings = make([]*subgraph.Embedding, 0, int(size)+1)
		for i, next := sets[arg].Values()(); next != nil; i, next = next() {
			emb := i.(*subgraph.Embedding)
			embeddings = append(embeddings, emb)
		}
	} else if mode&(FIS) == FIS {
		embeddings = fisEmbs
	} else {
		return 0, nil, nil, nil, nil, errors.Errorf("Unknown support counting strategy %v", mode)
	}

	if CACHE_DEBUG || debug {
		errors.Logf("CACHE-DEBUG", "Caching exts %v embs %v total-embs %v : %v", len(extensions), len(embeddings), total, pattern.Pretty(dt.Labels))
	}
	if !debug {
		err := cacheExtsEmbs(dt, pattern, len(embeddings), extensions, embeddings, overlap, *dropped)
		if err != nil {
			return 0, nil, nil, nil, nil, err
		}
	}
	return len(embeddings), extensions, embeddings, overlap, *dropped, nil
}