Ejemplo n.º 1
0
func extensionsFromEmbeddings(dt *Digraph, pattern *subgraph.SubGraph, ei subgraph.EmbIterator, seen map[int]bool) (total int, overlap []map[int]bool, fisEmbs []*subgraph.Embedding, sets []*hashtable.LinearHash, exts types.Set) {
	if dt.Mode&FIS == FIS {
		seen = make(map[int]bool)
		fisEmbs = make([]*subgraph.Embedding, 0, 10)
	} else {
		sets = make([]*hashtable.LinearHash, len(pattern.V))
	}
	if dt.Mode&OverlapPruning == OverlapPruning {
		overlap = make([]map[int]bool, len(pattern.V))
	}
	exts = set.NewSetMap(hashtable.NewLinearHash())
	add := validExtChecker(dt, func(emb *subgraph.Embedding, ext *subgraph.Extension) {
		exts.Add(ext)
	})
	for emb, next := ei(false); next != nil; emb, next = next(false) {
		seenIt := false
		for idx, id := range emb.Ids {
			if fisEmbs != nil {
				if seen[id] {
					seenIt = true
				}
			}
			if overlap != nil {
				if overlap[idx] == nil {
					overlap[idx] = make(map[int]bool)
				}
				overlap[idx][id] = true
			}
			if seen != nil {
				seen[id] = true
			}
			if sets != nil {
				if sets[idx] == nil {
					sets[idx] = hashtable.NewLinearHash()
				}
				set := sets[idx]
				if !set.Has(types.Int(id)) {
					set.Put(types.Int(id), emb)
				}
			}
			for _, e := range dt.G.Kids[id] {
				add(emb, &dt.G.E[e], idx, -1)
			}
			for _, e := range dt.G.Parents[id] {
				add(emb, &dt.G.E[e], -1, idx)
			}
		}
		if fisEmbs != nil && !seenIt {
			fisEmbs = append(fisEmbs, emb)
		}
		total++
	}
	return total, overlap, fisEmbs, sets, exts
}
Ejemplo n.º 2
0
func extendNode(dt *Digraph, n Node, debug bool) (*hashtable.LinearHash, error) {
	if debug {
		errors.Logf("DEBUG", "n.SubGraph %v", n.SubGraph())
	}
	sg := n.SubGraph()
	b := subgraph.Build(len(sg.V), len(sg.E)).From(sg)
	extPoints, err := n.Extensions()
	if err != nil {
		return nil, err
	}
	patterns := hashtable.NewLinearHash()
	for _, ep := range extPoints {
		bc := b.Copy()
		bc.Extend(ep)
		if len(bc.V) > dt.MaxVertices {
			continue
		}
		vord, eord := bc.CanonicalPermutation()
		ext := bc.BuildFromPermutation(vord, eord)
		if !patterns.Has(ext) {
			patterns.Put(ext, &extInfo{ep, vord})
		}
	}

	return patterns, nil
}
Ejemplo n.º 3
0
func FilterAutomorphs(it EmbIterator, dropped *VertexEmbeddings) (ei EmbIterator, _ *VertexEmbeddings) {
	idSet := func(emb *Embedding) *list.Sorted {
		ids := list.NewSorted(len(emb.Ids), true)
		for _, id := range emb.Ids {
			ids.Add(types.Int(id))
		}
		return ids
	}
	seen := hashtable.NewLinearHash()
	ei = func(stop bool) (emb *Embedding, _ EmbIterator) {
		if it == nil {
			return nil, nil
		}
		for emb, it = it(stop); it != nil; emb, it = it(stop) {
			ids := idSet(emb)
			// errors.Logf("AUTOMORPH-DEBUG", "emb %v ids %v has %v", emb, ids, seen.Has(ids))
			if !seen.Has(ids) {
				seen.Put(ids, nil)
				return emb, ei
			}
		}
		return nil, nil
	}
	return ei, dropped
}
Ejemplo n.º 4
0
func TestLinearHashtableCast(t *testing.T) {
	hash := hashtable.NewLinearHash()
	_ = types.Sized(hash)
	_ = types.MapIterable(hash)
	_ = types.MapOperable(hash)
	_ = types.Map(hash)
}
Ejemplo n.º 5
0
func LoadGraph(getInput func() (io.Reader, func()), supportAttr string, nodeAttrs *bptree.BpTree, supportAttrs map[int]string) (graph *goiso.Graph, err error) {
	var errors ParseErrors
	reader, closer := getInput()
	G := goiso.NewGraph(graphSize(reader))
	closer()
	graph = &G
	vids := hashtable.NewLinearHash() // int64 ==> *goiso.Vertex

	reader, closer = getInput()
	defer closer()
	ProcessLines(reader, func(line []byte) {
		if len(line) == 0 || !bytes.Contains(line, []byte("\t")) {
			return
		}
		line_type, data := parseLine(line)
		switch line_type {
		case "vertex":
			if err := LoadVertex(graph, supportAttr, vids, nodeAttrs, supportAttrs, data); err != nil {
				errors = append(errors, err)
			}
		case "edge":
			if err := LoadEdge(graph, vids, data); err != nil {
				errors = append(errors, err)
			}
		default:
			errors = append(errors, fmt.Errorf("Unknown line type %v", line_type))
			return
		}
	})
	if len(errors) == 0 {
		return graph, nil
	}
	return graph, errors
}
Ejemplo n.º 6
0
/* Construct a new queue */
func NewQueue(allowDups bool) *Queue {
	return &Queue{
		head:      nil,
		tail:      nil,
		length:    0,
		index:     hashtable.NewLinearHash(),
		lock:      new(sync.Mutex),
		allowDups: allowDups,
	}
}
Ejemplo n.º 7
0
func findChildren(n Node, allow func(*subgraph.SubGraph) (bool, error), debug bool) (nodes []lattice.Node, err error) {
	if debug {
		errors.Logf("CHILDREN-DEBUG", "node %v", n)
	}
	dt := n.dt()
	sg := n.SubGraph()
	patterns, err := extendNode(dt, n, debug)
	if err != nil {
		return nil, err
	}
	unsupEmbs, err := n.UnsupportedEmbs()
	if err != nil {
		return nil, err
	}
	unsupExts, err := n.UnsupportedExts()
	if err != nil {
		return nil, err
	}
	newUnsupportedExts := unsupExts.Copy()
	nOverlap, err := n.Overlap()
	if err != nil {
		return nil, err
	}
	var wg sync.WaitGroup
	type nodeEp struct {
		n    lattice.Node
		vord []int
	}
	nodeCh := make(chan nodeEp)
	vords := make([][]int, 0, 10)
	go func() {
		for nep := range nodeCh {
			nodes = append(nodes, nep.n)
			vords = append(vords, nep.vord)
			wg.Done()
		}
	}()
	epCh := make(chan *subgraph.Extension)
	go func() {
		for ep := range epCh {
			newUnsupportedExts.Add(ep)
			wg.Done()
		}
	}()
	errorCh := make(chan error)
	errs := make([]error, 0, 10)
	go func() {
		for err := range errorCh {
			errs = append(errs, err)
			wg.Done()
		}
	}()
	for k, v, next := patterns.Iterate()(); next != nil; k, v, next = next() {
		err := dt.pool.Do(func(pattern *subgraph.SubGraph, i *extInfo) func() {
			wg.Add(1)
			return func() {
				if allow != nil {
					if allowed, err := allow(pattern); err != nil {
						errorCh <- err
						return
					} else if !allowed {
						wg.Done()
						return
					}
				}
				ep := i.ep
				vord := i.vord
				tu := set.NewSetMap(hashtable.NewLinearHash())
				for i, next := unsupExts.Items()(); next != nil; i, next = next() {
					tu.Add(i.(*subgraph.Extension).Translate(len(sg.V), vord))
				}
				pOverlap := translateOverlap(nOverlap, vord)
				tUnsupEmbs := unsupEmbs.Translate(len(sg.V), vord).Set()
				support, exts, embs, overlap, dropped, err := ExtsAndEmbs(dt, pattern, pOverlap, tu, tUnsupEmbs, dt.Mode, debug)
				if err != nil {
					errorCh <- err
					return
				}
				if debug {
					errors.Logf("CHILDREN-DEBUG", "pattern %v support %v exts %v", pattern.Pretty(dt.Labels), len(embs), len(exts))
				}
				if support >= dt.Support() {
					nodeCh <- nodeEp{n.New(pattern, exts, embs, overlap, dropped), vord}
				} else {
					epCh <- ep
				}
			}
		}(k.(*subgraph.SubGraph), v.(*extInfo)))
		if err != nil {
			return nil, err
		}
	}
	wg.Wait()
	close(nodeCh)
	close(epCh)
	close(errorCh)
	if len(errs) > 0 {
		e := errors.Errorf("findChildren error").(*errors.Error)
		for _, err := range errs {
			e.Chain(err)
		}
		return nil, e
	}
	for i, newNode := range nodes {
		err := newNode.(Node).SaveUnsupportedExts(len(sg.V), vords[i], newUnsupportedExts)
		if err != nil {
			return nil, err
		}
	}
	return nodes, nil
}
Ejemplo n.º 8
0
func main() {
	args, optargs, err := getopt.GetOpt(
		os.Args[1:],
		"hs:m:o:c:",
		[]string{
			"help",
			"support=",
			"cache=",
			"min-vertices=",
			"sample-size=",
			"mem-profile=",
			"cpu-profile=",
			"output=",
			"probabilities",
		},
	)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		Usage(ErrorCodes["opts"])
	}
	log.Printf("Number of goroutines = %v", runtime.NumGoroutine())

	support := -1
	minVertices := -1
	sampleSize := -1
	memProfile := ""
	cpuProfile := ""
	outputDir := ""
	cache := ""
	compute_prs := false
	for _, oa := range optargs {
		switch oa.Opt() {
		case "-h", "--help":
			Usage(0)
		case "-o", "--output":
			outputDir = EmptyDir(AssertDir(oa.Arg()))
		case "-s", "--support":
			support = ParseInt(oa.Arg())
		case "-m", "--min-vertices":
			minVertices = ParseInt(oa.Arg())
		case "-c", "--cache":
			cache = AssertDir(oa.Arg())
		case "--probabilities":
			compute_prs = true
		case "--sample-size":
			sampleSize = ParseInt(oa.Arg())
		case "--mem-profile":
			memProfile = AssertFile(oa.Arg())
		case "--cpu-profile":
			cpuProfile = AssertFile(oa.Arg())
		}
	}

	if support < 1 {
		fmt.Fprintf(os.Stderr, "You must supply a support greater than 0, you gave %v\n", support)
		Usage(ErrorCodes["opts"])
	}

	if sampleSize < 1 {
		fmt.Fprintf(os.Stderr, "You must supply a sample-size greater than 0, you gave %v\n", sampleSize)
		Usage(ErrorCodes["opts"])
	}

	if outputDir == "" {
		fmt.Fprintf(os.Stderr, "You must supply an output file (use -o)\n")
		Usage(ErrorCodes["opts"])
	}

	if cache == "" {
		fmt.Fprintln(os.Stderr, "you must supply a --cache=<dir>")
		Usage(ErrorCodes["opts"])
	}

	if len(args) != 1 {
		fmt.Fprintln(os.Stderr, "Expected a path to the graph file")
		Usage(ErrorCodes["opts"])
	}

	getReader := func() (io.Reader, func()) { return Input(args[0]) }

	if cpuProfile != "" {
		f, err := os.Create(cpuProfile)
		if err != nil {
			log.Fatal(err)
		}
		defer f.Close()
		err = pprof.StartCPUProfile(f)
		if err != nil {
			log.Fatal(err)
		}
		defer pprof.StopCPUProfile()
	}

	var memProfFile io.WriteCloser
	if memProfile != "" {
		f, err := os.Create(memProfile)
		if err != nil {
			log.Fatal(err)
		}
		memProfFile = f
		defer f.Close()
	}

	nodePath := path.Join(outputDir, "node-attrs.bptree")

	nodeBf, err := fmap.CreateBlockFile(nodePath)
	if err != nil {
		log.Fatal(err)
	}
	defer nodeBf.Close()
	nodeAttrs, err := bptree.New(nodeBf, 4, -1)
	if err != nil {
		log.Fatal(err)
	}

	G, err := graph.LoadGraph(getReader, "", nodeAttrs, nil)
	if err != nil {
		log.Println("Error loading the graph")
		log.Panic(err)
	}
	log.Print("Loaded graph, about to start mining")

	sgCount := 0
	sgMaker := func() store.SubGraphs {
		name := fmt.Sprintf("subgraphs-%d.b+tree", sgCount)
		sgCount++
		path := path.Join(cache, name)
		s := store.NewFs2BpTree(G, path)
		// os.Remove(path)
		// s, err := store.NewSqlite(G, path)
		// if err != nil {
		// 	log.Panic(err)
		// }
		return s
	}

	idxCount := 0
	idxMaker := func() store.UniqueIndex {
		name := fmt.Sprintf("unique-idx-%d.b+tree", idxCount)
		idxCount++
		path := path.Join(cache, name)
		s := store.NewFs2UniqueIndex(G, path)
		// os.Remove(path)
		// s, err := store.NewSqlite(G, path)
		// if err != nil {
		// 	log.Panic(err)
		// }
		return s
	}

	setsCount := 0
	setsMaker := func() store.SetsMap {
		name := fmt.Sprintf("sets-%d.b+tree", setsCount)
		setsCount++
		path := path.Join(cache, name)
		s := store.NewFs2Sets(path)
		// os.Remove(path)
		// s, err := store.NewSqlite(G, path)
		// if err != nil {
		// 	log.Panic(err)
		// }
		return s
	}

	// memFsMaker := func() store.SubGraphs {
	// 	return store.AnonFs2BpTree(G)
	// }

	m := mine.RandomWalk(
		G,
		support,
		minVertices,
		sampleSize,
		memProfFile,
		sgMaker,
		idxMaker,
		setsMaker,
	)
	keys := list.NewSorted(10, false)
	counts := hashtable.NewLinearHash()
	for label := range m.Report {
		key := types.ByteSlice(label)
		count := 0
		if counts.Has(key) {
			c, err := counts.Get(key)
			if err != nil {
				log.Panic(err)
			}
			count = c.(int)
		}
		counts.Put(key, count+1)
		keys.Add(key)
	}
	log.Println("Tries", m.Tries)
	triesPath := path.Join(outputDir, "tries")
	if f, e := os.Create(triesPath); e != nil {
		log.Fatal(err)
	} else {
		fmt.Fprintln(f, m.Tries)
		f.Close()
	}
	{
		log.Println("Finished mining! Writing output...")
		keyCh := make(chan []byte)
		go func() {
			for k, next := keys.Items()(); next != nil; k, next = next() {
				keyCh <- []byte(k.(types.ByteSlice))
			}
			close(keyCh)
		}()
		writeMaximalPatterns(keyCh, m.AllEmbeddings, nodeAttrs, outputDir)
	}

	if !compute_prs {
		log.Println("Done!")
		return
	}

	log.Println("Finished writing patterns. Computing probabilities...")
	count := 0
	for k, next := keys.Items()(); next != nil; k, next = next() {
		patDir := path.Join(outputDir, fmt.Sprintf("%d", count))
		log.Println("-----------------------------------")
		c, err := counts.Get(k)
		if err != nil {
			log.Fatal(err)
		}
		key := []byte(k.(types.ByteSlice))
		dupCount := c.(int)
		// if max.Count(key) < support {
		// 	log.Println("wat not enough subgraphs", max.Count(key))
		// 	continue
		// }
		if c, err := os.Create(path.Join(patDir, "duplicates")); err != nil {
			log.Fatal(err)
		} else {
			fmt.Fprintln(c, dupCount)
			c.Close()
		}
		for _, sg, next := m.AllEmbeddings.Find(key)(); next != nil; _, sg, next = next() {
			vp, Q, R, u, err := m.PrMatrices(sg)
			if err != nil {
				log.Println(err)
				errPath := path.Join(patDir, "error")
				if f, e := os.Create(errPath); e != nil {
					log.Fatal(err)
				} else {
					fmt.Fprintln(f, err)
					f.Close()
				}
			} else {
				bytes, err := json.Marshal(map[string]interface{}{
					"Q":              Q,
					"R":              R,
					"u":              u,
					"startingPoints": vp,
				})
				if err != nil {
					log.Fatal(err)
				}
				matPath := path.Join(patDir, "matrices.json")
				if m, err := os.Create(matPath); err != nil {
					log.Fatal(err)
				} else {
					_, err := m.Write(bytes)
					if err != nil {
						m.Close()
						log.Fatal(err)
					}
					m.Close()
				}
			}
			break
		}
		count++
	}
	log.Println("Done!")
}
Ejemplo n.º 9
0
func extensionsFromFreqEdges(dt *Digraph, pattern *subgraph.SubGraph, ei subgraph.EmbIterator, seen map[int]bool) (total int, overlap []map[int]bool, fisEmbs []*subgraph.Embedding, sets []*hashtable.LinearHash, exts types.Set) {
	if dt.Mode&FIS == FIS {
		seen = make(map[int]bool)
		fisEmbs = make([]*subgraph.Embedding, 0, 10)
	} else {
		sets = make([]*hashtable.LinearHash, len(pattern.V))
	}
	if dt.Mode&OverlapPruning == OverlapPruning {
		overlap = make([]map[int]bool, len(pattern.V))
	}
	support := dt.Support()
	done := make(chan types.Set)
	go func(done chan types.Set) {
		exts := make(chan *subgraph.Extension, len(pattern.V))
		go func() {
			hash := set.NewSetMap(hashtable.NewLinearHash())
			for ext := range exts {
				if !pattern.HasExtension(ext) {
					hash.Add(ext)
				}
			}
			done <- hash
			close(done)
		}()
		for i := range pattern.V {
			u := &pattern.V[i]
			for _, e := range dt.Indices.EdgesFromColor[u.Color] {
				for j := range pattern.V {
					v := &pattern.V[j]
					if v.Color == e.TargColor {
						ep := subgraph.NewExt(
							subgraph.Vertex{Idx: i, Color: e.SrcColor},
							subgraph.Vertex{Idx: j, Color: e.TargColor},
							e.EdgeColor)
						exts <- ep
					}
				}
				ep := subgraph.NewExt(
					subgraph.Vertex{Idx: i, Color: u.Color},
					subgraph.Vertex{Idx: len(pattern.V), Color: e.TargColor},
					e.EdgeColor)
				exts <- ep
			}
			for _, e := range dt.Indices.EdgesToColor[u.Color] {
				ep := subgraph.NewExt(
					subgraph.Vertex{Idx: len(pattern.V), Color: e.SrcColor},
					subgraph.Vertex{Idx: i, Color: u.Color},
					e.EdgeColor)
				exts <- ep
			}
		}
		close(exts)
	}(done)
	stop := false
	for emb, next := ei(stop); next != nil; emb, next = next(stop) {
		min := -1
		seenIt := false
		for idx, id := range emb.Ids {
			if fisEmbs != nil {
				if seen[id] {
					seenIt = true
				}
			}
			if overlap != nil {
				if overlap[idx] == nil {
					overlap[idx] = make(map[int]bool)
				}
				overlap[idx][id] = true
			}
			if seen != nil {
				seen[id] = true
			}
			if sets != nil {
				if sets[idx] == nil {
					sets[idx] = hashtable.NewLinearHash()
				}
				set := sets[idx]
				if !set.Has(types.Int(id)) {
					set.Put(types.Int(id), emb)
				}
				size := set.Size()
				if min == -1 || size < min {
					min = size
				}
			}
		}
		if fisEmbs != nil && !seenIt {
			fisEmbs = append(fisEmbs, emb)
			min = len(fisEmbs)
		}
		total++
		if min >= support {
			stop = true
		}
	}
	if total < support {
		return total, overlap, fisEmbs, sets, nil
	}
	return total, overlap, fisEmbs, sets, <-done
}