func extensionsFromEmbeddings(dt *Digraph, pattern *subgraph.SubGraph, ei subgraph.EmbIterator, seen map[int]bool) (total int, overlap []map[int]bool, fisEmbs []*subgraph.Embedding, sets []*hashtable.LinearHash, exts types.Set) { if dt.Mode&FIS == FIS { seen = make(map[int]bool) fisEmbs = make([]*subgraph.Embedding, 0, 10) } else { sets = make([]*hashtable.LinearHash, len(pattern.V)) } if dt.Mode&OverlapPruning == OverlapPruning { overlap = make([]map[int]bool, len(pattern.V)) } exts = set.NewSetMap(hashtable.NewLinearHash()) add := validExtChecker(dt, func(emb *subgraph.Embedding, ext *subgraph.Extension) { exts.Add(ext) }) for emb, next := ei(false); next != nil; emb, next = next(false) { seenIt := false for idx, id := range emb.Ids { if fisEmbs != nil { if seen[id] { seenIt = true } } if overlap != nil { if overlap[idx] == nil { overlap[idx] = make(map[int]bool) } overlap[idx][id] = true } if seen != nil { seen[id] = true } if sets != nil { if sets[idx] == nil { sets[idx] = hashtable.NewLinearHash() } set := sets[idx] if !set.Has(types.Int(id)) { set.Put(types.Int(id), emb) } } for _, e := range dt.G.Kids[id] { add(emb, &dt.G.E[e], idx, -1) } for _, e := range dt.G.Parents[id] { add(emb, &dt.G.E[e], -1, idx) } } if fisEmbs != nil && !seenIt { fisEmbs = append(fisEmbs, emb) } total++ } return total, overlap, fisEmbs, sets, exts }
func extendNode(dt *Digraph, n Node, debug bool) (*hashtable.LinearHash, error) { if debug { errors.Logf("DEBUG", "n.SubGraph %v", n.SubGraph()) } sg := n.SubGraph() b := subgraph.Build(len(sg.V), len(sg.E)).From(sg) extPoints, err := n.Extensions() if err != nil { return nil, err } patterns := hashtable.NewLinearHash() for _, ep := range extPoints { bc := b.Copy() bc.Extend(ep) if len(bc.V) > dt.MaxVertices { continue } vord, eord := bc.CanonicalPermutation() ext := bc.BuildFromPermutation(vord, eord) if !patterns.Has(ext) { patterns.Put(ext, &extInfo{ep, vord}) } } return patterns, nil }
func FilterAutomorphs(it EmbIterator, dropped *VertexEmbeddings) (ei EmbIterator, _ *VertexEmbeddings) { idSet := func(emb *Embedding) *list.Sorted { ids := list.NewSorted(len(emb.Ids), true) for _, id := range emb.Ids { ids.Add(types.Int(id)) } return ids } seen := hashtable.NewLinearHash() ei = func(stop bool) (emb *Embedding, _ EmbIterator) { if it == nil { return nil, nil } for emb, it = it(stop); it != nil; emb, it = it(stop) { ids := idSet(emb) // errors.Logf("AUTOMORPH-DEBUG", "emb %v ids %v has %v", emb, ids, seen.Has(ids)) if !seen.Has(ids) { seen.Put(ids, nil) return emb, ei } } return nil, nil } return ei, dropped }
func TestLinearHashtableCast(t *testing.T) { hash := hashtable.NewLinearHash() _ = types.Sized(hash) _ = types.MapIterable(hash) _ = types.MapOperable(hash) _ = types.Map(hash) }
func LoadGraph(getInput func() (io.Reader, func()), supportAttr string, nodeAttrs *bptree.BpTree, supportAttrs map[int]string) (graph *goiso.Graph, err error) { var errors ParseErrors reader, closer := getInput() G := goiso.NewGraph(graphSize(reader)) closer() graph = &G vids := hashtable.NewLinearHash() // int64 ==> *goiso.Vertex reader, closer = getInput() defer closer() ProcessLines(reader, func(line []byte) { if len(line) == 0 || !bytes.Contains(line, []byte("\t")) { return } line_type, data := parseLine(line) switch line_type { case "vertex": if err := LoadVertex(graph, supportAttr, vids, nodeAttrs, supportAttrs, data); err != nil { errors = append(errors, err) } case "edge": if err := LoadEdge(graph, vids, data); err != nil { errors = append(errors, err) } default: errors = append(errors, fmt.Errorf("Unknown line type %v", line_type)) return } }) if len(errors) == 0 { return graph, nil } return graph, errors }
/* Construct a new queue */ func NewQueue(allowDups bool) *Queue { return &Queue{ head: nil, tail: nil, length: 0, index: hashtable.NewLinearHash(), lock: new(sync.Mutex), allowDups: allowDups, } }
func findChildren(n Node, allow func(*subgraph.SubGraph) (bool, error), debug bool) (nodes []lattice.Node, err error) { if debug { errors.Logf("CHILDREN-DEBUG", "node %v", n) } dt := n.dt() sg := n.SubGraph() patterns, err := extendNode(dt, n, debug) if err != nil { return nil, err } unsupEmbs, err := n.UnsupportedEmbs() if err != nil { return nil, err } unsupExts, err := n.UnsupportedExts() if err != nil { return nil, err } newUnsupportedExts := unsupExts.Copy() nOverlap, err := n.Overlap() if err != nil { return nil, err } var wg sync.WaitGroup type nodeEp struct { n lattice.Node vord []int } nodeCh := make(chan nodeEp) vords := make([][]int, 0, 10) go func() { for nep := range nodeCh { nodes = append(nodes, nep.n) vords = append(vords, nep.vord) wg.Done() } }() epCh := make(chan *subgraph.Extension) go func() { for ep := range epCh { newUnsupportedExts.Add(ep) wg.Done() } }() errorCh := make(chan error) errs := make([]error, 0, 10) go func() { for err := range errorCh { errs = append(errs, err) wg.Done() } }() for k, v, next := patterns.Iterate()(); next != nil; k, v, next = next() { err := dt.pool.Do(func(pattern *subgraph.SubGraph, i *extInfo) func() { wg.Add(1) return func() { if allow != nil { if allowed, err := allow(pattern); err != nil { errorCh <- err return } else if !allowed { wg.Done() return } } ep := i.ep vord := i.vord tu := set.NewSetMap(hashtable.NewLinearHash()) for i, next := unsupExts.Items()(); next != nil; i, next = next() { tu.Add(i.(*subgraph.Extension).Translate(len(sg.V), vord)) } pOverlap := translateOverlap(nOverlap, vord) tUnsupEmbs := unsupEmbs.Translate(len(sg.V), vord).Set() support, exts, embs, overlap, dropped, err := ExtsAndEmbs(dt, pattern, pOverlap, tu, tUnsupEmbs, dt.Mode, debug) if err != nil { errorCh <- err return } if debug { errors.Logf("CHILDREN-DEBUG", "pattern %v support %v exts %v", pattern.Pretty(dt.Labels), len(embs), len(exts)) } if support >= dt.Support() { nodeCh <- nodeEp{n.New(pattern, exts, embs, overlap, dropped), vord} } else { epCh <- ep } } }(k.(*subgraph.SubGraph), v.(*extInfo))) if err != nil { return nil, err } } wg.Wait() close(nodeCh) close(epCh) close(errorCh) if len(errs) > 0 { e := errors.Errorf("findChildren error").(*errors.Error) for _, err := range errs { e.Chain(err) } return nil, e } for i, newNode := range nodes { err := newNode.(Node).SaveUnsupportedExts(len(sg.V), vords[i], newUnsupportedExts) if err != nil { return nil, err } } return nodes, nil }
func main() { args, optargs, err := getopt.GetOpt( os.Args[1:], "hs:m:o:c:", []string{ "help", "support=", "cache=", "min-vertices=", "sample-size=", "mem-profile=", "cpu-profile=", "output=", "probabilities", }, ) if err != nil { fmt.Fprintln(os.Stderr, err) Usage(ErrorCodes["opts"]) } log.Printf("Number of goroutines = %v", runtime.NumGoroutine()) support := -1 minVertices := -1 sampleSize := -1 memProfile := "" cpuProfile := "" outputDir := "" cache := "" compute_prs := false for _, oa := range optargs { switch oa.Opt() { case "-h", "--help": Usage(0) case "-o", "--output": outputDir = EmptyDir(AssertDir(oa.Arg())) case "-s", "--support": support = ParseInt(oa.Arg()) case "-m", "--min-vertices": minVertices = ParseInt(oa.Arg()) case "-c", "--cache": cache = AssertDir(oa.Arg()) case "--probabilities": compute_prs = true case "--sample-size": sampleSize = ParseInt(oa.Arg()) case "--mem-profile": memProfile = AssertFile(oa.Arg()) case "--cpu-profile": cpuProfile = AssertFile(oa.Arg()) } } if support < 1 { fmt.Fprintf(os.Stderr, "You must supply a support greater than 0, you gave %v\n", support) Usage(ErrorCodes["opts"]) } if sampleSize < 1 { fmt.Fprintf(os.Stderr, "You must supply a sample-size greater than 0, you gave %v\n", sampleSize) Usage(ErrorCodes["opts"]) } if outputDir == "" { fmt.Fprintf(os.Stderr, "You must supply an output file (use -o)\n") Usage(ErrorCodes["opts"]) } if cache == "" { fmt.Fprintln(os.Stderr, "you must supply a --cache=<dir>") Usage(ErrorCodes["opts"]) } if len(args) != 1 { fmt.Fprintln(os.Stderr, "Expected a path to the graph file") Usage(ErrorCodes["opts"]) } getReader := func() (io.Reader, func()) { return Input(args[0]) } if cpuProfile != "" { f, err := os.Create(cpuProfile) if err != nil { log.Fatal(err) } defer f.Close() err = pprof.StartCPUProfile(f) if err != nil { log.Fatal(err) } defer pprof.StopCPUProfile() } var memProfFile io.WriteCloser if memProfile != "" { f, err := os.Create(memProfile) if err != nil { log.Fatal(err) } memProfFile = f defer f.Close() } nodePath := path.Join(outputDir, "node-attrs.bptree") nodeBf, err := fmap.CreateBlockFile(nodePath) if err != nil { log.Fatal(err) } defer nodeBf.Close() nodeAttrs, err := bptree.New(nodeBf, 4, -1) if err != nil { log.Fatal(err) } G, err := graph.LoadGraph(getReader, "", nodeAttrs, nil) if err != nil { log.Println("Error loading the graph") log.Panic(err) } log.Print("Loaded graph, about to start mining") sgCount := 0 sgMaker := func() store.SubGraphs { name := fmt.Sprintf("subgraphs-%d.b+tree", sgCount) sgCount++ path := path.Join(cache, name) s := store.NewFs2BpTree(G, path) // os.Remove(path) // s, err := store.NewSqlite(G, path) // if err != nil { // log.Panic(err) // } return s } idxCount := 0 idxMaker := func() store.UniqueIndex { name := fmt.Sprintf("unique-idx-%d.b+tree", idxCount) idxCount++ path := path.Join(cache, name) s := store.NewFs2UniqueIndex(G, path) // os.Remove(path) // s, err := store.NewSqlite(G, path) // if err != nil { // log.Panic(err) // } return s } setsCount := 0 setsMaker := func() store.SetsMap { name := fmt.Sprintf("sets-%d.b+tree", setsCount) setsCount++ path := path.Join(cache, name) s := store.NewFs2Sets(path) // os.Remove(path) // s, err := store.NewSqlite(G, path) // if err != nil { // log.Panic(err) // } return s } // memFsMaker := func() store.SubGraphs { // return store.AnonFs2BpTree(G) // } m := mine.RandomWalk( G, support, minVertices, sampleSize, memProfFile, sgMaker, idxMaker, setsMaker, ) keys := list.NewSorted(10, false) counts := hashtable.NewLinearHash() for label := range m.Report { key := types.ByteSlice(label) count := 0 if counts.Has(key) { c, err := counts.Get(key) if err != nil { log.Panic(err) } count = c.(int) } counts.Put(key, count+1) keys.Add(key) } log.Println("Tries", m.Tries) triesPath := path.Join(outputDir, "tries") if f, e := os.Create(triesPath); e != nil { log.Fatal(err) } else { fmt.Fprintln(f, m.Tries) f.Close() } { log.Println("Finished mining! Writing output...") keyCh := make(chan []byte) go func() { for k, next := keys.Items()(); next != nil; k, next = next() { keyCh <- []byte(k.(types.ByteSlice)) } close(keyCh) }() writeMaximalPatterns(keyCh, m.AllEmbeddings, nodeAttrs, outputDir) } if !compute_prs { log.Println("Done!") return } log.Println("Finished writing patterns. Computing probabilities...") count := 0 for k, next := keys.Items()(); next != nil; k, next = next() { patDir := path.Join(outputDir, fmt.Sprintf("%d", count)) log.Println("-----------------------------------") c, err := counts.Get(k) if err != nil { log.Fatal(err) } key := []byte(k.(types.ByteSlice)) dupCount := c.(int) // if max.Count(key) < support { // log.Println("wat not enough subgraphs", max.Count(key)) // continue // } if c, err := os.Create(path.Join(patDir, "duplicates")); err != nil { log.Fatal(err) } else { fmt.Fprintln(c, dupCount) c.Close() } for _, sg, next := m.AllEmbeddings.Find(key)(); next != nil; _, sg, next = next() { vp, Q, R, u, err := m.PrMatrices(sg) if err != nil { log.Println(err) errPath := path.Join(patDir, "error") if f, e := os.Create(errPath); e != nil { log.Fatal(err) } else { fmt.Fprintln(f, err) f.Close() } } else { bytes, err := json.Marshal(map[string]interface{}{ "Q": Q, "R": R, "u": u, "startingPoints": vp, }) if err != nil { log.Fatal(err) } matPath := path.Join(patDir, "matrices.json") if m, err := os.Create(matPath); err != nil { log.Fatal(err) } else { _, err := m.Write(bytes) if err != nil { m.Close() log.Fatal(err) } m.Close() } } break } count++ } log.Println("Done!") }
func extensionsFromFreqEdges(dt *Digraph, pattern *subgraph.SubGraph, ei subgraph.EmbIterator, seen map[int]bool) (total int, overlap []map[int]bool, fisEmbs []*subgraph.Embedding, sets []*hashtable.LinearHash, exts types.Set) { if dt.Mode&FIS == FIS { seen = make(map[int]bool) fisEmbs = make([]*subgraph.Embedding, 0, 10) } else { sets = make([]*hashtable.LinearHash, len(pattern.V)) } if dt.Mode&OverlapPruning == OverlapPruning { overlap = make([]map[int]bool, len(pattern.V)) } support := dt.Support() done := make(chan types.Set) go func(done chan types.Set) { exts := make(chan *subgraph.Extension, len(pattern.V)) go func() { hash := set.NewSetMap(hashtable.NewLinearHash()) for ext := range exts { if !pattern.HasExtension(ext) { hash.Add(ext) } } done <- hash close(done) }() for i := range pattern.V { u := &pattern.V[i] for _, e := range dt.Indices.EdgesFromColor[u.Color] { for j := range pattern.V { v := &pattern.V[j] if v.Color == e.TargColor { ep := subgraph.NewExt( subgraph.Vertex{Idx: i, Color: e.SrcColor}, subgraph.Vertex{Idx: j, Color: e.TargColor}, e.EdgeColor) exts <- ep } } ep := subgraph.NewExt( subgraph.Vertex{Idx: i, Color: u.Color}, subgraph.Vertex{Idx: len(pattern.V), Color: e.TargColor}, e.EdgeColor) exts <- ep } for _, e := range dt.Indices.EdgesToColor[u.Color] { ep := subgraph.NewExt( subgraph.Vertex{Idx: len(pattern.V), Color: e.SrcColor}, subgraph.Vertex{Idx: i, Color: u.Color}, e.EdgeColor) exts <- ep } } close(exts) }(done) stop := false for emb, next := ei(stop); next != nil; emb, next = next(stop) { min := -1 seenIt := false for idx, id := range emb.Ids { if fisEmbs != nil { if seen[id] { seenIt = true } } if overlap != nil { if overlap[idx] == nil { overlap[idx] = make(map[int]bool) } overlap[idx][id] = true } if seen != nil { seen[id] = true } if sets != nil { if sets[idx] == nil { sets[idx] = hashtable.NewLinearHash() } set := sets[idx] if !set.Has(types.Int(id)) { set.Put(types.Int(id), emb) } size := set.Size() if min == -1 || size < min { min = size } } } if fisEmbs != nil && !seenIt { fisEmbs = append(fisEmbs, emb) min = len(fisEmbs) } total++ if min >= support { stop = true } } if total < support { return total, overlap, fisEmbs, sets, nil } return total, overlap, fisEmbs, sets, <-done }