func FilterAutomorphs(it EmbIterator, dropped *VertexEmbeddings) (ei EmbIterator, _ *VertexEmbeddings) { idSet := func(emb *Embedding) *list.Sorted { ids := list.NewSorted(len(emb.Ids), true) for _, id := range emb.Ids { ids.Add(types.Int(id)) } return ids } seen := hashtable.NewLinearHash() ei = func(stop bool) (emb *Embedding, _ EmbIterator) { if it == nil { return nil, nil } for emb, it = it(stop); it != nil; emb, it = it(stop) { ids := idSet(emb) // errors.Logf("AUTOMORPH-DEBUG", "emb %v ids %v has %v", emb, ids, seen.Has(ids)) if !seen.Has(ids) { seen.Put(ids, nil) return emb, ei } } return nil, nil } return ei, dropped }
func NewSortedSet(initialSize int) *SortedSet { return &SortedSet{*list.NewSorted(initialSize, false)} }
func TestSortedCast(t *testing.T) { s := list.NewSorted(17, false) _ = types.Hashable(s) _ = types.OrderedList(s) }
func main() { args, optargs, err := getopt.GetOpt( os.Args[1:], "hs:m:o:c:", []string{ "help", "support=", "cache=", "min-vertices=", "sample-size=", "mem-profile=", "cpu-profile=", "output=", "probabilities", }, ) if err != nil { fmt.Fprintln(os.Stderr, err) Usage(ErrorCodes["opts"]) } log.Printf("Number of goroutines = %v", runtime.NumGoroutine()) support := -1 minVertices := -1 sampleSize := -1 memProfile := "" cpuProfile := "" outputDir := "" cache := "" compute_prs := false for _, oa := range optargs { switch oa.Opt() { case "-h", "--help": Usage(0) case "-o", "--output": outputDir = EmptyDir(AssertDir(oa.Arg())) case "-s", "--support": support = ParseInt(oa.Arg()) case "-m", "--min-vertices": minVertices = ParseInt(oa.Arg()) case "-c", "--cache": cache = AssertDir(oa.Arg()) case "--probabilities": compute_prs = true case "--sample-size": sampleSize = ParseInt(oa.Arg()) case "--mem-profile": memProfile = AssertFile(oa.Arg()) case "--cpu-profile": cpuProfile = AssertFile(oa.Arg()) } } if support < 1 { fmt.Fprintf(os.Stderr, "You must supply a support greater than 0, you gave %v\n", support) Usage(ErrorCodes["opts"]) } if sampleSize < 1 { fmt.Fprintf(os.Stderr, "You must supply a sample-size greater than 0, you gave %v\n", sampleSize) Usage(ErrorCodes["opts"]) } if outputDir == "" { fmt.Fprintf(os.Stderr, "You must supply an output file (use -o)\n") Usage(ErrorCodes["opts"]) } if cache == "" { fmt.Fprintln(os.Stderr, "you must supply a --cache=<dir>") Usage(ErrorCodes["opts"]) } if len(args) != 1 { fmt.Fprintln(os.Stderr, "Expected a path to the graph file") Usage(ErrorCodes["opts"]) } getReader := func() (io.Reader, func()) { return Input(args[0]) } if cpuProfile != "" { f, err := os.Create(cpuProfile) if err != nil { log.Fatal(err) } defer f.Close() err = pprof.StartCPUProfile(f) if err != nil { log.Fatal(err) } defer pprof.StopCPUProfile() } var memProfFile io.WriteCloser if memProfile != "" { f, err := os.Create(memProfile) if err != nil { log.Fatal(err) } memProfFile = f defer f.Close() } nodePath := path.Join(outputDir, "node-attrs.bptree") nodeBf, err := fmap.CreateBlockFile(nodePath) if err != nil { log.Fatal(err) } defer nodeBf.Close() nodeAttrs, err := bptree.New(nodeBf, 4, -1) if err != nil { log.Fatal(err) } G, err := graph.LoadGraph(getReader, "", nodeAttrs, nil) if err != nil { log.Println("Error loading the graph") log.Panic(err) } log.Print("Loaded graph, about to start mining") sgCount := 0 sgMaker := func() store.SubGraphs { name := fmt.Sprintf("subgraphs-%d.b+tree", sgCount) sgCount++ path := path.Join(cache, name) s := store.NewFs2BpTree(G, path) // os.Remove(path) // s, err := store.NewSqlite(G, path) // if err != nil { // log.Panic(err) // } return s } idxCount := 0 idxMaker := func() store.UniqueIndex { name := fmt.Sprintf("unique-idx-%d.b+tree", idxCount) idxCount++ path := path.Join(cache, name) s := store.NewFs2UniqueIndex(G, path) // os.Remove(path) // s, err := store.NewSqlite(G, path) // if err != nil { // log.Panic(err) // } return s } setsCount := 0 setsMaker := func() store.SetsMap { name := fmt.Sprintf("sets-%d.b+tree", setsCount) setsCount++ path := path.Join(cache, name) s := store.NewFs2Sets(path) // os.Remove(path) // s, err := store.NewSqlite(G, path) // if err != nil { // log.Panic(err) // } return s } // memFsMaker := func() store.SubGraphs { // return store.AnonFs2BpTree(G) // } m := mine.RandomWalk( G, support, minVertices, sampleSize, memProfFile, sgMaker, idxMaker, setsMaker, ) keys := list.NewSorted(10, false) counts := hashtable.NewLinearHash() for label := range m.Report { key := types.ByteSlice(label) count := 0 if counts.Has(key) { c, err := counts.Get(key) if err != nil { log.Panic(err) } count = c.(int) } counts.Put(key, count+1) keys.Add(key) } log.Println("Tries", m.Tries) triesPath := path.Join(outputDir, "tries") if f, e := os.Create(triesPath); e != nil { log.Fatal(err) } else { fmt.Fprintln(f, m.Tries) f.Close() } { log.Println("Finished mining! Writing output...") keyCh := make(chan []byte) go func() { for k, next := keys.Items()(); next != nil; k, next = next() { keyCh <- []byte(k.(types.ByteSlice)) } close(keyCh) }() writeMaximalPatterns(keyCh, m.AllEmbeddings, nodeAttrs, outputDir) } if !compute_prs { log.Println("Done!") return } log.Println("Finished writing patterns. Computing probabilities...") count := 0 for k, next := keys.Items()(); next != nil; k, next = next() { patDir := path.Join(outputDir, fmt.Sprintf("%d", count)) log.Println("-----------------------------------") c, err := counts.Get(k) if err != nil { log.Fatal(err) } key := []byte(k.(types.ByteSlice)) dupCount := c.(int) // if max.Count(key) < support { // log.Println("wat not enough subgraphs", max.Count(key)) // continue // } if c, err := os.Create(path.Join(patDir, "duplicates")); err != nil { log.Fatal(err) } else { fmt.Fprintln(c, dupCount) c.Close() } for _, sg, next := m.AllEmbeddings.Find(key)(); next != nil; _, sg, next = next() { vp, Q, R, u, err := m.PrMatrices(sg) if err != nil { log.Println(err) errPath := path.Join(patDir, "error") if f, e := os.Create(errPath); e != nil { log.Fatal(err) } else { fmt.Fprintln(f, err) f.Close() } } else { bytes, err := json.Marshal(map[string]interface{}{ "Q": Q, "R": R, "u": u, "startingPoints": vp, }) if err != nil { log.Fatal(err) } matPath := path.Join(patDir, "matrices.json") if m, err := os.Create(matPath); err != nil { log.Fatal(err) } else { _, err := m.Write(bytes) if err != nil { m.Close() log.Fatal(err) } m.Close() } } break } count++ } log.Println("Done!") }