func GetUserStore(dir string) (*UserFileStore, error) { var bf *fmap.BlockFile var users *bptree.BpTree path := filepath.Join(dir, "users.bptree") err := createOrOpen(path, func(path string) (err error) { bf, err = fmap.CreateBlockFile(path) if err != nil { return err } users, err = bptree.New(bf, -1, -1) return err }, func(path string) (err error) { bf, err = fmap.OpenBlockFile(path) if err != nil { return err } users, err = bptree.Open(bf) return err }, ) if err != nil { return nil, err } s := &UserFileStore{ path: path, bf: bf, users: users, } return s, bf.Sync() }
func newFs2Sets(bf *fmap.BlockFile) *Fs2Sets { bpt, err := bptree.New(bf, -1, -1) assert_ok(err) return &Fs2Sets{ bf: bf, bpt: bpt, } }
func newFs2BpTree(g *goiso.Graph, bf *fmap.BlockFile) *Fs2BpTree { bpt, err := bptree.New(bf, -1, -1) assert_ok(err) return &Fs2BpTree{ g: g, bf: bf, bpt: bpt, } }
func newFs2UniqueIndex(g *goiso.Graph, bf *fmap.BlockFile) *Fs2UniqueIndex { bpt, err := bptree.New(bf, -1, 0) assert_ok(err) return &Fs2UniqueIndex{ g: g, bf: bf, bpt: bpt, } }
func newBpTree(bf *fmap.BlockFile) (*BpTree, error) { bpt, err := bptree.New(bf, -1, -1) if err != nil { return nil, err } b := &BpTree{ bf: bf, bpt: bpt, } return b, nil }
func MaximalSubGraphs(all store.SubGraphs, nodeAttrs *bptree.BpTree, tempDir string) (<-chan []byte, error) { labelsBf, err := fmap.CreateBlockFile(path.Join(tempDir, "labels.bptree")) if err != nil { return nil, err } labels, err := bptree.New(labelsBf, -1, 1) if err != nil { return nil, err } keys := make(chan []byte) go func() { defer labelsBf.Close() var cur []byte var had bool = false for key, sg, next := all.Backward()(); next != nil; key, sg, next = next() { if cur != nil && !bytes.Equal(key, cur) { if !had { keys <- cur } had = false } has, err := labels.Has(key) if err != nil { log.Fatal(err) } if has { had = true } if !bytes.Equal(cur, key) { // add all of the (potential) parents of this node for eIdx := range sg.E { nsg, _ := sg.RemoveEdge(eIdx) addToLabels(labels, nsg.ShortLabel()) } } cur = key } if !had && cur != nil { keys <- cur } close(keys) }() return keys, nil }
func newBpTree( bf *fmap.BlockFile, serializeKey func([]byte) []byte, serializeValue func(*goiso.SubGraph) []byte, deserializeKey func([]byte) []byte, deserializeValue func([]byte) *goiso.SubGraph, ) (*BpTree, error) { bpt, err := bptree.New(bf, -1, -1) if err != nil { return nil, err } b := &BpTree{ bf: bf, bpt: bpt, serializeKey: serializeKey, serializeValue: serializeValue, deserializeKey: deserializeKey, deserializeValue: deserializeValue, } return b, nil }
func main() { args, optargs, err := getopt.GetOpt( os.Args[1:], "hs:m:o:c:", []string{ "help", "support=", "cache=", "min-vertices=", "sample-size=", "mem-profile=", "cpu-profile=", "output=", "probabilities", }, ) if err != nil { fmt.Fprintln(os.Stderr, err) Usage(ErrorCodes["opts"]) } log.Printf("Number of goroutines = %v", runtime.NumGoroutine()) support := -1 minVertices := -1 sampleSize := -1 memProfile := "" cpuProfile := "" outputDir := "" cache := "" compute_prs := false for _, oa := range optargs { switch oa.Opt() { case "-h", "--help": Usage(0) case "-o", "--output": outputDir = EmptyDir(AssertDir(oa.Arg())) case "-s", "--support": support = ParseInt(oa.Arg()) case "-m", "--min-vertices": minVertices = ParseInt(oa.Arg()) case "-c", "--cache": cache = AssertDir(oa.Arg()) case "--probabilities": compute_prs = true case "--sample-size": sampleSize = ParseInt(oa.Arg()) case "--mem-profile": memProfile = AssertFile(oa.Arg()) case "--cpu-profile": cpuProfile = AssertFile(oa.Arg()) } } if support < 1 { fmt.Fprintf(os.Stderr, "You must supply a support greater than 0, you gave %v\n", support) Usage(ErrorCodes["opts"]) } if sampleSize < 1 { fmt.Fprintf(os.Stderr, "You must supply a sample-size greater than 0, you gave %v\n", sampleSize) Usage(ErrorCodes["opts"]) } if outputDir == "" { fmt.Fprintf(os.Stderr, "You must supply an output file (use -o)\n") Usage(ErrorCodes["opts"]) } if cache == "" { fmt.Fprintln(os.Stderr, "you must supply a --cache=<dir>") Usage(ErrorCodes["opts"]) } if len(args) != 1 { fmt.Fprintln(os.Stderr, "Expected a path to the graph file") Usage(ErrorCodes["opts"]) } getReader := func() (io.Reader, func()) { return Input(args[0]) } if cpuProfile != "" { f, err := os.Create(cpuProfile) if err != nil { log.Fatal(err) } defer f.Close() err = pprof.StartCPUProfile(f) if err != nil { log.Fatal(err) } defer pprof.StopCPUProfile() } var memProfFile io.WriteCloser if memProfile != "" { f, err := os.Create(memProfile) if err != nil { log.Fatal(err) } memProfFile = f defer f.Close() } nodePath := path.Join(outputDir, "node-attrs.bptree") nodeBf, err := fmap.CreateBlockFile(nodePath) if err != nil { log.Fatal(err) } defer nodeBf.Close() nodeAttrs, err := bptree.New(nodeBf, 4, -1) if err != nil { log.Fatal(err) } G, err := graph.LoadGraph(getReader, "", nodeAttrs, nil) if err != nil { log.Println("Error loading the graph") log.Panic(err) } log.Print("Loaded graph, about to start mining") sgCount := 0 sgMaker := func() store.SubGraphs { name := fmt.Sprintf("subgraphs-%d.b+tree", sgCount) sgCount++ path := path.Join(cache, name) s := store.NewFs2BpTree(G, path) // os.Remove(path) // s, err := store.NewSqlite(G, path) // if err != nil { // log.Panic(err) // } return s } idxCount := 0 idxMaker := func() store.UniqueIndex { name := fmt.Sprintf("unique-idx-%d.b+tree", idxCount) idxCount++ path := path.Join(cache, name) s := store.NewFs2UniqueIndex(G, path) // os.Remove(path) // s, err := store.NewSqlite(G, path) // if err != nil { // log.Panic(err) // } return s } setsCount := 0 setsMaker := func() store.SetsMap { name := fmt.Sprintf("sets-%d.b+tree", setsCount) setsCount++ path := path.Join(cache, name) s := store.NewFs2Sets(path) // os.Remove(path) // s, err := store.NewSqlite(G, path) // if err != nil { // log.Panic(err) // } return s } // memFsMaker := func() store.SubGraphs { // return store.AnonFs2BpTree(G) // } m := mine.RandomWalk( G, support, minVertices, sampleSize, memProfFile, sgMaker, idxMaker, setsMaker, ) keys := list.NewSorted(10, false) counts := hashtable.NewLinearHash() for label := range m.Report { key := types.ByteSlice(label) count := 0 if counts.Has(key) { c, err := counts.Get(key) if err != nil { log.Panic(err) } count = c.(int) } counts.Put(key, count+1) keys.Add(key) } log.Println("Tries", m.Tries) triesPath := path.Join(outputDir, "tries") if f, e := os.Create(triesPath); e != nil { log.Fatal(err) } else { fmt.Fprintln(f, m.Tries) f.Close() } { log.Println("Finished mining! Writing output...") keyCh := make(chan []byte) go func() { for k, next := keys.Items()(); next != nil; k, next = next() { keyCh <- []byte(k.(types.ByteSlice)) } close(keyCh) }() writeMaximalPatterns(keyCh, m.AllEmbeddings, nodeAttrs, outputDir) } if !compute_prs { log.Println("Done!") return } log.Println("Finished writing patterns. Computing probabilities...") count := 0 for k, next := keys.Items()(); next != nil; k, next = next() { patDir := path.Join(outputDir, fmt.Sprintf("%d", count)) log.Println("-----------------------------------") c, err := counts.Get(k) if err != nil { log.Fatal(err) } key := []byte(k.(types.ByteSlice)) dupCount := c.(int) // if max.Count(key) < support { // log.Println("wat not enough subgraphs", max.Count(key)) // continue // } if c, err := os.Create(path.Join(patDir, "duplicates")); err != nil { log.Fatal(err) } else { fmt.Fprintln(c, dupCount) c.Close() } for _, sg, next := m.AllEmbeddings.Find(key)(); next != nil; _, sg, next = next() { vp, Q, R, u, err := m.PrMatrices(sg) if err != nil { log.Println(err) errPath := path.Join(patDir, "error") if f, e := os.Create(errPath); e != nil { log.Fatal(err) } else { fmt.Fprintln(f, err) f.Close() } } else { bytes, err := json.Marshal(map[string]interface{}{ "Q": Q, "R": R, "u": u, "startingPoints": vp, }) if err != nil { log.Fatal(err) } matPath := path.Join(patDir, "matrices.json") if m, err := os.Create(matPath); err != nil { log.Fatal(err) } else { _, err := m.Write(bytes) if err != nil { m.Close() log.Fatal(err) } m.Close() } } break } count++ } log.Println("Done!") }