func (emb *Embedding) Less(o types.Sortable) bool { a := types.ByteSlice(emb.Serialize()) switch b := o.(type) { case *Embedding: return a.Less(types.ByteSlice(b.Serialize())) default: return false } }
func (sg *SubGraph) Less(o types.Sortable) bool { a := types.ByteSlice(sg.Label()) switch b := o.(type) { case *SubGraph: return a.Less(types.ByteSlice(b.Label())) default: return false } }
func (n *SubgraphPattern) Equals(o types.Equatable) bool { a := types.ByteSlice(n.Label()) switch b := o.(type) { case Labeled: return a.Equals(types.ByteSlice(b.Label())) default: return false } }
func (emb *Embedding) Equals(o types.Equatable) bool { a := types.ByteSlice(emb.Serialize()) switch b := o.(type) { case *Embedding: return a.Equals(types.ByteSlice(b.Serialize())) case *SubGraph: return emb.SG.equals(b) default: return false } }
func (self *MemBpTree) Add(key []byte, psg *goiso.SubGraph) { bpt := (*bptree.BpTree)(self) err := bpt.Add(types.ByteSlice(key), psg) if err != nil { panic(err) } }
func (w *Walker) RejectingWalk(samples chan lattice.Node, terminate chan bool) chan lattice.Node { accepted := make(chan lattice.Node) go func() { i := 0 seen := set.NewSortedSet(w.Config.Samples) for sampled := range samples { accept := false if !w.Reject || w.Dt.Acceptable(sampled) { label := types.ByteSlice(sampled.Pattern().Label()) if !w.Config.Unique || !seen.Has(label) { if w.Config.Unique { seen.Add(label) } accept = true i++ } else { errors.Logf("DEBUG", "duplicate %v", sampled) } } else { errors.Logf("DEBUG", "rejected %v", sampled) } if i >= w.Config.Samples { terminate <- true } else { terminate <- false } if accept { accepted <- sampled } } close(accepted) close(terminate) }() return accepted }
func (self *MemBpTree) Remove(key []byte, where func(*goiso.SubGraph) bool) error { bpt := (*bptree.BpTree)(self) return bpt.RemoveWhere(types.ByteSlice(key), func(val interface{}) bool { v := val.(*goiso.SubGraph) return where(v) }) }
func (m *RandomWalkMiner) extensions(sgs []*goiso.SubGraph) *set.SortedSet { if len(sgs) == 0 { return set.NewSortedSet(10) } label := types.ByteSlice(sgs[0].ShortLabel()) if m.extended.Has(label) { keys := m.extended.Get(label) return keys } keys := set.NewSortedSet(10) m.extend(sgs, func(sg *goiso.SubGraph) { m.AllEmbeddings.send(sg) keys.Add(types.ByteSlice(sg.ShortLabel())) }) m.extended.Put(label, keys) return keys }
/* Read data off the queue in FIFO order */ func (self *Queue) Deque() (data []byte, err error) { self.lock.Lock() defer self.lock.Unlock() if self.length == 0 { return nil, fmt.Errorf("List is empty") } if self.length < 0 { return nil, fmt.Errorf("List length is less than zero") } if self.head == nil { return nil, fmt.Errorf("head is nil") } node := self.head if node.next == nil { if node != self.tail { return nil, fmt.Errorf("Expected tail to equal head") } if self.length != 1 { return nil, fmt.Errorf("Expected list length to equal 1") } self.tail = nil } self.head = node.next self.length -= 1 h := types.ByteSlice(Hash(node.data)) if self.index.Has(h) { i, err := self.index.Get(h) if err != nil { return nil, err } j := int(i.(types.Int)) - 1 if j <= 0 { _, err = self.index.Remove(h) if err != nil { return nil, err } } else { err = self.index.Put(h, types.Int(j)) if err != nil { return nil, err } } } else { return nil, fmt.Errorf("integrity error, index did not have data") } return node.data, nil }
func (t *T) randslice(length int) types.ByteSlice { if urandom, err := os.Open("/dev/urandom"); err != nil { panic(err) } else { slice := make([]byte, length) if _, err := urandom.Read(slice); err != nil { t.Fatal(err) } urandom.Close() return types.ByteSlice(slice) } panic("unreachable") }
func (m *RandomWalkMiner) supportedKeys(from []byte, keys *set.SortedSet) *set.SortedSet { key := types.ByteSlice(from) if m.supportedExtensions.Has(key) { supKeys := m.supportedExtensions.Get(key) return supKeys } keysCh := make(chan []byte) partKeys := make(chan []byte) done := make(chan bool) for i := 0; i < m.PLevel; i++ { go func() { for key := range keysCh { if len(m.partition(key)) >= m.Support { partKeys <- key } } done <- true }() } go func() { for k, next := keys.Items()(); next != nil; k, next = next() { keysCh <- []byte(k.(types.ByteSlice)) } close(keysCh) }() go func() { for i := 0; i < m.PLevel; i++ { <-done } close(partKeys) close(done) }() supKeys := set.NewSortedSet(10) for partKey := range partKeys { supKeys.Add(types.ByteSlice(partKey)) } m.supportedExtensions.Put(key, supKeys) return supKeys }
func (m *RandomWalkMiner) initial() (Collectors, *set.SortedSet) { groups := m.makeCollectors(m.PLevel) for i := range m.Graph.V { v := &m.Graph.V[i] if m.Graph.ColorFrequency(v.Color) >= m.Support { sg, _ := m.Graph.VertexSubGraph(v.Idx) groups.send(sg) } } startingPoints := set.NewSortedSet(10) for key, next := groups.keys()(); next != nil; key, next = next() { startingPoints.Add(types.ByteSlice(key)) } return groups, startingPoints }
/* Put data on the queue */ func (self *Queue) Enque(data []byte) error { self.lock.Lock() defer self.lock.Unlock() h := types.ByteSlice(Hash(data)) has := self.index.Has(h) if !self.allowDups && has { return nil } else if has { i, err := self.index.Get(h) if err != nil { return err } err = self.index.Put(h, types.Int(int(i.(types.Int))+1)) if err != nil { return err } } else { err := self.index.Put(h, types.Int(1)) if err != nil { return err } } node := &node{next: nil, data: data} if self.tail == nil { if self.head != nil { return fmt.Errorf("List has head but no tail...") } self.head = node self.tail = node } else { self.tail.next = node self.tail = node } self.length += 1 return nil }
func (emb *Embedding) Hash() int { return types.ByteSlice(emb.Serialize()).Hash() }
// Generate a sha256 hash of the data func Hash(data []byte) []byte { h := sha256.Sum256(data) return types.ByteSlice(h[:]) }
func (self *Queue) Has(hash []byte) bool { if len(hash) != sha256.Size { return false } return self.index.Has(types.ByteSlice(hash)) }
func TestIteratorPrefixFindDotty(t *testing.T) { items := ByteSlices{ types.ByteSlice("0:java.io.File;"), types.ByteSlice("cat"), types.ByteSlice("catty"), types.ByteSlice("car"), types.ByteSlice("cow"), types.ByteSlice("candy"), types.ByteSlice("coo"), types.ByteSlice("coon"), types.ByteSlice("0:java.io.File;1:new,0:java.util.ArrayList;"), types.ByteSlice("andy"), types.ByteSlice("alex"), types.ByteSlice("andrie"), types.ByteSlice("alexander"), types.ByteSlice("alexi"), types.ByteSlice("bob"), types.ByteSlice("0:java.io.File;"), types.ByteSlice("bobcat"), types.ByteSlice("barnaby"), types.ByteSlice("baskin"), types.ByteSlice("balm"), } table := new(TST) for _, key := range items { if err := table.Put(key, nil); err != nil { t.Error(table, err) } if has := table.Has(key); !has { t.Error(table, "Missing key") } } write("TestDotty.dot", table.Dotty()) sort.Sort(items) i := 0 for k, _, next := table.Iterate()(); next != nil; k, _, next = next() { if !k.Equals(types.ByteSlice(items[i])) { t.Error(string(k.(types.ByteSlice)), "!=", string(items[i])) } i++ for i < len(items) && items[i].Equals(items[i-1]) { i++ } } co_items := ByteSlices{ types.ByteSlice("coo"), types.ByteSlice("coon"), types.ByteSlice("cow"), } i = 0 for k, _, next := table.PrefixFind([]byte("co"))(); next != nil; k, _, next = next() { if !k.Equals(types.ByteSlice(co_items[i])) { t.Error(string(k.(types.ByteSlice)), "!=", string(co_items[i])) } i++ } }
func (self *MemBpTree) Has(key []byte) bool { bpt := (*bptree.BpTree)(self) return bpt.Has(types.ByteSlice(key)) }
func TestComplete4(t *testing.T) { items := ByteSlices{ types.ByteSlice("abaa"), types.ByteSlice("abab"), types.ByteSlice("abac"), types.ByteSlice("abad"), types.ByteSlice("abba"), types.ByteSlice("abbb"), types.ByteSlice("abbc"), types.ByteSlice("abbd"), types.ByteSlice("abca"), types.ByteSlice("abcb"), types.ByteSlice("abcc"), types.ByteSlice("abcd"), types.ByteSlice("abda"), types.ByteSlice("abdb"), types.ByteSlice("abdc"), types.ByteSlice("abdd"), types.ByteSlice("aaaa"), types.ByteSlice("aaab"), types.ByteSlice("aaac"), types.ByteSlice("aaad"), types.ByteSlice("aaba"), types.ByteSlice("aabb"), types.ByteSlice("aabc"), types.ByteSlice("aabd"), types.ByteSlice("aaca"), types.ByteSlice("aacb"), types.ByteSlice("aacc"), types.ByteSlice("aacd"), types.ByteSlice("aada"), types.ByteSlice("aadb"), types.ByteSlice("aadc"), types.ByteSlice("aadd"), types.ByteSlice("adaa"), types.ByteSlice("adab"), types.ByteSlice("adac"), types.ByteSlice("adad"), types.ByteSlice("adba"), types.ByteSlice("adbb"), types.ByteSlice("adbc"), types.ByteSlice("adbd"), types.ByteSlice("adca"), types.ByteSlice("adcb"), types.ByteSlice("adcc"), types.ByteSlice("adcd"), types.ByteSlice("adda"), types.ByteSlice("addb"), types.ByteSlice("addc"), types.ByteSlice("addd"), types.ByteSlice("acaa"), types.ByteSlice("acab"), types.ByteSlice("acac"), types.ByteSlice("acad"), types.ByteSlice("acba"), types.ByteSlice("acbb"), types.ByteSlice("acbc"), types.ByteSlice("acbd"), types.ByteSlice("acca"), types.ByteSlice("accb"), types.ByteSlice("accc"), types.ByteSlice("accd"), types.ByteSlice("acda"), types.ByteSlice("acdb"), types.ByteSlice("acdc"), types.ByteSlice("addd"), } table := new(TST) for _, key := range items { if err := table.Put(key, nil); err != nil { t.Error(table, err) } if has := table.Has(key); !has { t.Error(table, "Missing key") } } write("TestComplete4.dot", table.Dotty()) sort.Sort(items) i := 0 for k, _, next := table.Iterate()(); next != nil; k, _, next = next() { if !k.Equals(types.ByteSlice(items[i])) { t.Error(string(k.(types.ByteSlice)), "!=", string(items[i])) } i++ for i+1 < len(items) && items[i].Equals(items[i-1]) { i++ } } }
func (sg *SubGraph) Hash() int { return types.ByteSlice(sg.Label()).Hash() }
func (n *SubgraphPattern) Hash() int { return types.ByteSlice(n.Label()).Hash() }
func (self *MemBpTree) Count(key []byte) int { bpt := (*bptree.BpTree)(self) return bpt.Count(types.ByteSlice(key)) }
func (self *MemBpTree) Find(key []byte) Iterator { bpt := (*bptree.BpTree)(self) return self.kvIter(bpt.Find(types.ByteSlice(key))) }
func main() { args, optargs, err := getopt.GetOpt( os.Args[1:], "hs:m:o:c:", []string{ "help", "support=", "cache=", "min-vertices=", "sample-size=", "mem-profile=", "cpu-profile=", "output=", "probabilities", }, ) if err != nil { fmt.Fprintln(os.Stderr, err) Usage(ErrorCodes["opts"]) } log.Printf("Number of goroutines = %v", runtime.NumGoroutine()) support := -1 minVertices := -1 sampleSize := -1 memProfile := "" cpuProfile := "" outputDir := "" cache := "" compute_prs := false for _, oa := range optargs { switch oa.Opt() { case "-h", "--help": Usage(0) case "-o", "--output": outputDir = EmptyDir(AssertDir(oa.Arg())) case "-s", "--support": support = ParseInt(oa.Arg()) case "-m", "--min-vertices": minVertices = ParseInt(oa.Arg()) case "-c", "--cache": cache = AssertDir(oa.Arg()) case "--probabilities": compute_prs = true case "--sample-size": sampleSize = ParseInt(oa.Arg()) case "--mem-profile": memProfile = AssertFile(oa.Arg()) case "--cpu-profile": cpuProfile = AssertFile(oa.Arg()) } } if support < 1 { fmt.Fprintf(os.Stderr, "You must supply a support greater than 0, you gave %v\n", support) Usage(ErrorCodes["opts"]) } if sampleSize < 1 { fmt.Fprintf(os.Stderr, "You must supply a sample-size greater than 0, you gave %v\n", sampleSize) Usage(ErrorCodes["opts"]) } if outputDir == "" { fmt.Fprintf(os.Stderr, "You must supply an output file (use -o)\n") Usage(ErrorCodes["opts"]) } if cache == "" { fmt.Fprintln(os.Stderr, "you must supply a --cache=<dir>") Usage(ErrorCodes["opts"]) } if len(args) != 1 { fmt.Fprintln(os.Stderr, "Expected a path to the graph file") Usage(ErrorCodes["opts"]) } getReader := func() (io.Reader, func()) { return Input(args[0]) } if cpuProfile != "" { f, err := os.Create(cpuProfile) if err != nil { log.Fatal(err) } defer f.Close() err = pprof.StartCPUProfile(f) if err != nil { log.Fatal(err) } defer pprof.StopCPUProfile() } var memProfFile io.WriteCloser if memProfile != "" { f, err := os.Create(memProfile) if err != nil { log.Fatal(err) } memProfFile = f defer f.Close() } nodePath := path.Join(outputDir, "node-attrs.bptree") nodeBf, err := fmap.CreateBlockFile(nodePath) if err != nil { log.Fatal(err) } defer nodeBf.Close() nodeAttrs, err := bptree.New(nodeBf, 4, -1) if err != nil { log.Fatal(err) } G, err := graph.LoadGraph(getReader, "", nodeAttrs, nil) if err != nil { log.Println("Error loading the graph") log.Panic(err) } log.Print("Loaded graph, about to start mining") sgCount := 0 sgMaker := func() store.SubGraphs { name := fmt.Sprintf("subgraphs-%d.b+tree", sgCount) sgCount++ path := path.Join(cache, name) s := store.NewFs2BpTree(G, path) // os.Remove(path) // s, err := store.NewSqlite(G, path) // if err != nil { // log.Panic(err) // } return s } idxCount := 0 idxMaker := func() store.UniqueIndex { name := fmt.Sprintf("unique-idx-%d.b+tree", idxCount) idxCount++ path := path.Join(cache, name) s := store.NewFs2UniqueIndex(G, path) // os.Remove(path) // s, err := store.NewSqlite(G, path) // if err != nil { // log.Panic(err) // } return s } setsCount := 0 setsMaker := func() store.SetsMap { name := fmt.Sprintf("sets-%d.b+tree", setsCount) setsCount++ path := path.Join(cache, name) s := store.NewFs2Sets(path) // os.Remove(path) // s, err := store.NewSqlite(G, path) // if err != nil { // log.Panic(err) // } return s } // memFsMaker := func() store.SubGraphs { // return store.AnonFs2BpTree(G) // } m := mine.RandomWalk( G, support, minVertices, sampleSize, memProfFile, sgMaker, idxMaker, setsMaker, ) keys := list.NewSorted(10, false) counts := hashtable.NewLinearHash() for label := range m.Report { key := types.ByteSlice(label) count := 0 if counts.Has(key) { c, err := counts.Get(key) if err != nil { log.Panic(err) } count = c.(int) } counts.Put(key, count+1) keys.Add(key) } log.Println("Tries", m.Tries) triesPath := path.Join(outputDir, "tries") if f, e := os.Create(triesPath); e != nil { log.Fatal(err) } else { fmt.Fprintln(f, m.Tries) f.Close() } { log.Println("Finished mining! Writing output...") keyCh := make(chan []byte) go func() { for k, next := keys.Items()(); next != nil; k, next = next() { keyCh <- []byte(k.(types.ByteSlice)) } close(keyCh) }() writeMaximalPatterns(keyCh, m.AllEmbeddings, nodeAttrs, outputDir) } if !compute_prs { log.Println("Done!") return } log.Println("Finished writing patterns. Computing probabilities...") count := 0 for k, next := keys.Items()(); next != nil; k, next = next() { patDir := path.Join(outputDir, fmt.Sprintf("%d", count)) log.Println("-----------------------------------") c, err := counts.Get(k) if err != nil { log.Fatal(err) } key := []byte(k.(types.ByteSlice)) dupCount := c.(int) // if max.Count(key) < support { // log.Println("wat not enough subgraphs", max.Count(key)) // continue // } if c, err := os.Create(path.Join(patDir, "duplicates")); err != nil { log.Fatal(err) } else { fmt.Fprintln(c, dupCount) c.Close() } for _, sg, next := m.AllEmbeddings.Find(key)(); next != nil; _, sg, next = next() { vp, Q, R, u, err := m.PrMatrices(sg) if err != nil { log.Println(err) errPath := path.Join(patDir, "error") if f, e := os.Create(errPath); e != nil { log.Fatal(err) } else { fmt.Fprintln(f, err) f.Close() } } else { bytes, err := json.Marshal(map[string]interface{}{ "Q": Q, "R": R, "u": u, "startingPoints": vp, }) if err != nil { log.Fatal(err) } matPath := path.Join(patDir, "matrices.json") if m, err := os.Create(matPath); err != nil { log.Fatal(err) } else { _, err := m.Write(bytes) if err != nil { m.Close() log.Fatal(err) } m.Close() } } break } count++ } log.Println("Done!") }