Exemple #1
0
func (emb *Embedding) Less(o types.Sortable) bool {
	a := types.ByteSlice(emb.Serialize())
	switch b := o.(type) {
	case *Embedding:
		return a.Less(types.ByteSlice(b.Serialize()))
	default:
		return false
	}
}
Exemple #2
0
func (sg *SubGraph) Less(o types.Sortable) bool {
	a := types.ByteSlice(sg.Label())
	switch b := o.(type) {
	case *SubGraph:
		return a.Less(types.ByteSlice(b.Label()))
	default:
		return false
	}
}
Exemple #3
0
func (n *SubgraphPattern) Equals(o types.Equatable) bool {
	a := types.ByteSlice(n.Label())
	switch b := o.(type) {
	case Labeled:
		return a.Equals(types.ByteSlice(b.Label()))
	default:
		return false
	}
}
Exemple #4
0
func (emb *Embedding) Equals(o types.Equatable) bool {
	a := types.ByteSlice(emb.Serialize())
	switch b := o.(type) {
	case *Embedding:
		return a.Equals(types.ByteSlice(b.Serialize()))
	case *SubGraph:
		return emb.SG.equals(b)
	default:
		return false
	}
}
Exemple #5
0
func (self *MemBpTree) Add(key []byte, psg *goiso.SubGraph) {
	bpt := (*bptree.BpTree)(self)
	err := bpt.Add(types.ByteSlice(key), psg)
	if err != nil {
		panic(err)
	}
}
Exemple #6
0
func (w *Walker) RejectingWalk(samples chan lattice.Node, terminate chan bool) chan lattice.Node {
	accepted := make(chan lattice.Node)
	go func() {
		i := 0
		seen := set.NewSortedSet(w.Config.Samples)
		for sampled := range samples {
			accept := false
			if !w.Reject || w.Dt.Acceptable(sampled) {
				label := types.ByteSlice(sampled.Pattern().Label())
				if !w.Config.Unique || !seen.Has(label) {
					if w.Config.Unique {
						seen.Add(label)
					}
					accept = true
					i++
				} else {
					errors.Logf("DEBUG", "duplicate %v", sampled)
				}
			} else {
				errors.Logf("DEBUG", "rejected %v", sampled)
			}
			if i >= w.Config.Samples {
				terminate <- true
			} else {
				terminate <- false
			}
			if accept {
				accepted <- sampled
			}
		}
		close(accepted)
		close(terminate)
	}()
	return accepted
}
Exemple #7
0
func (self *MemBpTree) Remove(key []byte, where func(*goiso.SubGraph) bool) error {
	bpt := (*bptree.BpTree)(self)
	return bpt.RemoveWhere(types.ByteSlice(key), func(val interface{}) bool {
		v := val.(*goiso.SubGraph)
		return where(v)
	})
}
Exemple #8
0
func (m *RandomWalkMiner) extensions(sgs []*goiso.SubGraph) *set.SortedSet {
	if len(sgs) == 0 {
		return set.NewSortedSet(10)
	}
	label := types.ByteSlice(sgs[0].ShortLabel())
	if m.extended.Has(label) {
		keys := m.extended.Get(label)
		return keys
	}
	keys := set.NewSortedSet(10)
	m.extend(sgs, func(sg *goiso.SubGraph) {
		m.AllEmbeddings.send(sg)
		keys.Add(types.ByteSlice(sg.ShortLabel()))
	})
	m.extended.Put(label, keys)
	return keys
}
Exemple #9
0
/* Read data off the queue in FIFO order */
func (self *Queue) Deque() (data []byte, err error) {
	self.lock.Lock()
	defer self.lock.Unlock()

	if self.length == 0 {
		return nil, fmt.Errorf("List is empty")
	}
	if self.length < 0 {
		return nil, fmt.Errorf("List length is less than zero")
	}
	if self.head == nil {
		return nil, fmt.Errorf("head is nil")
	}

	node := self.head

	if node.next == nil {
		if node != self.tail {
			return nil, fmt.Errorf("Expected tail to equal head")
		}
		if self.length != 1 {
			return nil, fmt.Errorf("Expected list length to equal 1")
		}
		self.tail = nil
	}
	self.head = node.next
	self.length -= 1

	h := types.ByteSlice(Hash(node.data))
	if self.index.Has(h) {
		i, err := self.index.Get(h)
		if err != nil {
			return nil, err
		}
		j := int(i.(types.Int)) - 1
		if j <= 0 {
			_, err = self.index.Remove(h)
			if err != nil {
				return nil, err
			}
		} else {
			err = self.index.Put(h, types.Int(j))
			if err != nil {
				return nil, err
			}
		}
	} else {
		return nil, fmt.Errorf("integrity error, index did not have data")
	}

	return node.data, nil
}
func (t *T) randslice(length int) types.ByteSlice {
	if urandom, err := os.Open("/dev/urandom"); err != nil {
		panic(err)
	} else {
		slice := make([]byte, length)
		if _, err := urandom.Read(slice); err != nil {
			t.Fatal(err)
		}
		urandom.Close()
		return types.ByteSlice(slice)
	}
	panic("unreachable")
}
Exemple #11
0
func (m *RandomWalkMiner) supportedKeys(from []byte, keys *set.SortedSet) *set.SortedSet {
	key := types.ByteSlice(from)
	if m.supportedExtensions.Has(key) {
		supKeys := m.supportedExtensions.Get(key)
		return supKeys
	}
	keysCh := make(chan []byte)
	partKeys := make(chan []byte)
	done := make(chan bool)
	for i := 0; i < m.PLevel; i++ {
		go func() {
			for key := range keysCh {
				if len(m.partition(key)) >= m.Support {
					partKeys <- key
				}
			}
			done <- true
		}()
	}
	go func() {
		for k, next := keys.Items()(); next != nil; k, next = next() {
			keysCh <- []byte(k.(types.ByteSlice))
		}
		close(keysCh)
	}()
	go func() {
		for i := 0; i < m.PLevel; i++ {
			<-done
		}
		close(partKeys)
		close(done)
	}()
	supKeys := set.NewSortedSet(10)
	for partKey := range partKeys {
		supKeys.Add(types.ByteSlice(partKey))
	}
	m.supportedExtensions.Put(key, supKeys)
	return supKeys
}
Exemple #12
0
func (m *RandomWalkMiner) initial() (Collectors, *set.SortedSet) {
	groups := m.makeCollectors(m.PLevel)
	for i := range m.Graph.V {
		v := &m.Graph.V[i]
		if m.Graph.ColorFrequency(v.Color) >= m.Support {
			sg, _ := m.Graph.VertexSubGraph(v.Idx)
			groups.send(sg)
		}
	}
	startingPoints := set.NewSortedSet(10)
	for key, next := groups.keys()(); next != nil; key, next = next() {
		startingPoints.Add(types.ByteSlice(key))
	}
	return groups, startingPoints
}
Exemple #13
0
/* Put data on the queue */
func (self *Queue) Enque(data []byte) error {
	self.lock.Lock()
	defer self.lock.Unlock()

	h := types.ByteSlice(Hash(data))
	has := self.index.Has(h)
	if !self.allowDups && has {
		return nil
	} else if has {
		i, err := self.index.Get(h)
		if err != nil {
			return err
		}
		err = self.index.Put(h, types.Int(int(i.(types.Int))+1))
		if err != nil {
			return err
		}
	} else {
		err := self.index.Put(h, types.Int(1))
		if err != nil {
			return err
		}
	}

	node := &node{next: nil, data: data}

	if self.tail == nil {
		if self.head != nil {
			return fmt.Errorf("List has head but no tail...")
		}
		self.head = node
		self.tail = node
	} else {
		self.tail.next = node
		self.tail = node
	}
	self.length += 1

	return nil
}
Exemple #14
0
func (emb *Embedding) Hash() int {
	return types.ByteSlice(emb.Serialize()).Hash()
}
Exemple #15
0
// Generate a sha256 hash of the data
func Hash(data []byte) []byte {
	h := sha256.Sum256(data)
	return types.ByteSlice(h[:])
}
Exemple #16
0
func (self *Queue) Has(hash []byte) bool {
	if len(hash) != sha256.Size {
		return false
	}
	return self.index.Has(types.ByteSlice(hash))
}
func TestIteratorPrefixFindDotty(t *testing.T) {
	items := ByteSlices{
		types.ByteSlice("0:java.io.File;"),
		types.ByteSlice("cat"),
		types.ByteSlice("catty"),
		types.ByteSlice("car"),
		types.ByteSlice("cow"),
		types.ByteSlice("candy"),
		types.ByteSlice("coo"),
		types.ByteSlice("coon"),
		types.ByteSlice("0:java.io.File;1:new,0:java.util.ArrayList;"),
		types.ByteSlice("andy"),
		types.ByteSlice("alex"),
		types.ByteSlice("andrie"),
		types.ByteSlice("alexander"),
		types.ByteSlice("alexi"),
		types.ByteSlice("bob"),
		types.ByteSlice("0:java.io.File;"),
		types.ByteSlice("bobcat"),
		types.ByteSlice("barnaby"),
		types.ByteSlice("baskin"),
		types.ByteSlice("balm"),
	}
	table := new(TST)
	for _, key := range items {
		if err := table.Put(key, nil); err != nil {
			t.Error(table, err)
		}
		if has := table.Has(key); !has {
			t.Error(table, "Missing key")
		}
	}
	write("TestDotty.dot", table.Dotty())
	sort.Sort(items)
	i := 0
	for k, _, next := table.Iterate()(); next != nil; k, _, next = next() {
		if !k.Equals(types.ByteSlice(items[i])) {
			t.Error(string(k.(types.ByteSlice)), "!=", string(items[i]))
		}
		i++
		for i < len(items) && items[i].Equals(items[i-1]) {
			i++
		}
	}
	co_items := ByteSlices{
		types.ByteSlice("coo"),
		types.ByteSlice("coon"),
		types.ByteSlice("cow"),
	}
	i = 0
	for k, _, next := table.PrefixFind([]byte("co"))(); next != nil; k, _, next = next() {
		if !k.Equals(types.ByteSlice(co_items[i])) {
			t.Error(string(k.(types.ByteSlice)), "!=", string(co_items[i]))
		}
		i++
	}
}
Exemple #18
0
func (self *MemBpTree) Has(key []byte) bool {
	bpt := (*bptree.BpTree)(self)
	return bpt.Has(types.ByteSlice(key))
}
func TestComplete4(t *testing.T) {
	items := ByteSlices{
		types.ByteSlice("abaa"),
		types.ByteSlice("abab"),
		types.ByteSlice("abac"),
		types.ByteSlice("abad"),
		types.ByteSlice("abba"),
		types.ByteSlice("abbb"),
		types.ByteSlice("abbc"),
		types.ByteSlice("abbd"),
		types.ByteSlice("abca"),
		types.ByteSlice("abcb"),
		types.ByteSlice("abcc"),
		types.ByteSlice("abcd"),
		types.ByteSlice("abda"),
		types.ByteSlice("abdb"),
		types.ByteSlice("abdc"),
		types.ByteSlice("abdd"),
		types.ByteSlice("aaaa"),
		types.ByteSlice("aaab"),
		types.ByteSlice("aaac"),
		types.ByteSlice("aaad"),
		types.ByteSlice("aaba"),
		types.ByteSlice("aabb"),
		types.ByteSlice("aabc"),
		types.ByteSlice("aabd"),
		types.ByteSlice("aaca"),
		types.ByteSlice("aacb"),
		types.ByteSlice("aacc"),
		types.ByteSlice("aacd"),
		types.ByteSlice("aada"),
		types.ByteSlice("aadb"),
		types.ByteSlice("aadc"),
		types.ByteSlice("aadd"),
		types.ByteSlice("adaa"),
		types.ByteSlice("adab"),
		types.ByteSlice("adac"),
		types.ByteSlice("adad"),
		types.ByteSlice("adba"),
		types.ByteSlice("adbb"),
		types.ByteSlice("adbc"),
		types.ByteSlice("adbd"),
		types.ByteSlice("adca"),
		types.ByteSlice("adcb"),
		types.ByteSlice("adcc"),
		types.ByteSlice("adcd"),
		types.ByteSlice("adda"),
		types.ByteSlice("addb"),
		types.ByteSlice("addc"),
		types.ByteSlice("addd"),
		types.ByteSlice("acaa"),
		types.ByteSlice("acab"),
		types.ByteSlice("acac"),
		types.ByteSlice("acad"),
		types.ByteSlice("acba"),
		types.ByteSlice("acbb"),
		types.ByteSlice("acbc"),
		types.ByteSlice("acbd"),
		types.ByteSlice("acca"),
		types.ByteSlice("accb"),
		types.ByteSlice("accc"),
		types.ByteSlice("accd"),
		types.ByteSlice("acda"),
		types.ByteSlice("acdb"),
		types.ByteSlice("acdc"),
		types.ByteSlice("addd"),
	}
	table := new(TST)
	for _, key := range items {
		if err := table.Put(key, nil); err != nil {
			t.Error(table, err)
		}
		if has := table.Has(key); !has {
			t.Error(table, "Missing key")
		}
	}
	write("TestComplete4.dot", table.Dotty())
	sort.Sort(items)
	i := 0
	for k, _, next := table.Iterate()(); next != nil; k, _, next = next() {
		if !k.Equals(types.ByteSlice(items[i])) {
			t.Error(string(k.(types.ByteSlice)), "!=", string(items[i]))
		}
		i++
		for i+1 < len(items) && items[i].Equals(items[i-1]) {
			i++
		}
	}
}
Exemple #20
0
func (sg *SubGraph) Hash() int {
	return types.ByteSlice(sg.Label()).Hash()
}
Exemple #21
0
func (n *SubgraphPattern) Hash() int {
	return types.ByteSlice(n.Label()).Hash()
}
Exemple #22
0
func (self *MemBpTree) Count(key []byte) int {
	bpt := (*bptree.BpTree)(self)
	return bpt.Count(types.ByteSlice(key))
}
Exemple #23
0
func (self *MemBpTree) Find(key []byte) Iterator {
	bpt := (*bptree.BpTree)(self)
	return self.kvIter(bpt.Find(types.ByteSlice(key)))
}
Exemple #24
0
func main() {
	args, optargs, err := getopt.GetOpt(
		os.Args[1:],
		"hs:m:o:c:",
		[]string{
			"help",
			"support=",
			"cache=",
			"min-vertices=",
			"sample-size=",
			"mem-profile=",
			"cpu-profile=",
			"output=",
			"probabilities",
		},
	)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		Usage(ErrorCodes["opts"])
	}
	log.Printf("Number of goroutines = %v", runtime.NumGoroutine())

	support := -1
	minVertices := -1
	sampleSize := -1
	memProfile := ""
	cpuProfile := ""
	outputDir := ""
	cache := ""
	compute_prs := false
	for _, oa := range optargs {
		switch oa.Opt() {
		case "-h", "--help":
			Usage(0)
		case "-o", "--output":
			outputDir = EmptyDir(AssertDir(oa.Arg()))
		case "-s", "--support":
			support = ParseInt(oa.Arg())
		case "-m", "--min-vertices":
			minVertices = ParseInt(oa.Arg())
		case "-c", "--cache":
			cache = AssertDir(oa.Arg())
		case "--probabilities":
			compute_prs = true
		case "--sample-size":
			sampleSize = ParseInt(oa.Arg())
		case "--mem-profile":
			memProfile = AssertFile(oa.Arg())
		case "--cpu-profile":
			cpuProfile = AssertFile(oa.Arg())
		}
	}

	if support < 1 {
		fmt.Fprintf(os.Stderr, "You must supply a support greater than 0, you gave %v\n", support)
		Usage(ErrorCodes["opts"])
	}

	if sampleSize < 1 {
		fmt.Fprintf(os.Stderr, "You must supply a sample-size greater than 0, you gave %v\n", sampleSize)
		Usage(ErrorCodes["opts"])
	}

	if outputDir == "" {
		fmt.Fprintf(os.Stderr, "You must supply an output file (use -o)\n")
		Usage(ErrorCodes["opts"])
	}

	if cache == "" {
		fmt.Fprintln(os.Stderr, "you must supply a --cache=<dir>")
		Usage(ErrorCodes["opts"])
	}

	if len(args) != 1 {
		fmt.Fprintln(os.Stderr, "Expected a path to the graph file")
		Usage(ErrorCodes["opts"])
	}

	getReader := func() (io.Reader, func()) { return Input(args[0]) }

	if cpuProfile != "" {
		f, err := os.Create(cpuProfile)
		if err != nil {
			log.Fatal(err)
		}
		defer f.Close()
		err = pprof.StartCPUProfile(f)
		if err != nil {
			log.Fatal(err)
		}
		defer pprof.StopCPUProfile()
	}

	var memProfFile io.WriteCloser
	if memProfile != "" {
		f, err := os.Create(memProfile)
		if err != nil {
			log.Fatal(err)
		}
		memProfFile = f
		defer f.Close()
	}

	nodePath := path.Join(outputDir, "node-attrs.bptree")

	nodeBf, err := fmap.CreateBlockFile(nodePath)
	if err != nil {
		log.Fatal(err)
	}
	defer nodeBf.Close()
	nodeAttrs, err := bptree.New(nodeBf, 4, -1)
	if err != nil {
		log.Fatal(err)
	}

	G, err := graph.LoadGraph(getReader, "", nodeAttrs, nil)
	if err != nil {
		log.Println("Error loading the graph")
		log.Panic(err)
	}
	log.Print("Loaded graph, about to start mining")

	sgCount := 0
	sgMaker := func() store.SubGraphs {
		name := fmt.Sprintf("subgraphs-%d.b+tree", sgCount)
		sgCount++
		path := path.Join(cache, name)
		s := store.NewFs2BpTree(G, path)
		// os.Remove(path)
		// s, err := store.NewSqlite(G, path)
		// if err != nil {
		// 	log.Panic(err)
		// }
		return s
	}

	idxCount := 0
	idxMaker := func() store.UniqueIndex {
		name := fmt.Sprintf("unique-idx-%d.b+tree", idxCount)
		idxCount++
		path := path.Join(cache, name)
		s := store.NewFs2UniqueIndex(G, path)
		// os.Remove(path)
		// s, err := store.NewSqlite(G, path)
		// if err != nil {
		// 	log.Panic(err)
		// }
		return s
	}

	setsCount := 0
	setsMaker := func() store.SetsMap {
		name := fmt.Sprintf("sets-%d.b+tree", setsCount)
		setsCount++
		path := path.Join(cache, name)
		s := store.NewFs2Sets(path)
		// os.Remove(path)
		// s, err := store.NewSqlite(G, path)
		// if err != nil {
		// 	log.Panic(err)
		// }
		return s
	}

	// memFsMaker := func() store.SubGraphs {
	// 	return store.AnonFs2BpTree(G)
	// }

	m := mine.RandomWalk(
		G,
		support,
		minVertices,
		sampleSize,
		memProfFile,
		sgMaker,
		idxMaker,
		setsMaker,
	)
	keys := list.NewSorted(10, false)
	counts := hashtable.NewLinearHash()
	for label := range m.Report {
		key := types.ByteSlice(label)
		count := 0
		if counts.Has(key) {
			c, err := counts.Get(key)
			if err != nil {
				log.Panic(err)
			}
			count = c.(int)
		}
		counts.Put(key, count+1)
		keys.Add(key)
	}
	log.Println("Tries", m.Tries)
	triesPath := path.Join(outputDir, "tries")
	if f, e := os.Create(triesPath); e != nil {
		log.Fatal(err)
	} else {
		fmt.Fprintln(f, m.Tries)
		f.Close()
	}
	{
		log.Println("Finished mining! Writing output...")
		keyCh := make(chan []byte)
		go func() {
			for k, next := keys.Items()(); next != nil; k, next = next() {
				keyCh <- []byte(k.(types.ByteSlice))
			}
			close(keyCh)
		}()
		writeMaximalPatterns(keyCh, m.AllEmbeddings, nodeAttrs, outputDir)
	}

	if !compute_prs {
		log.Println("Done!")
		return
	}

	log.Println("Finished writing patterns. Computing probabilities...")
	count := 0
	for k, next := keys.Items()(); next != nil; k, next = next() {
		patDir := path.Join(outputDir, fmt.Sprintf("%d", count))
		log.Println("-----------------------------------")
		c, err := counts.Get(k)
		if err != nil {
			log.Fatal(err)
		}
		key := []byte(k.(types.ByteSlice))
		dupCount := c.(int)
		// if max.Count(key) < support {
		// 	log.Println("wat not enough subgraphs", max.Count(key))
		// 	continue
		// }
		if c, err := os.Create(path.Join(patDir, "duplicates")); err != nil {
			log.Fatal(err)
		} else {
			fmt.Fprintln(c, dupCount)
			c.Close()
		}
		for _, sg, next := m.AllEmbeddings.Find(key)(); next != nil; _, sg, next = next() {
			vp, Q, R, u, err := m.PrMatrices(sg)
			if err != nil {
				log.Println(err)
				errPath := path.Join(patDir, "error")
				if f, e := os.Create(errPath); e != nil {
					log.Fatal(err)
				} else {
					fmt.Fprintln(f, err)
					f.Close()
				}
			} else {
				bytes, err := json.Marshal(map[string]interface{}{
					"Q":              Q,
					"R":              R,
					"u":              u,
					"startingPoints": vp,
				})
				if err != nil {
					log.Fatal(err)
				}
				matPath := path.Join(patDir, "matrices.json")
				if m, err := os.Create(matPath); err != nil {
					log.Fatal(err)
				} else {
					_, err := m.Write(bytes)
					if err != nil {
						m.Close()
						log.Fatal(err)
					}
					m.Close()
				}
			}
			break
		}
		count++
	}
	log.Println("Done!")
}