Пример #1
0
func GetUserStore(dir string) (*UserFileStore, error) {
	var bf *fmap.BlockFile
	var users *bptree.BpTree
	path := filepath.Join(dir, "users.bptree")
	err := createOrOpen(path,
		func(path string) (err error) {
			bf, err = fmap.CreateBlockFile(path)
			if err != nil {
				return err
			}
			users, err = bptree.New(bf, -1, -1)
			return err
		},
		func(path string) (err error) {
			bf, err = fmap.OpenBlockFile(path)
			if err != nil {
				return err
			}
			users, err = bptree.Open(bf)
			return err
		},
	)
	if err != nil {
		return nil, err
	}
	s := &UserFileStore{
		path:  path,
		bf:    bf,
		users: users,
	}
	return s, bf.Sync()
}
Пример #2
0
func newFs2Sets(bf *fmap.BlockFile) *Fs2Sets {
	bpt, err := bptree.New(bf, -1, -1)
	assert_ok(err)
	return &Fs2Sets{
		bf:  bf,
		bpt: bpt,
	}
}
Пример #3
0
func newFs2BpTree(g *goiso.Graph, bf *fmap.BlockFile) *Fs2BpTree {
	bpt, err := bptree.New(bf, -1, -1)
	assert_ok(err)
	return &Fs2BpTree{
		g:   g,
		bf:  bf,
		bpt: bpt,
	}
}
Пример #4
0
func newFs2UniqueIndex(g *goiso.Graph, bf *fmap.BlockFile) *Fs2UniqueIndex {
	bpt, err := bptree.New(bf, -1, 0)
	assert_ok(err)
	return &Fs2UniqueIndex{
		g:   g,
		bf:  bf,
		bpt: bpt,
	}
}
Пример #5
0
func newBpTree(bf *fmap.BlockFile) (*BpTree, error) {
	bpt, err := bptree.New(bf, -1, -1)
	if err != nil {
		return nil, err
	}
	b := &BpTree{
		bf:  bf,
		bpt: bpt,
	}
	return b, nil
}
Пример #6
0
func MaximalSubGraphs(all store.SubGraphs, nodeAttrs *bptree.BpTree, tempDir string) (<-chan []byte, error) {
	labelsBf, err := fmap.CreateBlockFile(path.Join(tempDir, "labels.bptree"))
	if err != nil {
		return nil, err
	}
	labels, err := bptree.New(labelsBf, -1, 1)
	if err != nil {
		return nil, err
	}
	keys := make(chan []byte)
	go func() {
		defer labelsBf.Close()
		var cur []byte
		var had bool = false
		for key, sg, next := all.Backward()(); next != nil; key, sg, next = next() {
			if cur != nil && !bytes.Equal(key, cur) {
				if !had {
					keys <- cur
				}
				had = false
			}
			has, err := labels.Has(key)
			if err != nil {
				log.Fatal(err)
			}
			if has {
				had = true
			}
			if !bytes.Equal(cur, key) {
				// add all of the (potential) parents of this node
				for eIdx := range sg.E {
					nsg, _ := sg.RemoveEdge(eIdx)
					addToLabels(labels, nsg.ShortLabel())
				}
			}
			cur = key
		}
		if !had && cur != nil {
			keys <- cur
		}
		close(keys)
	}()
	return keys, nil
}
Пример #7
0
func newBpTree(
	bf *fmap.BlockFile,
	serializeKey func([]byte) []byte,
	serializeValue func(*goiso.SubGraph) []byte,
	deserializeKey func([]byte) []byte,
	deserializeValue func([]byte) *goiso.SubGraph,
) (*BpTree, error) {
	bpt, err := bptree.New(bf, -1, -1)
	if err != nil {
		return nil, err
	}
	b := &BpTree{
		bf:               bf,
		bpt:              bpt,
		serializeKey:     serializeKey,
		serializeValue:   serializeValue,
		deserializeKey:   deserializeKey,
		deserializeValue: deserializeValue,
	}
	return b, nil
}
Пример #8
0
func main() {
	args, optargs, err := getopt.GetOpt(
		os.Args[1:],
		"hs:m:o:c:",
		[]string{
			"help",
			"support=",
			"cache=",
			"min-vertices=",
			"sample-size=",
			"mem-profile=",
			"cpu-profile=",
			"output=",
			"probabilities",
		},
	)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		Usage(ErrorCodes["opts"])
	}
	log.Printf("Number of goroutines = %v", runtime.NumGoroutine())

	support := -1
	minVertices := -1
	sampleSize := -1
	memProfile := ""
	cpuProfile := ""
	outputDir := ""
	cache := ""
	compute_prs := false
	for _, oa := range optargs {
		switch oa.Opt() {
		case "-h", "--help":
			Usage(0)
		case "-o", "--output":
			outputDir = EmptyDir(AssertDir(oa.Arg()))
		case "-s", "--support":
			support = ParseInt(oa.Arg())
		case "-m", "--min-vertices":
			minVertices = ParseInt(oa.Arg())
		case "-c", "--cache":
			cache = AssertDir(oa.Arg())
		case "--probabilities":
			compute_prs = true
		case "--sample-size":
			sampleSize = ParseInt(oa.Arg())
		case "--mem-profile":
			memProfile = AssertFile(oa.Arg())
		case "--cpu-profile":
			cpuProfile = AssertFile(oa.Arg())
		}
	}

	if support < 1 {
		fmt.Fprintf(os.Stderr, "You must supply a support greater than 0, you gave %v\n", support)
		Usage(ErrorCodes["opts"])
	}

	if sampleSize < 1 {
		fmt.Fprintf(os.Stderr, "You must supply a sample-size greater than 0, you gave %v\n", sampleSize)
		Usage(ErrorCodes["opts"])
	}

	if outputDir == "" {
		fmt.Fprintf(os.Stderr, "You must supply an output file (use -o)\n")
		Usage(ErrorCodes["opts"])
	}

	if cache == "" {
		fmt.Fprintln(os.Stderr, "you must supply a --cache=<dir>")
		Usage(ErrorCodes["opts"])
	}

	if len(args) != 1 {
		fmt.Fprintln(os.Stderr, "Expected a path to the graph file")
		Usage(ErrorCodes["opts"])
	}

	getReader := func() (io.Reader, func()) { return Input(args[0]) }

	if cpuProfile != "" {
		f, err := os.Create(cpuProfile)
		if err != nil {
			log.Fatal(err)
		}
		defer f.Close()
		err = pprof.StartCPUProfile(f)
		if err != nil {
			log.Fatal(err)
		}
		defer pprof.StopCPUProfile()
	}

	var memProfFile io.WriteCloser
	if memProfile != "" {
		f, err := os.Create(memProfile)
		if err != nil {
			log.Fatal(err)
		}
		memProfFile = f
		defer f.Close()
	}

	nodePath := path.Join(outputDir, "node-attrs.bptree")

	nodeBf, err := fmap.CreateBlockFile(nodePath)
	if err != nil {
		log.Fatal(err)
	}
	defer nodeBf.Close()
	nodeAttrs, err := bptree.New(nodeBf, 4, -1)
	if err != nil {
		log.Fatal(err)
	}

	G, err := graph.LoadGraph(getReader, "", nodeAttrs, nil)
	if err != nil {
		log.Println("Error loading the graph")
		log.Panic(err)
	}
	log.Print("Loaded graph, about to start mining")

	sgCount := 0
	sgMaker := func() store.SubGraphs {
		name := fmt.Sprintf("subgraphs-%d.b+tree", sgCount)
		sgCount++
		path := path.Join(cache, name)
		s := store.NewFs2BpTree(G, path)
		// os.Remove(path)
		// s, err := store.NewSqlite(G, path)
		// if err != nil {
		// 	log.Panic(err)
		// }
		return s
	}

	idxCount := 0
	idxMaker := func() store.UniqueIndex {
		name := fmt.Sprintf("unique-idx-%d.b+tree", idxCount)
		idxCount++
		path := path.Join(cache, name)
		s := store.NewFs2UniqueIndex(G, path)
		// os.Remove(path)
		// s, err := store.NewSqlite(G, path)
		// if err != nil {
		// 	log.Panic(err)
		// }
		return s
	}

	setsCount := 0
	setsMaker := func() store.SetsMap {
		name := fmt.Sprintf("sets-%d.b+tree", setsCount)
		setsCount++
		path := path.Join(cache, name)
		s := store.NewFs2Sets(path)
		// os.Remove(path)
		// s, err := store.NewSqlite(G, path)
		// if err != nil {
		// 	log.Panic(err)
		// }
		return s
	}

	// memFsMaker := func() store.SubGraphs {
	// 	return store.AnonFs2BpTree(G)
	// }

	m := mine.RandomWalk(
		G,
		support,
		minVertices,
		sampleSize,
		memProfFile,
		sgMaker,
		idxMaker,
		setsMaker,
	)
	keys := list.NewSorted(10, false)
	counts := hashtable.NewLinearHash()
	for label := range m.Report {
		key := types.ByteSlice(label)
		count := 0
		if counts.Has(key) {
			c, err := counts.Get(key)
			if err != nil {
				log.Panic(err)
			}
			count = c.(int)
		}
		counts.Put(key, count+1)
		keys.Add(key)
	}
	log.Println("Tries", m.Tries)
	triesPath := path.Join(outputDir, "tries")
	if f, e := os.Create(triesPath); e != nil {
		log.Fatal(err)
	} else {
		fmt.Fprintln(f, m.Tries)
		f.Close()
	}
	{
		log.Println("Finished mining! Writing output...")
		keyCh := make(chan []byte)
		go func() {
			for k, next := keys.Items()(); next != nil; k, next = next() {
				keyCh <- []byte(k.(types.ByteSlice))
			}
			close(keyCh)
		}()
		writeMaximalPatterns(keyCh, m.AllEmbeddings, nodeAttrs, outputDir)
	}

	if !compute_prs {
		log.Println("Done!")
		return
	}

	log.Println("Finished writing patterns. Computing probabilities...")
	count := 0
	for k, next := keys.Items()(); next != nil; k, next = next() {
		patDir := path.Join(outputDir, fmt.Sprintf("%d", count))
		log.Println("-----------------------------------")
		c, err := counts.Get(k)
		if err != nil {
			log.Fatal(err)
		}
		key := []byte(k.(types.ByteSlice))
		dupCount := c.(int)
		// if max.Count(key) < support {
		// 	log.Println("wat not enough subgraphs", max.Count(key))
		// 	continue
		// }
		if c, err := os.Create(path.Join(patDir, "duplicates")); err != nil {
			log.Fatal(err)
		} else {
			fmt.Fprintln(c, dupCount)
			c.Close()
		}
		for _, sg, next := m.AllEmbeddings.Find(key)(); next != nil; _, sg, next = next() {
			vp, Q, R, u, err := m.PrMatrices(sg)
			if err != nil {
				log.Println(err)
				errPath := path.Join(patDir, "error")
				if f, e := os.Create(errPath); e != nil {
					log.Fatal(err)
				} else {
					fmt.Fprintln(f, err)
					f.Close()
				}
			} else {
				bytes, err := json.Marshal(map[string]interface{}{
					"Q":              Q,
					"R":              R,
					"u":              u,
					"startingPoints": vp,
				})
				if err != nil {
					log.Fatal(err)
				}
				matPath := path.Join(patDir, "matrices.json")
				if m, err := os.Create(matPath); err != nil {
					log.Fatal(err)
				} else {
					_, err := m.Write(bytes)
					if err != nil {
						m.Close()
						log.Fatal(err)
					}
					m.Close()
				}
			}
			break
		}
		count++
	}
	log.Println("Done!")
}