Beispiel #1
0
func FilterAutomorphs(it EmbIterator, dropped *VertexEmbeddings) (ei EmbIterator, _ *VertexEmbeddings) {
	idSet := func(emb *Embedding) *list.Sorted {
		ids := list.NewSorted(len(emb.Ids), true)
		for _, id := range emb.Ids {
			ids.Add(types.Int(id))
		}
		return ids
	}
	seen := hashtable.NewLinearHash()
	ei = func(stop bool) (emb *Embedding, _ EmbIterator) {
		if it == nil {
			return nil, nil
		}
		for emb, it = it(stop); it != nil; emb, it = it(stop) {
			ids := idSet(emb)
			// errors.Logf("AUTOMORPH-DEBUG", "emb %v ids %v has %v", emb, ids, seen.Has(ids))
			if !seen.Has(ids) {
				seen.Put(ids, nil)
				return emb, ei
			}
		}
		return nil, nil
	}
	return ei, dropped
}
Beispiel #2
0
func NewSortedSet(initialSize int) *SortedSet {
	return &SortedSet{*list.NewSorted(initialSize, false)}
}
Beispiel #3
0
func TestSortedCast(t *testing.T) {
	s := list.NewSorted(17, false)
	_ = types.Hashable(s)
	_ = types.OrderedList(s)
}
Beispiel #4
0
func main() {
	args, optargs, err := getopt.GetOpt(
		os.Args[1:],
		"hs:m:o:c:",
		[]string{
			"help",
			"support=",
			"cache=",
			"min-vertices=",
			"sample-size=",
			"mem-profile=",
			"cpu-profile=",
			"output=",
			"probabilities",
		},
	)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		Usage(ErrorCodes["opts"])
	}
	log.Printf("Number of goroutines = %v", runtime.NumGoroutine())

	support := -1
	minVertices := -1
	sampleSize := -1
	memProfile := ""
	cpuProfile := ""
	outputDir := ""
	cache := ""
	compute_prs := false
	for _, oa := range optargs {
		switch oa.Opt() {
		case "-h", "--help":
			Usage(0)
		case "-o", "--output":
			outputDir = EmptyDir(AssertDir(oa.Arg()))
		case "-s", "--support":
			support = ParseInt(oa.Arg())
		case "-m", "--min-vertices":
			minVertices = ParseInt(oa.Arg())
		case "-c", "--cache":
			cache = AssertDir(oa.Arg())
		case "--probabilities":
			compute_prs = true
		case "--sample-size":
			sampleSize = ParseInt(oa.Arg())
		case "--mem-profile":
			memProfile = AssertFile(oa.Arg())
		case "--cpu-profile":
			cpuProfile = AssertFile(oa.Arg())
		}
	}

	if support < 1 {
		fmt.Fprintf(os.Stderr, "You must supply a support greater than 0, you gave %v\n", support)
		Usage(ErrorCodes["opts"])
	}

	if sampleSize < 1 {
		fmt.Fprintf(os.Stderr, "You must supply a sample-size greater than 0, you gave %v\n", sampleSize)
		Usage(ErrorCodes["opts"])
	}

	if outputDir == "" {
		fmt.Fprintf(os.Stderr, "You must supply an output file (use -o)\n")
		Usage(ErrorCodes["opts"])
	}

	if cache == "" {
		fmt.Fprintln(os.Stderr, "you must supply a --cache=<dir>")
		Usage(ErrorCodes["opts"])
	}

	if len(args) != 1 {
		fmt.Fprintln(os.Stderr, "Expected a path to the graph file")
		Usage(ErrorCodes["opts"])
	}

	getReader := func() (io.Reader, func()) { return Input(args[0]) }

	if cpuProfile != "" {
		f, err := os.Create(cpuProfile)
		if err != nil {
			log.Fatal(err)
		}
		defer f.Close()
		err = pprof.StartCPUProfile(f)
		if err != nil {
			log.Fatal(err)
		}
		defer pprof.StopCPUProfile()
	}

	var memProfFile io.WriteCloser
	if memProfile != "" {
		f, err := os.Create(memProfile)
		if err != nil {
			log.Fatal(err)
		}
		memProfFile = f
		defer f.Close()
	}

	nodePath := path.Join(outputDir, "node-attrs.bptree")

	nodeBf, err := fmap.CreateBlockFile(nodePath)
	if err != nil {
		log.Fatal(err)
	}
	defer nodeBf.Close()
	nodeAttrs, err := bptree.New(nodeBf, 4, -1)
	if err != nil {
		log.Fatal(err)
	}

	G, err := graph.LoadGraph(getReader, "", nodeAttrs, nil)
	if err != nil {
		log.Println("Error loading the graph")
		log.Panic(err)
	}
	log.Print("Loaded graph, about to start mining")

	sgCount := 0
	sgMaker := func() store.SubGraphs {
		name := fmt.Sprintf("subgraphs-%d.b+tree", sgCount)
		sgCount++
		path := path.Join(cache, name)
		s := store.NewFs2BpTree(G, path)
		// os.Remove(path)
		// s, err := store.NewSqlite(G, path)
		// if err != nil {
		// 	log.Panic(err)
		// }
		return s
	}

	idxCount := 0
	idxMaker := func() store.UniqueIndex {
		name := fmt.Sprintf("unique-idx-%d.b+tree", idxCount)
		idxCount++
		path := path.Join(cache, name)
		s := store.NewFs2UniqueIndex(G, path)
		// os.Remove(path)
		// s, err := store.NewSqlite(G, path)
		// if err != nil {
		// 	log.Panic(err)
		// }
		return s
	}

	setsCount := 0
	setsMaker := func() store.SetsMap {
		name := fmt.Sprintf("sets-%d.b+tree", setsCount)
		setsCount++
		path := path.Join(cache, name)
		s := store.NewFs2Sets(path)
		// os.Remove(path)
		// s, err := store.NewSqlite(G, path)
		// if err != nil {
		// 	log.Panic(err)
		// }
		return s
	}

	// memFsMaker := func() store.SubGraphs {
	// 	return store.AnonFs2BpTree(G)
	// }

	m := mine.RandomWalk(
		G,
		support,
		minVertices,
		sampleSize,
		memProfFile,
		sgMaker,
		idxMaker,
		setsMaker,
	)
	keys := list.NewSorted(10, false)
	counts := hashtable.NewLinearHash()
	for label := range m.Report {
		key := types.ByteSlice(label)
		count := 0
		if counts.Has(key) {
			c, err := counts.Get(key)
			if err != nil {
				log.Panic(err)
			}
			count = c.(int)
		}
		counts.Put(key, count+1)
		keys.Add(key)
	}
	log.Println("Tries", m.Tries)
	triesPath := path.Join(outputDir, "tries")
	if f, e := os.Create(triesPath); e != nil {
		log.Fatal(err)
	} else {
		fmt.Fprintln(f, m.Tries)
		f.Close()
	}
	{
		log.Println("Finished mining! Writing output...")
		keyCh := make(chan []byte)
		go func() {
			for k, next := keys.Items()(); next != nil; k, next = next() {
				keyCh <- []byte(k.(types.ByteSlice))
			}
			close(keyCh)
		}()
		writeMaximalPatterns(keyCh, m.AllEmbeddings, nodeAttrs, outputDir)
	}

	if !compute_prs {
		log.Println("Done!")
		return
	}

	log.Println("Finished writing patterns. Computing probabilities...")
	count := 0
	for k, next := keys.Items()(); next != nil; k, next = next() {
		patDir := path.Join(outputDir, fmt.Sprintf("%d", count))
		log.Println("-----------------------------------")
		c, err := counts.Get(k)
		if err != nil {
			log.Fatal(err)
		}
		key := []byte(k.(types.ByteSlice))
		dupCount := c.(int)
		// if max.Count(key) < support {
		// 	log.Println("wat not enough subgraphs", max.Count(key))
		// 	continue
		// }
		if c, err := os.Create(path.Join(patDir, "duplicates")); err != nil {
			log.Fatal(err)
		} else {
			fmt.Fprintln(c, dupCount)
			c.Close()
		}
		for _, sg, next := m.AllEmbeddings.Find(key)(); next != nil; _, sg, next = next() {
			vp, Q, R, u, err := m.PrMatrices(sg)
			if err != nil {
				log.Println(err)
				errPath := path.Join(patDir, "error")
				if f, e := os.Create(errPath); e != nil {
					log.Fatal(err)
				} else {
					fmt.Fprintln(f, err)
					f.Close()
				}
			} else {
				bytes, err := json.Marshal(map[string]interface{}{
					"Q":              Q,
					"R":              R,
					"u":              u,
					"startingPoints": vp,
				})
				if err != nil {
					log.Fatal(err)
				}
				matPath := path.Join(patDir, "matrices.json")
				if m, err := os.Create(matPath); err != nil {
					log.Fatal(err)
				} else {
					_, err := m.Write(bytes)
					if err != nil {
						m.Close()
						log.Fatal(err)
					}
					m.Close()
				}
			}
			break
		}
		count++
	}
	log.Println("Done!")
}