Exemplo n.º 1
0
func digraphCommonAncestor(patterns []lattice.Pattern) (lattice.Pattern, error) {

	// construct a in memory configuration for finding common subdigraphs of all patterns
	conf := &config.Config{
		Cache:   "",
		Output:  "",
		Support: len(patterns),
		Samples: 5,
		Unique:  false,
	}
	wlkr := fastmax.NewWalker(conf)
	wlkr.Reject = false

	// closing the walker releases the memory
	defer func() {
		err := wlkr.Close()
		if err != nil {
			log.Panic(err)
		}
	}()

	maxE := int(math.MaxInt32)
	maxV := int(math.MaxInt32)
	for _, pat := range patterns {
		sg := pat.(*digraph.SubgraphPattern).Pat
		if len(sg.E) < maxE {
			maxE = len(sg.E)
		}
		if len(sg.V) < maxV {
			maxV = len(sg.V)
		}
	}

	// init the datatype (we are now ready to mine)
	dt, err := digraph.NewDigraph(conf, &digraph.Config{
		MinEdges:    0,
		MaxEdges:    maxE,
		MinVertices: 0,
		MaxVertices: maxV,
		Mode:        digraph.GIS | digraph.OverlapPruning | digraph.ExtFromEmb,
	})
	if err != nil {
		return nil, err
	}

	var labels *dg.Labels = patterns[0].(*digraph.SubgraphPattern).Dt.Labels
	// construct the digraph from the patterns
	b := dg.Build(10, 10)
	offset := 0
	for gid, pat := range patterns {
		sn := pat.(*digraph.SubgraphPattern)
		for i := range sn.Pat.V {
			vid := offset + i
			b.AddVertex(sn.Pat.V[i].Color)
			err := dt.NodeAttrs.Add(int32(vid), map[string]interface{}{"gid": gid})
			if err != nil {
				return nil, err
			}
		}
		for i := range sn.Pat.E {
			b.AddEdge(&b.V[offset+sn.Pat.E[i].Src], &b.V[offset+sn.Pat.E[i].Targ], sn.Pat.E[i].Color)
		}
		offset += len(sn.Pat.V)
	}

	// Initialize the *Digraph with the graph G being used.
	err = dt.Init(b, labels)
	if err != nil {
		return nil, err
	}

	// errors.Logf("DEBUG", "patterns %v %v", len(patterns), G)

	// create the reporter
	fmtr := digraph.NewFormatter(dt, nil)
	collector := &reporters.Collector{make([]lattice.Node, 0, 10)}
	uniq, err := reporters.NewUnique(conf, fmtr, collector, "")
	if err != nil {
		return nil, err
	}
	// rptr := &reporters.Chain{[]miners.Reporter{reporters.NewLog(fmtr, false, "DEBUG", "common-ancestor"), uniq}}
	rptr := uniq

	// mine
	err = wlkr.Mine(dt, rptr, fmtr)
	if err != nil {
		return nil, err
	}

	// extract the largest common subdigraph
	maxLevel := collector.Nodes[0].Pattern().Level()
	maxPattern := collector.Nodes[0].Pattern()
	for _, n := range collector.Nodes {
		p := n.Pattern()
		if p.Level() > maxLevel {
			maxLevel = p.Level()
			maxPattern = p
		}
	}
	errors.Logf("DEBUG", "ancestor %v", maxPattern)

	return maxPattern, nil
}
Exemplo n.º 2
0
Arquivo: cmd.go Projeto: timtadh/sfp
func digraphType(argv []string, conf *config.Config) (lattice.Loader, func(lattice.DataType, lattice.PrFormatter) lattice.Formatter, []string) {
	args, optargs, err := getopt.GetOpt(
		argv,
		"hl:c:i:e:",
		[]string{"help",
			"loader=",
			"count-mode=",
			"fully-optimistic",
			"overlap-pruning",
			"extension-pruning",
			"unsup-embs-pruning",
			"extend-from-embeddings",
			"extend-from-freq-edges",
			"no-caching",
			"emb-search-starting-point=",
			"min-edges=",
			"max-edges=",
			"min-vertices=",
			"max-vertices=",
			"include=",
			"exclude=",
		},
	)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		Usage(ErrorCodes["opts"])
	}

	loaderType := "veg"
	modeStr := "MNI"
	overlapPruning := false
	extensionPruning := false
	unsupEmbsPruning := false
	extendFromEmb := false
	extendFromEdges := false
	embSearchStartingPoint := subgraph.MostConnected
	caching := true
	minE := 0
	maxE := int(math.MaxInt32)
	minV := 0
	maxV := int(math.MaxInt32)
	includes := make([]string, 0, 10)
	excludes := make([]string, 0, 10)
	for _, oa := range optargs {
		switch oa.Opt() {
		case "-h", "--help":
			Usage(0)
		case "-l", "--loader":
			loaderType = oa.Arg()
		case "-c", "--count-mode":
			modeStr = oa.Arg()
		case "--overlap-pruning":
			overlapPruning = true
		case "--extension-pruning":
			extensionPruning = true
		case "--unsup-embs-pruning":
			unsupEmbsPruning = true
		case "--emb-search-starting-point":
			switch oa.Arg() {
			case "random-start":
				embSearchStartingPoint = subgraph.RandomStart
			case "most-connected":
				embSearchStartingPoint = subgraph.MostConnected
			case "least-connected":
				embSearchStartingPoint = subgraph.LeastConnected
			case "most-frequent":
				embSearchStartingPoint = subgraph.MostFrequent
			case "least-frequent":
				embSearchStartingPoint = subgraph.LeastFrequent
			case "most-extensions":
				embSearchStartingPoint = subgraph.MostExtensions
			case "fewest-extensions":
				embSearchStartingPoint = subgraph.FewestExtensions
			case "lowest-cardinality":
				embSearchStartingPoint = subgraph.LowestCardinality
			case "highest-cardinality":
				embSearchStartingPoint = subgraph.HighestCardinality
			default:
				fmt.Fprintf(os.Stderr, "unknown mode for --emb-search-starting-point %v", oa.Arg())
				fmt.Fprintln(os.Stderr, "valid modes: random-start, (most|least)-connected, (most|least)-frequent")
				fmt.Fprintln(os.Stderr, "             (most|fewest)-extensions, (lowest|highest)-cardinality")
				Usage(ErrorCodes["opts"])
			}
		case "--no-caching":
			caching = false
		case "--min-edges":
			minE = ParseInt(oa.Arg())
		case "--max-edges":
			maxE = ParseInt(oa.Arg())
		case "--min-vertices":
			minV = ParseInt(oa.Arg())
		case "--max-vertices":
			maxV = ParseInt(oa.Arg())
		case "--extend-from-embeddings":
			extendFromEmb = true
		case "--extend-from-freq-edges":
			extendFromEdges = true
		case "-i", "--include":
			includes = append(includes, "("+AssertRegex(oa.Arg())+")")
		case "-e", "--exclude":
			excludes = append(excludes, "("+AssertRegex(oa.Arg())+")")
		default:
			fmt.Fprintf(os.Stderr, "Unknown flag '%v'\n", oa.Opt())
			Usage(ErrorCodes["opts"])
		}
	}

	var mode digraph.Mode
	if extendFromEmb && extendFromEdges {
		fmt.Fprintf(os.Stderr, "Cannot have both --extend-from-embeddings and --extend-from-freq-edges\n")
		Usage(ErrorCodes["opts"])
	} else if extendFromEmb {
		mode |= digraph.ExtFromEmb
	} else if extendFromEdges {
		mode |= digraph.ExtFromFreqEdges
	} else {
		mode |= digraph.ExtFromEmb
	}

	switch modeStr {
	case "MNI":
		mode |= digraph.MNI
	case "FIS":
		mode |= digraph.FIS
	case "GIS":
		mode |= digraph.GIS
	default:
		fmt.Fprintf(os.Stderr, "Unknown support mode '%v'\n", modeStr)
		fmt.Fprintf(os.Stderr, "support modes: MNI (min-image support), FIS (fully independent subgraphs)\n")
		fmt.Fprintf(os.Stderr, "               GIS (greedy independent subgraphs)\n")
		Usage(ErrorCodes["opts"])
	}
	if overlapPruning {
		mode |= digraph.OverlapPruning
	}
	if extensionPruning {
		mode |= digraph.ExtensionPruning
	}
	if unsupEmbsPruning {
		mode |= digraph.EmbeddingPruning
	}
	if caching {
		mode |= digraph.Caching
	}

	var include *regexp.Regexp = nil
	var exclude *regexp.Regexp = nil
	if len(includes) > 0 {
		include = regexp.MustCompile(strings.Join(includes, "|"))
		errors.Logf("INFO", "including labels matching '%v'", include)
	}
	if len(excludes) > 0 {
		exclude = regexp.MustCompile(strings.Join(excludes, "|"))
		errors.Logf("INFO", "excluding labels matching '%v'", exclude)
	}

	dc := &digraph.Config{
		MinEdges:            minE,
		MaxEdges:            maxE,
		MinVertices:         minV,
		MaxVertices:         maxV,
		Mode:                mode,
		Include:             include,
		Exclude:             exclude,
		EmbSearchStartPoint: embSearchStartingPoint,
	}

	var loader lattice.Loader
	switch loaderType {
	case "veg":
		loader, err = digraph.NewVegLoader(conf, dc)
	case "dot":
		loader, err = digraph.NewDotLoader(conf, dc)
	default:
		fmt.Fprintf(os.Stderr, "Unknown itemset loader '%v'\n", loaderType)
		Usage(ErrorCodes["opts"])
	}
	if err != nil {
		log.Panic(err)
	}
	fmtr := func(dt lattice.DataType, prfmt lattice.PrFormatter) lattice.Formatter {
		g := dt.(*digraph.Digraph)
		return digraph.NewFormatter(g, prfmt)
	}
	return loader, fmtr, args
}