func digraphCommonAncestor(patterns []lattice.Pattern) (lattice.Pattern, error) { // construct a in memory configuration for finding common subdigraphs of all patterns conf := &config.Config{ Cache: "", Output: "", Support: len(patterns), Samples: 5, Unique: false, } wlkr := fastmax.NewWalker(conf) wlkr.Reject = false // closing the walker releases the memory defer func() { err := wlkr.Close() if err != nil { log.Panic(err) } }() maxE := int(math.MaxInt32) maxV := int(math.MaxInt32) for _, pat := range patterns { sg := pat.(*digraph.SubgraphPattern).Pat if len(sg.E) < maxE { maxE = len(sg.E) } if len(sg.V) < maxV { maxV = len(sg.V) } } // init the datatype (we are now ready to mine) dt, err := digraph.NewDigraph(conf, &digraph.Config{ MinEdges: 0, MaxEdges: maxE, MinVertices: 0, MaxVertices: maxV, Mode: digraph.GIS | digraph.OverlapPruning | digraph.ExtFromEmb, }) if err != nil { return nil, err } var labels *dg.Labels = patterns[0].(*digraph.SubgraphPattern).Dt.Labels // construct the digraph from the patterns b := dg.Build(10, 10) offset := 0 for gid, pat := range patterns { sn := pat.(*digraph.SubgraphPattern) for i := range sn.Pat.V { vid := offset + i b.AddVertex(sn.Pat.V[i].Color) err := dt.NodeAttrs.Add(int32(vid), map[string]interface{}{"gid": gid}) if err != nil { return nil, err } } for i := range sn.Pat.E { b.AddEdge(&b.V[offset+sn.Pat.E[i].Src], &b.V[offset+sn.Pat.E[i].Targ], sn.Pat.E[i].Color) } offset += len(sn.Pat.V) } // Initialize the *Digraph with the graph G being used. err = dt.Init(b, labels) if err != nil { return nil, err } // errors.Logf("DEBUG", "patterns %v %v", len(patterns), G) // create the reporter fmtr := digraph.NewFormatter(dt, nil) collector := &reporters.Collector{make([]lattice.Node, 0, 10)} uniq, err := reporters.NewUnique(conf, fmtr, collector, "") if err != nil { return nil, err } // rptr := &reporters.Chain{[]miners.Reporter{reporters.NewLog(fmtr, false, "DEBUG", "common-ancestor"), uniq}} rptr := uniq // mine err = wlkr.Mine(dt, rptr, fmtr) if err != nil { return nil, err } // extract the largest common subdigraph maxLevel := collector.Nodes[0].Pattern().Level() maxPattern := collector.Nodes[0].Pattern() for _, n := range collector.Nodes { p := n.Pattern() if p.Level() > maxLevel { maxLevel = p.Level() maxPattern = p } } errors.Logf("DEBUG", "ancestor %v", maxPattern) return maxPattern, nil }
func digraphType(argv []string, conf *config.Config) (lattice.Loader, func(lattice.DataType, lattice.PrFormatter) lattice.Formatter, []string) { args, optargs, err := getopt.GetOpt( argv, "hl:c:i:e:", []string{"help", "loader=", "count-mode=", "fully-optimistic", "overlap-pruning", "extension-pruning", "unsup-embs-pruning", "extend-from-embeddings", "extend-from-freq-edges", "no-caching", "emb-search-starting-point=", "min-edges=", "max-edges=", "min-vertices=", "max-vertices=", "include=", "exclude=", }, ) if err != nil { fmt.Fprintln(os.Stderr, err) Usage(ErrorCodes["opts"]) } loaderType := "veg" modeStr := "MNI" overlapPruning := false extensionPruning := false unsupEmbsPruning := false extendFromEmb := false extendFromEdges := false embSearchStartingPoint := subgraph.MostConnected caching := true minE := 0 maxE := int(math.MaxInt32) minV := 0 maxV := int(math.MaxInt32) includes := make([]string, 0, 10) excludes := make([]string, 0, 10) for _, oa := range optargs { switch oa.Opt() { case "-h", "--help": Usage(0) case "-l", "--loader": loaderType = oa.Arg() case "-c", "--count-mode": modeStr = oa.Arg() case "--overlap-pruning": overlapPruning = true case "--extension-pruning": extensionPruning = true case "--unsup-embs-pruning": unsupEmbsPruning = true case "--emb-search-starting-point": switch oa.Arg() { case "random-start": embSearchStartingPoint = subgraph.RandomStart case "most-connected": embSearchStartingPoint = subgraph.MostConnected case "least-connected": embSearchStartingPoint = subgraph.LeastConnected case "most-frequent": embSearchStartingPoint = subgraph.MostFrequent case "least-frequent": embSearchStartingPoint = subgraph.LeastFrequent case "most-extensions": embSearchStartingPoint = subgraph.MostExtensions case "fewest-extensions": embSearchStartingPoint = subgraph.FewestExtensions case "lowest-cardinality": embSearchStartingPoint = subgraph.LowestCardinality case "highest-cardinality": embSearchStartingPoint = subgraph.HighestCardinality default: fmt.Fprintf(os.Stderr, "unknown mode for --emb-search-starting-point %v", oa.Arg()) fmt.Fprintln(os.Stderr, "valid modes: random-start, (most|least)-connected, (most|least)-frequent") fmt.Fprintln(os.Stderr, " (most|fewest)-extensions, (lowest|highest)-cardinality") Usage(ErrorCodes["opts"]) } case "--no-caching": caching = false case "--min-edges": minE = ParseInt(oa.Arg()) case "--max-edges": maxE = ParseInt(oa.Arg()) case "--min-vertices": minV = ParseInt(oa.Arg()) case "--max-vertices": maxV = ParseInt(oa.Arg()) case "--extend-from-embeddings": extendFromEmb = true case "--extend-from-freq-edges": extendFromEdges = true case "-i", "--include": includes = append(includes, "("+AssertRegex(oa.Arg())+")") case "-e", "--exclude": excludes = append(excludes, "("+AssertRegex(oa.Arg())+")") default: fmt.Fprintf(os.Stderr, "Unknown flag '%v'\n", oa.Opt()) Usage(ErrorCodes["opts"]) } } var mode digraph.Mode if extendFromEmb && extendFromEdges { fmt.Fprintf(os.Stderr, "Cannot have both --extend-from-embeddings and --extend-from-freq-edges\n") Usage(ErrorCodes["opts"]) } else if extendFromEmb { mode |= digraph.ExtFromEmb } else if extendFromEdges { mode |= digraph.ExtFromFreqEdges } else { mode |= digraph.ExtFromEmb } switch modeStr { case "MNI": mode |= digraph.MNI case "FIS": mode |= digraph.FIS case "GIS": mode |= digraph.GIS default: fmt.Fprintf(os.Stderr, "Unknown support mode '%v'\n", modeStr) fmt.Fprintf(os.Stderr, "support modes: MNI (min-image support), FIS (fully independent subgraphs)\n") fmt.Fprintf(os.Stderr, " GIS (greedy independent subgraphs)\n") Usage(ErrorCodes["opts"]) } if overlapPruning { mode |= digraph.OverlapPruning } if extensionPruning { mode |= digraph.ExtensionPruning } if unsupEmbsPruning { mode |= digraph.EmbeddingPruning } if caching { mode |= digraph.Caching } var include *regexp.Regexp = nil var exclude *regexp.Regexp = nil if len(includes) > 0 { include = regexp.MustCompile(strings.Join(includes, "|")) errors.Logf("INFO", "including labels matching '%v'", include) } if len(excludes) > 0 { exclude = regexp.MustCompile(strings.Join(excludes, "|")) errors.Logf("INFO", "excluding labels matching '%v'", exclude) } dc := &digraph.Config{ MinEdges: minE, MaxEdges: maxE, MinVertices: minV, MaxVertices: maxV, Mode: mode, Include: include, Exclude: exclude, EmbSearchStartPoint: embSearchStartingPoint, } var loader lattice.Loader switch loaderType { case "veg": loader, err = digraph.NewVegLoader(conf, dc) case "dot": loader, err = digraph.NewDotLoader(conf, dc) default: fmt.Fprintf(os.Stderr, "Unknown itemset loader '%v'\n", loaderType) Usage(ErrorCodes["opts"]) } if err != nil { log.Panic(err) } fmtr := func(dt lattice.DataType, prfmt lattice.PrFormatter) lattice.Formatter { g := dt.(*digraph.Digraph) return digraph.NewFormatter(g, prfmt) } return loader, fmtr, args }