Beispiel #1
0
func run() int {
	args, optargs, err := getopt.GetOpt(
		os.Args[1:],
		"h:p:n:",
		[]string{
			"help",
			"pattern=",
			"cpu-profile=",
			"names=",
		},
	)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		cmd.Usage(cmd.ErrorCodes["opts"])
	}

	patterns := make([]string, 0, 10)
	namesPath := ""
	cpuProfile := ""
	for _, oa := range optargs {
		switch oa.Opt() {
		case "-h", "--help":
			cmd.Usage(0)
		case "-p", "--pattern":
			patterns = append(patterns, oa.Arg())
		case "-n", "--names":
			namesPath = cmd.AssertFileExists(oa.Arg())
		case "--cpu-profile":
			cpuProfile = cmd.AssertFile(oa.Arg())
		default:
			fmt.Fprintf(os.Stderr, "Unknown flag '%v'\n", oa.Opt())
			cmd.Usage(cmd.ErrorCodes["opts"])
		}
	}

	if namesPath != "" && len(patterns) > 0 {
		fmt.Fprintf(os.Stderr, "You cannot supply patterns with both (-p) and (-n)\n")
		cmd.Usage(cmd.ErrorCodes["opts"])
	}

	if len(patterns) == 0 && namesPath == "" {
		fmt.Fprintf(os.Stderr, "You must supply a pattern (-p, -n)\n")
		cmd.Usage(cmd.ErrorCodes["opts"])
	}

	if namesPath != "" {
		var err error
		patterns, _, err = loadNames(namesPath)
		if err != nil {
			fmt.Fprintf(os.Stderr, "There was error loading the probability file\n")
			fmt.Fprintf(os.Stderr, "%v\n", err)
			return 1
		}
	}

	conf := &config.Config{}

	graphs := make([]*digraph.Digraph, 0, 10)
	for len(args) > 0 {
		loadDt, as := cmd.ParseType(args, conf)
		args = as
		dt, _ := loadDt(nil)
		graph := dt.(*digraph.Digraph)
		graphs = append(graphs, graph)
	}

	if cpuProfile != "" {
		defer cmd.CPUProfile(cpuProfile)()
	}

	errors.Logf("INFO", "looking for embeddings")
	for _, graph := range graphs {
		for _, pattern := range patterns {
			sg, err := subgraph.ParsePretty(pattern, graph.Labels)
			if err != nil {
				fmt.Fprintf(os.Stderr, "There was error during the parsing the pattern '%v'\n", pattern)
				fmt.Fprintf(os.Stderr, "%v\n", err)
				return 1
			}
			if sg.Pretty(graph.Labels) != pattern {
				errors.Logf("ERROR", "bad load of pattern")
				errors.Logf("ERROR", "expected %v", pattern)
				errors.Logf("ERROR", "got      %v", sg.Pretty(graph.Labels))
				return 1
			}
			errors.Logf("INFO", "cur sg: %v", sg.Pretty(graph.Labels))
			ei, _ := sg.IterEmbeddings(subgraph.MostConnected, graph.Indices, nil, nil, nil)
			c := 0
			for _, next := ei(false); next != nil; _, next = next(false) {
				c++
			}
			errors.Logf("EMB", "total embs: %v", c)
			fmt.Println(sg.Dotty(graph.Labels, nil, nil))
		}
	}

	return 0
}
Beispiel #2
0
func run() int {
	args, optargs, err := getopt.GetOpt(
		os.Args[1:],
		"h:p:v:",
		[]string{
			"help",
			"pattern=",
			"cpu-profile=",
			"visualize=",
			"probabilities=",
			"samples=",
			"names=",
		},
	)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		cmd.Usage(cmd.ErrorCodes["opts"])
	}

	visual := ""
	patterns := make([]string, 0, 10)
	prPath := ""
	namesPath := ""
	cpuProfile := ""
	samples := -1
	for _, oa := range optargs {
		switch oa.Opt() {
		case "-h", "--help":
			cmd.Usage(0)
		case "-p", "--pattern":
			patterns = append(patterns, oa.Arg())
		case "--probabilities":
			prPath = cmd.AssertFileExists(oa.Arg())
		case "--names":
			namesPath = cmd.AssertFileExists(oa.Arg())
		case "--samples":
			samples = cmd.ParseInt(oa.Arg())
		case "--cpu-profile":
			cpuProfile = cmd.AssertFile(oa.Arg())
		case "-v", "--visual":
			visual = cmd.AssertFile(oa.Arg())
		default:
			fmt.Fprintf(os.Stderr, "Unknown flag '%v'\n", oa.Opt())
			cmd.Usage(cmd.ErrorCodes["opts"])
		}
	}

	if (prPath != "" || namesPath != "") && len(patterns) > 0 {
		fmt.Fprintf(os.Stderr, "You cannot supply patterns with both (-p) and (--probabilities)\n")
		cmd.Usage(cmd.ErrorCodes["opts"])
	}

	if len(patterns) == 0 && prPath == "" && namesPath == "" {
		fmt.Fprintf(os.Stderr, "You must supply a pattern (-p, --names, --probabilities)\n")
		cmd.Usage(cmd.ErrorCodes["opts"])
	}

	var patternCount int = len(patterns)
	var prs []float64 = nil
	if prPath != "" {
		var err error
		prs, patterns, patternCount, err = loadProbabilities(prPath)
		if err != nil {
			fmt.Fprintf(os.Stderr, "There was error loading the probability file\n")
			fmt.Fprintf(os.Stderr, "%v\n", err)
			return 1
		}
	} else if namesPath != "" {
		var err error
		prs, patterns, patternCount, err = loadNames(namesPath)
		if err != nil {
			fmt.Fprintf(os.Stderr, "There was error loading the probability file\n")
			fmt.Fprintf(os.Stderr, "%v\n", err)
			return 1
		}
	}

	if samples < len(prs) {
		errors.Errorf("INFO", "assuming # of samples is the total number of patterns supplied: %v", patternCount)
		samples = patternCount
	}

	conf := &config.Config{}

	graphs := make([]*digraph.Digraph, 0, 10)
	for len(args) > 0 {
		loadDt, as := cmd.ParseType(args, conf)
		args = as
		dt, _ := loadDt(nil)
		graph := dt.(*digraph.Digraph)
		graphs = append(graphs, graph)
	}

	if cpuProfile != "" {
		defer cmd.CPUProfile(cpuProfile)()
	}

	var visualize io.Writer = nil
	if visual != "" {
		f, err := os.Create(visual)
		if err != nil {
			fmt.Fprintf(os.Stderr, "There was error opening the visualization output file\n")
			fmt.Fprintf(os.Stderr, "%v\n", err)
			return 1
		}
		defer f.Close()
		visualize = f
	}

	matches := make([]float64, 0, len(patterns))
	matched := make([]*subgraph.SubGraph, 0, len(patterns))
	sgEdges := make([]float64, 0, len(patterns))
	total := 0.0
	totalEdges := 0.0
	for _, graph := range graphs {
		for _, pattern := range patterns {
			sg, err := subgraph.ParsePretty(pattern, graph.Labels)
			if err != nil {
				fmt.Fprintf(os.Stderr, "There was error during the parsing the pattern '%v'\n", pattern)
				fmt.Fprintf(os.Stderr, "%v\n", err)
				return 1
			}
			match, csg, err := sg.EstimateMatch(graph.Indices)
			match = match * float64(len(sg.E))
			if err != nil {
				errors.Logf("ERROR", "%v", err)
				return 1
			}
			matches = append(matches, match)
			matched = append(matched, csg)
			sgEdges = append(sgEdges, float64(len(sg.E)))
			// fmt.Printf("%v, %v, %v\n", i+1, match, pattern)
			total += match
			totalEdges += float64(len(sg.E))
			if visualize != nil {
				dotty, err := csg.VisualizeEmbedding(sg.AsIndices(), graph.Labels)
				if err != nil {
					fmt.Fprintf(os.Stderr, "There was error visualizing the embedding '%v'\n", csg)
					fmt.Fprintf(os.Stderr, "%v\n", err)
					return 1
				}
				fmt.Fprintln(visualize, dotty)
			}
		}
	}
	errors.Logf("DEBUG", "prs %v", sum(prs))
	fmt.Printf(", %v, sample total covered edges\n", total)
	fmt.Printf(", %v, sample total edges\n", totalEdges)
	fmt.Printf(", %v, sample covered/total\n", total/totalEdges)
	fmt.Printf(", %v, sample avg covered\n", total/float64(len(patterns)))
	fmt.Printf(", %v, sample avg edges\n", totalEdges/float64(len(patterns)))

	if len(prs) > 0 {
		pis := samplingPrs(samples, prs)
		jpis := jointSamplingPrs(samples, prs, pis)
		estN := estPopSize(pis)
		estTotalMatch := estPopTotal(pis, matches)
		estVarTotalMatch := estVarTotal(pis, jpis, matches)
		estTotalEdges := estPopTotal(pis, sgEdges)
		estVarTotalEdges := estVarTotal(pis, jpis, sgEdges)

		fmt.Printf("\n")
		fmt.Printf(", %v, estimated population total of matched edges\n", estTotalMatch)
		fmt.Printf(", %v, estimated population total of total edges\n", estTotalEdges)
		fmt.Printf(", %v, estimated var population total of match edges\n", estVarTotalMatch)
		fmt.Printf(", %v, estimated var population total of total edges\n", estVarTotalEdges)
		fmt.Printf(", %v, estimated std population total of match edges\n", math.Sqrt(estVarTotalMatch))
		fmt.Printf(", %v, estimated std population total of total edges\n", math.Sqrt(estVarTotalEdges))
		fmt.Printf(", %v, estimated population mean\n", estTotalMatch/estTotalEdges)

		estMeanMatch := estPopMean(estTotalMatch, estN)
		estMeanEdges := estPopMean(estTotalEdges, estN)
		fmt.Printf("\n")
		fmt.Printf(", %v, est. mean matches\n", estMeanMatch)
		fmt.Printf(", %v, est. mean edges\n", estMeanEdges)
		fmt.Printf(", %v, est. cover\n", estMeanMatch/estMeanEdges)

		varMeanMatch := estVarMean(estN, estMeanMatch, pis, jpis, matches)
		varMeanEdges := estVarMean(estN, estMeanEdges, pis, jpis, sgEdges)
		stdMeanMatch := math.Sqrt(varMeanMatch)
		stdMeanEdges := math.Sqrt(varMeanEdges)
		fmt.Printf("\n")
		fmt.Printf(", %v, var. mean matches\n", varMeanMatch)
		fmt.Printf(", %v, var. mean edges\n", varMeanEdges)
		fmt.Printf(", %v, std. mean matches\n", stdMeanMatch)
		fmt.Printf(", %v, std. mean edges\n", stdMeanEdges)

		t := t_alpha_05[samples-1]
		fmt.Printf("\n")
		fmt.Printf(", %v - %v, interval. mean matches\n",
			estMeanMatch-t*stdMeanMatch,
			estMeanMatch+t*stdMeanMatch)
		fmt.Printf(", %v - %v, interval. mean edges\n",
			estMeanEdges-t*stdMeanEdges,
			estMeanEdges+t*stdMeanEdges)
		fmt.Printf(", %v - %v, interval. cover\n",
			math.Max((estMeanMatch-t*stdMeanMatch)/(estMeanEdges+t*stdMeanEdges), 0.0),
			math.Min((estMeanMatch+t*stdMeanMatch)/(estMeanEdges-t*stdMeanEdges), 1.0))
	}
	return 0
}