Beispiel #1
0
// helper function to collect popularity results and merge them into bins of given metric
// with the help of updateDict function.
// return rdict which is a dictionary of bins and corresponding dataset names
func popdb2Bins(metric string, bins []int, records []Record, siteName string, tstamps []string) BinRecord {
	var popdbNames []string
	rdict := make(BinRecord)
	for _, bin := range bins {
		rdict[bin] = []string{} // init all bins
	}
	recType := "dataset"            // type of record we'll process
	for idx, rec := range records { // loop over popularity records
		mval := int(rec[metric].(float64))
		name := rec["name"].(string)
		if idx == 0 && strings.Contains(name, "#") {
			recType = "block"
		}
		popdbNames = append(popdbNames, name)
		updateDict(bins, rdict, mval, name)
	}
	// loop over site content and collect zero bin for given metric
	siteNames := siteContent(siteName, tstamps[0], recType)
	var zeroMetricNames []string
	for _, name := range siteNames {
		if !utils.InList(name, popdbNames) {
			zeroMetricNames = append(zeroMetricNames, name)
		}
	}
	rdict[0] = zeroMetricNames

	// fetch old datasets, those who are in zero bin but their creation time
	// is older then interval we're intersting.
	thr := float64(utils.UnixTime(tstamps[0]))
	olds := oldDatasets(rdict[0].([]string), thr)
	rdict[-1] = olds
	newd := utils.Substruct(rdict[0].([]string), rdict[-1].([]string))
	if utils.VERBOSE > 0 {
		fmt.Println("Bin-zero division, bin0-old", len(olds), "bin0-new", len(newd))
	}
	rdict[0] = newd

	// make sure that we have unique list of datasets in every bin
	allbins := []int{-1, 0}
	for _, bin := range bins {
		allbins = append(allbins, bin)
	}
	for _, bin := range allbins {
		arr := rdict[bin].([]string)
		rdict[bin] = utils.List2Set(arr)
		val := rdict[bin]
		fmt.Println(siteName, "bin ", bin, " contains ", len(val.([]string)))
	}
	return rdict
}
Beispiel #2
0
// exported function which process user request
func Process(metric, siteName, tstamp, tier, breakdown, binValues, format string) {
	startTime := time.Now()
	utils.TestEnv()
	utils.TestMetric(metric)
	utils.TestBreakdown(breakdown)
	tiers := dataTiers()
	if tier != "" && !utils.InList(tier, tiers) {
		msg := fmt.Sprintf("Wrong data tier '%s'", tier)
		fmt.Println(msg)
		os.Exit(-1)
	}
	if PBRDB != "" { // we got PBR name, open DB
		db, err := sql.Open("sqlite3", PBRDB)
		if err != nil {
			panic(err)
		}
		defer db.Close()
		//         db.SetMaxIdleConns(100)
		err = db.Ping()
		if err != nil {
			panic(err)
		}
		PDB = db
		var pbr PBR
		PBRMAP = pbr.Map()
		if utils.VERBOSE > 0 {
			fmt.Println("Loaded PBRMAP", len(PBRMAP), "items")
		}
	}
	sites := siteNames(siteName)
	bins := utils.Bins(binValues)
	tstamps := utils.TimeStamps(tstamp)
	if utils.VERBOSE > 0 {
		fmt.Printf("Site: %s, sites %v, tstamp %s, interval %v\n", siteName, sites, tstamp, tstamps)
	}
	ch := make(chan Record)
	for _, siteName := range sites {
		go process(metric, siteName, tstamps, tier, breakdown, bins, ch)
	}
	// collect results
	var out []Record
	for {
		select {
		case r := <-ch:
			out = append(out, r)
		default:
			time.Sleep(time.Duration(10) * time.Millisecond) // wait for response
		}
		if len(out) == len(sites) {
			break
		}
	}
	if format == "json" {
		var records []Record
		for _, rec := range out {
			nrec := make(Record)
			for site, sdict := range rec {
				nrow := make(map[string]interface{})
				for key, val := range sdict.(Record) { // key=results|breakdown, val is a dict
					row := make(map[string]interface{})
					for kkk, vvv := range val.(BinRecord) {
						row[fmt.Sprintf("%d", kkk)] = vvv
					}
					nrow[key] = row
				}
				nrec[site] = nrow
			}
			records = append(records, nrec)
		}
		res, err := json.Marshal(records)
		if err != nil {
			fmt.Println("Unable to marshal json out of found results")
			fmt.Println(err)
			os.Exit(-1)
		}
		fmt.Println(string(res))
	} else if format == "csv" {
		formatCSV(bins, out)
	} else {
		msg := fmt.Sprintf("Final results: metric %s, site, %s, time interval %s %v", metric, siteName, tstamp, tstamps)
		if tier != "" {
			msg += fmt.Sprintf(", tier %s", tier)
		}
		if breakdown != "" {
			msg += fmt.Sprintf(", breakdown %s", breakdown)
		}
		fmt.Println(msg)
		formatResults(metric, bins, out, breakdown)
	}
	if utils.PROFILE {
		fmt.Printf("Processed %d urls\n", utils.UrlCounter)
		fmt.Printf("Elapsed time %s\n", time.Since(startTime))
	}
}