// helper function to collect popularity results and merge them into bins of given metric // with the help of updateDict function. // return rdict which is a dictionary of bins and corresponding dataset names func popdb2Bins(metric string, bins []int, records []Record, siteName string, tstamps []string) BinRecord { var popdbNames []string rdict := make(BinRecord) for _, bin := range bins { rdict[bin] = []string{} // init all bins } recType := "dataset" // type of record we'll process for idx, rec := range records { // loop over popularity records mval := int(rec[metric].(float64)) name := rec["name"].(string) if idx == 0 && strings.Contains(name, "#") { recType = "block" } popdbNames = append(popdbNames, name) updateDict(bins, rdict, mval, name) } // loop over site content and collect zero bin for given metric siteNames := siteContent(siteName, tstamps[0], recType) var zeroMetricNames []string for _, name := range siteNames { if !utils.InList(name, popdbNames) { zeroMetricNames = append(zeroMetricNames, name) } } rdict[0] = zeroMetricNames // fetch old datasets, those who are in zero bin but their creation time // is older then interval we're intersting. thr := float64(utils.UnixTime(tstamps[0])) olds := oldDatasets(rdict[0].([]string), thr) rdict[-1] = olds newd := utils.Substruct(rdict[0].([]string), rdict[-1].([]string)) if utils.VERBOSE > 0 { fmt.Println("Bin-zero division, bin0-old", len(olds), "bin0-new", len(newd)) } rdict[0] = newd // make sure that we have unique list of datasets in every bin allbins := []int{-1, 0} for _, bin := range bins { allbins = append(allbins, bin) } for _, bin := range allbins { arr := rdict[bin].([]string) rdict[bin] = utils.List2Set(arr) val := rdict[bin] fmt.Println(siteName, "bin ", bin, " contains ", len(val.([]string))) } return rdict }
// exported function which process user request func Process(metric, siteName, tstamp, tier, breakdown, binValues, format string) { startTime := time.Now() utils.TestEnv() utils.TestMetric(metric) utils.TestBreakdown(breakdown) tiers := dataTiers() if tier != "" && !utils.InList(tier, tiers) { msg := fmt.Sprintf("Wrong data tier '%s'", tier) fmt.Println(msg) os.Exit(-1) } if PBRDB != "" { // we got PBR name, open DB db, err := sql.Open("sqlite3", PBRDB) if err != nil { panic(err) } defer db.Close() // db.SetMaxIdleConns(100) err = db.Ping() if err != nil { panic(err) } PDB = db var pbr PBR PBRMAP = pbr.Map() if utils.VERBOSE > 0 { fmt.Println("Loaded PBRMAP", len(PBRMAP), "items") } } sites := siteNames(siteName) bins := utils.Bins(binValues) tstamps := utils.TimeStamps(tstamp) if utils.VERBOSE > 0 { fmt.Printf("Site: %s, sites %v, tstamp %s, interval %v\n", siteName, sites, tstamp, tstamps) } ch := make(chan Record) for _, siteName := range sites { go process(metric, siteName, tstamps, tier, breakdown, bins, ch) } // collect results var out []Record for { select { case r := <-ch: out = append(out, r) default: time.Sleep(time.Duration(10) * time.Millisecond) // wait for response } if len(out) == len(sites) { break } } if format == "json" { var records []Record for _, rec := range out { nrec := make(Record) for site, sdict := range rec { nrow := make(map[string]interface{}) for key, val := range sdict.(Record) { // key=results|breakdown, val is a dict row := make(map[string]interface{}) for kkk, vvv := range val.(BinRecord) { row[fmt.Sprintf("%d", kkk)] = vvv } nrow[key] = row } nrec[site] = nrow } records = append(records, nrec) } res, err := json.Marshal(records) if err != nil { fmt.Println("Unable to marshal json out of found results") fmt.Println(err) os.Exit(-1) } fmt.Println(string(res)) } else if format == "csv" { formatCSV(bins, out) } else { msg := fmt.Sprintf("Final results: metric %s, site, %s, time interval %s %v", metric, siteName, tstamp, tstamps) if tier != "" { msg += fmt.Sprintf(", tier %s", tier) } if breakdown != "" { msg += fmt.Sprintf(", breakdown %s", breakdown) } fmt.Println(msg) formatResults(metric, bins, out, breakdown) } if utils.PROFILE { fmt.Printf("Processed %d urls\n", utils.UrlCounter) fmt.Printf("Elapsed time %s\n", time.Since(startTime)) } }