// helper function to find all dataset at a given tier-site func datasetsDictAtSite(siteName, tstamp string) Record { rdict := make(Record) api := "blockreplicas" furl := fmt.Sprintf("%s/%s?node=%s&create_since=%d", phedexUrl(), api, siteName, utils.UnixTime(tstamp)) if utils.VERBOSE > 1 { fmt.Println("furl", furl) } if strings.HasPrefix(siteName, "T1_") && !strings.HasSuffix(siteName, "_Disk") { siteName += "_Disk" } response := utils.FetchResponse(furl, "") if response.Error == nil { records := loadPhedexData(furl, response.Data) for _, rec := range records { val := rec["phedex"].(map[string]interface{}) blocks := val["block"].([]interface{}) for _, item := range blocks { brec := item.(map[string]interface{}) dataset := strings.Split(brec["name"].(string), "#")[0] bytes := brec["bytes"].(float64) val, ok := rdict[dataset] if ok { rdict[dataset] = bytes + val.(float64) } else { rdict[dataset] = bytes } } } } return rdict }
// helper function to get site content. Return either list of blocks or datasets on site. func siteContent(siteName, tstamp, recType string) []string { api := "blockreplicasummary" if strings.HasPrefix(siteName, "T1_") && !strings.HasSuffix(siteName, "_Disk") { siteName += "_Disk" } furl := fmt.Sprintf("%s/%s?node=%s&create_since=%d", phedexUrl(), api, siteName, utils.UnixTime(tstamp)) if utils.VERBOSE > 1 { fmt.Println("furl", furl) } response := utils.FetchResponse(furl, "") // use a map to collect dataset names as keys ddict := make(Record) if response.Error == nil { records := loadPhedexData(furl, response.Data) for _, rec := range records { val := rec["phedex"].(map[string]interface{}) blocks := val["block"].([]interface{}) for _, item := range blocks { brec := item.(map[string]interface{}) blk := brec["name"].(string) if recType == "block" { ddict[blk] = struct{}{} } else { // look-up dataset name dataset := strings.Split(blk, "#")[0] if datasetNameOk(dataset) { ddict[dataset] = struct{}{} } } } } // return map keys, they're unique already return utils.MapKeys(ddict) } return []string{} }
// helper function to collect popularity results and merge them into bins of given metric // with the help of updateDict function. // return rdict which is a dictionary of bins and corresponding dataset names func popdb2Bins(metric string, bins []int, records []Record, siteName string, tstamps []string) BinRecord { var popdbNames []string rdict := make(BinRecord) for _, bin := range bins { rdict[bin] = []string{} // init all bins } recType := "dataset" // type of record we'll process for idx, rec := range records { // loop over popularity records mval := int(rec[metric].(float64)) name := rec["name"].(string) if idx == 0 && strings.Contains(name, "#") { recType = "block" } popdbNames = append(popdbNames, name) updateDict(bins, rdict, mval, name) } // loop over site content and collect zero bin for given metric siteNames := siteContent(siteName, tstamps[0], recType) var zeroMetricNames []string for _, name := range siteNames { if !utils.InList(name, popdbNames) { zeroMetricNames = append(zeroMetricNames, name) } } rdict[0] = zeroMetricNames // fetch old datasets, those who are in zero bin but their creation time // is older then interval we're intersting. thr := float64(utils.UnixTime(tstamps[0])) olds := oldDatasets(rdict[0].([]string), thr) rdict[-1] = olds newd := utils.Substruct(rdict[0].([]string), rdict[-1].([]string)) if utils.VERBOSE > 0 { fmt.Println("Bin-zero division, bin0-old", len(olds), "bin0-new", len(newd)) } rdict[0] = newd // make sure that we have unique list of datasets in every bin allbins := []int{-1, 0} for _, bin := range bins { allbins = append(allbins, bin) } for _, bin := range allbins { arr := rdict[bin].([]string) rdict[bin] = utils.List2Set(arr) val := rdict[bin] fmt.Println(siteName, "bin ", bin, " contains ", len(val.([]string))) } return rdict }
// helper function to find all dataset at a given tier-site func datasetInfoAtSite(dataset, siteName, tstamp string, ch chan Record) { if !datasetNameOk(dataset) { ch <- Record{"dataset": dataset, "size": 0.0, "tier": "unknown"} return } api := "blockreplicas" furl := fmt.Sprintf("%s/%s?dataset=%s&node=%s&create_since=%d", phedexUrl(), api, dataset, siteName, utils.UnixTime(tstamp)) if utils.VERBOSE > 1 { fmt.Println("furl", furl) } // if strings.HasPrefix(siteName, "T1_") && !strings.HasSuffix(siteName, "_Disk") { // siteName += "_Disk" // } response := utils.FetchResponse(furl, "") size := 0. if response.Error == nil { records := loadPhedexData(furl, response.Data) for _, rec := range records { val := rec["phedex"].(map[string]interface{}) blocks := val["block"].([]interface{}) for _, item := range blocks { brec := item.(map[string]interface{}) bytes := brec["bytes"].(float64) size += bytes } } } ch <- Record{"dataset": dataset, "size": size, "tier": utils.DataTier(dataset)} }