// function to generate datamap func GenerateMap(datamap, fout string) { startTime := time.Now() if fout == "" { fout = fmt.Sprintf("%s.txt", datamap) } utils.TestEnv() var names []string var records []Record for dbsinst, dbsId := range dbsInstances() { if datamap == "dataset" || datamap == "datasets" { names = dbsDatasets(dbsinst) } else if datamap == "tier" || datamap == "tiers" { names = dbsTiers(dbsinst) } else { fmt.Printf("Unsupported map name '%s'\n", datamap) os.Exit(-1) } for _, n := range names { rec := make(Record) rec[datamap] = n rec["dbsinst"] = dbsId rec["hash"] = utils.Hash1(n) records = append(records, rec) } } // process extra dataset writeRecords(records, fout) if utils.PROFILE { fmt.Printf("Processed %d urls\n", utils.UrlCounter) fmt.Printf("Elapsed time %s\n", time.Since(startTime)) } if utils.VERBOSE > 0 { fmt.Println("Job finished", time.Now()) } }
// function which process user request func Process(start, stop, fout string, newdata bool, dbsExtra, chunkSize int, test bool) { startTime := time.Now() utils.TestEnv() if start == "" { start = utils.Today() } if stop == "" { stop = utils.Today() } if fout == "" { fout = fmt.Sprintf("dataframe-%s-%s.csv", start, stop) if newdata { fout = fmt.Sprintf("new%s", fout) } } if utils.VERBOSE > 0 { fmt.Println("Job started", time.Now()) fmt.Println(fout) } var results, popdbRecords, missRecords []Record if test { for _, rec := range testRecords() { popdbRecords = append(popdbRecords, rec) } } else if newdata { for _, rec := range newRecords(start, stop) { popdbRecords = append(popdbRecords, rec) } if utils.VERBOSE > 0 { fmt.Printf("Process %d new DBS records\n", len(popdbRecords)) } } else { // get popularity DB records for given time interval popdbRecords = datasetStats(start, stop) if len(popdbRecords) == 0 { fmt.Println("No records from PopDB, better stop here") os.Exit(-1) } extraRecords := extraRecords(dbsDatasets("prod/global"), popdbRecords, dbsExtra) if utils.VERBOSE > 0 { fmt.Printf("Process %d popdb records\n", len(popdbRecords)) fmt.Printf("Process %d extra records\n", len(extraRecords)) } // combine all records togther for _, rec := range extraRecords { popdbRecords = append(popdbRecords, rec) } } // for every dataset in popdbRecords we need to collect its information for cdx, chunk := range makeChunksOfRecords(popdbRecords, chunkSize) { if utils.VERBOSE == 1 { fmt.Printf("process chunk=%d, %d records\n", cdx, len(chunk)) } if utils.VERBOSE == 2 { fmt.Println("process chunk", chunk) } var counter int ch := make(chan Record) for _, prec := range chunk { if prec != nil { go datasetInfo(prec, start, stop, ch) counter += 1 } } var out []Record var missCount int for { // collect results from a given chunk select { case r := <-ch: v := r["dataset"] if v == nil { missRecords = append(missRecords, r) missCount += 1 } else { switch v.(type) { case string: missRecords = append(missRecords, r) missCount += 1 default: out = append(out, r) } } default: time.Sleep(time.Duration(10) * time.Millisecond) // wait for response } if len(out)+missCount == len(chunk) { for _, rec := range out { results = append(results, rec) } break } } if utils.PROFILE { fmt.Printf("Processed %d chunk in %s\n", cdx, time.Since(startTime)) } } // check if we miss some datasets and try to recover them if len(missRecords) > 0 { if utils.VERBOSE > 0 { fmt.Printf("Process missed dataset %d records\n", len(missRecords)) } var missCount int for _, chunk := range makeChunksOfRecords(missRecords, 10) { ch := make(chan Record) for _, prec := range chunk { if prec != nil { go datasetInfo(prec, start, stop, ch) } } var out []Record missCount = 0 for { // collect results from a given chunk select { case r := <-ch: v := r["dataset"] switch v.(type) { case uint64: if v.(uint64) == 0 { fmt.Println("Miss record", r) missCount += 1 } else { out = append(out, r) } default: fmt.Println("Miss record", r) missCount += 1 } default: time.Sleep(time.Duration(10) * time.Millisecond) // wait for response } if len(out)+missCount == len(chunk) { for _, rec := range out { results = append(results, rec) } break } } } if missCount > 0 { fmt.Printf("Number of missed records: %d\n", missCount) } } // process extra dataset writeRecords(results, fout) if utils.PROFILE { fmt.Printf("Processed %d urls\n", utils.UrlCounter) fmt.Printf("Elapsed time %s\n", time.Since(startTime)) } if utils.VERBOSE > 0 { fmt.Println("Job finished", time.Now()) } }