Ejemplo n.º 1
0
// function to generate datamap
func GenerateMap(datamap, fout string) {
	startTime := time.Now()
	if fout == "" {
		fout = fmt.Sprintf("%s.txt", datamap)
	}
	utils.TestEnv()
	var names []string
	var records []Record
	for dbsinst, dbsId := range dbsInstances() {
		if datamap == "dataset" || datamap == "datasets" {
			names = dbsDatasets(dbsinst)
		} else if datamap == "tier" || datamap == "tiers" {
			names = dbsTiers(dbsinst)
		} else {
			fmt.Printf("Unsupported map name '%s'\n", datamap)
			os.Exit(-1)
		}
		for _, n := range names {
			rec := make(Record)
			rec[datamap] = n
			rec["dbsinst"] = dbsId
			rec["hash"] = utils.Hash1(n)
			records = append(records, rec)
		}
	}
	// process extra dataset
	writeRecords(records, fout)
	if utils.PROFILE {
		fmt.Printf("Processed %d urls\n", utils.UrlCounter)
		fmt.Printf("Elapsed time %s\n", time.Since(startTime))
	}
	if utils.VERBOSE > 0 {
		fmt.Println("Job finished", time.Now())
	}
}
Ejemplo n.º 2
0
// function which process user request
func Process(start, stop, fout string, newdata bool, dbsExtra, chunkSize int, test bool) {
	startTime := time.Now()
	utils.TestEnv()
	if start == "" {
		start = utils.Today()
	}
	if stop == "" {
		stop = utils.Today()
	}
	if fout == "" {
		fout = fmt.Sprintf("dataframe-%s-%s.csv", start, stop)
		if newdata {
			fout = fmt.Sprintf("new%s", fout)
		}
	}
	if utils.VERBOSE > 0 {
		fmt.Println("Job started", time.Now())
		fmt.Println(fout)
	}
	var results, popdbRecords, missRecords []Record
	if test {
		for _, rec := range testRecords() {
			popdbRecords = append(popdbRecords, rec)
		}
	} else if newdata {
		for _, rec := range newRecords(start, stop) {
			popdbRecords = append(popdbRecords, rec)
		}
		if utils.VERBOSE > 0 {
			fmt.Printf("Process %d new DBS records\n", len(popdbRecords))
		}
	} else {
		// get popularity DB records for given time interval
		popdbRecords = datasetStats(start, stop)
		if len(popdbRecords) == 0 {
			fmt.Println("No records from PopDB, better stop here")
			os.Exit(-1)
		}
		extraRecords := extraRecords(dbsDatasets("prod/global"), popdbRecords, dbsExtra)
		if utils.VERBOSE > 0 {
			fmt.Printf("Process %d popdb records\n", len(popdbRecords))
			fmt.Printf("Process %d extra records\n", len(extraRecords))
		}
		// combine all records togther
		for _, rec := range extraRecords {
			popdbRecords = append(popdbRecords, rec)
		}
	}
	// for every dataset in popdbRecords we need to collect its information
	for cdx, chunk := range makeChunksOfRecords(popdbRecords, chunkSize) {
		if utils.VERBOSE == 1 {
			fmt.Printf("process chunk=%d, %d records\n", cdx, len(chunk))
		}
		if utils.VERBOSE == 2 {
			fmt.Println("process chunk", chunk)
		}
		var counter int
		ch := make(chan Record)
		for _, prec := range chunk {
			if prec != nil {
				go datasetInfo(prec, start, stop, ch)
				counter += 1
			}
		}
		var out []Record
		var missCount int
		for { // collect results from a given chunk
			select {
			case r := <-ch:
				v := r["dataset"]
				if v == nil {
					missRecords = append(missRecords, r)
					missCount += 1
				} else {
					switch v.(type) {
					case string:
						missRecords = append(missRecords, r)
						missCount += 1
					default:
						out = append(out, r)
					}
				}
			default:
				time.Sleep(time.Duration(10) * time.Millisecond) // wait for response
			}
			if len(out)+missCount == len(chunk) {
				for _, rec := range out {
					results = append(results, rec)
				}
				break
			}
		}
		if utils.PROFILE {
			fmt.Printf("Processed %d chunk in %s\n", cdx, time.Since(startTime))
		}
	}
	// check if we miss some datasets and try to recover them
	if len(missRecords) > 0 {
		if utils.VERBOSE > 0 {
			fmt.Printf("Process missed dataset %d records\n", len(missRecords))
		}
		var missCount int
		for _, chunk := range makeChunksOfRecords(missRecords, 10) {
			ch := make(chan Record)
			for _, prec := range chunk {
				if prec != nil {
					go datasetInfo(prec, start, stop, ch)
				}
			}
			var out []Record
			missCount = 0
			for { // collect results from a given chunk
				select {
				case r := <-ch:
					v := r["dataset"]
					switch v.(type) {
					case uint64:
						if v.(uint64) == 0 {
							fmt.Println("Miss record", r)
							missCount += 1
						} else {
							out = append(out, r)
						}
					default:
						fmt.Println("Miss record", r)
						missCount += 1
					}
				default:
					time.Sleep(time.Duration(10) * time.Millisecond) // wait for response
				}
				if len(out)+missCount == len(chunk) {
					for _, rec := range out {
						results = append(results, rec)
					}
					break
				}
			}
		}
		if missCount > 0 {
			fmt.Printf("Number of missed records: %d\n", missCount)
		}
	}

	// process extra dataset
	writeRecords(results, fout)
	if utils.PROFILE {
		fmt.Printf("Processed %d urls\n", utils.UrlCounter)
		fmt.Printf("Elapsed time %s\n", time.Since(startTime))
	}
	if utils.VERBOSE > 0 {
		fmt.Println("Job finished", time.Now())
	}
}