Beispiel #1
0
func mergeUploadCSVFiles(localOutputDir, pathToPyFiles, runID, remoteDir string, gs *util.GsUtil) error {
	// Move all results into a single directory.
	fileInfos, err := ioutil.ReadDir(localOutputDir)
	if err != nil {
		return fmt.Errorf("Unable to read %s: %s", localOutputDir, err)
	}
	for _, fileInfo := range fileInfos {
		if !fileInfo.IsDir() {
			continue
		}
		outputFile := filepath.Join(localOutputDir, fileInfo.Name(), "results-pivot-table.csv")
		newFile := filepath.Join(localOutputDir, fmt.Sprintf("%s.csv", fileInfo.Name()))
		if err := os.Rename(outputFile, newFile); err != nil {
			glog.Errorf("Could not rename %s to %s: %s", outputFile, newFile, err)
			continue
		}
		// Add the rank of the page to the CSV file.
		headers, values, err := getRowsFromCSV(newFile)
		if err != nil {
			glog.Errorf("Could not read %s: %s", newFile, err)
			continue
		}
		pageRank := strings.Split(fileInfo.Name(), "_")[1]
		for i := range headers {
			for j := range values {
				if headers[i] == "page" {
					values[j][i] = fmt.Sprintf("%s (#%s)", values[j][i], pageRank)
				}
			}
		}
		if err := writeRowsToCSV(newFile, headers, values); err != nil {
			glog.Errorf("Could not write to %s: %s", newFile, err)
			continue
		}
	}
	// Call csv_pivot_table_merger.py to merge all results into a single results CSV.
	pathToCsvMerger := filepath.Join(pathToPyFiles, "csv_pivot_table_merger.py")
	outputFileName := runID + ".output"
	args := []string{
		pathToCsvMerger,
		"--csv_dir=" + localOutputDir,
		"--output_csv_name=" + filepath.Join(localOutputDir, outputFileName),
	}
	err = util.ExecuteCmd("python", args, []string{}, util.CSV_PIVOT_TABLE_MERGER_TIMEOUT, nil,
		nil)
	if err != nil {
		return fmt.Errorf("Error running csv_pivot_table_merger.py: %s", err)
	}
	// Copy the output file to Google Storage.
	remoteOutputDir := filepath.Join(remoteDir, fmt.Sprintf("slave%d", *workerNum), "outputs")
	if err := gs.UploadFile(outputFileName, localOutputDir, remoteOutputDir); err != nil {
		return fmt.Errorf("Unable to upload %s to %s: %s", outputFileName, remoteOutputDir, err)
	}
	return nil
}
Beispiel #2
0
func mergeUploadCSVFiles(runID string, gs *util.GsUtil) error {
	localOutputDir := filepath.Join(util.StorageDir, util.BenchmarkRunsDir, runID)
	skutil.MkdirAll(localOutputDir, 0700)
	// Copy outputs from all slaves locally.
	for i := 0; i < util.NUM_WORKERS; i++ {
		workerNum := i + 1
		workerLocalOutputPath := filepath.Join(localOutputDir, fmt.Sprintf("slave%d", workerNum)+".csv")
		workerRemoteOutputPath := filepath.Join(util.BenchmarkRunsDir, runID, fmt.Sprintf("slave%d", workerNum), "outputs", runID+".output")
		respBody, err := gs.GetRemoteFileContents(workerRemoteOutputPath)
		if err != nil {
			glog.Errorf("Could not fetch %s: %s", workerRemoteOutputPath, err)
			// TODO(rmistry): Should we instead return here? We can only return
			// here if all 100 slaves reliably run without any failures which they
			// really should.
			continue
		}
		defer skutil.Close(respBody)
		out, err := os.Create(workerLocalOutputPath)
		if err != nil {
			return fmt.Errorf("Unable to create file %s: %s", workerLocalOutputPath, err)
		}
		defer skutil.Close(out)
		defer skutil.Remove(workerLocalOutputPath)
		if _, err = io.Copy(out, respBody); err != nil {
			return fmt.Errorf("Unable to copy to file %s: %s", workerLocalOutputPath, err)
		}
	}
	// Call csv_merger.py to merge all results into a single results CSV.
	_, currentFile, _, _ := runtime.Caller(0)
	pathToPyFiles := filepath.Join(
		filepath.Dir((filepath.Dir(filepath.Dir(filepath.Dir(currentFile))))),
		"py")
	pathToCsvMerger := filepath.Join(pathToPyFiles, "csv_merger.py")
	outputFileName := runID + ".output"
	args := []string{
		pathToCsvMerger,
		"--csv_dir=" + localOutputDir,
		"--output_csv_name=" + filepath.Join(localOutputDir, outputFileName),
	}
	if err := util.ExecuteCmd("python", args, []string{}, 1*time.Hour, nil, nil); err != nil {
		return fmt.Errorf("Error running csv_merger.py: %s", err)
	}
	// Copy the output file to Google Storage.
	remoteOutputDir := filepath.Join(util.BenchmarkRunsDir, runID, "consolidated_outputs")
	if err := gs.UploadFile(outputFileName, localOutputDir, remoteOutputDir); err != nil {
		return fmt.Errorf("Unable to upload %s to %s: %s", outputFileName, remoteOutputDir, err)
	}
	return nil
}
Beispiel #3
0
func mergeUploadCSVFiles(runID string, gs *util.GsUtil) ([]string, error) {
	localOutputDir := filepath.Join(util.StorageDir, util.BenchmarkRunsDir, runID)
	skutil.MkdirAll(localOutputDir, 0700)
	noOutputSlaves := []string{}
	// Copy outputs from all slaves locally.
	for i := 0; i < util.NumWorkers(); i++ {
		workerNum := i + 1
		workerLocalOutputPath := filepath.Join(localOutputDir, fmt.Sprintf("slave%d", workerNum)+".csv")
		workerRemoteOutputPath := filepath.Join(util.BenchmarkRunsDir, runID, fmt.Sprintf("slave%d", workerNum), "outputs", runID+".output")
		respBody, err := gs.GetRemoteFileContents(workerRemoteOutputPath)
		if err != nil {
			glog.Errorf("Could not fetch %s: %s", workerRemoteOutputPath, err)
			noOutputSlaves = append(noOutputSlaves, fmt.Sprintf(util.WORKER_NAME_TEMPLATE, workerNum))
			continue
		}
		defer skutil.Close(respBody)
		out, err := os.Create(workerLocalOutputPath)
		if err != nil {
			return noOutputSlaves, fmt.Errorf("Unable to create file %s: %s", workerLocalOutputPath, err)
		}
		defer skutil.Close(out)
		defer skutil.Remove(workerLocalOutputPath)
		if _, err = io.Copy(out, respBody); err != nil {
			return noOutputSlaves, fmt.Errorf("Unable to copy to file %s: %s", workerLocalOutputPath, err)
		}
	}
	// Call csv_merger.py to merge all results into a single results CSV.
	_, currentFile, _, _ := runtime.Caller(0)
	pathToPyFiles := filepath.Join(
		filepath.Dir((filepath.Dir(filepath.Dir(filepath.Dir(currentFile))))),
		"py")
	pathToCsvMerger := filepath.Join(pathToPyFiles, "csv_merger.py")
	outputFileName := runID + ".output"
	args := []string{
		pathToCsvMerger,
		"--csv_dir=" + localOutputDir,
		"--output_csv_name=" + filepath.Join(localOutputDir, outputFileName),
	}
	err := util.ExecuteCmd("python", args, []string{}, util.CSV_MERGER_TIMEOUT, nil, nil)
	if err != nil {
		return noOutputSlaves, fmt.Errorf("Error running csv_merger.py: %s", err)
	}
	// Copy the output file to Google Storage.
	remoteOutputDir := filepath.Join(util.BenchmarkRunsDir, runID, "consolidated_outputs")
	if err := gs.UploadFile(outputFileName, localOutputDir, remoteOutputDir); err != nil {
		return noOutputSlaves, fmt.Errorf("Unable to upload %s to %s: %s", outputFileName, remoteOutputDir, err)
	}
	return noOutputSlaves, nil
}