func mergeUploadCSVFiles(localOutputDir, pathToPyFiles, runID, remoteDir string, gs *util.GsUtil) error { // Move all results into a single directory. fileInfos, err := ioutil.ReadDir(localOutputDir) if err != nil { return fmt.Errorf("Unable to read %s: %s", localOutputDir, err) } for _, fileInfo := range fileInfos { if !fileInfo.IsDir() { continue } outputFile := filepath.Join(localOutputDir, fileInfo.Name(), "results-pivot-table.csv") newFile := filepath.Join(localOutputDir, fmt.Sprintf("%s.csv", fileInfo.Name())) if err := os.Rename(outputFile, newFile); err != nil { glog.Errorf("Could not rename %s to %s: %s", outputFile, newFile, err) continue } // Add the rank of the page to the CSV file. headers, values, err := getRowsFromCSV(newFile) if err != nil { glog.Errorf("Could not read %s: %s", newFile, err) continue } pageRank := strings.Split(fileInfo.Name(), "_")[1] for i := range headers { for j := range values { if headers[i] == "page" { values[j][i] = fmt.Sprintf("%s (#%s)", values[j][i], pageRank) } } } if err := writeRowsToCSV(newFile, headers, values); err != nil { glog.Errorf("Could not write to %s: %s", newFile, err) continue } } // Call csv_pivot_table_merger.py to merge all results into a single results CSV. pathToCsvMerger := filepath.Join(pathToPyFiles, "csv_pivot_table_merger.py") outputFileName := runID + ".output" args := []string{ pathToCsvMerger, "--csv_dir=" + localOutputDir, "--output_csv_name=" + filepath.Join(localOutputDir, outputFileName), } err = util.ExecuteCmd("python", args, []string{}, util.CSV_PIVOT_TABLE_MERGER_TIMEOUT, nil, nil) if err != nil { return fmt.Errorf("Error running csv_pivot_table_merger.py: %s", err) } // Copy the output file to Google Storage. remoteOutputDir := filepath.Join(remoteDir, fmt.Sprintf("slave%d", *workerNum), "outputs") if err := gs.UploadFile(outputFileName, localOutputDir, remoteOutputDir); err != nil { return fmt.Errorf("Unable to upload %s to %s: %s", outputFileName, remoteOutputDir, err) } return nil }
func mergeUploadCSVFiles(runID string, gs *util.GsUtil) error { localOutputDir := filepath.Join(util.StorageDir, util.BenchmarkRunsDir, runID) skutil.MkdirAll(localOutputDir, 0700) // Copy outputs from all slaves locally. for i := 0; i < util.NUM_WORKERS; i++ { workerNum := i + 1 workerLocalOutputPath := filepath.Join(localOutputDir, fmt.Sprintf("slave%d", workerNum)+".csv") workerRemoteOutputPath := filepath.Join(util.BenchmarkRunsDir, runID, fmt.Sprintf("slave%d", workerNum), "outputs", runID+".output") respBody, err := gs.GetRemoteFileContents(workerRemoteOutputPath) if err != nil { glog.Errorf("Could not fetch %s: %s", workerRemoteOutputPath, err) // TODO(rmistry): Should we instead return here? We can only return // here if all 100 slaves reliably run without any failures which they // really should. continue } defer skutil.Close(respBody) out, err := os.Create(workerLocalOutputPath) if err != nil { return fmt.Errorf("Unable to create file %s: %s", workerLocalOutputPath, err) } defer skutil.Close(out) defer skutil.Remove(workerLocalOutputPath) if _, err = io.Copy(out, respBody); err != nil { return fmt.Errorf("Unable to copy to file %s: %s", workerLocalOutputPath, err) } } // Call csv_merger.py to merge all results into a single results CSV. _, currentFile, _, _ := runtime.Caller(0) pathToPyFiles := filepath.Join( filepath.Dir((filepath.Dir(filepath.Dir(filepath.Dir(currentFile))))), "py") pathToCsvMerger := filepath.Join(pathToPyFiles, "csv_merger.py") outputFileName := runID + ".output" args := []string{ pathToCsvMerger, "--csv_dir=" + localOutputDir, "--output_csv_name=" + filepath.Join(localOutputDir, outputFileName), } if err := util.ExecuteCmd("python", args, []string{}, 1*time.Hour, nil, nil); err != nil { return fmt.Errorf("Error running csv_merger.py: %s", err) } // Copy the output file to Google Storage. remoteOutputDir := filepath.Join(util.BenchmarkRunsDir, runID, "consolidated_outputs") if err := gs.UploadFile(outputFileName, localOutputDir, remoteOutputDir); err != nil { return fmt.Errorf("Unable to upload %s to %s: %s", outputFileName, remoteOutputDir, err) } return nil }
func mergeUploadCSVFiles(runID string, gs *util.GsUtil) ([]string, error) { localOutputDir := filepath.Join(util.StorageDir, util.BenchmarkRunsDir, runID) skutil.MkdirAll(localOutputDir, 0700) noOutputSlaves := []string{} // Copy outputs from all slaves locally. for i := 0; i < util.NumWorkers(); i++ { workerNum := i + 1 workerLocalOutputPath := filepath.Join(localOutputDir, fmt.Sprintf("slave%d", workerNum)+".csv") workerRemoteOutputPath := filepath.Join(util.BenchmarkRunsDir, runID, fmt.Sprintf("slave%d", workerNum), "outputs", runID+".output") respBody, err := gs.GetRemoteFileContents(workerRemoteOutputPath) if err != nil { glog.Errorf("Could not fetch %s: %s", workerRemoteOutputPath, err) noOutputSlaves = append(noOutputSlaves, fmt.Sprintf(util.WORKER_NAME_TEMPLATE, workerNum)) continue } defer skutil.Close(respBody) out, err := os.Create(workerLocalOutputPath) if err != nil { return noOutputSlaves, fmt.Errorf("Unable to create file %s: %s", workerLocalOutputPath, err) } defer skutil.Close(out) defer skutil.Remove(workerLocalOutputPath) if _, err = io.Copy(out, respBody); err != nil { return noOutputSlaves, fmt.Errorf("Unable to copy to file %s: %s", workerLocalOutputPath, err) } } // Call csv_merger.py to merge all results into a single results CSV. _, currentFile, _, _ := runtime.Caller(0) pathToPyFiles := filepath.Join( filepath.Dir((filepath.Dir(filepath.Dir(filepath.Dir(currentFile))))), "py") pathToCsvMerger := filepath.Join(pathToPyFiles, "csv_merger.py") outputFileName := runID + ".output" args := []string{ pathToCsvMerger, "--csv_dir=" + localOutputDir, "--output_csv_name=" + filepath.Join(localOutputDir, outputFileName), } err := util.ExecuteCmd("python", args, []string{}, util.CSV_MERGER_TIMEOUT, nil, nil) if err != nil { return noOutputSlaves, fmt.Errorf("Error running csv_merger.py: %s", err) } // Copy the output file to Google Storage. remoteOutputDir := filepath.Join(util.BenchmarkRunsDir, runID, "consolidated_outputs") if err := gs.UploadFile(outputFileName, localOutputDir, remoteOutputDir); err != nil { return noOutputSlaves, fmt.Errorf("Unable to upload %s to %s: %s", outputFileName, remoteOutputDir, err) } return noOutputSlaves, nil }