func main() { defer common.LogPanic() worker_common.Init() defer util.TimeTrack(time.Now(), "Capturing Archives") defer glog.Flush() // Create the task file so that the master knows this worker is still busy. skutil.LogErr(util.CreateTaskFile(util.ACTIVITY_CAPTURING_ARCHIVES)) defer util.DeleteTaskFile(util.ACTIVITY_CAPTURING_ARCHIVES) if *chromiumBuild == "" { glog.Error("Must specify --chromium_build") return } // Reset the local chromium checkout. if err := util.ResetCheckout(util.ChromiumSrcDir); err != nil { glog.Errorf("Could not reset %s: %s", util.ChromiumSrcDir, err) return } // Sync the local chromium checkout. if err := util.SyncDir(util.ChromiumSrcDir); err != nil { glog.Errorf("Could not gclient sync %s: %s", util.ChromiumSrcDir, err) return } // Delete and remake the local webpage archives directory. pathToArchives := filepath.Join(util.WebArchivesDir, *pagesetType) skutil.RemoveAll(pathToArchives) skutil.MkdirAll(pathToArchives, 0700) // Instantiate GsUtil object. gs, err := util.NewGsUtil(nil) if err != nil { glog.Error(err) return } // Download the specified chromium build if it does not exist locally. if err := gs.DownloadChromiumBuild(*chromiumBuild); err != nil { glog.Error(err) return } // Download pagesets if they do not exist locally. if err := gs.DownloadWorkerArtifacts(util.PAGESETS_DIR_NAME, *pagesetType, *workerNum); err != nil { glog.Error(err) return } pathToPagesets := filepath.Join(util.PagesetsDir, *pagesetType) chromiumBinary := filepath.Join(util.ChromiumBuildsDir, *chromiumBuild, util.BINARY_CHROME) recordWprBinary := filepath.Join(util.TelemetryBinariesDir, util.BINARY_RECORD_WPR) timeoutSecs := util.PagesetTypeToInfo[*pagesetType].CaptureArchivesTimeoutSecs // Loop through all pagesets. fileInfos, err := ioutil.ReadDir(pathToPagesets) if err != nil { glog.Errorf("Unable to read the pagesets dir %s: %s", pathToPagesets, err) return } glog.Infof("The %s fileInfos are: %s", len(fileInfos), fileInfos) for _, fileInfo := range fileInfos { pagesetBaseName := filepath.Base(fileInfo.Name()) if pagesetBaseName == util.TIMESTAMP_FILE_NAME || filepath.Ext(pagesetBaseName) == ".pyc" { // Ignore timestamp files and .pyc files. continue } // Read the pageset. pagesetPath := filepath.Join(pathToPagesets, fileInfo.Name()) decodedPageset, err := util.ReadPageset(pagesetPath) if err != nil { glog.Errorf("Could not read %s: %s", pagesetPath, err) return } glog.Infof("===== Processing %s =====", pagesetPath) args := []string{ util.CAPTURE_ARCHIVES_DEFAULT_CT_BENCHMARK, "--extra-browser-args=--disable-setuid-sandbox", "--browser=exact", "--browser-executable=" + chromiumBinary, "--user-agent=" + decodedPageset.UserAgent, "--urls-list=" + decodedPageset.UrlsList, "--archive-data-file=" + decodedPageset.ArchiveDataFile, } env := []string{ fmt.Sprintf("PYTHONPATH=%s:$PYTHONPATH", pathToPagesets), "DISPLAY=:0", } skutil.LogErr(util.ExecuteCmd(recordWprBinary, args, env, time.Duration(timeoutSecs)*time.Second, nil, nil)) } // Write timestamp to the webpage archives dir. skutil.LogErr(util.CreateTimestampFile(pathToArchives)) // Upload webpage archives dir to Google Storage. if err := gs.UploadWorkerArtifacts(util.WEB_ARCHIVES_DIR_NAME, *pagesetType, *workerNum); err != nil { glog.Error(err) return } }
func main() { defer common.LogPanic() worker_common.Init() defer util.TimeTrack(time.Now(), "Fixing archives") defer glog.Flush() // Create the task file so that the master knows this worker is still busy. skutil.LogErr(util.CreateTaskFile(util.ACTIVITY_FIXING_ARCHIVES)) defer util.DeleteTaskFile(util.ACTIVITY_FIXING_ARCHIVES) if *pagesetType == "" { glog.Error("Must specify --pageset_type") return } if *chromiumBuild == "" { glog.Error("Must specify --chromium_build") return } if *runID == "" { glog.Error("Must specify --run_id") return } // Reset the local chromium checkout. if err := util.ResetCheckout(util.ChromiumSrcDir); err != nil { glog.Errorf("Could not reset %s: %s", util.ChromiumSrcDir, err) return } // Sync the local chromium checkout. if err := util.SyncDir(util.ChromiumSrcDir); err != nil { glog.Errorf("Could not gclient sync %s: %s", util.ChromiumSrcDir, err) return } // Instantiate GsUtil object. gs, err := util.NewGsUtil(nil) if err != nil { glog.Error(err) return } // Download the specified chromium build. if err := gs.DownloadChromiumBuild(*chromiumBuild); err != nil { glog.Error(err) return } // Delete the chromium build to save space when we are done. defer skutil.RemoveAll(filepath.Join(util.ChromiumBuildsDir, *chromiumBuild)) chromiumBinary := filepath.Join(util.ChromiumBuildsDir, *chromiumBuild, util.BINARY_CHROME) // Download pagesets if they do not exist locally. if err := gs.DownloadWorkerArtifacts(util.PAGESETS_DIR_NAME, *pagesetType, *workerNum); err != nil { glog.Error(err) return } pathToPagesets := filepath.Join(util.PagesetsDir, *pagesetType) // Download archives if they do not exist locally. if err := gs.DownloadWorkerArtifacts(util.WEB_ARCHIVES_DIR_NAME, *pagesetType, *workerNum); err != nil { glog.Error(err) return } // Establish output paths. localOutputDir := filepath.Join(util.StorageDir, util.FixArchivesRunsDir, *runID) skutil.RemoveAll(localOutputDir) skutil.MkdirAll(localOutputDir, 0700) defer skutil.RemoveAll(localOutputDir) // Construct path to the ct_run_benchmark python script. _, currentFile, _, _ := runtime.Caller(0) pathToPyFiles := filepath.Join( filepath.Dir((filepath.Dir(filepath.Dir(filepath.Dir(currentFile))))), "py") timeoutSecs := util.PagesetTypeToInfo[*pagesetType].RunChromiumPerfTimeoutSecs fileInfos, err := ioutil.ReadDir(pathToPagesets) if err != nil { glog.Errorf("Unable to read the pagesets dir %s: %s", pathToPagesets, err) return } // Location of the WPR logs. wprLogs := filepath.Join(util.ChromiumSrcDir, "webpagereplay_logs", "logs.txt") // Slice that will contain inconsistentArchives := []string{} // Loop through all pagesets. for _, fileInfo := range fileInfos { benchmarkResults := []float64{} resourceMissingCounts := []int{} pagesetBaseName := filepath.Base(fileInfo.Name()) if pagesetBaseName == util.TIMESTAMP_FILE_NAME || filepath.Ext(pagesetBaseName) == ".pyc" { // Ignore timestamp files and .pyc files. continue } // Convert the filename into a format consumable by the run_benchmarks // binary. pagesetName := strings.TrimSuffix(pagesetBaseName, filepath.Ext(pagesetBaseName)) pagesetPath := filepath.Join(pathToPagesets, fileInfo.Name()) glog.Infof("===== Processing %s =====", pagesetPath) // Repeat runs the specified number of times. for repeatNum := 1; repeatNum <= *repeatBenchmark; repeatNum++ { // Delete webpagereplay_logs before every run. skutil.RemoveAll(wprLogs) skutil.LogErr(os.Chdir(pathToPyFiles)) args := []string{ util.BINARY_RUN_BENCHMARK, fmt.Sprintf("%s.%s", *benchmarkName, util.BenchmarksToTelemetryName[*benchmarkName]), "--page-set-name=" + pagesetName, "--page-set-base-dir=" + pathToPagesets, "--also-run-disabled-tests", } // Add output dir. outputDirArgValue := filepath.Join(localOutputDir, pagesetName, strconv.Itoa(repeatNum)) args = append(args, "--output-dir="+outputDirArgValue) // Figure out which browser should be used. args = append(args, "--browser=exact", "--browser-executable="+chromiumBinary) // Split benchmark args if not empty and append to args. if *benchmarkArgs != "" { for _, benchmarkArg := range strings.Split(*benchmarkArgs, " ") { args = append(args, benchmarkArg) } } // Add browserArgs if not empty to args. if *browserArgs != "" { args = append(args, "--extra-browser-args="+*browserArgs) } // Set the PYTHONPATH to the pagesets and the telemetry dirs. env := []string{ fmt.Sprintf("PYTHONPATH=%s:%s:%s:$PYTHONPATH", pathToPagesets, util.TelemetryBinariesDir, util.TelemetrySrcDir), "DISPLAY=:0", } skutil.LogErr( util.ExecuteCmd("python", args, env, time.Duration(timeoutSecs)*time.Second, nil, nil)) // Examine the results-pivot-table.csv file and store the mean frame time. resultsCSV := filepath.Join(outputDirArgValue, "results-pivot-table.csv") // TODO(rmistry): The format has changed from results.csv to results-pivot-table.csv headers, values, err := getRowsFromCSV(resultsCSV) if err != nil { glog.Errorf("Could not read %s: %s", resultsCSV, err) continue } for i := range headers { if headers[i] == *benchmarkHeaderToCheck { value, _ := strconv.ParseFloat(values[i], 64) benchmarkResults = append(benchmarkResults, value) break } } // Find how many times "Could not replay" showed up in wprLogs. content, err := ioutil.ReadFile(wprLogs) if err != nil { glog.Errorf("Could not read %s: %s", wprLogs, err) continue } resourceMissingCount := strings.Count(string(content), "Could not replay") resourceMissingCounts = append(resourceMissingCounts, resourceMissingCount) } glog.Infof("Benchmark results for %s are: %v", fileInfo.Name(), benchmarkResults) percentageChange := getPercentageChange(benchmarkResults) glog.Infof("Percentage change of results is: %f", percentageChange) glog.Infof("\"Could not replay\" showed up %v times in %s", resourceMissingCounts, wprLogs) maxResourceMissingCount := 0 for _, count := range resourceMissingCounts { if maxResourceMissingCount < count { maxResourceMissingCount = count } } if percentageChange > *percentageChangeThreshold || maxResourceMissingCount > *resourceMissingCountThreshold { glog.Infof("The archive for %s is inconsistent!", fileInfo.Name()) inconsistentArchives = append(inconsistentArchives, fmt.Sprintf("%s percentageChange: %f maxResourceMissingCount: %v", fileInfo.Name(), percentageChange, maxResourceMissingCount)) if *deletePageset { // Delete the pageset. skutil.RemoveAll(pagesetPath) } } } if len(inconsistentArchives) > 0 { glog.Infof("%d archives are inconsistent!", len(inconsistentArchives)) glog.Infof("The list of inconsistentArchives is: %v", inconsistentArchives) if *deletePageset { // Write new timestamp to the pagesets dir. skutil.RemoveAll(filepath.Join(pathToPagesets, util.TIMESTAMP_FILE_NAME)) skutil.LogErr(util.CreateTimestampFile(pathToPagesets)) // Inconsistent pagesets were deleted locally. Upload local pagesets dir // to Google Storage. if err := gs.UploadWorkerArtifacts(util.PAGESETS_DIR_NAME, *pagesetType, *workerNum); err != nil { glog.Error(err) return } } } }
func main() { defer common.LogPanic() worker_common.Init() if !*worker_common.Local { defer util.CleanTmpDir() } defer util.TimeTrack(time.Now(), "Capturing SKPs") defer glog.Flush() // Validate required arguments. if *chromiumBuild == "" { glog.Error("Must specify --chromium_build") return } if *runID == "" { glog.Error("Must specify --run_id") return } if *targetPlatform == util.PLATFORM_ANDROID { glog.Error("Android is not yet supported for capturing SKPs.") return } // Reset the local chromium checkout. if err := util.ResetCheckout(util.ChromiumSrcDir); err != nil { glog.Errorf("Could not reset %s: %s", util.ChromiumSrcDir, err) return } // Sync the local chromium checkout. if err := util.SyncDir(util.ChromiumSrcDir); err != nil { glog.Errorf("Could not gclient sync %s: %s", util.ChromiumSrcDir, err) return } // Create the task file so that the master knows this worker is still busy. skutil.LogErr(util.CreateTaskFile(util.ACTIVITY_CAPTURING_SKPS)) defer util.DeleteTaskFile(util.ACTIVITY_CAPTURING_SKPS) // Instantiate GsUtil object. gs, err := util.NewGsUtil(nil) if err != nil { glog.Error(err) return } // Download the specified chromium build. if err := gs.DownloadChromiumBuild(*chromiumBuild); err != nil { glog.Error(err) return } // Delete the chromium build to save space when we are done. defer skutil.RemoveAll(filepath.Join(util.ChromiumBuildsDir, *chromiumBuild)) chromiumBinary := filepath.Join(util.ChromiumBuildsDir, *chromiumBuild, util.BINARY_CHROME) if *targetPlatform == util.PLATFORM_ANDROID { // Install the APK on the Android device. if err := util.InstallChromeAPK(*chromiumBuild); err != nil { glog.Errorf("Could not install the chromium APK: %s", err) return } } // Download pagesets if they do not exist locally. if err := gs.DownloadWorkerArtifacts(util.PAGESETS_DIR_NAME, *pagesetType, *workerNum); err != nil { glog.Error(err) return } pathToPagesets := filepath.Join(util.PagesetsDir, *pagesetType) // Download archives if they do not exist locally. if err := gs.DownloadWorkerArtifacts(util.WEB_ARCHIVES_DIR_NAME, *pagesetType, *workerNum); err != nil { glog.Error(err) return } // Create the dir that SKPs will be stored in. pathToSkps := filepath.Join(util.SkpsDir, *pagesetType, *chromiumBuild) // Delete and remake the local SKPs directory. skutil.RemoveAll(pathToSkps) skutil.MkdirAll(pathToSkps, 0700) // Establish output paths. localOutputDir := filepath.Join(util.StorageDir, util.BenchmarkRunsDir, *runID) skutil.RemoveAll(localOutputDir) skutil.MkdirAll(localOutputDir, 0700) defer skutil.RemoveAll(localOutputDir) // Construct path to the ct_run_benchmark python script. _, currentFile, _, _ := runtime.Caller(0) pathToPyFiles := filepath.Join( filepath.Dir((filepath.Dir(filepath.Dir(filepath.Dir(currentFile))))), "py") timeoutSecs := util.PagesetTypeToInfo[*pagesetType].CaptureSKPsTimeoutSecs fileInfos, err := ioutil.ReadDir(pathToPagesets) if err != nil { glog.Errorf("Unable to read the pagesets dir %s: %s", pathToPagesets, err) return } // Create channel that contains all pageset file names. This channel will // be consumed by the worker pool. pagesetRequests := util.GetClosedChannelOfPagesets(fileInfos) var wg sync.WaitGroup // Use a RWMutex for the chromeProcessesCleaner goroutine to communicate to // the workers (acting as "readers") when it wants to be the "writer" and // kill all zombie chrome processes. var mutex sync.RWMutex // Loop through workers in the worker pool. for i := 0; i < WORKER_POOL_SIZE; i++ { // Increment the WaitGroup counter. wg.Add(1) // Create and run a goroutine closure that captures SKPs. go func() { // Decrement the WaitGroup counter when the goroutine completes. defer wg.Done() for pagesetName := range pagesetRequests { mutex.RLock() // Read the pageset. pagesetPath := filepath.Join(pathToPagesets, pagesetName) decodedPageset, err := util.ReadPageset(pagesetPath) if err != nil { glog.Errorf("Could not read %s: %s", pagesetPath, err) continue } glog.Infof("===== Processing %s =====", pagesetPath) skutil.LogErr(os.Chdir(pathToPyFiles)) args := []string{ filepath.Join(util.TelemetryBinariesDir, util.BINARY_RUN_BENCHMARK), util.BenchmarksToTelemetryName[util.BENCHMARK_SKPICTURE_PRINTER], "--also-run-disabled-tests", "--page-repeat=1", // Only need one run for SKPs. "--skp-outdir=" + pathToSkps, "--extra-browser-args=" + util.DEFAULT_BROWSER_ARGS, "--user-agent=" + decodedPageset.UserAgent, "--urls-list=" + decodedPageset.UrlsList, "--archive-data-file=" + decodedPageset.ArchiveDataFile, } // Figure out which browser should be used. if *targetPlatform == util.PLATFORM_ANDROID { args = append(args, "--browser=android-chromium") } else { args = append(args, "--browser=exact", "--browser-executable="+chromiumBinary) } // Set the PYTHONPATH to the pagesets and the telemetry dirs. env := []string{ fmt.Sprintf("PYTHONPATH=%s:%s:%s:$PYTHONPATH", pathToPagesets, util.TelemetryBinariesDir, util.TelemetrySrcDir), "DISPLAY=:0", } skutil.LogErr( util.ExecuteCmd("python", args, env, time.Duration(timeoutSecs)*time.Second, nil, nil)) mutex.RUnlock() } }() } if !*worker_common.Local { // Start the cleaner. go util.ChromeProcessesCleaner(&mutex, *chromeCleanerTimer) } // Wait for all spawned goroutines to complete. wg.Wait() // Move, validate and upload all SKP files. // List all directories in pathToSkps and copy out the skps. skpFileInfos, err := ioutil.ReadDir(pathToSkps) if err != nil { glog.Errorf("Unable to read %s: %s", pathToSkps, err) return } for _, fileInfo := range skpFileInfos { if !fileInfo.IsDir() { // We are only interested in directories. continue } skpName := fileInfo.Name() // Find the largest layer in this directory. layerInfos, err := ioutil.ReadDir(filepath.Join(pathToSkps, skpName)) if err != nil { glog.Errorf("Unable to read %s: %s", filepath.Join(pathToSkps, skpName), err) } if len(layerInfos) > 0 { largestLayerInfo := layerInfos[0] for _, layerInfo := range layerInfos { fmt.Println(layerInfo.Size()) if layerInfo.Size() > largestLayerInfo.Size() { largestLayerInfo = layerInfo } } // Only save SKPs greater than 6000 bytes. Less than that are probably // malformed. if largestLayerInfo.Size() > 6000 { layerPath := filepath.Join(pathToSkps, skpName, largestLayerInfo.Name()) skutil.Rename(layerPath, filepath.Join(pathToSkps, skpName+".skp")) } else { glog.Warningf("Skipping %s because size was less than 5000 bytes", skpName) } } // We extracted what we needed from the directory, now delete it. skutil.RemoveAll(filepath.Join(pathToSkps, skpName)) } glog.Info("Calling remove_invalid_skps.py") // Sync Skia tree. skutil.LogErr(util.SyncDir(util.SkiaTreeDir)) // Build tools. skutil.LogErr(util.BuildSkiaTools()) // Run remove_invalid_skps.py pathToRemoveSKPs := filepath.Join(pathToPyFiles, "remove_invalid_skps.py") pathToSKPInfo := filepath.Join(util.SkiaTreeDir, "out", "Release", "skpinfo") args := []string{ pathToRemoveSKPs, "--skp_dir=" + pathToSkps, "--path_to_skpinfo=" + pathToSKPInfo, } skutil.LogErr(util.ExecuteCmd("python", args, []string{}, util.REMOVE_INVALID_SKPS_TIMEOUT, nil, nil)) // Write timestamp to the SKPs dir. skutil.LogErr(util.CreateTimestampFile(pathToSkps)) // Upload SKPs dir to Google Storage. if err := gs.UploadWorkerArtifacts(util.SKPS_DIR_NAME, filepath.Join(*pagesetType, *chromiumBuild), *workerNum); err != nil { glog.Error(err) return } }
func main() { defer common.LogPanic() common.Init() defer util.TimeTrack(time.Now(), "Creating Pagesets") defer glog.Flush() // Create the task file so that the master knows this worker is still busy. skutil.LogErr(util.CreateTaskFile(util.ACTIVITY_CREATING_PAGESETS)) defer util.DeleteTaskFile(util.ACTIVITY_CREATING_PAGESETS) // Delete and remake the local pagesets directory. pathToPagesets := filepath.Join(util.PagesetsDir, *pagesetType) skutil.RemoveAll(pathToPagesets) skutil.MkdirAll(pathToPagesets, 0700) // Get info about the specified pageset type. pagesetTypeInfo := util.PagesetTypeToInfo[*pagesetType] csvSource := pagesetTypeInfo.CSVSource numPages := pagesetTypeInfo.NumPages userAgent := pagesetTypeInfo.UserAgent // Download the CSV file from Google Storage to a tmp location. gs, err := util.NewGsUtil(nil) if err != nil { glog.Error(err) return } respBody, err := gs.GetRemoteFileContents(csvSource) if err != nil { glog.Error(err) return } defer skutil.Close(respBody) csvFile := filepath.Join(os.TempDir(), filepath.Base(csvSource)) out, err := os.Create(csvFile) if err != nil { glog.Errorf("Unable to create file %s: %s", csvFile, err) return } defer skutil.Close(out) defer skutil.Remove(csvFile) if _, err = io.Copy(out, respBody); err != nil { glog.Error(err) return } // Figure out which pagesets this worker should generate. numPagesPerSlave := numPages / util.NUM_WORKERS startNum := (*workerNum-1)*numPagesPerSlave + 1 endNum := *workerNum * numPagesPerSlave // Construct path to the create_page_set.py python script. _, currentFile, _, _ := runtime.Caller(0) createPageSetScript := filepath.Join( filepath.Dir((filepath.Dir(filepath.Dir(filepath.Dir(currentFile))))), "py", "create_page_set.py") // Execute the create_page_set.py python script. timeoutSecs := util.PagesetTypeToInfo[*pagesetType].CreatePagesetsTimeoutSecs for currNum := startNum; currNum <= endNum; currNum++ { args := []string{ createPageSetScript, "-s", strconv.Itoa(currNum), "-e", strconv.Itoa(currNum), "-c", csvFile, "-p", *pagesetType, "-u", userAgent, "-o", pathToPagesets, } if err := util.ExecuteCmd("python", args, []string{}, time.Duration(timeoutSecs)*time.Second, nil, nil); err != nil { glog.Error(err) return } } // Write timestamp to the pagesets dir. skutil.LogErr(util.CreateTimestampFile(pathToPagesets)) // Upload pagesets dir to Google Storage. if err := gs.UploadWorkerArtifacts(util.PAGESETS_DIR_NAME, *pagesetType, *workerNum); err != nil { glog.Error(err) return } }
func main() { common.Init() defer util.TimeTrack(time.Now(), "Capturing Archives") defer glog.Flush() // Create the task file so that the master knows this worker is still busy. skutil.LogErr(util.CreateTaskFile(util.ACTIVITY_CAPTURING_ARCHIVES)) defer util.DeleteTaskFile(util.ACTIVITY_CAPTURING_ARCHIVES) if *chromiumBuild == "" { glog.Error("Must specify --chromium_build") return } // Reset the local chromium checkout. if err := util.ResetCheckout(util.ChromiumSrcDir); err != nil { glog.Errorf("Could not reset %s: %s", util.ChromiumSrcDir, err) return } // Sync the local chromium checkout. if err := util.SyncDir(util.ChromiumSrcDir); err != nil { glog.Errorf("Could not gclient sync %s: %s", util.ChromiumSrcDir, err) return } // Delete and remake the local webpage archives directory. pathToArchives := filepath.Join(util.WebArchivesDir, *pagesetType) skutil.RemoveAll(pathToArchives) skutil.MkdirAll(pathToArchives, 0700) // Instantiate GsUtil object. gs, err := util.NewGsUtil(nil) if err != nil { glog.Error(err) return } // Download the specified chromium build if it does not exist locally. if err := gs.DownloadChromiumBuild(*chromiumBuild); err != nil { glog.Error(err) return } // Download pagesets if they do not exist locally. if err := gs.DownloadWorkerArtifacts(util.PAGESETS_DIR_NAME, *pagesetType, *workerNum); err != nil { glog.Error(err) return } pathToPagesets := filepath.Join(util.PagesetsDir, *pagesetType) chromiumBinary := filepath.Join(util.ChromiumBuildsDir, *chromiumBuild, util.BINARY_CHROME) recordWprBinary := filepath.Join(util.TelemetryBinariesDir, util.BINARY_RECORD_WPR) timeoutSecs := util.PagesetTypeToInfo[*pagesetType].CaptureArchivesTimeoutSecs // Loop through all pagesets. fileInfos, err := ioutil.ReadDir(pathToPagesets) if err != nil { glog.Errorf("Unable to read the pagesets dir %s: %s", pathToPagesets, err) return } // TODO(rmistry): Remove this hack once the 1M webpage archives have been captured. glog.Infof("The length of fileInfos is: %s", len(fileInfos)) fileInfos = fileInfos[18500:20000] glog.Infof("The fileInfos are: %s", fileInfos) for _, fileInfo := range fileInfos { pagesetBaseName := filepath.Base(fileInfo.Name()) if pagesetBaseName == util.TIMESTAMP_FILE_NAME || filepath.Ext(pagesetBaseName) == ".pyc" { // Ignore timestamp files and .pyc files. continue } // Convert the filename into a format consumable by the record_wpr binary. pagesetArchiveName := strings.TrimSuffix(pagesetBaseName, filepath.Ext(pagesetBaseName)) pagesetPath := filepath.Join(pathToPagesets, fileInfo.Name()) glog.Infof("===== Processing %s =====", pagesetPath) args := []string{ "--extra-browser-args=--disable-setuid-sandbox", "--browser=exact", "--browser-executable=" + chromiumBinary, fmt.Sprintf("%s_page_set", pagesetArchiveName), "--page-set-base-dir=" + pathToPagesets, } env := []string{ fmt.Sprintf("PYTHONPATH=%s:$PYTHONPATH", pathToPagesets), "DISPLAY=:0", } skutil.LogErr(util.ExecuteCmd(recordWprBinary, args, env, time.Duration(timeoutSecs)*time.Second, nil, nil)) } // Write timestamp to the webpage archives dir. skutil.LogErr(util.CreateTimestampFile(pathToArchives)) // Upload webpage archives dir to Google Storage. if err := gs.UploadWorkerArtifacts(util.WEB_ARCHIVES_DIR_NAME, *pagesetType, *workerNum); err != nil { glog.Error(err) return } }