Example #1
0
func main() {
	defer common.LogPanic()
	worker_common.Init()
	defer util.TimeTrack(time.Now(), "Capturing Archives")
	defer glog.Flush()

	// Create the task file so that the master knows this worker is still busy.
	skutil.LogErr(util.CreateTaskFile(util.ACTIVITY_CAPTURING_ARCHIVES))
	defer util.DeleteTaskFile(util.ACTIVITY_CAPTURING_ARCHIVES)

	if *chromiumBuild == "" {
		glog.Error("Must specify --chromium_build")
		return
	}

	// Reset the local chromium checkout.
	if err := util.ResetCheckout(util.ChromiumSrcDir); err != nil {
		glog.Errorf("Could not reset %s: %s", util.ChromiumSrcDir, err)
		return
	}
	// Sync the local chromium checkout.
	if err := util.SyncDir(util.ChromiumSrcDir); err != nil {
		glog.Errorf("Could not gclient sync %s: %s", util.ChromiumSrcDir, err)
		return
	}

	// Delete and remake the local webpage archives directory.
	pathToArchives := filepath.Join(util.WebArchivesDir, *pagesetType)
	skutil.RemoveAll(pathToArchives)
	skutil.MkdirAll(pathToArchives, 0700)

	// Instantiate GsUtil object.
	gs, err := util.NewGsUtil(nil)
	if err != nil {
		glog.Error(err)
		return
	}

	// Download the specified chromium build if it does not exist locally.
	if err := gs.DownloadChromiumBuild(*chromiumBuild); err != nil {
		glog.Error(err)
		return
	}

	// Download pagesets if they do not exist locally.
	if err := gs.DownloadWorkerArtifacts(util.PAGESETS_DIR_NAME, *pagesetType, *workerNum); err != nil {
		glog.Error(err)
		return
	}

	pathToPagesets := filepath.Join(util.PagesetsDir, *pagesetType)
	chromiumBinary := filepath.Join(util.ChromiumBuildsDir, *chromiumBuild, util.BINARY_CHROME)
	recordWprBinary := filepath.Join(util.TelemetryBinariesDir, util.BINARY_RECORD_WPR)
	timeoutSecs := util.PagesetTypeToInfo[*pagesetType].CaptureArchivesTimeoutSecs
	// Loop through all pagesets.
	fileInfos, err := ioutil.ReadDir(pathToPagesets)
	if err != nil {
		glog.Errorf("Unable to read the pagesets dir %s: %s", pathToPagesets, err)
		return
	}
	glog.Infof("The %s fileInfos are: %s", len(fileInfos), fileInfos)
	for _, fileInfo := range fileInfos {
		pagesetBaseName := filepath.Base(fileInfo.Name())
		if pagesetBaseName == util.TIMESTAMP_FILE_NAME || filepath.Ext(pagesetBaseName) == ".pyc" {
			// Ignore timestamp files and .pyc files.
			continue
		}

		// Read the pageset.
		pagesetPath := filepath.Join(pathToPagesets, fileInfo.Name())
		decodedPageset, err := util.ReadPageset(pagesetPath)
		if err != nil {
			glog.Errorf("Could not read %s: %s", pagesetPath, err)
			return
		}

		glog.Infof("===== Processing %s =====", pagesetPath)
		args := []string{
			util.CAPTURE_ARCHIVES_DEFAULT_CT_BENCHMARK,
			"--extra-browser-args=--disable-setuid-sandbox",
			"--browser=exact",
			"--browser-executable=" + chromiumBinary,
			"--user-agent=" + decodedPageset.UserAgent,
			"--urls-list=" + decodedPageset.UrlsList,
			"--archive-data-file=" + decodedPageset.ArchiveDataFile,
		}
		env := []string{
			fmt.Sprintf("PYTHONPATH=%s:$PYTHONPATH", pathToPagesets),
			"DISPLAY=:0",
		}
		skutil.LogErr(util.ExecuteCmd(recordWprBinary, args, env, time.Duration(timeoutSecs)*time.Second, nil, nil))
	}

	// Write timestamp to the webpage archives dir.
	skutil.LogErr(util.CreateTimestampFile(pathToArchives))

	// Upload webpage archives dir to Google Storage.
	if err := gs.UploadWorkerArtifacts(util.WEB_ARCHIVES_DIR_NAME, *pagesetType, *workerNum); err != nil {
		glog.Error(err)
		return
	}
}
Example #2
0
func main() {
	defer common.LogPanic()
	worker_common.Init()
	defer util.TimeTrack(time.Now(), "Fixing archives")
	defer glog.Flush()

	// Create the task file so that the master knows this worker is still busy.
	skutil.LogErr(util.CreateTaskFile(util.ACTIVITY_FIXING_ARCHIVES))
	defer util.DeleteTaskFile(util.ACTIVITY_FIXING_ARCHIVES)

	if *pagesetType == "" {
		glog.Error("Must specify --pageset_type")
		return
	}
	if *chromiumBuild == "" {
		glog.Error("Must specify --chromium_build")
		return
	}
	if *runID == "" {
		glog.Error("Must specify --run_id")
		return
	}

	// Reset the local chromium checkout.
	if err := util.ResetCheckout(util.ChromiumSrcDir); err != nil {
		glog.Errorf("Could not reset %s: %s", util.ChromiumSrcDir, err)
		return
	}
	// Sync the local chromium checkout.
	if err := util.SyncDir(util.ChromiumSrcDir); err != nil {
		glog.Errorf("Could not gclient sync %s: %s", util.ChromiumSrcDir, err)
		return
	}

	// Instantiate GsUtil object.
	gs, err := util.NewGsUtil(nil)
	if err != nil {
		glog.Error(err)
		return
	}

	// Download the specified chromium build.
	if err := gs.DownloadChromiumBuild(*chromiumBuild); err != nil {
		glog.Error(err)
		return
	}
	// Delete the chromium build to save space when we are done.
	defer skutil.RemoveAll(filepath.Join(util.ChromiumBuildsDir, *chromiumBuild))
	chromiumBinary := filepath.Join(util.ChromiumBuildsDir, *chromiumBuild, util.BINARY_CHROME)

	// Download pagesets if they do not exist locally.
	if err := gs.DownloadWorkerArtifacts(util.PAGESETS_DIR_NAME, *pagesetType, *workerNum); err != nil {
		glog.Error(err)
		return
	}
	pathToPagesets := filepath.Join(util.PagesetsDir, *pagesetType)

	// Download archives if they do not exist locally.
	if err := gs.DownloadWorkerArtifacts(util.WEB_ARCHIVES_DIR_NAME, *pagesetType, *workerNum); err != nil {
		glog.Error(err)
		return
	}

	// Establish output paths.
	localOutputDir := filepath.Join(util.StorageDir, util.FixArchivesRunsDir, *runID)
	skutil.RemoveAll(localOutputDir)
	skutil.MkdirAll(localOutputDir, 0700)
	defer skutil.RemoveAll(localOutputDir)

	// Construct path to the ct_run_benchmark python script.
	_, currentFile, _, _ := runtime.Caller(0)
	pathToPyFiles := filepath.Join(
		filepath.Dir((filepath.Dir(filepath.Dir(filepath.Dir(currentFile))))),
		"py")

	timeoutSecs := util.PagesetTypeToInfo[*pagesetType].RunChromiumPerfTimeoutSecs
	fileInfos, err := ioutil.ReadDir(pathToPagesets)
	if err != nil {
		glog.Errorf("Unable to read the pagesets dir %s: %s", pathToPagesets, err)
		return
	}

	// Location of the WPR logs.
	wprLogs := filepath.Join(util.ChromiumSrcDir, "webpagereplay_logs", "logs.txt")
	// Slice that will contain
	inconsistentArchives := []string{}

	// Loop through all pagesets.
	for _, fileInfo := range fileInfos {
		benchmarkResults := []float64{}
		resourceMissingCounts := []int{}
		pagesetBaseName := filepath.Base(fileInfo.Name())
		if pagesetBaseName == util.TIMESTAMP_FILE_NAME || filepath.Ext(pagesetBaseName) == ".pyc" {
			// Ignore timestamp files and .pyc files.
			continue
		}

		// Convert the filename into a format consumable by the run_benchmarks
		// binary.
		pagesetName := strings.TrimSuffix(pagesetBaseName, filepath.Ext(pagesetBaseName))
		pagesetPath := filepath.Join(pathToPagesets, fileInfo.Name())

		glog.Infof("===== Processing %s =====", pagesetPath)

		// Repeat runs the specified number of times.
		for repeatNum := 1; repeatNum <= *repeatBenchmark; repeatNum++ {
			// Delete webpagereplay_logs before every run.
			skutil.RemoveAll(wprLogs)

			skutil.LogErr(os.Chdir(pathToPyFiles))
			args := []string{
				util.BINARY_RUN_BENCHMARK,
				fmt.Sprintf("%s.%s", *benchmarkName, util.BenchmarksToTelemetryName[*benchmarkName]),
				"--page-set-name=" + pagesetName,
				"--page-set-base-dir=" + pathToPagesets,
				"--also-run-disabled-tests",
			}
			// Add output dir.
			outputDirArgValue := filepath.Join(localOutputDir, pagesetName, strconv.Itoa(repeatNum))
			args = append(args, "--output-dir="+outputDirArgValue)
			// Figure out which browser should be used.
			args = append(args, "--browser=exact", "--browser-executable="+chromiumBinary)
			// Split benchmark args if not empty and append to args.
			if *benchmarkArgs != "" {
				for _, benchmarkArg := range strings.Split(*benchmarkArgs, " ") {
					args = append(args, benchmarkArg)
				}
			}
			// Add browserArgs if not empty to args.
			if *browserArgs != "" {
				args = append(args, "--extra-browser-args="+*browserArgs)
			}
			// Set the PYTHONPATH to the pagesets and the telemetry dirs.
			env := []string{
				fmt.Sprintf("PYTHONPATH=%s:%s:%s:$PYTHONPATH", pathToPagesets, util.TelemetryBinariesDir, util.TelemetrySrcDir),
				"DISPLAY=:0",
			}
			skutil.LogErr(
				util.ExecuteCmd("python", args, env, time.Duration(timeoutSecs)*time.Second, nil, nil))

			// Examine the results-pivot-table.csv file and store the mean frame time.
			resultsCSV := filepath.Join(outputDirArgValue, "results-pivot-table.csv")
			// TODO(rmistry): The format has changed from results.csv to results-pivot-table.csv
			headers, values, err := getRowsFromCSV(resultsCSV)
			if err != nil {
				glog.Errorf("Could not read %s: %s", resultsCSV, err)
				continue
			}
			for i := range headers {
				if headers[i] == *benchmarkHeaderToCheck {
					value, _ := strconv.ParseFloat(values[i], 64)
					benchmarkResults = append(benchmarkResults, value)
					break
				}
			}

			// Find how many times "Could not replay" showed up in wprLogs.
			content, err := ioutil.ReadFile(wprLogs)
			if err != nil {
				glog.Errorf("Could not read %s: %s", wprLogs, err)
				continue
			}
			resourceMissingCount := strings.Count(string(content), "Could not replay")
			resourceMissingCounts = append(resourceMissingCounts, resourceMissingCount)
		}

		glog.Infof("Benchmark results for %s are: %v", fileInfo.Name(), benchmarkResults)
		percentageChange := getPercentageChange(benchmarkResults)
		glog.Infof("Percentage change of results is: %f", percentageChange)
		glog.Infof("\"Could not replay\" showed up %v times in %s", resourceMissingCounts, wprLogs)
		maxResourceMissingCount := 0
		for _, count := range resourceMissingCounts {
			if maxResourceMissingCount < count {
				maxResourceMissingCount = count
			}
		}
		if percentageChange > *percentageChangeThreshold || maxResourceMissingCount > *resourceMissingCountThreshold {
			glog.Infof("The archive for %s is inconsistent!", fileInfo.Name())
			inconsistentArchives = append(inconsistentArchives, fmt.Sprintf("%s percentageChange: %f maxResourceMissingCount: %v", fileInfo.Name(), percentageChange, maxResourceMissingCount))
			if *deletePageset {
				// Delete the pageset.
				skutil.RemoveAll(pagesetPath)
			}
		}
	}

	if len(inconsistentArchives) > 0 {
		glog.Infof("%d archives are inconsistent!", len(inconsistentArchives))
		glog.Infof("The list of inconsistentArchives is: %v", inconsistentArchives)
		if *deletePageset {
			// Write new timestamp to the pagesets dir.
			skutil.RemoveAll(filepath.Join(pathToPagesets, util.TIMESTAMP_FILE_NAME))
			skutil.LogErr(util.CreateTimestampFile(pathToPagesets))
			// Inconsistent pagesets were deleted locally. Upload local pagesets dir
			// to Google Storage.
			if err := gs.UploadWorkerArtifacts(util.PAGESETS_DIR_NAME, *pagesetType, *workerNum); err != nil {
				glog.Error(err)
				return
			}
		}
	}
}
Example #3
0
func main() {
	defer common.LogPanic()
	worker_common.Init()
	if !*worker_common.Local {
		defer util.CleanTmpDir()
	}
	defer util.TimeTrack(time.Now(), "Capturing SKPs")
	defer glog.Flush()

	// Validate required arguments.
	if *chromiumBuild == "" {
		glog.Error("Must specify --chromium_build")
		return
	}
	if *runID == "" {
		glog.Error("Must specify --run_id")
		return
	}
	if *targetPlatform == util.PLATFORM_ANDROID {
		glog.Error("Android is not yet supported for capturing SKPs.")
		return
	}

	// Reset the local chromium checkout.
	if err := util.ResetCheckout(util.ChromiumSrcDir); err != nil {
		glog.Errorf("Could not reset %s: %s", util.ChromiumSrcDir, err)
		return
	}
	// Sync the local chromium checkout.
	if err := util.SyncDir(util.ChromiumSrcDir); err != nil {
		glog.Errorf("Could not gclient sync %s: %s", util.ChromiumSrcDir, err)
		return
	}

	// Create the task file so that the master knows this worker is still busy.
	skutil.LogErr(util.CreateTaskFile(util.ACTIVITY_CAPTURING_SKPS))
	defer util.DeleteTaskFile(util.ACTIVITY_CAPTURING_SKPS)

	// Instantiate GsUtil object.
	gs, err := util.NewGsUtil(nil)
	if err != nil {
		glog.Error(err)
		return
	}

	// Download the specified chromium build.
	if err := gs.DownloadChromiumBuild(*chromiumBuild); err != nil {
		glog.Error(err)
		return
	}
	// Delete the chromium build to save space when we are done.
	defer skutil.RemoveAll(filepath.Join(util.ChromiumBuildsDir, *chromiumBuild))
	chromiumBinary := filepath.Join(util.ChromiumBuildsDir, *chromiumBuild, util.BINARY_CHROME)
	if *targetPlatform == util.PLATFORM_ANDROID {
		// Install the APK on the Android device.
		if err := util.InstallChromeAPK(*chromiumBuild); err != nil {
			glog.Errorf("Could not install the chromium APK: %s", err)
			return
		}
	}

	// Download pagesets if they do not exist locally.
	if err := gs.DownloadWorkerArtifacts(util.PAGESETS_DIR_NAME, *pagesetType, *workerNum); err != nil {
		glog.Error(err)
		return
	}
	pathToPagesets := filepath.Join(util.PagesetsDir, *pagesetType)

	// Download archives if they do not exist locally.
	if err := gs.DownloadWorkerArtifacts(util.WEB_ARCHIVES_DIR_NAME, *pagesetType, *workerNum); err != nil {
		glog.Error(err)
		return
	}

	// Create the dir that SKPs will be stored in.
	pathToSkps := filepath.Join(util.SkpsDir, *pagesetType, *chromiumBuild)
	// Delete and remake the local SKPs directory.
	skutil.RemoveAll(pathToSkps)
	skutil.MkdirAll(pathToSkps, 0700)

	// Establish output paths.
	localOutputDir := filepath.Join(util.StorageDir, util.BenchmarkRunsDir, *runID)
	skutil.RemoveAll(localOutputDir)
	skutil.MkdirAll(localOutputDir, 0700)
	defer skutil.RemoveAll(localOutputDir)

	// Construct path to the ct_run_benchmark python script.
	_, currentFile, _, _ := runtime.Caller(0)
	pathToPyFiles := filepath.Join(
		filepath.Dir((filepath.Dir(filepath.Dir(filepath.Dir(currentFile))))),
		"py")

	timeoutSecs := util.PagesetTypeToInfo[*pagesetType].CaptureSKPsTimeoutSecs
	fileInfos, err := ioutil.ReadDir(pathToPagesets)
	if err != nil {
		glog.Errorf("Unable to read the pagesets dir %s: %s", pathToPagesets, err)
		return
	}

	// Create channel that contains all pageset file names. This channel will
	// be consumed by the worker pool.
	pagesetRequests := util.GetClosedChannelOfPagesets(fileInfos)

	var wg sync.WaitGroup
	// Use a RWMutex for the chromeProcessesCleaner goroutine to communicate to
	// the workers (acting as "readers") when it wants to be the "writer" and
	// kill all zombie chrome processes.
	var mutex sync.RWMutex

	// Loop through workers in the worker pool.
	for i := 0; i < WORKER_POOL_SIZE; i++ {
		// Increment the WaitGroup counter.
		wg.Add(1)

		// Create and run a goroutine closure that captures SKPs.
		go func() {
			// Decrement the WaitGroup counter when the goroutine completes.
			defer wg.Done()

			for pagesetName := range pagesetRequests {

				mutex.RLock()

				// Read the pageset.
				pagesetPath := filepath.Join(pathToPagesets, pagesetName)
				decodedPageset, err := util.ReadPageset(pagesetPath)
				if err != nil {
					glog.Errorf("Could not read %s: %s", pagesetPath, err)
					continue
				}

				glog.Infof("===== Processing %s =====", pagesetPath)

				skutil.LogErr(os.Chdir(pathToPyFiles))
				args := []string{
					filepath.Join(util.TelemetryBinariesDir, util.BINARY_RUN_BENCHMARK),
					util.BenchmarksToTelemetryName[util.BENCHMARK_SKPICTURE_PRINTER],
					"--also-run-disabled-tests",
					"--page-repeat=1", // Only need one run for SKPs.
					"--skp-outdir=" + pathToSkps,
					"--extra-browser-args=" + util.DEFAULT_BROWSER_ARGS,
					"--user-agent=" + decodedPageset.UserAgent,
					"--urls-list=" + decodedPageset.UrlsList,
					"--archive-data-file=" + decodedPageset.ArchiveDataFile,
				}
				// Figure out which browser should be used.
				if *targetPlatform == util.PLATFORM_ANDROID {
					args = append(args, "--browser=android-chromium")
				} else {
					args = append(args, "--browser=exact", "--browser-executable="+chromiumBinary)
				}
				// Set the PYTHONPATH to the pagesets and the telemetry dirs.
				env := []string{
					fmt.Sprintf("PYTHONPATH=%s:%s:%s:$PYTHONPATH", pathToPagesets, util.TelemetryBinariesDir, util.TelemetrySrcDir),
					"DISPLAY=:0",
				}
				skutil.LogErr(
					util.ExecuteCmd("python", args, env, time.Duration(timeoutSecs)*time.Second, nil, nil))

				mutex.RUnlock()

			}
		}()
	}

	if !*worker_common.Local {
		// Start the cleaner.
		go util.ChromeProcessesCleaner(&mutex, *chromeCleanerTimer)
	}

	// Wait for all spawned goroutines to complete.
	wg.Wait()

	// Move, validate and upload all SKP files.
	// List all directories in pathToSkps and copy out the skps.
	skpFileInfos, err := ioutil.ReadDir(pathToSkps)
	if err != nil {
		glog.Errorf("Unable to read %s: %s", pathToSkps, err)
		return
	}
	for _, fileInfo := range skpFileInfos {
		if !fileInfo.IsDir() {
			// We are only interested in directories.
			continue
		}
		skpName := fileInfo.Name()
		// Find the largest layer in this directory.
		layerInfos, err := ioutil.ReadDir(filepath.Join(pathToSkps, skpName))
		if err != nil {
			glog.Errorf("Unable to read %s: %s", filepath.Join(pathToSkps, skpName), err)
		}
		if len(layerInfos) > 0 {
			largestLayerInfo := layerInfos[0]
			for _, layerInfo := range layerInfos {
				fmt.Println(layerInfo.Size())
				if layerInfo.Size() > largestLayerInfo.Size() {
					largestLayerInfo = layerInfo
				}
			}
			// Only save SKPs greater than 6000 bytes. Less than that are probably
			// malformed.
			if largestLayerInfo.Size() > 6000 {
				layerPath := filepath.Join(pathToSkps, skpName, largestLayerInfo.Name())
				skutil.Rename(layerPath, filepath.Join(pathToSkps, skpName+".skp"))
			} else {
				glog.Warningf("Skipping %s because size was less than 5000 bytes", skpName)
			}
		}
		// We extracted what we needed from the directory, now delete it.
		skutil.RemoveAll(filepath.Join(pathToSkps, skpName))
	}

	glog.Info("Calling remove_invalid_skps.py")
	// Sync Skia tree.
	skutil.LogErr(util.SyncDir(util.SkiaTreeDir))
	// Build tools.
	skutil.LogErr(util.BuildSkiaTools())
	// Run remove_invalid_skps.py
	pathToRemoveSKPs := filepath.Join(pathToPyFiles, "remove_invalid_skps.py")
	pathToSKPInfo := filepath.Join(util.SkiaTreeDir, "out", "Release", "skpinfo")
	args := []string{
		pathToRemoveSKPs,
		"--skp_dir=" + pathToSkps,
		"--path_to_skpinfo=" + pathToSKPInfo,
	}
	skutil.LogErr(util.ExecuteCmd("python", args, []string{}, util.REMOVE_INVALID_SKPS_TIMEOUT,
		nil, nil))

	// Write timestamp to the SKPs dir.
	skutil.LogErr(util.CreateTimestampFile(pathToSkps))

	// Upload SKPs dir to Google Storage.
	if err := gs.UploadWorkerArtifacts(util.SKPS_DIR_NAME, filepath.Join(*pagesetType, *chromiumBuild), *workerNum); err != nil {
		glog.Error(err)
		return
	}
}
Example #4
0
func main() {
	defer common.LogPanic()
	common.Init()
	defer util.TimeTrack(time.Now(), "Creating Pagesets")
	defer glog.Flush()
	// Create the task file so that the master knows this worker is still busy.
	skutil.LogErr(util.CreateTaskFile(util.ACTIVITY_CREATING_PAGESETS))
	defer util.DeleteTaskFile(util.ACTIVITY_CREATING_PAGESETS)

	// Delete and remake the local pagesets directory.
	pathToPagesets := filepath.Join(util.PagesetsDir, *pagesetType)
	skutil.RemoveAll(pathToPagesets)
	skutil.MkdirAll(pathToPagesets, 0700)

	// Get info about the specified pageset type.
	pagesetTypeInfo := util.PagesetTypeToInfo[*pagesetType]
	csvSource := pagesetTypeInfo.CSVSource
	numPages := pagesetTypeInfo.NumPages
	userAgent := pagesetTypeInfo.UserAgent

	// Download the CSV file from Google Storage to a tmp location.
	gs, err := util.NewGsUtil(nil)
	if err != nil {
		glog.Error(err)
		return
	}
	respBody, err := gs.GetRemoteFileContents(csvSource)
	if err != nil {
		glog.Error(err)
		return
	}
	defer skutil.Close(respBody)
	csvFile := filepath.Join(os.TempDir(), filepath.Base(csvSource))
	out, err := os.Create(csvFile)
	if err != nil {
		glog.Errorf("Unable to create file %s: %s", csvFile, err)
		return
	}
	defer skutil.Close(out)
	defer skutil.Remove(csvFile)
	if _, err = io.Copy(out, respBody); err != nil {
		glog.Error(err)
		return
	}

	// Figure out which pagesets this worker should generate.
	numPagesPerSlave := numPages / util.NUM_WORKERS
	startNum := (*workerNum-1)*numPagesPerSlave + 1
	endNum := *workerNum * numPagesPerSlave

	// Construct path to the create_page_set.py python script.
	_, currentFile, _, _ := runtime.Caller(0)
	createPageSetScript := filepath.Join(
		filepath.Dir((filepath.Dir(filepath.Dir(filepath.Dir(currentFile))))),
		"py", "create_page_set.py")

	// Execute the create_page_set.py python script.
	timeoutSecs := util.PagesetTypeToInfo[*pagesetType].CreatePagesetsTimeoutSecs
	for currNum := startNum; currNum <= endNum; currNum++ {
		args := []string{
			createPageSetScript,
			"-s", strconv.Itoa(currNum),
			"-e", strconv.Itoa(currNum),
			"-c", csvFile,
			"-p", *pagesetType,
			"-u", userAgent,
			"-o", pathToPagesets,
		}
		if err := util.ExecuteCmd("python", args, []string{}, time.Duration(timeoutSecs)*time.Second, nil, nil); err != nil {
			glog.Error(err)
			return
		}
	}
	// Write timestamp to the pagesets dir.
	skutil.LogErr(util.CreateTimestampFile(pathToPagesets))

	// Upload pagesets dir to Google Storage.
	if err := gs.UploadWorkerArtifacts(util.PAGESETS_DIR_NAME, *pagesetType, *workerNum); err != nil {
		glog.Error(err)
		return
	}
}
Example #5
0
func main() {
	common.Init()
	defer util.TimeTrack(time.Now(), "Capturing Archives")
	defer glog.Flush()

	// Create the task file so that the master knows this worker is still busy.
	skutil.LogErr(util.CreateTaskFile(util.ACTIVITY_CAPTURING_ARCHIVES))
	defer util.DeleteTaskFile(util.ACTIVITY_CAPTURING_ARCHIVES)

	if *chromiumBuild == "" {
		glog.Error("Must specify --chromium_build")
		return
	}

	// Reset the local chromium checkout.
	if err := util.ResetCheckout(util.ChromiumSrcDir); err != nil {
		glog.Errorf("Could not reset %s: %s", util.ChromiumSrcDir, err)
		return
	}
	// Sync the local chromium checkout.
	if err := util.SyncDir(util.ChromiumSrcDir); err != nil {
		glog.Errorf("Could not gclient sync %s: %s", util.ChromiumSrcDir, err)
		return
	}

	// Delete and remake the local webpage archives directory.
	pathToArchives := filepath.Join(util.WebArchivesDir, *pagesetType)
	skutil.RemoveAll(pathToArchives)
	skutil.MkdirAll(pathToArchives, 0700)

	// Instantiate GsUtil object.
	gs, err := util.NewGsUtil(nil)
	if err != nil {
		glog.Error(err)
		return
	}

	// Download the specified chromium build if it does not exist locally.
	if err := gs.DownloadChromiumBuild(*chromiumBuild); err != nil {
		glog.Error(err)
		return
	}

	// Download pagesets if they do not exist locally.
	if err := gs.DownloadWorkerArtifacts(util.PAGESETS_DIR_NAME, *pagesetType, *workerNum); err != nil {
		glog.Error(err)
		return
	}

	pathToPagesets := filepath.Join(util.PagesetsDir, *pagesetType)
	chromiumBinary := filepath.Join(util.ChromiumBuildsDir, *chromiumBuild, util.BINARY_CHROME)
	recordWprBinary := filepath.Join(util.TelemetryBinariesDir, util.BINARY_RECORD_WPR)
	timeoutSecs := util.PagesetTypeToInfo[*pagesetType].CaptureArchivesTimeoutSecs
	// Loop through all pagesets.
	fileInfos, err := ioutil.ReadDir(pathToPagesets)
	if err != nil {
		glog.Errorf("Unable to read the pagesets dir %s: %s", pathToPagesets, err)
		return
	}
	// TODO(rmistry): Remove this hack once the 1M webpage archives have been captured.
	glog.Infof("The length of fileInfos is: %s", len(fileInfos))
	fileInfos = fileInfos[18500:20000]
	glog.Infof("The fileInfos are: %s", fileInfos)
	for _, fileInfo := range fileInfos {
		pagesetBaseName := filepath.Base(fileInfo.Name())
		if pagesetBaseName == util.TIMESTAMP_FILE_NAME || filepath.Ext(pagesetBaseName) == ".pyc" {
			// Ignore timestamp files and .pyc files.
			continue
		}

		// Convert the filename into a format consumable by the record_wpr binary.
		pagesetArchiveName := strings.TrimSuffix(pagesetBaseName, filepath.Ext(pagesetBaseName))
		pagesetPath := filepath.Join(pathToPagesets, fileInfo.Name())

		glog.Infof("===== Processing %s =====", pagesetPath)
		args := []string{
			"--extra-browser-args=--disable-setuid-sandbox",
			"--browser=exact",
			"--browser-executable=" + chromiumBinary,
			fmt.Sprintf("%s_page_set", pagesetArchiveName),
			"--page-set-base-dir=" + pathToPagesets,
		}
		env := []string{
			fmt.Sprintf("PYTHONPATH=%s:$PYTHONPATH", pathToPagesets),
			"DISPLAY=:0",
		}
		skutil.LogErr(util.ExecuteCmd(recordWprBinary, args, env, time.Duration(timeoutSecs)*time.Second, nil, nil))
	}

	// Write timestamp to the webpage archives dir.
	skutil.LogErr(util.CreateTimestampFile(pathToArchives))

	// Upload webpage archives dir to Google Storage.
	if err := gs.UploadWorkerArtifacts(util.WEB_ARCHIVES_DIR_NAME, *pagesetType, *workerNum); err != nil {
		glog.Error(err)
		return
	}
}