// NewFileDiffStore intializes and returns a file based implementation of // DiffStore. The optional http.Client is used to make HTTP requests to Google // Storage. If nil is supplied then a default client is used. The baseDir is // the local base directory where the DEFAULT_IMG_DIR_NAME, // DEFAULT_DIFF_DIR_NAME and the DEFAULT_DIFFMETRICS_DIR_NAME directories // exist. gsBucketName is the bucket images will be downloaded from. // storageBaseDir is the directory in the bucket (if empty // DEFAULT_GS_IMG_DIR_NAME is used). workerPoolSize is the max number of // simultaneous goroutines that will be created when running Get or AbsPath. // Use RECOMMENDED_WORKER_POOL_SIZE if unsure what this value should be. func NewFileDiffStore(client *http.Client, baseDir, gsBucketName string, storageBaseDir string, cacheFactory CacheFactory, workerPoolSize int) (diff.DiffStore, error) { if client == nil { client = util.NewTimeoutClient() } if storageBaseDir == "" { storageBaseDir = DEFAULT_GS_IMG_DIR_NAME } imageCache, err := lru.New(IMAGE_LRU_CACHE_SIZE) if err != nil { return nil, fmt.Errorf("Unable to alloace image LRU cache: %s", err) } diffCache := cacheFactory("di", DiffMetricsCodec(0)) unavailableChan := make(chan *diff.DigestFailure, 10) statusDir := fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_STATUS_DIR_NAME))) failureDB, err := bolt.Open(filepath.Join(statusDir, FAILUREDB_NAME), 0600, nil) if err != nil { return nil, fmt.Errorf("Unable to open failuredb: %s", err) } fs := &FileDiffStore{ client: client, localImgDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_IMG_DIR_NAME))), localDiffDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_DIFF_DIR_NAME))), localDiffMetricsDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_DIFFMETRICS_DIR_NAME))), localTempFileDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_TEMPFILE_DIR_NAME))), gsBucketName: gsBucketName, storageBaseDir: storageBaseDir, imageCache: imageCache, diffCache: diffCache, unavailableDigests: map[string]*diff.DigestFailure{}, unavailableChan: unavailableChan, failureDB: failureDB, } if err := fs.loadDigestFailures(); err != nil { return nil, err } go func() { for { digestFailure := <-unavailableChan if err := fs.addDigestFailure(digestFailure); err != nil { glog.Errorf("Unable to store digest failure: %s", err) } else if err = fs.loadDigestFailures(); err != nil { glog.Errorf("Unable to load failures: %s", err) } } }() fs.activateWorkers(workerPoolSize) return fs, nil }
// NewFileDiffStore intializes and returns a file based implementation of // DiffStore. The optional http.Client is used to make HTTP requests to Google // Storage. If nil is supplied then a default client is used. The baseDir is the // local base directory where the DEFAULT_IMG_DIR_NAME, DEFAULT_DIFF_DIR_NAME and // the DEFAULT_DIFFMETRICS_DIR_NAME directories exist. gsBucketName is the bucket // images will be downloaded from. storageBaseDir is the directory in the // bucket (if empty DEFAULT_GS_IMG_DIR_NAME is used). // workerPoolSize is the max number of simultaneous goroutines that will be // created when running Get or AbsPath. // Use RECOMMENDED_WORKER_POOL_SIZE if unsure what this value should be. func NewFileDiffStore(client *http.Client, baseDir, gsBucketName string, storageBaseDir string, cacheFactory CacheFactory, workerPoolSize int) (diff.DiffStore, error) { if client == nil { client = util.NewTimeoutClient() } if storageBaseDir == "" { storageBaseDir = DEFAULT_GS_IMG_DIR_NAME } imageCache, err := lru.New(IMAGE_LRU_CACHE_SIZE) if err != nil { return nil, fmt.Errorf("Unable to alloace image LRU cache: %s", err) } diffCache := cacheFactory("di", DiffMetricsCodec(0)) unavailableChan := make(chan string, 10) fs := &FileDiffStore{ client: client, localImgDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_IMG_DIR_NAME))), localDiffDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_DIFF_DIR_NAME))), localDiffMetricsDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_DIFFMETRICS_DIR_NAME))), localTempFileDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_TEMPFILE_DIR_NAME))), gsBucketName: gsBucketName, storageBaseDir: storageBaseDir, imageCache: imageCache, diffCache: diffCache, unavailableDigests: map[string]bool{}, unavailableChan: unavailableChan, } // TODO(stephana): Clean this up and store digests to ignore in the // database and expose them on the front-end. // This is the hash of the empty, we should ignore this right away. unavailableChan <- "d41d8cd98f00b204e9800998ecf8427e" go func() { var ignoreDigest string for { ignoreDigest = <-unavailableChan func() { fs.unavailableMutex.Lock() defer fs.unavailableMutex.Unlock() fs.unavailableDigests[ignoreDigest] = true }() } }() fs.activateWorkers(workerPoolSize) return fs, nil }
// start starts up the BinaryAggregator. It refreshes all status it needs and builds // a debug and a release version of Skia for use in analysis. It then spawns the // aggregation pipeline and a monitoring thread. func (agg *BinaryAggregator) start() error { if _, err := fileutil.EnsureDirExists(config.Aggregator.BinaryFuzzPath); err != nil { return err } if _, err := fileutil.EnsureDirExists(config.Aggregator.ExecutablePath); err != nil { return err } if err := common.BuildClangDM("Debug", true); err != nil { return err } if err := common.BuildClangDM("Release", true); err != nil { return err } // Set the wait groups to fresh agg.monitoringWaitGroup = &sync.WaitGroup{} agg.pipelineWaitGroup = &sync.WaitGroup{} agg.analysisCount = 0 agg.uploadCount = 0 agg.bugReportCount = 0 agg.monitoringWaitGroup.Add(1) go agg.scanForNewCandidates() numAnalysisProcesses := config.Aggregator.NumAnalysisProcesses if numAnalysisProcesses <= 0 { // TODO(kjlubick): Actually make this smart based on the number of cores numAnalysisProcesses = 20 } for i := 0; i < numAnalysisProcesses; i++ { agg.pipelineWaitGroup.Add(1) go agg.waitForAnalysis(i, analyzeSkp) } numUploadProcesses := config.Aggregator.NumUploadProcesses if numUploadProcesses <= 0 { // TODO(kjlubick): Actually make this smart based on the number of cores/number // of aggregation processes numUploadProcesses = 5 } for i := 0; i < numUploadProcesses; i++ { agg.pipelineWaitGroup.Add(1) go agg.waitForUploads(i) } agg.pipelineWaitGroup.Add(1) go agg.waitForBugReporting() agg.pipelineShutdown = make(chan bool, numAnalysisProcesses+numUploadProcesses+1) // start background routine to monitor queue details agg.monitoringWaitGroup.Add(1) go agg.monitorStatus(numAnalysisProcesses, numUploadProcesses) return nil }
// NewServer returns a instance that implements the ShareDBServer interface that // was generated via the sharedb.proto file. // It can then be used to run an RPC server. See tests for details. func NewServer(dataDir string) ShareDBServer { ret := &rpcServer{ dataDir: fileutil.Must(fileutil.EnsureDirExists(dataDir)), databases: map[string]*bolt.DB{}, } return ret }
func writeFlagsToConfig() error { // Check the required ones and terminate if they are not provided for _, f := range requiredFlags { if flag.Lookup(f).Value.String() == "" { return fmt.Errorf("Required flag %s is empty.", f) } } var err error config.Generator.AflOutputPath, err = fileutil.EnsureDirExists(*aflOutputPath) if err != nil { return err } config.Generator.SkiaRoot, err = fileutil.EnsureDirExists(*skiaRoot) if err != nil { return err } config.Generator.AflRoot, err = fileutil.EnsureDirExists(*aflRoot) if err != nil { return err } config.Generator.ClangPath = *clangPath config.Generator.ClangPlusPlusPath = *clangPlusPlusPath config.Generator.NumFuzzProcesses = *numFuzzProcesses config.Aggregator.Bucket = *bucket config.Aggregator.BinaryFuzzPath, err = fileutil.EnsureDirExists(*binaryFuzzPath) if err != nil { return err } config.Aggregator.ExecutablePath, err = fileutil.EnsureDirExists(*executablePath) if err != nil { return err } config.Aggregator.NumAnalysisProcesses = *numAnalysisProcesses config.Aggregator.NumUploadProcesses = *numUploadProcesses config.Aggregator.StatusPeriod = *statusPeriod config.Aggregator.RescanPeriod = *rescanPeriod config.Aggregator.AnalysisTimeout = *analysisTimeout return nil }
func writeFlagsToConfig() error { // Check the required ones and terminate if they are not provided for _, f := range requiredFlags { if flag.Lookup(f).Value.String() == "" { return fmt.Errorf("Required flag %s is empty.", f) } } var err error config.Generator.SkiaRoot, err = fileutil.EnsureDirExists(*skiaRoot) if err != nil { return err } config.Generator.ClangPath = *clangPath config.Generator.ClangPlusPlusPath = *clangPlusPlusPath config.Aggregator.Bucket = *bucket return nil }
// NewIngester creates an Ingester given the repo and tilestore specified. func NewIngester(git *gitinfo.GitInfo, tileStoreDir string, datasetName string, ri ResultIngester, nCommits int, minDuration time.Duration, config map[string]string, statusDir, metricName string) (*Ingester, error) { var storageService *storage.Service = nil var err error = nil // check if the ingestion source is coming from Google Storage if config["GSDir"] != "" { storageService, err = storage.New(client) if err != nil { return nil, fmt.Errorf("Failed to create interace to Google Storage: %s\n", err) } } var processedFiles *leveldb.DB = nil if statusDir != "" { statusDir = fileutil.Must(fileutil.EnsureDirExists(filepath.Join(statusDir, datasetName))) processedFiles, err = leveldb.OpenFile(filepath.Join(statusDir, "processed_files.ldb"), nil) if err != nil { glog.Fatalf("Unable to open status db at %s: %s", filepath.Join(statusDir, "processed_files.ldb"), err) } } i := &Ingester{ git: git, tileStore: filetilestore.NewFileTileStore(tileStoreDir, datasetName, -1), storage: storageService, hashToNumber: map[string]int{}, resultIngester: ri, config: config, datasetName: datasetName, elapsedTimePerUpdate: newGauge(metricName, "update"), metricsProcessed: newCounter(metricName, "processed"), lastSuccessfulUpdate: time.Now(), timeSinceLastSucceessfulUpdate: newGauge(metricName, "time-since-last-successful-update"), nCommits: nCommits, minDuration: minDuration, processedFiles: processedFiles, } i.timeSinceLastSucceessfulUpdate.Update(int64(time.Since(i.lastSuccessfulUpdate).Seconds())) go func() { for _ = range time.Tick(time.Minute) { i.timeSinceLastSucceessfulUpdate.Update(int64(time.Since(i.lastSuccessfulUpdate).Seconds())) } }() return i, nil }
// NewIngester creates a new ingester with the given id and configuration around // the supplied vcs (version control system), input sources and Processor instance. func NewIngester(ingesterID string, ingesterConf *sharedconfig.IngesterConfig, vcs vcsinfo.VCS, sources []Source, processor Processor) (*Ingester, error) { statusDir := fileutil.Must(fileutil.EnsureDirExists(filepath.Join(ingesterConf.StatusDir, ingesterID))) dbName := filepath.Join(statusDir, fmt.Sprintf("%s-status.db", ingesterID)) statusDB, err := bolt.Open(dbName, 0600, &bolt.Options{Timeout: 1 * time.Second}) if err != nil { return nil, fmt.Errorf("Unable to open db at %s. Got error: %s", dbName, err) } ret := &Ingester{ id: ingesterID, vcs: vcs, nCommits: ingesterConf.NCommits, minDuration: time.Duration(ingesterConf.MinDays) * time.Hour * 24, runEvery: ingesterConf.RunEvery.Duration, sources: sources, processor: processor, statusDB: statusDB, } ret.setupMetrics() return ret, nil }
func writeFlagsToConfig() error { // Check the required ones and terminate if they are not provided for _, f := range requiredFlags { if flag.Lookup(f).Value.String() == "" { return fmt.Errorf("Required flag %s is empty.", f) } } var err error config.FrontEnd.SkiaRoot, err = fileutil.EnsureDirExists(*skiaRoot) if err != nil { return err } config.FrontEnd.BoltDBPath = *boltDBPath config.FrontEnd.VersionCheckPeriod = *versionCheckPeriod config.Common.ClangPath = *clangPath config.Common.ClangPlusPlusPath = *clangPlusPlusPath config.Common.DepotToolsPath = *depotToolsPath config.GS.Bucket = *bucket config.FrontEnd.NumDownloadProcesses = *downloadProcesses config.FrontEnd.FuzzSyncPeriod = *fuzzSyncPeriod return nil }
func TestPDFProcessor(t *testing.T) { testutils.SkipIfShort(t) // Get the service account client from meta data or a local config file. client, err := auth.NewJWTServiceAccountClient("", auth.DEFAULT_JWT_FILENAME, nil, storage.ScopeFullControl) assert.Nil(t, err) cacheDir, err := fileutil.EnsureDirExists(CACHE_DIR) assert.Nil(t, err) // Clean up after the test. defer func() { defer util.RemoveAll(cacheDir) deleteFolderContent(t, TEST_BUCKET, IMAGES_OUT_DIR, client) deleteFolderContent(t, TEST_BUCKET, JSON_OUT_DIR, client) }() // Configure the processor. ingesterConf := &sharedconfig.IngesterConfig{ ExtraParams: map[string]string{ CONFIG_INPUT_IMAGES_BUCKET: TEST_BUCKET, CONFIG_INPUT_IMAGES_DIR: IMAGES_IN_DIR, CONFIG_OUTPUT_JSON_BUCKET: TEST_BUCKET, CONFIG_OUTPUT_JSON_DIR: JSON_OUT_DIR, CONFIG_OUTPUT_IMAGES_BUCKET: TEST_BUCKET, CONFIG_OUTPUT_IMAGES_DIR: IMAGES_OUT_DIR, CONFIG_PDF_CACHEDIR: cacheDir, }, } processor, err := newPDFProcessor(nil, ingesterConf, client) assert.Nil(t, err) // Load the example file and process it. fsResult, err := ingestion.FileSystemResult(TEST_INGESTION_FILE, "./") assert.Nil(t, err) err = processor.Process(fsResult) assert.Nil(t, err) // Fetch the json output and parse it. pProcessor := processor.(*pdfProcessor) // download the result. resultFileName := filepath.Join(CACHE_DIR, "result-file.json") assert.Nil(t, pProcessor.download(TEST_BUCKET, JSON_OUT_DIR, fsResult.Name(), resultFileName)) // Make sure we get the expected result. fsResult, err = ingestion.FileSystemResult(TEST_INGESTION_FILE, "./") assert.Nil(t, err) r, err := fsResult.Open() assert.Nil(t, err) fsDMResults, err := goldingestion.ParseDMResultsFromReader(r) assert.Nil(t, err) foundResult, err := ingestion.FileSystemResult(resultFileName, "./") assert.Nil(t, err) r, err = foundResult.Open() assert.Nil(t, err) foundDMResults, err := goldingestion.ParseDMResultsFromReader(r) assert.Nil(t, err) dmResult1 := *fsDMResults dmResult2 := *foundDMResults dmResult1.Results = nil dmResult2.Results = nil assert.Equal(t, dmResult1, dmResult2) foundIdx := 0 srcResults := fsDMResults.Results tgtResults := foundDMResults.Results for _, result := range srcResults { assert.True(t, foundIdx < len(tgtResults)) if result.Options["ext"] == "pdf" { for ; (foundIdx < len(tgtResults)) && (result.Key["name"] == tgtResults[foundIdx].Key["name"]); foundIdx++ { assert.True(t, tgtResults[foundIdx].Key["rasterizer"] != "") delete(tgtResults[foundIdx].Key, "rasterizer") assert.Equal(t, result.Key, tgtResults[foundIdx].Key) assert.Equal(t, "png", tgtResults[foundIdx].Options["ext"]) } } } assert.Equal(t, len(foundDMResults.Results), foundIdx) }
// StartBinaryAggregator will find new bad binary fuzzes generated by afl-fuzz and create the // metadata required for them. // It does this by searching in the specified AflOutputPath for new crashes and moves them to a // temporary holding folder (specified by BinaryFuzzPath) for parsing, before uploading them to GCS func StartBinaryAggregator(s *storage.Service) error { if _, err := fileutil.EnsureDirExists(config.Aggregator.BinaryFuzzPath); err != nil { return err } if _, err := fileutil.EnsureDirExists(config.Aggregator.ExecutablePath); err != nil { return err } if err := BuildClangDM("Debug", true); err != nil { return err } if err := BuildClangDM("Release", true); err != nil { return err } storageService = s // For passing the paths of new binaries that should be scanned. forAnalysis := make(chan string, 10000) // For passing the file names of analyzed fuzzes that should be uploaded from where they rest on // disk in config.Aggregator.BinaryFuzzPath forUpload := make(chan uploadPackage, 100) // For passing the names of go routines that had to stop. If the aggregation process fails, // everything else will be killed. terminated := make(chan string) go scanForNewCandidates(forAnalysis, terminated) numAnalysisProcesses := config.Aggregator.NumAnalysisProcesses if numAnalysisProcesses <= 0 { // TODO(kjlubick): Actually make this smart based on the number of cores numAnalysisProcesses = 20 } for i := 0; i < numAnalysisProcesses; i++ { go performAnalysis(i, analyzeSkp, forAnalysis, forUpload, terminated) } numUploadProcesses := config.Aggregator.NumUploadProcesses if numUploadProcesses <= 0 { // TODO(kjlubick): Actually make this smart based on the number of cores/number // of aggregation processes numUploadProcesses = 5 } for i := 0; i < numUploadProcesses; i++ { go waitForUploads(i, forUpload, terminated) } t := time.Tick(config.Aggregator.StatusPeriod) for { select { case _ = <-t: // TODO(kjlubick): Keep track of these numbers via metrics so we can use // mon.skia.org and write alerts for it. glog.Infof("There are %d fuzzes waiting for analysis and %d waiting for upload.", len(forAnalysis), len(forUpload)) glog.Infof("There are %d aggregation processes alive and %d upload processes alive.", numAnalysisProcesses, numUploadProcesses) case deadService := <-terminated: glog.Errorf("%s died", deadService) if deadService == "scanner" { return fmt.Errorf("Ending aggregator: The afl-fuzz scanner died.") } else if strings.HasPrefix(deadService, "analyzer") { if numAnalysisProcesses--; numAnalysisProcesses <= 0 { return fmt.Errorf("Ending aggregator: No more analysis processes alive") } } else if strings.HasPrefix(deadService, "uploader") { if numUploadProcesses--; numUploadProcesses <= 0 { return fmt.Errorf("Ending aggregator: No more upload processes alive") } } } } }