Ejemplo n.º 1
0
// NewFileDiffStore intializes and returns a file based implementation of
// DiffStore. The optional http.Client is used to make HTTP requests to Google
// Storage. If nil is supplied then a default client is used. The baseDir is
// the local base directory where the DEFAULT_IMG_DIR_NAME,
// DEFAULT_DIFF_DIR_NAME and the DEFAULT_DIFFMETRICS_DIR_NAME directories
// exist. gsBucketName is the bucket images will be downloaded from.
// storageBaseDir is the directory in the bucket (if empty
// DEFAULT_GS_IMG_DIR_NAME is used).  workerPoolSize is the max number of
// simultaneous goroutines that will be created when running Get or AbsPath.
// Use RECOMMENDED_WORKER_POOL_SIZE if unsure what this value should be.
func NewFileDiffStore(client *http.Client, baseDir, gsBucketName string, storageBaseDir string, cacheFactory CacheFactory, workerPoolSize int) (diff.DiffStore, error) {
	if client == nil {
		client = util.NewTimeoutClient()
	}

	if storageBaseDir == "" {
		storageBaseDir = DEFAULT_GS_IMG_DIR_NAME
	}

	imageCache, err := lru.New(IMAGE_LRU_CACHE_SIZE)
	if err != nil {
		return nil, fmt.Errorf("Unable to alloace image LRU cache: %s", err)
	}

	diffCache := cacheFactory("di", DiffMetricsCodec(0))
	unavailableChan := make(chan *diff.DigestFailure, 10)

	statusDir := fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_STATUS_DIR_NAME)))
	failureDB, err := bolt.Open(filepath.Join(statusDir, FAILUREDB_NAME), 0600, nil)
	if err != nil {
		return nil, fmt.Errorf("Unable to open failuredb: %s", err)
	}

	fs := &FileDiffStore{
		client:              client,
		localImgDir:         fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_IMG_DIR_NAME))),
		localDiffDir:        fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_DIFF_DIR_NAME))),
		localDiffMetricsDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_DIFFMETRICS_DIR_NAME))),
		localTempFileDir:    fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_TEMPFILE_DIR_NAME))),
		gsBucketName:        gsBucketName,
		storageBaseDir:      storageBaseDir,
		imageCache:          imageCache,
		diffCache:           diffCache,
		unavailableDigests:  map[string]*diff.DigestFailure{},
		unavailableChan:     unavailableChan,
		failureDB:           failureDB,
	}

	if err := fs.loadDigestFailures(); err != nil {
		return nil, err
	}
	go func() {
		for {
			digestFailure := <-unavailableChan
			if err := fs.addDigestFailure(digestFailure); err != nil {
				glog.Errorf("Unable to store digest failure: %s", err)
			} else if err = fs.loadDigestFailures(); err != nil {
				glog.Errorf("Unable to load failures: %s", err)
			}
		}
	}()

	fs.activateWorkers(workerPoolSize)
	return fs, nil
}
Ejemplo n.º 2
0
// NewFileDiffStore intializes and returns a file based implementation of
// DiffStore. The optional http.Client is used to make HTTP requests to Google
// Storage. If nil is supplied then a default client is used. The baseDir is the
// local base directory where the DEFAULT_IMG_DIR_NAME, DEFAULT_DIFF_DIR_NAME and
// the DEFAULT_DIFFMETRICS_DIR_NAME directories exist. gsBucketName is the bucket
// images will be downloaded from. storageBaseDir is the directory in the
// bucket (if empty DEFAULT_GS_IMG_DIR_NAME is used).
// workerPoolSize is the max number of simultaneous goroutines that will be
// created when running Get or AbsPath.
// Use RECOMMENDED_WORKER_POOL_SIZE if unsure what this value should be.
func NewFileDiffStore(client *http.Client, baseDir, gsBucketName string, storageBaseDir string, cacheFactory CacheFactory, workerPoolSize int) (diff.DiffStore, error) {
	if client == nil {
		client = util.NewTimeoutClient()
	}

	if storageBaseDir == "" {
		storageBaseDir = DEFAULT_GS_IMG_DIR_NAME
	}

	imageCache, err := lru.New(IMAGE_LRU_CACHE_SIZE)
	if err != nil {
		return nil, fmt.Errorf("Unable to alloace image LRU cache: %s", err)
	}

	diffCache := cacheFactory("di", DiffMetricsCodec(0))
	unavailableChan := make(chan string, 10)

	fs := &FileDiffStore{
		client:              client,
		localImgDir:         fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_IMG_DIR_NAME))),
		localDiffDir:        fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_DIFF_DIR_NAME))),
		localDiffMetricsDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_DIFFMETRICS_DIR_NAME))),
		localTempFileDir:    fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_TEMPFILE_DIR_NAME))),
		gsBucketName:        gsBucketName,
		storageBaseDir:      storageBaseDir,
		imageCache:          imageCache,
		diffCache:           diffCache,
		unavailableDigests:  map[string]bool{},
		unavailableChan:     unavailableChan,
	}

	// TODO(stephana): Clean this up and store digests to ignore in the
	// database and expose them on the front-end.
	// This is the hash of the empty, we should ignore this right away.
	unavailableChan <- "d41d8cd98f00b204e9800998ecf8427e"
	go func() {
		var ignoreDigest string
		for {
			ignoreDigest = <-unavailableChan
			func() {
				fs.unavailableMutex.Lock()
				defer fs.unavailableMutex.Unlock()
				fs.unavailableDigests[ignoreDigest] = true
			}()
		}
	}()

	fs.activateWorkers(workerPoolSize)
	return fs, nil
}
// start starts up the BinaryAggregator.  It refreshes all status it needs and builds
// a debug and a release version of Skia for use in analysis.  It then spawns the
// aggregation pipeline and a monitoring thread.
func (agg *BinaryAggregator) start() error {
	if _, err := fileutil.EnsureDirExists(config.Aggregator.BinaryFuzzPath); err != nil {
		return err
	}
	if _, err := fileutil.EnsureDirExists(config.Aggregator.ExecutablePath); err != nil {
		return err
	}
	if err := common.BuildClangDM("Debug", true); err != nil {
		return err
	}
	if err := common.BuildClangDM("Release", true); err != nil {
		return err
	}
	// Set the wait groups to fresh
	agg.monitoringWaitGroup = &sync.WaitGroup{}
	agg.pipelineWaitGroup = &sync.WaitGroup{}
	agg.analysisCount = 0
	agg.uploadCount = 0
	agg.bugReportCount = 0
	agg.monitoringWaitGroup.Add(1)
	go agg.scanForNewCandidates()

	numAnalysisProcesses := config.Aggregator.NumAnalysisProcesses
	if numAnalysisProcesses <= 0 {
		// TODO(kjlubick): Actually make this smart based on the number of cores
		numAnalysisProcesses = 20
	}
	for i := 0; i < numAnalysisProcesses; i++ {
		agg.pipelineWaitGroup.Add(1)
		go agg.waitForAnalysis(i, analyzeSkp)
	}

	numUploadProcesses := config.Aggregator.NumUploadProcesses
	if numUploadProcesses <= 0 {
		// TODO(kjlubick): Actually make this smart based on the number of cores/number
		// of aggregation processes
		numUploadProcesses = 5
	}
	for i := 0; i < numUploadProcesses; i++ {
		agg.pipelineWaitGroup.Add(1)
		go agg.waitForUploads(i)
	}
	agg.pipelineWaitGroup.Add(1)
	go agg.waitForBugReporting()
	agg.pipelineShutdown = make(chan bool, numAnalysisProcesses+numUploadProcesses+1)
	// start background routine to monitor queue details
	agg.monitoringWaitGroup.Add(1)
	go agg.monitorStatus(numAnalysisProcesses, numUploadProcesses)
	return nil
}
Ejemplo n.º 4
0
// NewServer returns a instance that implements the ShareDBServer interface that
// was generated via the sharedb.proto file.
// It can then be used to run an RPC server. See tests for details.
func NewServer(dataDir string) ShareDBServer {
	ret := &rpcServer{
		dataDir:   fileutil.Must(fileutil.EnsureDirExists(dataDir)),
		databases: map[string]*bolt.DB{},
	}
	return ret
}
Ejemplo n.º 5
0
func writeFlagsToConfig() error {
	// Check the required ones and terminate if they are not provided
	for _, f := range requiredFlags {
		if flag.Lookup(f).Value.String() == "" {
			return fmt.Errorf("Required flag %s is empty.", f)
		}
	}
	var err error
	config.Generator.AflOutputPath, err = fileutil.EnsureDirExists(*aflOutputPath)
	if err != nil {
		return err
	}
	config.Generator.SkiaRoot, err = fileutil.EnsureDirExists(*skiaRoot)
	if err != nil {
		return err
	}
	config.Generator.AflRoot, err = fileutil.EnsureDirExists(*aflRoot)
	if err != nil {
		return err
	}
	config.Generator.ClangPath = *clangPath
	config.Generator.ClangPlusPlusPath = *clangPlusPlusPath
	config.Generator.NumFuzzProcesses = *numFuzzProcesses

	config.Aggregator.Bucket = *bucket
	config.Aggregator.BinaryFuzzPath, err = fileutil.EnsureDirExists(*binaryFuzzPath)
	if err != nil {
		return err
	}
	config.Aggregator.ExecutablePath, err = fileutil.EnsureDirExists(*executablePath)
	if err != nil {
		return err
	}
	config.Aggregator.NumAnalysisProcesses = *numAnalysisProcesses
	config.Aggregator.NumUploadProcesses = *numUploadProcesses
	config.Aggregator.StatusPeriod = *statusPeriod
	config.Aggregator.RescanPeriod = *rescanPeriod
	config.Aggregator.AnalysisTimeout = *analysisTimeout
	return nil
}
Ejemplo n.º 6
0
func writeFlagsToConfig() error {
	// Check the required ones and terminate if they are not provided
	for _, f := range requiredFlags {
		if flag.Lookup(f).Value.String() == "" {
			return fmt.Errorf("Required flag %s is empty.", f)
		}
	}
	var err error
	config.Generator.SkiaRoot, err = fileutil.EnsureDirExists(*skiaRoot)
	if err != nil {
		return err
	}
	config.Generator.ClangPath = *clangPath
	config.Generator.ClangPlusPlusPath = *clangPlusPlusPath
	config.Aggregator.Bucket = *bucket
	return nil
}
Ejemplo n.º 7
0
// NewIngester creates an Ingester given the repo and tilestore specified.
func NewIngester(git *gitinfo.GitInfo, tileStoreDir string, datasetName string, ri ResultIngester, nCommits int, minDuration time.Duration, config map[string]string, statusDir, metricName string) (*Ingester, error) {
	var storageService *storage.Service = nil
	var err error = nil
	// check if the ingestion source is coming from Google Storage
	if config["GSDir"] != "" {
		storageService, err = storage.New(client)
		if err != nil {
			return nil, fmt.Errorf("Failed to create interace to Google Storage: %s\n", err)
		}
	}
	var processedFiles *leveldb.DB = nil
	if statusDir != "" {
		statusDir = fileutil.Must(fileutil.EnsureDirExists(filepath.Join(statusDir, datasetName)))
		processedFiles, err = leveldb.OpenFile(filepath.Join(statusDir, "processed_files.ldb"), nil)
		if err != nil {
			glog.Fatalf("Unable to open status db at %s: %s", filepath.Join(statusDir, "processed_files.ldb"), err)
		}
	}

	i := &Ingester{
		git:                            git,
		tileStore:                      filetilestore.NewFileTileStore(tileStoreDir, datasetName, -1),
		storage:                        storageService,
		hashToNumber:                   map[string]int{},
		resultIngester:                 ri,
		config:                         config,
		datasetName:                    datasetName,
		elapsedTimePerUpdate:           newGauge(metricName, "update"),
		metricsProcessed:               newCounter(metricName, "processed"),
		lastSuccessfulUpdate:           time.Now(),
		timeSinceLastSucceessfulUpdate: newGauge(metricName, "time-since-last-successful-update"),
		nCommits:                       nCommits,
		minDuration:                    minDuration,
		processedFiles:                 processedFiles,
	}

	i.timeSinceLastSucceessfulUpdate.Update(int64(time.Since(i.lastSuccessfulUpdate).Seconds()))
	go func() {
		for _ = range time.Tick(time.Minute) {
			i.timeSinceLastSucceessfulUpdate.Update(int64(time.Since(i.lastSuccessfulUpdate).Seconds()))
		}
	}()
	return i, nil
}
Ejemplo n.º 8
0
// NewIngester creates a new ingester with the given id and configuration around
// the supplied vcs (version control system), input sources and Processor instance.
func NewIngester(ingesterID string, ingesterConf *sharedconfig.IngesterConfig, vcs vcsinfo.VCS, sources []Source, processor Processor) (*Ingester, error) {
	statusDir := fileutil.Must(fileutil.EnsureDirExists(filepath.Join(ingesterConf.StatusDir, ingesterID)))
	dbName := filepath.Join(statusDir, fmt.Sprintf("%s-status.db", ingesterID))
	statusDB, err := bolt.Open(dbName, 0600, &bolt.Options{Timeout: 1 * time.Second})
	if err != nil {
		return nil, fmt.Errorf("Unable to open db at %s. Got error: %s", dbName, err)
	}

	ret := &Ingester{
		id:          ingesterID,
		vcs:         vcs,
		nCommits:    ingesterConf.NCommits,
		minDuration: time.Duration(ingesterConf.MinDays) * time.Hour * 24,
		runEvery:    ingesterConf.RunEvery.Duration,
		sources:     sources,
		processor:   processor,
		statusDB:    statusDB,
	}
	ret.setupMetrics()
	return ret, nil
}
Ejemplo n.º 9
0
func writeFlagsToConfig() error {
	// Check the required ones and terminate if they are not provided
	for _, f := range requiredFlags {
		if flag.Lookup(f).Value.String() == "" {
			return fmt.Errorf("Required flag %s is empty.", f)
		}
	}
	var err error
	config.FrontEnd.SkiaRoot, err = fileutil.EnsureDirExists(*skiaRoot)
	if err != nil {
		return err
	}
	config.FrontEnd.BoltDBPath = *boltDBPath
	config.FrontEnd.VersionCheckPeriod = *versionCheckPeriod
	config.Common.ClangPath = *clangPath
	config.Common.ClangPlusPlusPath = *clangPlusPlusPath
	config.Common.DepotToolsPath = *depotToolsPath
	config.GS.Bucket = *bucket
	config.FrontEnd.NumDownloadProcesses = *downloadProcesses
	config.FrontEnd.FuzzSyncPeriod = *fuzzSyncPeriod
	return nil
}
Ejemplo n.º 10
0
func TestPDFProcessor(t *testing.T) {
	testutils.SkipIfShort(t)

	// Get the service account client from meta data or a local config file.
	client, err := auth.NewJWTServiceAccountClient("", auth.DEFAULT_JWT_FILENAME, nil, storage.ScopeFullControl)
	assert.Nil(t, err)

	cacheDir, err := fileutil.EnsureDirExists(CACHE_DIR)
	assert.Nil(t, err)

	// Clean up after the test.
	defer func() {
		defer util.RemoveAll(cacheDir)
		deleteFolderContent(t, TEST_BUCKET, IMAGES_OUT_DIR, client)
		deleteFolderContent(t, TEST_BUCKET, JSON_OUT_DIR, client)
	}()

	// Configure the processor.
	ingesterConf := &sharedconfig.IngesterConfig{
		ExtraParams: map[string]string{
			CONFIG_INPUT_IMAGES_BUCKET:  TEST_BUCKET,
			CONFIG_INPUT_IMAGES_DIR:     IMAGES_IN_DIR,
			CONFIG_OUTPUT_JSON_BUCKET:   TEST_BUCKET,
			CONFIG_OUTPUT_JSON_DIR:      JSON_OUT_DIR,
			CONFIG_OUTPUT_IMAGES_BUCKET: TEST_BUCKET,
			CONFIG_OUTPUT_IMAGES_DIR:    IMAGES_OUT_DIR,
			CONFIG_PDF_CACHEDIR:         cacheDir,
		},
	}
	processor, err := newPDFProcessor(nil, ingesterConf, client)
	assert.Nil(t, err)

	// Load the example file and process it.
	fsResult, err := ingestion.FileSystemResult(TEST_INGESTION_FILE, "./")
	assert.Nil(t, err)

	err = processor.Process(fsResult)
	assert.Nil(t, err)

	// Fetch the json output and parse it.
	pProcessor := processor.(*pdfProcessor)

	// download the result.
	resultFileName := filepath.Join(CACHE_DIR, "result-file.json")
	assert.Nil(t, pProcessor.download(TEST_BUCKET, JSON_OUT_DIR, fsResult.Name(), resultFileName))

	// Make sure we get the expected result.
	fsResult, err = ingestion.FileSystemResult(TEST_INGESTION_FILE, "./")
	assert.Nil(t, err)
	r, err := fsResult.Open()
	assert.Nil(t, err)
	fsDMResults, err := goldingestion.ParseDMResultsFromReader(r)
	assert.Nil(t, err)

	foundResult, err := ingestion.FileSystemResult(resultFileName, "./")
	assert.Nil(t, err)
	r, err = foundResult.Open()
	assert.Nil(t, err)
	foundDMResults, err := goldingestion.ParseDMResultsFromReader(r)
	assert.Nil(t, err)

	dmResult1 := *fsDMResults
	dmResult2 := *foundDMResults
	dmResult1.Results = nil
	dmResult2.Results = nil
	assert.Equal(t, dmResult1, dmResult2)

	foundIdx := 0
	srcResults := fsDMResults.Results
	tgtResults := foundDMResults.Results
	for _, result := range srcResults {
		assert.True(t, foundIdx < len(tgtResults))
		if result.Options["ext"] == "pdf" {
			for ; (foundIdx < len(tgtResults)) && (result.Key["name"] == tgtResults[foundIdx].Key["name"]); foundIdx++ {
				assert.True(t, tgtResults[foundIdx].Key["rasterizer"] != "")
				delete(tgtResults[foundIdx].Key, "rasterizer")
				assert.Equal(t, result.Key, tgtResults[foundIdx].Key)
				assert.Equal(t, "png", tgtResults[foundIdx].Options["ext"])
			}
		}
	}
	assert.Equal(t, len(foundDMResults.Results), foundIdx)
}
Ejemplo n.º 11
0
// StartBinaryAggregator will find new bad binary fuzzes generated by afl-fuzz and create the
// metadata required for them.
// It does this by searching in the specified AflOutputPath for new crashes and moves them to a
// temporary holding folder (specified by BinaryFuzzPath) for parsing, before uploading them to GCS
func StartBinaryAggregator(s *storage.Service) error {
	if _, err := fileutil.EnsureDirExists(config.Aggregator.BinaryFuzzPath); err != nil {
		return err
	}
	if _, err := fileutil.EnsureDirExists(config.Aggregator.ExecutablePath); err != nil {
		return err
	}
	if err := BuildClangDM("Debug", true); err != nil {
		return err
	}
	if err := BuildClangDM("Release", true); err != nil {
		return err
	}
	storageService = s

	// For passing the paths of new binaries that should be scanned.
	forAnalysis := make(chan string, 10000)
	// For passing the file names of analyzed fuzzes that should be uploaded from where they rest on
	// disk in config.Aggregator.BinaryFuzzPath
	forUpload := make(chan uploadPackage, 100)
	// For passing the names of go routines that had to stop.  If the aggregation process fails,
	// everything else will be killed.
	terminated := make(chan string)
	go scanForNewCandidates(forAnalysis, terminated)

	numAnalysisProcesses := config.Aggregator.NumAnalysisProcesses
	if numAnalysisProcesses <= 0 {
		// TODO(kjlubick): Actually make this smart based on the number of cores
		numAnalysisProcesses = 20
	}

	for i := 0; i < numAnalysisProcesses; i++ {
		go performAnalysis(i, analyzeSkp, forAnalysis, forUpload, terminated)
	}

	numUploadProcesses := config.Aggregator.NumUploadProcesses
	if numUploadProcesses <= 0 {
		// TODO(kjlubick): Actually make this smart based on the number of cores/number
		// of aggregation processes
		numUploadProcesses = 5
	}

	for i := 0; i < numUploadProcesses; i++ {
		go waitForUploads(i, forUpload, terminated)
	}

	t := time.Tick(config.Aggregator.StatusPeriod)
	for {
		select {
		case _ = <-t:
			// TODO(kjlubick): Keep track of these numbers via metrics so we can use
			// mon.skia.org and write alerts for it.
			glog.Infof("There are %d fuzzes waiting for analysis and %d waiting for upload.", len(forAnalysis), len(forUpload))
			glog.Infof("There are %d aggregation processes alive and %d upload processes alive.", numAnalysisProcesses, numUploadProcesses)
		case deadService := <-terminated:
			glog.Errorf("%s died", deadService)
			if deadService == "scanner" {
				return fmt.Errorf("Ending aggregator: The afl-fuzz scanner died.")
			} else if strings.HasPrefix(deadService, "analyzer") {
				if numAnalysisProcesses--; numAnalysisProcesses <= 0 {
					return fmt.Errorf("Ending aggregator: No more analysis processes alive")
				}
			} else if strings.HasPrefix(deadService, "uploader") {
				if numUploadProcesses--; numUploadProcesses <= 0 {
					return fmt.Errorf("Ending aggregator: No more upload processes alive")
				}
			}
		}

	}
}