// NewFileDiffStore intializes and returns a file based implementation of
// DiffStore. The optional http.Client is used to make HTTP requests to Google
// Storage. If nil is supplied then a default client is used. The baseDir is
// the local base directory where the DEFAULT_IMG_DIR_NAME,
// DEFAULT_DIFF_DIR_NAME and the DEFAULT_DIFFMETRICS_DIR_NAME directories
// exist. gsBucketName is the bucket images will be downloaded from.
// storageBaseDir is the directory in the bucket (if empty
// DEFAULT_GS_IMG_DIR_NAME is used).  workerPoolSize is the max number of
// simultaneous goroutines that will be created when running Get or AbsPath.
// Use RECOMMENDED_WORKER_POOL_SIZE if unsure what this value should be.
func NewFileDiffStore(client *http.Client, baseDir, gsBucketName string, storageBaseDir string, cacheFactory CacheFactory, workerPoolSize int) (diff.DiffStore, error) {
	if client == nil {
		client = util.NewTimeoutClient()
	}

	if storageBaseDir == "" {
		storageBaseDir = DEFAULT_GS_IMG_DIR_NAME
	}

	imageCache, err := lru.New(IMAGE_LRU_CACHE_SIZE)
	if err != nil {
		return nil, fmt.Errorf("Unable to alloace image LRU cache: %s", err)
	}

	diffCache := cacheFactory("di", DiffMetricsCodec(0))
	unavailableChan := make(chan *diff.DigestFailure, 10)

	statusDir := fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_STATUS_DIR_NAME)))
	failureDB, err := bolt.Open(filepath.Join(statusDir, FAILUREDB_NAME), 0600, nil)
	if err != nil {
		return nil, fmt.Errorf("Unable to open failuredb: %s", err)
	}

	fs := &FileDiffStore{
		client:              client,
		localImgDir:         fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_IMG_DIR_NAME))),
		localDiffDir:        fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_DIFF_DIR_NAME))),
		localDiffMetricsDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_DIFFMETRICS_DIR_NAME))),
		localTempFileDir:    fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_TEMPFILE_DIR_NAME))),
		gsBucketName:        gsBucketName,
		storageBaseDir:      storageBaseDir,
		imageCache:          imageCache,
		diffCache:           diffCache,
		unavailableDigests:  map[string]*diff.DigestFailure{},
		unavailableChan:     unavailableChan,
		failureDB:           failureDB,
	}

	if err := fs.loadDigestFailures(); err != nil {
		return nil, err
	}
	go func() {
		for {
			digestFailure := <-unavailableChan
			if err := fs.addDigestFailure(digestFailure); err != nil {
				glog.Errorf("Unable to store digest failure: %s", err)
			} else if err = fs.loadDigestFailures(); err != nil {
				glog.Errorf("Unable to load failures: %s", err)
			}
		}
	}()

	fs.activateWorkers(workerPoolSize)
	return fs, nil
}
// NewFileDiffStore intializes and returns a file based implementation of
// DiffStore. The optional http.Client is used to make HTTP requests to Google
// Storage. If nil is supplied then a default client is used. The baseDir is the
// local base directory where the DEFAULT_IMG_DIR_NAME, DEFAULT_DIFF_DIR_NAME and
// the DEFAULT_DIFFMETRICS_DIR_NAME directories exist. gsBucketName is the bucket
// images will be downloaded from. storageBaseDir is the directory in the
// bucket (if empty DEFAULT_GS_IMG_DIR_NAME is used).
// workerPoolSize is the max number of simultaneous goroutines that will be
// created when running Get or AbsPath.
// Use RECOMMENDED_WORKER_POOL_SIZE if unsure what this value should be.
func NewFileDiffStore(client *http.Client, baseDir, gsBucketName string, storageBaseDir string, cacheFactory CacheFactory, workerPoolSize int) (diff.DiffStore, error) {
	if client == nil {
		client = util.NewTimeoutClient()
	}

	if storageBaseDir == "" {
		storageBaseDir = DEFAULT_GS_IMG_DIR_NAME
	}

	imageCache, err := lru.New(IMAGE_LRU_CACHE_SIZE)
	if err != nil {
		return nil, fmt.Errorf("Unable to alloace image LRU cache: %s", err)
	}

	diffCache := cacheFactory("di", DiffMetricsCodec(0))
	unavailableChan := make(chan string, 10)

	fs := &FileDiffStore{
		client:              client,
		localImgDir:         fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_IMG_DIR_NAME))),
		localDiffDir:        fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_DIFF_DIR_NAME))),
		localDiffMetricsDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_DIFFMETRICS_DIR_NAME))),
		localTempFileDir:    fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_TEMPFILE_DIR_NAME))),
		gsBucketName:        gsBucketName,
		storageBaseDir:      storageBaseDir,
		imageCache:          imageCache,
		diffCache:           diffCache,
		unavailableDigests:  map[string]bool{},
		unavailableChan:     unavailableChan,
	}

	// TODO(stephana): Clean this up and store digests to ignore in the
	// database and expose them on the front-end.
	// This is the hash of the empty, we should ignore this right away.
	unavailableChan <- "d41d8cd98f00b204e9800998ecf8427e"
	go func() {
		var ignoreDigest string
		for {
			ignoreDigest = <-unavailableChan
			func() {
				fs.unavailableMutex.Lock()
				defer fs.unavailableMutex.Unlock()
				fs.unavailableDigests[ignoreDigest] = true
			}()
		}
	}()

	fs.activateWorkers(workerPoolSize)
	return fs, nil
}
Example #3
0
// NewServer returns a instance that implements the ShareDBServer interface that
// was generated via the sharedb.proto file.
// It can then be used to run an RPC server. See tests for details.
func NewServer(dataDir string) ShareDBServer {
	ret := &rpcServer{
		dataDir:   fileutil.Must(fileutil.EnsureDirExists(dataDir)),
		databases: map[string]*bolt.DB{},
	}
	return ret
}
Example #4
0
// NewIngester creates an Ingester given the repo and tilestore specified.
func NewIngester(git *gitinfo.GitInfo, tileStoreDir string, datasetName string, ri ResultIngester, nCommits int, minDuration time.Duration, config map[string]string, statusDir, metricName string) (*Ingester, error) {
	var storageService *storage.Service = nil
	var err error = nil
	// check if the ingestion source is coming from Google Storage
	if config["GSDir"] != "" {
		storageService, err = storage.New(client)
		if err != nil {
			return nil, fmt.Errorf("Failed to create interace to Google Storage: %s\n", err)
		}
	}
	var processedFiles *leveldb.DB = nil
	if statusDir != "" {
		statusDir = fileutil.Must(fileutil.EnsureDirExists(filepath.Join(statusDir, datasetName)))
		processedFiles, err = leveldb.OpenFile(filepath.Join(statusDir, "processed_files.ldb"), nil)
		if err != nil {
			glog.Fatalf("Unable to open status db at %s: %s", filepath.Join(statusDir, "processed_files.ldb"), err)
		}
	}

	i := &Ingester{
		git:                            git,
		tileStore:                      filetilestore.NewFileTileStore(tileStoreDir, datasetName, -1),
		storage:                        storageService,
		hashToNumber:                   map[string]int{},
		resultIngester:                 ri,
		config:                         config,
		datasetName:                    datasetName,
		elapsedTimePerUpdate:           newGauge(metricName, "update"),
		metricsProcessed:               newCounter(metricName, "processed"),
		lastSuccessfulUpdate:           time.Now(),
		timeSinceLastSucceessfulUpdate: newGauge(metricName, "time-since-last-successful-update"),
		nCommits:                       nCommits,
		minDuration:                    minDuration,
		processedFiles:                 processedFiles,
	}

	i.timeSinceLastSucceessfulUpdate.Update(int64(time.Since(i.lastSuccessfulUpdate).Seconds()))
	go func() {
		for _ = range time.Tick(time.Minute) {
			i.timeSinceLastSucceessfulUpdate.Update(int64(time.Since(i.lastSuccessfulUpdate).Seconds()))
		}
	}()
	return i, nil
}
Example #5
0
// NewIngester creates a new ingester with the given id and configuration around
// the supplied vcs (version control system), input sources and Processor instance.
func NewIngester(ingesterID string, ingesterConf *sharedconfig.IngesterConfig, vcs vcsinfo.VCS, sources []Source, processor Processor) (*Ingester, error) {
	statusDir := fileutil.Must(fileutil.EnsureDirExists(filepath.Join(ingesterConf.StatusDir, ingesterID)))
	dbName := filepath.Join(statusDir, fmt.Sprintf("%s-status.db", ingesterID))
	statusDB, err := bolt.Open(dbName, 0600, &bolt.Options{Timeout: 1 * time.Second})
	if err != nil {
		return nil, fmt.Errorf("Unable to open db at %s. Got error: %s", dbName, err)
	}

	ret := &Ingester{
		id:          ingesterID,
		vcs:         vcs,
		nCommits:    ingesterConf.NCommits,
		minDuration: time.Duration(ingesterConf.MinDays) * time.Hour * 24,
		runEvery:    ingesterConf.RunEvery.Duration,
		sources:     sources,
		processor:   processor,
		statusDB:    statusDB,
	}
	ret.setupMetrics()
	return ret, nil
}