// NewFileDiffStore intializes and returns a file based implementation of // DiffStore. The optional http.Client is used to make HTTP requests to Google // Storage. If nil is supplied then a default client is used. The baseDir is // the local base directory where the DEFAULT_IMG_DIR_NAME, // DEFAULT_DIFF_DIR_NAME and the DEFAULT_DIFFMETRICS_DIR_NAME directories // exist. gsBucketName is the bucket images will be downloaded from. // storageBaseDir is the directory in the bucket (if empty // DEFAULT_GS_IMG_DIR_NAME is used). workerPoolSize is the max number of // simultaneous goroutines that will be created when running Get or AbsPath. // Use RECOMMENDED_WORKER_POOL_SIZE if unsure what this value should be. func NewFileDiffStore(client *http.Client, baseDir, gsBucketName string, storageBaseDir string, cacheFactory CacheFactory, workerPoolSize int) (diff.DiffStore, error) { if client == nil { client = util.NewTimeoutClient() } if storageBaseDir == "" { storageBaseDir = DEFAULT_GS_IMG_DIR_NAME } imageCache, err := lru.New(IMAGE_LRU_CACHE_SIZE) if err != nil { return nil, fmt.Errorf("Unable to alloace image LRU cache: %s", err) } diffCache := cacheFactory("di", DiffMetricsCodec(0)) unavailableChan := make(chan *diff.DigestFailure, 10) statusDir := fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_STATUS_DIR_NAME))) failureDB, err := bolt.Open(filepath.Join(statusDir, FAILUREDB_NAME), 0600, nil) if err != nil { return nil, fmt.Errorf("Unable to open failuredb: %s", err) } fs := &FileDiffStore{ client: client, localImgDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_IMG_DIR_NAME))), localDiffDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_DIFF_DIR_NAME))), localDiffMetricsDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_DIFFMETRICS_DIR_NAME))), localTempFileDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_TEMPFILE_DIR_NAME))), gsBucketName: gsBucketName, storageBaseDir: storageBaseDir, imageCache: imageCache, diffCache: diffCache, unavailableDigests: map[string]*diff.DigestFailure{}, unavailableChan: unavailableChan, failureDB: failureDB, } if err := fs.loadDigestFailures(); err != nil { return nil, err } go func() { for { digestFailure := <-unavailableChan if err := fs.addDigestFailure(digestFailure); err != nil { glog.Errorf("Unable to store digest failure: %s", err) } else if err = fs.loadDigestFailures(); err != nil { glog.Errorf("Unable to load failures: %s", err) } } }() fs.activateWorkers(workerPoolSize) return fs, nil }
// NewFileDiffStore intializes and returns a file based implementation of // DiffStore. The optional http.Client is used to make HTTP requests to Google // Storage. If nil is supplied then a default client is used. The baseDir is the // local base directory where the DEFAULT_IMG_DIR_NAME, DEFAULT_DIFF_DIR_NAME and // the DEFAULT_DIFFMETRICS_DIR_NAME directories exist. gsBucketName is the bucket // images will be downloaded from. storageBaseDir is the directory in the // bucket (if empty DEFAULT_GS_IMG_DIR_NAME is used). // workerPoolSize is the max number of simultaneous goroutines that will be // created when running Get or AbsPath. // Use RECOMMENDED_WORKER_POOL_SIZE if unsure what this value should be. func NewFileDiffStore(client *http.Client, baseDir, gsBucketName string, storageBaseDir string, cacheFactory CacheFactory, workerPoolSize int) (diff.DiffStore, error) { if client == nil { client = util.NewTimeoutClient() } if storageBaseDir == "" { storageBaseDir = DEFAULT_GS_IMG_DIR_NAME } imageCache, err := lru.New(IMAGE_LRU_CACHE_SIZE) if err != nil { return nil, fmt.Errorf("Unable to alloace image LRU cache: %s", err) } diffCache := cacheFactory("di", DiffMetricsCodec(0)) unavailableChan := make(chan string, 10) fs := &FileDiffStore{ client: client, localImgDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_IMG_DIR_NAME))), localDiffDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_DIFF_DIR_NAME))), localDiffMetricsDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_DIFFMETRICS_DIR_NAME))), localTempFileDir: fileutil.Must(fileutil.EnsureDirExists(filepath.Join(baseDir, DEFAULT_TEMPFILE_DIR_NAME))), gsBucketName: gsBucketName, storageBaseDir: storageBaseDir, imageCache: imageCache, diffCache: diffCache, unavailableDigests: map[string]bool{}, unavailableChan: unavailableChan, } // TODO(stephana): Clean this up and store digests to ignore in the // database and expose them on the front-end. // This is the hash of the empty, we should ignore this right away. unavailableChan <- "d41d8cd98f00b204e9800998ecf8427e" go func() { var ignoreDigest string for { ignoreDigest = <-unavailableChan func() { fs.unavailableMutex.Lock() defer fs.unavailableMutex.Unlock() fs.unavailableDigests[ignoreDigest] = true }() } }() fs.activateWorkers(workerPoolSize) return fs, nil }
// NewServer returns a instance that implements the ShareDBServer interface that // was generated via the sharedb.proto file. // It can then be used to run an RPC server. See tests for details. func NewServer(dataDir string) ShareDBServer { ret := &rpcServer{ dataDir: fileutil.Must(fileutil.EnsureDirExists(dataDir)), databases: map[string]*bolt.DB{}, } return ret }
// NewIngester creates an Ingester given the repo and tilestore specified. func NewIngester(git *gitinfo.GitInfo, tileStoreDir string, datasetName string, ri ResultIngester, nCommits int, minDuration time.Duration, config map[string]string, statusDir, metricName string) (*Ingester, error) { var storageService *storage.Service = nil var err error = nil // check if the ingestion source is coming from Google Storage if config["GSDir"] != "" { storageService, err = storage.New(client) if err != nil { return nil, fmt.Errorf("Failed to create interace to Google Storage: %s\n", err) } } var processedFiles *leveldb.DB = nil if statusDir != "" { statusDir = fileutil.Must(fileutil.EnsureDirExists(filepath.Join(statusDir, datasetName))) processedFiles, err = leveldb.OpenFile(filepath.Join(statusDir, "processed_files.ldb"), nil) if err != nil { glog.Fatalf("Unable to open status db at %s: %s", filepath.Join(statusDir, "processed_files.ldb"), err) } } i := &Ingester{ git: git, tileStore: filetilestore.NewFileTileStore(tileStoreDir, datasetName, -1), storage: storageService, hashToNumber: map[string]int{}, resultIngester: ri, config: config, datasetName: datasetName, elapsedTimePerUpdate: newGauge(metricName, "update"), metricsProcessed: newCounter(metricName, "processed"), lastSuccessfulUpdate: time.Now(), timeSinceLastSucceessfulUpdate: newGauge(metricName, "time-since-last-successful-update"), nCommits: nCommits, minDuration: minDuration, processedFiles: processedFiles, } i.timeSinceLastSucceessfulUpdate.Update(int64(time.Since(i.lastSuccessfulUpdate).Seconds())) go func() { for _ = range time.Tick(time.Minute) { i.timeSinceLastSucceessfulUpdate.Update(int64(time.Since(i.lastSuccessfulUpdate).Seconds())) } }() return i, nil }
// NewIngester creates a new ingester with the given id and configuration around // the supplied vcs (version control system), input sources and Processor instance. func NewIngester(ingesterID string, ingesterConf *sharedconfig.IngesterConfig, vcs vcsinfo.VCS, sources []Source, processor Processor) (*Ingester, error) { statusDir := fileutil.Must(fileutil.EnsureDirExists(filepath.Join(ingesterConf.StatusDir, ingesterID))) dbName := filepath.Join(statusDir, fmt.Sprintf("%s-status.db", ingesterID)) statusDB, err := bolt.Open(dbName, 0600, &bolt.Options{Timeout: 1 * time.Second}) if err != nil { return nil, fmt.Errorf("Unable to open db at %s. Got error: %s", dbName, err) } ret := &Ingester{ id: ingesterID, vcs: vcs, nCommits: ingesterConf.NCommits, minDuration: time.Duration(ingesterConf.MinDays) * time.Hour * 24, runEvery: ingesterConf.RunEvery.Duration, sources: sources, processor: processor, statusDB: statusDB, } ret.setupMetrics() return ret, nil }