// Will need a local valid google_storage_token.data file with read write access // to run the below test. func Auth_TestUploadWorkerArtifacts(t *testing.T) { client, _ := GetOAuthClient() gs, err := NewGsUtil(client) if err != nil { t.Errorf("Unexpected error: %s", err) } testDir := "testupload" testPagesetType := "10ktest" StorageDir = "testdata" if err := gs.UploadWorkerArtifacts(testDir, testPagesetType, 1); err != nil { t.Errorf("Unexpected error: %s", err) } // Examine contents of the remote directory and then clean it up. service, err := storage.New(gs.client) if err != nil { t.Errorf("Unexpected error: %s", err) } gsDir := filepath.Join(testDir, testPagesetType, "slave1") resp, err := service.Objects.List(GS_BUCKET_NAME).Prefix(gsDir + "/").Do() if err != nil { t.Errorf("Unexpected error: %s", err) } assert.Equal(t, 3, len(resp.Items)) for index, fileName := range []string{"TIMESTAMP", "alexa1-1.py", "alexa2-2.py"} { filePath := fmt.Sprintf("%s/%s", gsDir, fileName) defer util.LogErr(service.Objects.Delete(GS_BUCKET_NAME, filePath).Do()) assert.Equal(t, filePath, resp.Items[index].Name) } }
func TestGetGSResultFileLocations(t *testing.T) { testutils.SkipIfShort(t) storage, err := storage.New(http.DefaultClient) assert.Nil(t, err) startTS := time.Date(2014, time.December, 10, 0, 0, 0, 0, time.UTC).Unix() endTS := time.Date(2014, time.December, 10, 23, 59, 59, 0, time.UTC).Unix() // TODO(stephana): Switch this to a dedicated test bucket, so we are not // in danger of removing it. resultFiles, err := getGSResultsFileLocations(startTS, endTS, storage, "chromium-skia-gm", "dm-json-v1") assert.Nil(t, err) // Read the expected list of files and compare them. content, err := ioutil.ReadFile("./testdata/filelist_dec_10.txt") assert.Nil(t, err) lines := strings.Split(strings.TrimSpace(string(content)), "\n") sort.Strings(lines) resultNames := make([]string, len(resultFiles)) for idx, rf := range resultFiles { resultNames[idx] = rf.Name } sort.Strings(resultNames) assert.Equal(t, len(lines), len(resultNames)) assert.Equal(t, lines, resultNames) }
func init() { var err error st, err = storage.New(util.NewTimeoutClient()) if err != nil { panic("Can't construct HTTP client") } ingester.Register(config.CONSTRUCTOR_NANO_TRYBOT, NewTrybotResultIngester) }
func (b *Build) Upload(version string, filename string) error { file, err := ioutil.ReadFile(filename) if err != nil { return err } svc, err := storage.New(oauthClient) if err != nil { return err } obj := &storage.Object{ Acl: []*storage.ObjectAccessControl{{Entity: "allUsers", Role: "READER"}}, Name: filename, } _, err = svc.Objects.Insert(*storageBucket, obj).Media(bytes.NewReader(file)).Do() if err != nil { return err } sum := fmt.Sprintf("%x", sha1.Sum(file)) kind := "unknown" switch { case b.Source: kind = "source" case strings.HasSuffix(filename, ".tar.gz"), strings.HasSuffix(filename, ".zip"): kind = "archive" case strings.HasSuffix(filename, ".msi"), strings.HasSuffix(filename, ".pkg"): kind = "installer" } req, err := json.Marshal(File{ Filename: filename, Version: version, OS: b.OS, Arch: b.Arch, Checksum: sum, Size: len(file), Kind: kind, }) if err != nil { return err } u := fmt.Sprintf("%s?%s", *uploadURL, url.Values{"key": []string{builderKey}}.Encode()) resp, err := http.Post(u, "application/json", bytes.NewReader(req)) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return fmt.Errorf("upload status: %v", resp.Status) } return nil }
// NewGsUtil initializes and returns a utility for CT interations with Google // Storage. If client is nil then auth.RunFlow is invoked. if client is nil then // the client from GetOAuthClient is used. func NewGsUtil(client *http.Client) (*GsUtil, error) { if client == nil { oauthClient, err := GetOAuthClient() if err != nil { return nil, err } client = oauthClient } service, err := storage.New(client) if err != nil { return nil, fmt.Errorf("Failed to create interface to Google Storage: %s", err) } return &GsUtil{client: client, service: service}, nil }
func storageMain(client *http.Client, argv []string) { if len(argv) != 2 { fmt.Fprintln(os.Stderr, "Usage: storage filename bucket (to upload an object)") return } service, _ := storage.New(client) filename := argv[0] bucket := argv[1] goFile, err := os.Open(filename) if err != nil { log.Fatalf("error opening %q: %v", filename, err) } storageObject, err := service.Objects.Insert(bucket, &storage.Object{Name: filename}).Media(goFile).Do() log.Printf("Got storage.Object, err: %#v, %v", storageObject, err) }
// NewIngester creates an Ingester given the repo and tilestore specified. func NewIngester(git *gitinfo.GitInfo, tileStoreDir string, datasetName string, ri ResultIngester, nCommits int, minDuration time.Duration, config map[string]string, statusDir, metricName string) (*Ingester, error) { var storageService *storage.Service = nil var err error = nil // check if the ingestion source is coming from Google Storage if config["GSDir"] != "" { storageService, err = storage.New(client) if err != nil { return nil, fmt.Errorf("Failed to create interace to Google Storage: %s\n", err) } } var processedFiles *leveldb.DB = nil if statusDir != "" { statusDir = fileutil.Must(fileutil.EnsureDirExists(filepath.Join(statusDir, datasetName))) processedFiles, err = leveldb.OpenFile(filepath.Join(statusDir, "processed_files.ldb"), nil) if err != nil { glog.Fatalf("Unable to open status db at %s: %s", filepath.Join(statusDir, "processed_files.ldb"), err) } } i := &Ingester{ git: git, tileStore: filetilestore.NewFileTileStore(tileStoreDir, datasetName, -1), storage: storageService, hashToNumber: map[string]int{}, resultIngester: ri, config: config, datasetName: datasetName, elapsedTimePerUpdate: newGauge(metricName, "update"), metricsProcessed: newCounter(metricName, "processed"), lastSuccessfulUpdate: time.Now(), timeSinceLastSucceessfulUpdate: newGauge(metricName, "time-since-last-successful-update"), nCommits: nCommits, minDuration: minDuration, processedFiles: processedFiles, } i.timeSinceLastSucceessfulUpdate.Update(int64(time.Since(i.lastSuccessfulUpdate).Seconds())) go func() { for _ = range time.Tick(time.Minute) { i.timeSinceLastSucceessfulUpdate.Update(int64(time.Since(i.lastSuccessfulUpdate).Seconds())) } }() return i, nil }
// NewImageProcessor constructs an imageProcessor which listens for input on the provided channel // and logs to stderr with its name as the prefix. func NewImageProcessor(c <-chan processImageReq, name string) *imageProcessor { client, err := serviceaccount.NewClient(&serviceaccount.Options{ Transport: &RetryTransport{http.DefaultTransport, 5}, }) if err != nil { log.Panicf("Failed to create service account client: %v\n", err) } service, err := storage.New(client) if err != nil { log.Panicf("Failed to create GCS client: %v\n", err) } return &imageProcessor{ c: c, client: client, s: service, l: log.New(os.Stderr, name, log.LstdFlags), } }
// Downloads image file from Google Storage and caches it in a local directory. It // is thread safe because it locks the diff store's mutext before accessing the // digest cache. If the provided digest does not exist in Google Storage then // downloadFailureCount is incremented. // func (fs *FileDiffStore) cacheImageFromGS(d string) error { storage, err := storage.New(fs.client) if err != nil { return fmt.Errorf("Failed to create interface to Google Storage: %s\n", err) } objLocation := filepath.Join(fs.storageBaseDir, fmt.Sprintf("%s.%s", d, IMG_EXTENSION)) res, err := storage.Objects.Get(fs.gsBucketName, objLocation).Do() if err != nil { downloadFailureCount.Inc(1) return err } for i := 0; i < MAX_URI_GET_TRIES; i++ { if i > 0 { glog.Warningf("%d. retry for digest %s", i, d) } err = func() error { respBody, err := fs.getRespBody(res) if err != nil { return err } defer util.Close(respBody) // TODO(stephana): Creating and renaming temporary files this way // should be made into a generic utility function. // See also FileTileStore for a similar implementation. // Create a temporary file. tempOut, err := ioutil.TempFile(fs.localTempFileDir, fmt.Sprintf("tempfile-%s", d)) if err != nil { return fmt.Errorf("Unable to create temp file: %s", err) } md5Hash := md5.New() multiOut := io.MultiWriter(md5Hash, tempOut) if _, err = io.Copy(multiOut, respBody); err != nil { return err } err = tempOut.Close() if err != nil { return fmt.Errorf("Error closing temp file: %s", err) } // Check the MD5. objMD5, err := base64.StdEncoding.DecodeString(res.Md5Hash) if err != nil { return fmt.Errorf("Unable to decode MD5 hash from %s", d) } if !bytes.Equal(md5Hash.Sum(nil), objMD5) { return fmt.Errorf("MD5 hash for digest %s incorrect.", d) } // Rename the file after we acquired a lock outputBaseName := fs.getImageBaseName(d) outputFile, err := fs.createRadixPath(fs.localImgDir, outputBaseName) if err != nil { return fmt.Errorf("Error creating output file: %s", err) } fs.digestDirLock.Lock() defer fs.digestDirLock.Unlock() if err := os.Rename(tempOut.Name(), outputFile); err != nil { return fmt.Errorf("Unable to move file: %s", err) } downloadSuccessCount.Inc(1) return nil }() if err == nil { break } glog.Errorf("Error fetching file for digest %s: %s", d, err) } if err != nil { glog.Errorf("Failed fetching file after %d attempts", MAX_URI_GET_TRIES) downloadFailureCount.Inc(1) } return err }
// This example demonstrates loading objects from Google Cloud Storage into // BigQuery. Objects are specified by their bucket and a name prefix. Each // object will be loaded into a new table identified by the object name minus // any file extension. All tables are added to the specified dataset (one will // be created if necessary). Currently, tables will not be overwritten and an // attempt to load an object into a dataset that already contains its table // will emit an error message indicating the table already exists. // A schema file must be provided and it will be applied to every object/table. // Example usage: // go-api-demo -clientid="my-clientid" -secret="my-secret" bq myProject // myDataBucket datafile2013070 DataFiles2013 // ./datafile_schema.json 100 // // This will load all objects (e.g. all data files from July 2013) from // gs://myDataBucket into a (possibly new) BigQuery dataset named DataFiles2013 // using the schema file provided and allowing up to 100 bad records. Assuming // each object is named like datafileYYYYMMDD.csv.gz and all of July's files are // stored in the bucket, 9 tables will be created named like datafile201307DD // where DD ranges from 01 to 09, inclusive. // When the program completes, it will emit a results line similar to: // // 9 files loaded in 3m58s (18m2.708s). Size: 7.18GB Rows: 7130725 // // The total elapsed time from the start of first job to the end of the last job // (effectively wall clock time) is shown. In parenthesis is the aggregate time // taken to load all tables. func bqMain(client *http.Client, argv []string) { if len(argv) != 6 { fmt.Fprintln(os.Stderr, "Usage: bq project_id bucket prefix dataset schema max_bad_records") return } var ( project = argv[0] bucket = argv[1] objPrefix = argv[2] datasetId = argv[3] schemaFile = argv[4] ) badRecords, err := strconv.ParseInt(argv[5], 10, 64) if err != nil { fmt.Fprintln(os.Stderr, err) return } rand.Seed(time.Now().UnixNano()) service, err := storage.New(client) if err != nil { fmt.Fprintln(os.Stderr, err) return } // Get the list of objects in the bucket matching the specified prefix. list := service.Objects.List(bucket) list.Prefix(objPrefix) objects, err := list.Do() if err != nil { fmt.Fprintln(os.Stderr, err) return } // Create the wrapper and insert the (new) dataset. dataset, err := newBQDataset(client, project, datasetId) if err != nil { fmt.Fprintln(os.Stderr, err) return } if err = dataset.insert(true); err != nil { fmt.Fprintln(os.Stderr, err) return } objectSource := &tableSource{ maxBadRecords: badRecords, disposition: TableWriteEmptyDisposition, } // Load the schema from disk. f, err := ioutil.ReadFile(schemaFile) if err != nil { fmt.Fprintln(os.Stderr, err) return } if err = json.Unmarshal(f, &objectSource.schema); err != nil { fmt.Fprintln(os.Stderr, err) return } // Assumes all objects have .csv, .csv.gz (or no) extension. tableIdFromObject := func(name string) string { return strings.TrimSuffix(strings.TrimSuffix(name, ".gz"), ".csv") } // A jobset is way to group a collection of jobs together for monitoring. // For this example, we just use the name of the bucket and object prefix. jobset := fmt.Sprintf("%s:%s", bucket, objPrefix) fmt.Fprintf(os.Stderr, "\nLoading %d objects.\n", len(objects.Items)) // Load each object into a dataset of the same name (minus any extension). // A successful insert call will inject the job into our queue for monitoring. for _, o := range objects.Items { objectSource.id = tableIdFromObject(o.Name) objectSource.uri = fmt.Sprintf("gs://%s/%s", o.Bucket, o.Name) if err = dataset.load(jobset, objectSource); err != nil { fmt.Fprintln(os.Stderr, err) } } dataset.monitor(jobset) }
// GetStorageService returns a Cloud Storage service. func GetStorageService() (*storage.Service, error) { return storage.New(http.DefaultClient) }