// Will need a local valid google_storage_token.data file with read write access
// to run the below test.
func Auth_TestUploadWorkerArtifacts(t *testing.T) {
	client, _ := GetOAuthClient()
	gs, err := NewGsUtil(client)
	if err != nil {
		t.Errorf("Unexpected error: %s", err)
	}
	testDir := "testupload"
	testPagesetType := "10ktest"
	StorageDir = "testdata"
	if err := gs.UploadWorkerArtifacts(testDir, testPagesetType, 1); err != nil {
		t.Errorf("Unexpected error: %s", err)
	}

	// Examine contents of the remote directory and then clean it up.
	service, err := storage.New(gs.client)
	if err != nil {
		t.Errorf("Unexpected error: %s", err)
	}
	gsDir := filepath.Join(testDir, testPagesetType, "slave1")
	resp, err := service.Objects.List(GS_BUCKET_NAME).Prefix(gsDir + "/").Do()
	if err != nil {
		t.Errorf("Unexpected error: %s", err)
	}
	assert.Equal(t, 3, len(resp.Items))
	for index, fileName := range []string{"TIMESTAMP", "alexa1-1.py", "alexa2-2.py"} {
		filePath := fmt.Sprintf("%s/%s", gsDir, fileName)
		defer util.LogErr(service.Objects.Delete(GS_BUCKET_NAME, filePath).Do())
		assert.Equal(t, filePath, resp.Items[index].Name)
	}
}
func TestGetGSResultFileLocations(t *testing.T) {
	testutils.SkipIfShort(t)
	storage, err := storage.New(http.DefaultClient)
	assert.Nil(t, err)

	startTS := time.Date(2014, time.December, 10, 0, 0, 0, 0, time.UTC).Unix()
	endTS := time.Date(2014, time.December, 10, 23, 59, 59, 0, time.UTC).Unix()

	// TODO(stephana): Switch this to a dedicated test bucket, so we are not
	// in danger of removing it.
	resultFiles, err := getGSResultsFileLocations(startTS, endTS, storage, "chromium-skia-gm", "dm-json-v1")
	assert.Nil(t, err)

	// Read the expected list of files and compare them.
	content, err := ioutil.ReadFile("./testdata/filelist_dec_10.txt")
	assert.Nil(t, err)
	lines := strings.Split(strings.TrimSpace(string(content)), "\n")
	sort.Strings(lines)

	resultNames := make([]string, len(resultFiles))
	for idx, rf := range resultFiles {
		resultNames[idx] = rf.Name
	}
	sort.Strings(resultNames)
	assert.Equal(t, len(lines), len(resultNames))
	assert.Equal(t, lines, resultNames)
}
Exemple #3
0
func init() {
	var err error
	st, err = storage.New(util.NewTimeoutClient())
	if err != nil {
		panic("Can't construct HTTP client")
	}

	ingester.Register(config.CONSTRUCTOR_NANO_TRYBOT, NewTrybotResultIngester)
}
Exemple #4
0
func (b *Build) Upload(version string, filename string) error {
	file, err := ioutil.ReadFile(filename)
	if err != nil {
		return err
	}

	svc, err := storage.New(oauthClient)
	if err != nil {
		return err
	}
	obj := &storage.Object{
		Acl:  []*storage.ObjectAccessControl{{Entity: "allUsers", Role: "READER"}},
		Name: filename,
	}
	_, err = svc.Objects.Insert(*storageBucket, obj).Media(bytes.NewReader(file)).Do()
	if err != nil {
		return err
	}

	sum := fmt.Sprintf("%x", sha1.Sum(file))
	kind := "unknown"
	switch {
	case b.Source:
		kind = "source"
	case strings.HasSuffix(filename, ".tar.gz"), strings.HasSuffix(filename, ".zip"):
		kind = "archive"
	case strings.HasSuffix(filename, ".msi"), strings.HasSuffix(filename, ".pkg"):
		kind = "installer"
	}
	req, err := json.Marshal(File{
		Filename: filename,
		Version:  version,
		OS:       b.OS,
		Arch:     b.Arch,
		Checksum: sum,
		Size:     len(file),
		Kind:     kind,
	})
	if err != nil {
		return err
	}
	u := fmt.Sprintf("%s?%s", *uploadURL, url.Values{"key": []string{builderKey}}.Encode())
	resp, err := http.Post(u, "application/json", bytes.NewReader(req))
	if err != nil {
		return err
	}
	defer resp.Body.Close()
	if resp.StatusCode != http.StatusOK {
		return fmt.Errorf("upload status: %v", resp.Status)
	}

	return nil
}
Exemple #5
0
// NewGsUtil initializes and returns a utility for CT interations with Google
// Storage. If client is nil then auth.RunFlow is invoked. if client is nil then
// the client from GetOAuthClient is used.
func NewGsUtil(client *http.Client) (*GsUtil, error) {
	if client == nil {
		oauthClient, err := GetOAuthClient()
		if err != nil {
			return nil, err
		}
		client = oauthClient
	}
	service, err := storage.New(client)
	if err != nil {
		return nil, fmt.Errorf("Failed to create interface to Google Storage: %s", err)
	}
	return &GsUtil{client: client, service: service}, nil
}
func storageMain(client *http.Client, argv []string) {
	if len(argv) != 2 {
		fmt.Fprintln(os.Stderr, "Usage: storage filename bucket (to upload an object)")
		return
	}

	service, _ := storage.New(client)
	filename := argv[0]
	bucket := argv[1]

	goFile, err := os.Open(filename)
	if err != nil {
		log.Fatalf("error opening %q: %v", filename, err)
	}
	storageObject, err := service.Objects.Insert(bucket, &storage.Object{Name: filename}).Media(goFile).Do()
	log.Printf("Got storage.Object, err: %#v, %v", storageObject, err)
}
// NewIngester creates an Ingester given the repo and tilestore specified.
func NewIngester(git *gitinfo.GitInfo, tileStoreDir string, datasetName string, ri ResultIngester, nCommits int, minDuration time.Duration, config map[string]string, statusDir, metricName string) (*Ingester, error) {
	var storageService *storage.Service = nil
	var err error = nil
	// check if the ingestion source is coming from Google Storage
	if config["GSDir"] != "" {
		storageService, err = storage.New(client)
		if err != nil {
			return nil, fmt.Errorf("Failed to create interace to Google Storage: %s\n", err)
		}
	}
	var processedFiles *leveldb.DB = nil
	if statusDir != "" {
		statusDir = fileutil.Must(fileutil.EnsureDirExists(filepath.Join(statusDir, datasetName)))
		processedFiles, err = leveldb.OpenFile(filepath.Join(statusDir, "processed_files.ldb"), nil)
		if err != nil {
			glog.Fatalf("Unable to open status db at %s: %s", filepath.Join(statusDir, "processed_files.ldb"), err)
		}
	}

	i := &Ingester{
		git:                            git,
		tileStore:                      filetilestore.NewFileTileStore(tileStoreDir, datasetName, -1),
		storage:                        storageService,
		hashToNumber:                   map[string]int{},
		resultIngester:                 ri,
		config:                         config,
		datasetName:                    datasetName,
		elapsedTimePerUpdate:           newGauge(metricName, "update"),
		metricsProcessed:               newCounter(metricName, "processed"),
		lastSuccessfulUpdate:           time.Now(),
		timeSinceLastSucceessfulUpdate: newGauge(metricName, "time-since-last-successful-update"),
		nCommits:                       nCommits,
		minDuration:                    minDuration,
		processedFiles:                 processedFiles,
	}

	i.timeSinceLastSucceessfulUpdate.Update(int64(time.Since(i.lastSuccessfulUpdate).Seconds()))
	go func() {
		for _ = range time.Tick(time.Minute) {
			i.timeSinceLastSucceessfulUpdate.Update(int64(time.Since(i.lastSuccessfulUpdate).Seconds()))
		}
	}()
	return i, nil
}
// NewImageProcessor constructs an imageProcessor which listens for input on the provided channel
// and logs to stderr with its name as the prefix.
func NewImageProcessor(c <-chan processImageReq, name string) *imageProcessor {
	client, err := serviceaccount.NewClient(&serviceaccount.Options{
		Transport: &RetryTransport{http.DefaultTransport, 5},
	})
	if err != nil {
		log.Panicf("Failed to create service account client: %v\n", err)
	}
	service, err := storage.New(client)
	if err != nil {
		log.Panicf("Failed to create GCS client: %v\n", err)
	}
	return &imageProcessor{
		c:      c,
		client: client,
		s:      service,
		l:      log.New(os.Stderr, name, log.LstdFlags),
	}

}
// Downloads image file from Google Storage and caches it in a local directory. It
// is thread safe because it locks the diff store's mutext before accessing the
// digest cache. If the provided digest does not exist in Google Storage then
// downloadFailureCount is incremented.
//
func (fs *FileDiffStore) cacheImageFromGS(d string) error {
	storage, err := storage.New(fs.client)
	if err != nil {
		return fmt.Errorf("Failed to create interface to Google Storage: %s\n", err)
	}

	objLocation := filepath.Join(fs.storageBaseDir, fmt.Sprintf("%s.%s", d, IMG_EXTENSION))
	res, err := storage.Objects.Get(fs.gsBucketName, objLocation).Do()
	if err != nil {
		downloadFailureCount.Inc(1)
		return err
	}

	for i := 0; i < MAX_URI_GET_TRIES; i++ {
		if i > 0 {
			glog.Warningf("%d. retry for digest %s", i, d)
		}

		err = func() error {
			respBody, err := fs.getRespBody(res)
			if err != nil {
				return err
			}
			defer util.Close(respBody)

			// TODO(stephana): Creating and renaming temporary files this way
			// should be made into a generic utility function.
			// See also FileTileStore for a similar implementation.
			// Create a temporary file.
			tempOut, err := ioutil.TempFile(fs.localTempFileDir, fmt.Sprintf("tempfile-%s", d))
			if err != nil {
				return fmt.Errorf("Unable to create temp file: %s", err)
			}

			md5Hash := md5.New()
			multiOut := io.MultiWriter(md5Hash, tempOut)

			if _, err = io.Copy(multiOut, respBody); err != nil {
				return err
			}
			err = tempOut.Close()
			if err != nil {
				return fmt.Errorf("Error closing temp file: %s", err)
			}

			// Check the MD5.
			objMD5, err := base64.StdEncoding.DecodeString(res.Md5Hash)
			if err != nil {
				return fmt.Errorf("Unable to decode MD5 hash from %s", d)
			}

			if !bytes.Equal(md5Hash.Sum(nil), objMD5) {
				return fmt.Errorf("MD5 hash for digest %s incorrect.", d)
			}

			// Rename the file after we acquired a lock
			outputBaseName := fs.getImageBaseName(d)
			outputFile, err := fs.createRadixPath(fs.localImgDir, outputBaseName)
			if err != nil {
				return fmt.Errorf("Error creating output file: %s", err)
			}

			fs.digestDirLock.Lock()
			defer fs.digestDirLock.Unlock()
			if err := os.Rename(tempOut.Name(), outputFile); err != nil {
				return fmt.Errorf("Unable to move file: %s", err)
			}

			downloadSuccessCount.Inc(1)
			return nil
		}()

		if err == nil {
			break
		}
		glog.Errorf("Error fetching file for digest %s: %s", d, err)
	}

	if err != nil {
		glog.Errorf("Failed fetching file after %d attempts", MAX_URI_GET_TRIES)
		downloadFailureCount.Inc(1)
	}
	return err
}
// This example demonstrates loading objects from Google Cloud Storage into
// BigQuery. Objects are specified by their bucket and a name prefix. Each
// object will be loaded into a new table identified by the object name minus
// any file extension. All tables are added to the specified dataset (one will
// be created if necessary). Currently, tables will not be overwritten and an
// attempt to load an object into a dataset that already contains its table
// will emit an error message indicating the table already exists.
// A schema file must be provided and it will be applied to every object/table.
// Example usage:
//   go-api-demo -clientid="my-clientid" -secret="my-secret" bq myProject
//								myDataBucket datafile2013070 DataFiles2013
//								./datafile_schema.json 100
//
// This will load all objects (e.g. all data files from July 2013) from
// gs://myDataBucket into a (possibly new) BigQuery dataset named DataFiles2013
// using the schema file provided and allowing up to 100 bad records. Assuming
// each object is named like datafileYYYYMMDD.csv.gz and all of July's files are
// stored in the bucket, 9 tables will be created named like datafile201307DD
// where DD ranges from 01 to 09, inclusive.
// When the program completes, it will emit a results line similar to:
//
// 9 files loaded in 3m58s (18m2.708s). Size: 7.18GB Rows: 7130725
//
// The total elapsed time from the start of first job to the end of the last job
// (effectively wall clock time) is shown. In parenthesis is the aggregate time
// taken to load all tables.
func bqMain(client *http.Client, argv []string) {
	if len(argv) != 6 {
		fmt.Fprintln(os.Stderr,
			"Usage: bq project_id bucket prefix dataset schema max_bad_records")
		return
	}

	var (
		project    = argv[0]
		bucket     = argv[1]
		objPrefix  = argv[2]
		datasetId  = argv[3]
		schemaFile = argv[4]
	)
	badRecords, err := strconv.ParseInt(argv[5], 10, 64)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		return
	}

	rand.Seed(time.Now().UnixNano())

	service, err := storage.New(client)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		return
	}

	// Get the list of objects in the bucket matching the specified prefix.
	list := service.Objects.List(bucket)
	list.Prefix(objPrefix)
	objects, err := list.Do()
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		return
	}

	// Create the wrapper and insert the (new) dataset.
	dataset, err := newBQDataset(client, project, datasetId)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		return
	}
	if err = dataset.insert(true); err != nil {
		fmt.Fprintln(os.Stderr, err)
		return
	}

	objectSource := &tableSource{
		maxBadRecords: badRecords,
		disposition:   TableWriteEmptyDisposition,
	}

	// Load the schema from disk.
	f, err := ioutil.ReadFile(schemaFile)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		return
	}
	if err = json.Unmarshal(f, &objectSource.schema); err != nil {
		fmt.Fprintln(os.Stderr, err)
		return
	}

	// Assumes all objects have .csv, .csv.gz (or no) extension.
	tableIdFromObject := func(name string) string {
		return strings.TrimSuffix(strings.TrimSuffix(name, ".gz"), ".csv")
	}

	// A jobset is way to group a collection of jobs together for monitoring.
	// For this example, we just use the name of the bucket and object prefix.
	jobset := fmt.Sprintf("%s:%s", bucket, objPrefix)
	fmt.Fprintf(os.Stderr, "\nLoading %d objects.\n", len(objects.Items))

	// Load each object into a dataset of the same name (minus any extension).
	// A successful insert call will inject the job into our queue for monitoring.
	for _, o := range objects.Items {
		objectSource.id = tableIdFromObject(o.Name)
		objectSource.uri = fmt.Sprintf("gs://%s/%s", o.Bucket, o.Name)
		if err = dataset.load(jobset, objectSource); err != nil {
			fmt.Fprintln(os.Stderr, err)
		}
	}

	dataset.monitor(jobset)
}
Exemple #11
0
// GetStorageService returns a Cloud Storage service.
func GetStorageService() (*storage.Service, error) {
	return storage.New(http.DefaultClient)
}