Exemple #1
0
func (id *IndexDeps) UploadFile(fileName string, contents string) (fileRef, wholeRef *blobref.BlobRef) {
	cb := &test.Blob{Contents: contents}
	id.BlobSource.AddBlob(cb)
	wholeRef = cb.BlobRef()
	_, err := id.Index.ReceiveBlob(wholeRef, cb.Reader())
	if err != nil {
		panic(err)
	}

	m := schema.NewFileMap(fileName)
	schema.PopulateParts(m, int64(len(contents)), []schema.BytesPart{
		schema.BytesPart{
			Size:    uint64(len(contents)),
			BlobRef: wholeRef,
		}})
	fjson, err := schema.MapToCamliJSON(m)
	if err != nil {
		panic(err)
	}
	fb := &test.Blob{Contents: fjson}
	log.Printf("Blob is: %s", fjson)
	id.BlobSource.AddBlob(fb)
	fileRef = fb.BlobRef()
	_, err = id.Index.ReceiveBlob(fileRef, fb.Reader())
	if err != nil {
		panic(err)
	}
	return
}
Exemple #2
0
func (id *IndexDeps) UploadFile(fileName string, contents string) (fileRef, wholeRef *blobref.BlobRef) {
	cb := &test.Blob{Contents: contents}
	id.BlobSource.AddBlob(cb)
	wholeRef = cb.BlobRef()
	_, err := id.Index.ReceiveBlob(wholeRef, cb.Reader())
	if err != nil {
		id.Fatalf("UploadFile.ReceiveBlob: %v", err)
	}

	m := schema.NewFileMap(fileName)
	m.PopulateParts(int64(len(contents)), []schema.BytesPart{
		schema.BytesPart{
			Size:    uint64(len(contents)),
			BlobRef: wholeRef,
		}})
	fjson, err := m.JSON()
	if err != nil {
		id.Fatalf("UploadFile.JSON: %v", err)
	}
	fb := &test.Blob{Contents: fjson}
	id.BlobSource.AddBlob(fb)
	fileRef = fb.BlobRef()
	_, err = id.Index.ReceiveBlob(fileRef, fb.Reader())
	if err != nil {
		panic(err)
	}
	return
}
Exemple #3
0
// If modTime is zero, it's not used.
func (id *IndexDeps) UploadFile(fileName string, contents string, modTime time.Time) (fileRef, wholeRef blob.Ref) {
	wholeRef = id.UploadString(contents)

	m := schema.NewFileMap(fileName)
	m.PopulateParts(int64(len(contents)), []schema.BytesPart{
		schema.BytesPart{
			Size:    uint64(len(contents)),
			BlobRef: wholeRef,
		}})
	if !modTime.IsZero() {
		m.SetModTime(modTime)
	}
	fjson, err := m.JSON()
	if err != nil {
		id.Fatalf("UploadFile.JSON: %v", err)
	}
	fb := &test.Blob{Contents: fjson}
	id.BlobSource.AddBlob(fb)
	fileRef = fb.BlobRef()
	_, err = id.Index.ReceiveBlob(fileRef, fb.Reader())
	if err != nil {
		panic(err)
	}
	return
}
Exemple #4
0
// UploadFile uploads the contents of the file, as well as a file blob with
// filename for these contents. If the contents or the file blob are found on
// the server, they're not uploaded.
//
// Note: this method is still a work in progress, and might change to accomodate
// the needs of camput file.
func (cl *Client) UploadFile(filename string, contents io.Reader, opts *FileUploadOptions) (blob.Ref, error) {
	fileMap := schema.NewFileMap(filename)
	if opts != nil && opts.FileInfo != nil {
		fileMap = schema.NewCommonFileMap(filename, opts.FileInfo)
		modTime := opts.FileInfo.ModTime()
		if !modTime.IsZero() {
			fileMap.SetModTime(modTime)
		}
	}
	fileMap.SetType("file")

	var wholeRef blob.Ref
	if opts != nil && opts.WholeRef.Valid() {
		wholeRef = opts.WholeRef
	} else {
		var buf bytes.Buffer
		var err error
		wholeRef, err = cl.wholeRef(io.TeeReader(contents, &buf))
		if err != nil {
			return blob.Ref{}, err
		}
		contents = io.MultiReader(&buf, contents)
	}

	// TODO(mpl): should we consider the case (not covered by fileMapFromDuplicate)
	// where all the parts are there, but the file schema/blob does not exist? Can that
	// even happen ? I'm naively assuming it can't for now, since that's what camput file
	// does too.
	fileRef, err := cl.fileMapFromDuplicate(fileMap, wholeRef)
	if err != nil {
		return blob.Ref{}, err
	}
	if fileRef.Valid() {
		return fileRef, nil
	}

	return schema.WriteFileMap(cl, fileMap, contents)
}
Exemple #5
0
func (up *Uploader) uploadNodeRegularFile(n *node) (*client.PutResult, error) {
	var filebb *schema.Builder
	if up.fileOpts.contentsOnly {
		filebb = schema.NewFileMap("")
	} else {
		filebb = schema.NewCommonFileMap(n.fullPath, n.fi)
	}
	filebb.SetType("file")

	up.fdGate.Start()
	defer up.fdGate.Done()

	file, err := up.open(n.fullPath)
	if err != nil {
		return nil, err
	}
	defer file.Close()
	if !up.fileOpts.contentsOnly {
		if up.fileOpts.exifTime {
			ra, ok := file.(io.ReaderAt)
			if !ok {
				return nil, errors.New("Error asserting local file to io.ReaderAt")
			}
			modtime, err := schema.FileTime(ra)
			if err != nil {
				log.Printf("warning: getting time from EXIF failed for %v: %v", n.fullPath, err)
			} else {
				filebb.SetModTime(modtime)
			}
		}
		if up.fileOpts.wantCapCtime() {
			filebb.CapCreationTime()
		}
	}

	var (
		size                           = n.fi.Size()
		fileContents io.Reader         = io.LimitReader(file, size)
		br           blob.Ref          // of file schemaref
		sum          string            // sha1 hashsum of the file to upload
		pr           *client.PutResult // of the final "file" schema blob
	)

	const dupCheckThreshold = 256 << 10
	if size > dupCheckThreshold {
		sumRef, err := up.wholeFileDigest(n.fullPath)
		if err == nil {
			sum = sumRef.String()
			ok := false
			pr, ok = up.fileMapFromDuplicate(up.statReceiver(n), filebb, sum)
			if ok {
				br = pr.BlobRef
				android.NoteFileUploaded(n.fullPath, !pr.Skipped)
				if up.fileOpts.wantVivify() {
					// we can return early in that case, because the other options
					// are disallowed in the vivify case.
					return pr, nil
				}
			}
		}
	}

	if up.fileOpts.wantVivify() {
		// If vivify wasn't already done in fileMapFromDuplicate.
		err := schema.WriteFileChunks(up.noStatReceiver(up.statReceiver(n)), filebb, fileContents)
		if err != nil {
			return nil, err
		}
		json, err := filebb.JSON()
		if err != nil {
			return nil, err
		}
		br = blob.SHA1FromString(json)
		h := &client.UploadHandle{
			BlobRef:  br,
			Size:     uint32(len(json)),
			Contents: strings.NewReader(json),
			Vivify:   true,
		}
		pr, err = up.Upload(h)
		if err != nil {
			return nil, err
		}
		android.NoteFileUploaded(n.fullPath, true)
		return pr, nil
	}

	if !br.Valid() {
		// br still zero means fileMapFromDuplicate did not find the file on the server,
		// and the file has not just been uploaded subsequently to a vivify request.
		// So we do the full file + file schema upload here.
		if sum == "" && up.fileOpts.wantFilePermanode() {
			fileContents = &trackDigestReader{r: fileContents}
		}
		br, err = schema.WriteFileMap(up.noStatReceiver(up.statReceiver(n)), filebb, fileContents)
		if err != nil {
			return nil, err
		}
	}

	// The work for those planned permanodes (and the claims) is redone
	// everytime we get here (i.e past the stat cache). However, they're
	// caught by the have cache, so they won't be reuploaded for nothing
	// at least.
	if up.fileOpts.wantFilePermanode() {
		if td, ok := fileContents.(*trackDigestReader); ok {
			sum = td.Sum()
		}
		// claimTime is both the time of the "claimDate" in the
		// JSON claim, as well as the date in the OpenPGP
		// header.
		// TODO(bradfitz): this is a little clumsy to do by hand.
		// There should probably be a method on *Uploader to do this
		// from an unsigned schema map. Maybe ditch the schema.Claimer
		// type and just have the Uploader override the claimDate.
		claimTime, ok := filebb.ModTime()
		if !ok {
			return nil, fmt.Errorf("couldn't get modtime for file %v", n.fullPath)
		}
		err = up.uploadFilePermanode(sum, br, claimTime)
		if err != nil {
			return nil, fmt.Errorf("Error uploading permanode for node %v: %v", n, err)
		}
	}

	// TODO(bradfitz): faking a PutResult here to return
	// is kinda gross.  should instead make a
	// blobserver.Storage wrapper type (wrapping
	// statReceiver) that can track some of this?  or make
	// schemaWriteFileMap return it?
	json, _ := filebb.JSON()
	pr = &client.PutResult{BlobRef: br, Size: uint32(len(json)), Skipped: false}
	return pr, nil
}
Exemple #6
0
// Populates the bs, and the index at the same time through the sync handler
func populate(b *testing.B, dbfile string,
	sortedProvider func(dbfile string) (sorted.KeyValue, error)) *index.Index {
	b.Logf("populating %v", dbfile)
	kv, err := sortedProvider(dbfile)
	if err != nil {
		b.Fatal(err)
	}
	bsRoot := filepath.Join(filepath.Dir(dbfile), "bs")
	if err := os.MkdirAll(bsRoot, 0700); err != nil {
		b.Fatal(err)
	}
	dataDir, err := os.Open("testdata")
	if err != nil {
		b.Fatal(err)
	}
	fis, err := dataDir.Readdir(-1)
	if err != nil {
		b.Fatal(err)
	}
	if len(fis) == 0 {
		b.Fatalf("no files in %s dir", "testdata")
	}

	ks := doKeyStuff(b)

	bs, err := localdisk.New(bsRoot)
	if err != nil {
		b.Fatal(err)
	}
	if _, err := blobserver.Receive(bs, ks.pubKeyRef, strings.NewReader(ks.pubKey)); err != nil {
		b.Fatal(err)
	}
	idx, err := index.New(kv)
	if err != nil {
		b.Fatal(err)
	}
	idx.InitBlobSource(bs)
	sh := server.NewSyncHandler("/bs/", "/index/", bs, idx, sorted.NewMemoryKeyValue())

	b.ResetTimer()
	for _, v := range fis {
		f, err := os.Open(filepath.Join(dataDir.Name(), v.Name()))
		if err != nil {
			b.Fatal(err)
		}
		td := &trackDigestReader{r: f}
		fm := schema.NewFileMap(v.Name())
		fm.SetModTime(v.ModTime())
		fileRef, err := schema.WriteFileMap(bs, fm, td)
		if err != nil {
			b.Fatal(err)
		}
		f.Close()

		unsigned := schema.NewPlannedPermanode(td.Sum())
		unsigned.SetSigner(ks.pubKeyRef)
		sr := &jsonsign.SignRequest{
			UnsignedJSON: unsigned.Blob().JSON(),
			// TODO(mpl): if we make a bs that discards, replace this with a memory bs that has only the pubkey
			Fetcher:       bs,
			EntityFetcher: ks.entityFetcher,
			SignatureTime: time.Unix(0, 0),
		}
		signed, err := sr.Sign()
		if err != nil {
			b.Fatal("problem signing: " + err.Error())
		}
		pn := blob.SHA1FromString(signed)
		// N.B: use blobserver.Receive so that the blob hub gets notified, and the blob gets enqueued into the index
		if _, err := blobserver.Receive(bs, pn, strings.NewReader(signed)); err != nil {
			b.Fatal(err)
		}

		contentAttr := schema.NewSetAttributeClaim(pn, "camliContent", fileRef.String())
		claimTime, ok := fm.ModTime()
		if !ok {
			b.Fatal(err)
		}
		contentAttr.SetClaimDate(claimTime)
		contentAttr.SetSigner(ks.pubKeyRef)
		sr = &jsonsign.SignRequest{
			UnsignedJSON: contentAttr.Blob().JSON(),
			// TODO(mpl): if we make a bs that discards, replace this with a memory bs that has only the pubkey
			Fetcher:       bs,
			EntityFetcher: ks.entityFetcher,
			SignatureTime: claimTime,
		}
		signed, err = sr.Sign()
		if err != nil {
			b.Fatal("problem signing: " + err.Error())
		}
		cl := blob.SHA1FromString(signed)
		if _, err := blobserver.Receive(bs, cl, strings.NewReader(signed)); err != nil {
			b.Fatal(err)
		}
	}
	sh.IdleWait()

	return idx
}
func TestPackerBoundarySplits(t *testing.T) {
	if testing.Short() {
		t.Skip("skipping slow test")
	}
	// Test a file of three chunk sizes, totalling near the 16 MB
	// boundary:
	//    - 1st chunk is 6 MB. ("blobA")
	//    - 2nd chunk is 6 MB. ("blobB")
	//    - 3rd chunk ("blobC") is binary-searched (up to 4MB) to find
	//      which size causes the packer to write two zip files.

	// During the test we set zip overhead boundaries to 0, to
	// force the test to into its pathological misprediction code paths,
	// where it needs to back up and rewrite the zip with one part less.
	// That's why the test starts with two zip files: so there's at
	// least one that can be removed to make room.
	defer setIntTemporarily(&zipPerEntryOverhead, 0)()

	const sizeAB = 12 << 20
	const maxBlobSize = 16 << 20
	bytesAB := randBytes(sizeAB)
	blobA := &test.Blob{string(bytesAB[:sizeAB/2])}
	blobB := &test.Blob{string(bytesAB[sizeAB/2:])}
	refA := blobA.BlobRef()
	refB := blobB.BlobRef()
	bytesCFull := randBytes(maxBlobSize - sizeAB) // will be sliced down

	// Mechanism to verify we hit the back-up code path:
	var (
		mu                    sync.Mutex
		sawTruncate           blob.Ref
		stoppedBeforeOverflow bool
	)
	testHookSawTruncate = func(after blob.Ref) {
		if after != refB {
			t.Errorf("unexpected truncate point %v", after)
		}
		mu.Lock()
		defer mu.Unlock()
		sawTruncate = after
	}
	testHookStopBeforeOverflowing = func() {
		mu.Lock()
		defer mu.Unlock()
		stoppedBeforeOverflow = true
	}
	defer func() {
		testHookSawTruncate = nil
		testHookStopBeforeOverflowing = nil
	}()

	generatesTwoZips := func(sizeC int) (ret bool) {
		large := new(test.Fetcher)
		s := &storage{
			small: new(test.Fetcher),
			large: large,
			meta:  sorted.NewMemoryKeyValue(),
			log: test.NewLogger(t, "blobpacked: ",
				// Ignore these phrases:
				"Packing file ",
				"Packed file ",
			),
		}
		s.init()

		// Upload first two chunks
		blobA.MustUpload(t, s)
		blobB.MustUpload(t, s)

		// Upload second chunk
		bytesC := bytesCFull[:sizeC]
		h := blob.NewHash()
		h.Write(bytesC)
		refC := blob.RefFromHash(h)
		_, err := s.ReceiveBlob(refC, bytes.NewReader(bytesC))
		if err != nil {
			t.Fatal(err)
		}

		// Upload the file schema blob.
		m := schema.NewFileMap("foo.dat")
		m.PopulateParts(sizeAB+int64(sizeC), []schema.BytesPart{
			schema.BytesPart{
				Size:    sizeAB / 2,
				BlobRef: refA,
			},
			schema.BytesPart{
				Size:    sizeAB / 2,
				BlobRef: refB,
			},
			schema.BytesPart{
				Size:    uint64(sizeC),
				BlobRef: refC,
			},
		})
		fjson, err := m.JSON()
		if err != nil {
			t.Fatalf("schema filemap JSON: %v", err)
		}
		fb := &test.Blob{Contents: fjson}
		fb.MustUpload(t, s)
		num := large.NumBlobs()
		if num < 1 || num > 2 {
			t.Fatalf("for size %d, num packed zip blobs = %d; want 1 or 2", sizeC, num)
		}
		return num == 2
	}
	maxC := maxBlobSize - sizeAB
	smallestC := sort.Search(maxC, generatesTwoZips)
	if smallestC == maxC {
		t.Fatalf("never found a point at which we generated 2 zip files")
	}
	t.Logf("After 12 MB of data (in 2 chunks), the smallest blob that generates two zip files is %d bytes (%.03f MB)", smallestC, float64(smallestC)/(1<<20))
	t.Logf("Zip overhead (for this two chunk file) = %d bytes", maxBlobSize-1-smallestC-sizeAB)

	mu.Lock()
	if sawTruncate != refB {
		t.Errorf("truncate after = %v; want %v", sawTruncate, refB)
	}
	if !stoppedBeforeOverflow {
		t.Error("never hit the code path where it calculates that another data chunk would push it over the 16MB boundary")
	}
}