func (id *IndexDeps) UploadFile(fileName string, contents string) (fileRef, wholeRef *blobref.BlobRef) { cb := &test.Blob{Contents: contents} id.BlobSource.AddBlob(cb) wholeRef = cb.BlobRef() _, err := id.Index.ReceiveBlob(wholeRef, cb.Reader()) if err != nil { panic(err) } m := schema.NewFileMap(fileName) schema.PopulateParts(m, int64(len(contents)), []schema.BytesPart{ schema.BytesPart{ Size: uint64(len(contents)), BlobRef: wholeRef, }}) fjson, err := schema.MapToCamliJSON(m) if err != nil { panic(err) } fb := &test.Blob{Contents: fjson} log.Printf("Blob is: %s", fjson) id.BlobSource.AddBlob(fb) fileRef = fb.BlobRef() _, err = id.Index.ReceiveBlob(fileRef, fb.Reader()) if err != nil { panic(err) } return }
func (id *IndexDeps) UploadFile(fileName string, contents string) (fileRef, wholeRef *blobref.BlobRef) { cb := &test.Blob{Contents: contents} id.BlobSource.AddBlob(cb) wholeRef = cb.BlobRef() _, err := id.Index.ReceiveBlob(wholeRef, cb.Reader()) if err != nil { id.Fatalf("UploadFile.ReceiveBlob: %v", err) } m := schema.NewFileMap(fileName) m.PopulateParts(int64(len(contents)), []schema.BytesPart{ schema.BytesPart{ Size: uint64(len(contents)), BlobRef: wholeRef, }}) fjson, err := m.JSON() if err != nil { id.Fatalf("UploadFile.JSON: %v", err) } fb := &test.Blob{Contents: fjson} id.BlobSource.AddBlob(fb) fileRef = fb.BlobRef() _, err = id.Index.ReceiveBlob(fileRef, fb.Reader()) if err != nil { panic(err) } return }
// If modTime is zero, it's not used. func (id *IndexDeps) UploadFile(fileName string, contents string, modTime time.Time) (fileRef, wholeRef blob.Ref) { wholeRef = id.UploadString(contents) m := schema.NewFileMap(fileName) m.PopulateParts(int64(len(contents)), []schema.BytesPart{ schema.BytesPart{ Size: uint64(len(contents)), BlobRef: wholeRef, }}) if !modTime.IsZero() { m.SetModTime(modTime) } fjson, err := m.JSON() if err != nil { id.Fatalf("UploadFile.JSON: %v", err) } fb := &test.Blob{Contents: fjson} id.BlobSource.AddBlob(fb) fileRef = fb.BlobRef() _, err = id.Index.ReceiveBlob(fileRef, fb.Reader()) if err != nil { panic(err) } return }
// UploadFile uploads the contents of the file, as well as a file blob with // filename for these contents. If the contents or the file blob are found on // the server, they're not uploaded. // // Note: this method is still a work in progress, and might change to accomodate // the needs of camput file. func (cl *Client) UploadFile(filename string, contents io.Reader, opts *FileUploadOptions) (blob.Ref, error) { fileMap := schema.NewFileMap(filename) if opts != nil && opts.FileInfo != nil { fileMap = schema.NewCommonFileMap(filename, opts.FileInfo) modTime := opts.FileInfo.ModTime() if !modTime.IsZero() { fileMap.SetModTime(modTime) } } fileMap.SetType("file") var wholeRef blob.Ref if opts != nil && opts.WholeRef.Valid() { wholeRef = opts.WholeRef } else { var buf bytes.Buffer var err error wholeRef, err = cl.wholeRef(io.TeeReader(contents, &buf)) if err != nil { return blob.Ref{}, err } contents = io.MultiReader(&buf, contents) } // TODO(mpl): should we consider the case (not covered by fileMapFromDuplicate) // where all the parts are there, but the file schema/blob does not exist? Can that // even happen ? I'm naively assuming it can't for now, since that's what camput file // does too. fileRef, err := cl.fileMapFromDuplicate(fileMap, wholeRef) if err != nil { return blob.Ref{}, err } if fileRef.Valid() { return fileRef, nil } return schema.WriteFileMap(cl, fileMap, contents) }
func (up *Uploader) uploadNodeRegularFile(n *node) (*client.PutResult, error) { var filebb *schema.Builder if up.fileOpts.contentsOnly { filebb = schema.NewFileMap("") } else { filebb = schema.NewCommonFileMap(n.fullPath, n.fi) } filebb.SetType("file") up.fdGate.Start() defer up.fdGate.Done() file, err := up.open(n.fullPath) if err != nil { return nil, err } defer file.Close() if !up.fileOpts.contentsOnly { if up.fileOpts.exifTime { ra, ok := file.(io.ReaderAt) if !ok { return nil, errors.New("Error asserting local file to io.ReaderAt") } modtime, err := schema.FileTime(ra) if err != nil { log.Printf("warning: getting time from EXIF failed for %v: %v", n.fullPath, err) } else { filebb.SetModTime(modtime) } } if up.fileOpts.wantCapCtime() { filebb.CapCreationTime() } } var ( size = n.fi.Size() fileContents io.Reader = io.LimitReader(file, size) br blob.Ref // of file schemaref sum string // sha1 hashsum of the file to upload pr *client.PutResult // of the final "file" schema blob ) const dupCheckThreshold = 256 << 10 if size > dupCheckThreshold { sumRef, err := up.wholeFileDigest(n.fullPath) if err == nil { sum = sumRef.String() ok := false pr, ok = up.fileMapFromDuplicate(up.statReceiver(n), filebb, sum) if ok { br = pr.BlobRef android.NoteFileUploaded(n.fullPath, !pr.Skipped) if up.fileOpts.wantVivify() { // we can return early in that case, because the other options // are disallowed in the vivify case. return pr, nil } } } } if up.fileOpts.wantVivify() { // If vivify wasn't already done in fileMapFromDuplicate. err := schema.WriteFileChunks(up.noStatReceiver(up.statReceiver(n)), filebb, fileContents) if err != nil { return nil, err } json, err := filebb.JSON() if err != nil { return nil, err } br = blob.SHA1FromString(json) h := &client.UploadHandle{ BlobRef: br, Size: uint32(len(json)), Contents: strings.NewReader(json), Vivify: true, } pr, err = up.Upload(h) if err != nil { return nil, err } android.NoteFileUploaded(n.fullPath, true) return pr, nil } if !br.Valid() { // br still zero means fileMapFromDuplicate did not find the file on the server, // and the file has not just been uploaded subsequently to a vivify request. // So we do the full file + file schema upload here. if sum == "" && up.fileOpts.wantFilePermanode() { fileContents = &trackDigestReader{r: fileContents} } br, err = schema.WriteFileMap(up.noStatReceiver(up.statReceiver(n)), filebb, fileContents) if err != nil { return nil, err } } // The work for those planned permanodes (and the claims) is redone // everytime we get here (i.e past the stat cache). However, they're // caught by the have cache, so they won't be reuploaded for nothing // at least. if up.fileOpts.wantFilePermanode() { if td, ok := fileContents.(*trackDigestReader); ok { sum = td.Sum() } // claimTime is both the time of the "claimDate" in the // JSON claim, as well as the date in the OpenPGP // header. // TODO(bradfitz): this is a little clumsy to do by hand. // There should probably be a method on *Uploader to do this // from an unsigned schema map. Maybe ditch the schema.Claimer // type and just have the Uploader override the claimDate. claimTime, ok := filebb.ModTime() if !ok { return nil, fmt.Errorf("couldn't get modtime for file %v", n.fullPath) } err = up.uploadFilePermanode(sum, br, claimTime) if err != nil { return nil, fmt.Errorf("Error uploading permanode for node %v: %v", n, err) } } // TODO(bradfitz): faking a PutResult here to return // is kinda gross. should instead make a // blobserver.Storage wrapper type (wrapping // statReceiver) that can track some of this? or make // schemaWriteFileMap return it? json, _ := filebb.JSON() pr = &client.PutResult{BlobRef: br, Size: uint32(len(json)), Skipped: false} return pr, nil }
// Populates the bs, and the index at the same time through the sync handler func populate(b *testing.B, dbfile string, sortedProvider func(dbfile string) (sorted.KeyValue, error)) *index.Index { b.Logf("populating %v", dbfile) kv, err := sortedProvider(dbfile) if err != nil { b.Fatal(err) } bsRoot := filepath.Join(filepath.Dir(dbfile), "bs") if err := os.MkdirAll(bsRoot, 0700); err != nil { b.Fatal(err) } dataDir, err := os.Open("testdata") if err != nil { b.Fatal(err) } fis, err := dataDir.Readdir(-1) if err != nil { b.Fatal(err) } if len(fis) == 0 { b.Fatalf("no files in %s dir", "testdata") } ks := doKeyStuff(b) bs, err := localdisk.New(bsRoot) if err != nil { b.Fatal(err) } if _, err := blobserver.Receive(bs, ks.pubKeyRef, strings.NewReader(ks.pubKey)); err != nil { b.Fatal(err) } idx, err := index.New(kv) if err != nil { b.Fatal(err) } idx.InitBlobSource(bs) sh := server.NewSyncHandler("/bs/", "/index/", bs, idx, sorted.NewMemoryKeyValue()) b.ResetTimer() for _, v := range fis { f, err := os.Open(filepath.Join(dataDir.Name(), v.Name())) if err != nil { b.Fatal(err) } td := &trackDigestReader{r: f} fm := schema.NewFileMap(v.Name()) fm.SetModTime(v.ModTime()) fileRef, err := schema.WriteFileMap(bs, fm, td) if err != nil { b.Fatal(err) } f.Close() unsigned := schema.NewPlannedPermanode(td.Sum()) unsigned.SetSigner(ks.pubKeyRef) sr := &jsonsign.SignRequest{ UnsignedJSON: unsigned.Blob().JSON(), // TODO(mpl): if we make a bs that discards, replace this with a memory bs that has only the pubkey Fetcher: bs, EntityFetcher: ks.entityFetcher, SignatureTime: time.Unix(0, 0), } signed, err := sr.Sign() if err != nil { b.Fatal("problem signing: " + err.Error()) } pn := blob.SHA1FromString(signed) // N.B: use blobserver.Receive so that the blob hub gets notified, and the blob gets enqueued into the index if _, err := blobserver.Receive(bs, pn, strings.NewReader(signed)); err != nil { b.Fatal(err) } contentAttr := schema.NewSetAttributeClaim(pn, "camliContent", fileRef.String()) claimTime, ok := fm.ModTime() if !ok { b.Fatal(err) } contentAttr.SetClaimDate(claimTime) contentAttr.SetSigner(ks.pubKeyRef) sr = &jsonsign.SignRequest{ UnsignedJSON: contentAttr.Blob().JSON(), // TODO(mpl): if we make a bs that discards, replace this with a memory bs that has only the pubkey Fetcher: bs, EntityFetcher: ks.entityFetcher, SignatureTime: claimTime, } signed, err = sr.Sign() if err != nil { b.Fatal("problem signing: " + err.Error()) } cl := blob.SHA1FromString(signed) if _, err := blobserver.Receive(bs, cl, strings.NewReader(signed)); err != nil { b.Fatal(err) } } sh.IdleWait() return idx }
func TestPackerBoundarySplits(t *testing.T) { if testing.Short() { t.Skip("skipping slow test") } // Test a file of three chunk sizes, totalling near the 16 MB // boundary: // - 1st chunk is 6 MB. ("blobA") // - 2nd chunk is 6 MB. ("blobB") // - 3rd chunk ("blobC") is binary-searched (up to 4MB) to find // which size causes the packer to write two zip files. // During the test we set zip overhead boundaries to 0, to // force the test to into its pathological misprediction code paths, // where it needs to back up and rewrite the zip with one part less. // That's why the test starts with two zip files: so there's at // least one that can be removed to make room. defer setIntTemporarily(&zipPerEntryOverhead, 0)() const sizeAB = 12 << 20 const maxBlobSize = 16 << 20 bytesAB := randBytes(sizeAB) blobA := &test.Blob{string(bytesAB[:sizeAB/2])} blobB := &test.Blob{string(bytesAB[sizeAB/2:])} refA := blobA.BlobRef() refB := blobB.BlobRef() bytesCFull := randBytes(maxBlobSize - sizeAB) // will be sliced down // Mechanism to verify we hit the back-up code path: var ( mu sync.Mutex sawTruncate blob.Ref stoppedBeforeOverflow bool ) testHookSawTruncate = func(after blob.Ref) { if after != refB { t.Errorf("unexpected truncate point %v", after) } mu.Lock() defer mu.Unlock() sawTruncate = after } testHookStopBeforeOverflowing = func() { mu.Lock() defer mu.Unlock() stoppedBeforeOverflow = true } defer func() { testHookSawTruncate = nil testHookStopBeforeOverflowing = nil }() generatesTwoZips := func(sizeC int) (ret bool) { large := new(test.Fetcher) s := &storage{ small: new(test.Fetcher), large: large, meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: ", // Ignore these phrases: "Packing file ", "Packed file ", ), } s.init() // Upload first two chunks blobA.MustUpload(t, s) blobB.MustUpload(t, s) // Upload second chunk bytesC := bytesCFull[:sizeC] h := blob.NewHash() h.Write(bytesC) refC := blob.RefFromHash(h) _, err := s.ReceiveBlob(refC, bytes.NewReader(bytesC)) if err != nil { t.Fatal(err) } // Upload the file schema blob. m := schema.NewFileMap("foo.dat") m.PopulateParts(sizeAB+int64(sizeC), []schema.BytesPart{ schema.BytesPart{ Size: sizeAB / 2, BlobRef: refA, }, schema.BytesPart{ Size: sizeAB / 2, BlobRef: refB, }, schema.BytesPart{ Size: uint64(sizeC), BlobRef: refC, }, }) fjson, err := m.JSON() if err != nil { t.Fatalf("schema filemap JSON: %v", err) } fb := &test.Blob{Contents: fjson} fb.MustUpload(t, s) num := large.NumBlobs() if num < 1 || num > 2 { t.Fatalf("for size %d, num packed zip blobs = %d; want 1 or 2", sizeC, num) } return num == 2 } maxC := maxBlobSize - sizeAB smallestC := sort.Search(maxC, generatesTwoZips) if smallestC == maxC { t.Fatalf("never found a point at which we generated 2 zip files") } t.Logf("After 12 MB of data (in 2 chunks), the smallest blob that generates two zip files is %d bytes (%.03f MB)", smallestC, float64(smallestC)/(1<<20)) t.Logf("Zip overhead (for this two chunk file) = %d bytes", maxBlobSize-1-smallestC-sizeAB) mu.Lock() if sawTruncate != refB { t.Errorf("truncate after = %v; want %v", sawTruncate, refB) } if !stoppedBeforeOverflow { t.Error("never hit the code path where it calculates that another data chunk would push it over the 16MB boundary") } }