func (pt *packTest) testOpenWholeRef(t *testing.T, wholeRef blob.Ref, wantSize int64) { rc, gotSize, err := pt.sto.OpenWholeRef(wholeRef, 0) if err != nil { t.Errorf("OpenWholeRef = %v", err) return } defer rc.Close() if gotSize != wantSize { t.Errorf("OpenWholeRef size = %v; want %v", gotSize, wantSize) return } h := blob.NewHash() n, err := io.Copy(h, rc) if err != nil { t.Errorf("OpenWholeRef read error: %v", err) return } if n != wantSize { t.Errorf("OpenWholeRef read %v bytes; want %v", n, wantSize) return } gotRef := blob.RefFromHash(h) if gotRef != wholeRef { t.Errorf("OpenWholeRef read contents = %v; want %v", gotRef, wholeRef) } }
// This is the simple 1MB chunk version. The rolling checksum version is below. func writeFileMapOld(bs blobserver.StatReceiver, file *Builder, r io.Reader) (blob.Ref, error) { parts, size := []BytesPart{}, int64(0) var buf bytes.Buffer for { buf.Reset() n, err := io.Copy(&buf, io.LimitReader(r, maxBlobSize)) if err != nil { return blob.Ref{}, err } if n == 0 { break } hash := blob.NewHash() io.Copy(hash, bytes.NewReader(buf.Bytes())) br := blob.RefFromHash(hash) hasBlob, err := serverHasBlob(bs, br) if err != nil { return blob.Ref{}, err } if !hasBlob { sb, err := bs.ReceiveBlob(br, &buf) if err != nil { return blob.Ref{}, err } if want := (blob.SizedRef{br, uint32(n)}); sb != want { return blob.Ref{}, fmt.Errorf("schema/filewriter: wrote %s, expect", sb, want) } } size += n parts = append(parts, BytesPart{ BlobRef: br, Size: uint64(n), Offset: 0, // into BlobRef to read from (not of dest) }) } err := file.PopulateParts(size, parts) if err != nil { return blob.Ref{}, err } json := file.Blob().JSON() if err != nil { return blob.Ref{}, err } br := blob.SHA1FromString(json) sb, err := bs.ReceiveBlob(br, strings.NewReader(json)) if err != nil { return blob.Ref{}, err } if expect := (blob.SizedRef{br, uint32(len(json))}); expect != sb { return blob.Ref{}, fmt.Errorf("schema/filewriter: wrote %s bytes, got %s ack'd", expect, sb) } return br, nil }
func (pk *packer) pack() error { if err := pk.scanChunks(); err != nil { return err } // TODO: decide as a fuction of schemaRefs and dataRefs // already in s.large whether it makes sense to still compact // this from a savings standpoint. For now we just always do. // Maybe we'd have knobs in the future. Ideally not. // Don't pack a file if we already have its wholeref stored // otherwise (perhaps under a different filename). But that // means we have to compute its wholeref first. We assume the // blob source will cache these lookups so it's not too // expensive to do two passes over the input. h := blob.NewHash() var err error pk.wholeSize, err = io.Copy(h, pk.fr) if err != nil { return err } pk.wholeRef = blob.RefFromHash(h) wholeKey := wholeMetaPrefix + pk.wholeRef.String() _, err = pk.s.meta.Get(wholeKey) if err == nil { // Nil error means there was some knowledge of this wholeref. return fmt.Errorf("already have wholeref %v packed; not packing again", pk.wholeRef) } else if err != sorted.ErrNotFound { return err } pk.chunksRemain = pk.dataRefs var trunc blob.Ref MakingZips: for len(pk.chunksRemain) > 0 { if err := pk.writeAZip(trunc); err != nil { if needTrunc, ok := err.(needsTruncatedAfterError); ok { trunc = needTrunc.Ref if fn := testHookSawTruncate; fn != nil { fn(trunc) } continue MakingZips } return err } trunc = blob.Ref{} } // Record the final wholeMetaPrefix record: err = pk.s.meta.Set(wholeKey, fmt.Sprintf("%d %d", pk.wholeSize, len(pk.zips))) if err != nil { return fmt.Errorf("Error setting %s: %v", wholeKey, err) } return nil }
func (h *DeployHandler) storeInstanceConf(conf *InstanceConf) (blob.Ref, error) { contents, err := json.Marshal(conf) if err != nil { return blob.Ref{}, fmt.Errorf("could not json encode instance config: %v", err) } hash := blob.NewHash() _, err = io.Copy(hash, bytes.NewReader(contents)) if err != nil { return blob.Ref{}, fmt.Errorf("could not hash blob contents: %v", err) } br := blob.RefFromHash(hash) if _, err := blobserver.Receive(h.instConf, br, bytes.NewReader(contents)); err != nil { return blob.Ref{}, fmt.Errorf("could not store instance config blob: %v", err) } return br, nil }
// Blob builds the Blob. The builder continues to be usable after a call to Build. func (bb *Builder) Blob() *Blob { json, err := mapJSON(bb.m) if err != nil { panic(err) } ss, err := parseSuperset(strings.NewReader(json)) if err != nil { panic(err) } h := blob.NewHash() h.Write([]byte(json)) return &Blob{ str: json, ss: ss, br: blob.RefFromHash(h), } }
func TestPackLarge(t *testing.T) { if testing.Short() { t.Skip("skipping in short mode") } const fileSize = 17 << 20 // more than 16 MB, so more than one zip const fileName = "foo.dat" fileContents := randBytes(fileSize) hash := blob.NewHash() hash.Write(fileContents) wholeRef := blob.RefFromHash(hash) pt := testPack(t, func(sto blobserver.Storage) error { _, err := schema.WriteFileFromReader(sto, fileName, bytes.NewReader(fileContents)) return err }, wantNumLargeBlobs(2), wantNumSmallBlobs(0), ) // Verify we wrote the correct "w:*" meta rows. got := map[string]string{} want := map[string]string{ "w:" + wholeRef.String(): "17825792 2", "w:" + wholeRef.String() + ":0": "sha1-9b4a3d114c059988075c87293c86ee7cbc6f4af5 37 0 16709479", "w:" + wholeRef.String() + ":1": "sha1-fe6326ac6b389ffe302623e4a501bfc8c6272e8e 37 16709479 1116313", } if err := sorted.Foreach(pt.sto.meta, func(key, value string) error { if strings.HasPrefix(key, "b:") { return nil } got[key] = value return nil }); err != nil { t.Fatal(err) } if !reflect.DeepEqual(got, want) { t.Errorf("'w:*' meta rows = %v; want %v", got, want) } // And verify we can read it back out. pt.testOpenWholeRef(t, wholeRef, fileSize) }
func stdinBlobHandle() (uh *client.UploadHandle, err error) { var buf bytes.Buffer size, err := io.Copy(&buf, cmdmain.Stdin) if err != nil { return } // TODO(bradfitz,mpl): limit this buffer size? file := buf.Bytes() h := blob.NewHash() size, err = io.Copy(h, bytes.NewReader(file)) if err != nil { return } return &client.UploadHandle{ BlobRef: blob.RefFromHash(h), Size: size, Contents: io.LimitReader(bytes.NewReader(file), size), }, nil }
func TestPackNormal(t *testing.T) { const fileSize = 5 << 20 const fileName = "foo.dat" fileContents := randBytes(fileSize) hash := blob.NewHash() hash.Write(fileContents) wholeRef := blob.RefFromHash(hash) pt := testPack(t, func(sto blobserver.Storage) error { _, err := schema.WriteFileFromReader(sto, fileName, bytes.NewReader(fileContents)) return err }, wantNumLargeBlobs(1), wantNumSmallBlobs(0), ) // And verify we can read it back out. pt.testOpenWholeRef(t, wholeRef, fileSize) }
func stdinBlobHandle() (uh *client.UploadHandle, err error) { var buf bytes.Buffer size, err := io.CopyN(&buf, cmdmain.Stdin, constants.MaxBlobSize+1) if err != nil { return } if size > constants.MaxBlobSize { err = fmt.Errorf("blob size cannot be bigger than %d", constants.MaxBlobSize) } file := buf.Bytes() h := blob.NewHash() size, err = io.Copy(h, bytes.NewReader(file)) if err != nil { return } return &client.UploadHandle{ BlobRef: blob.RefFromHash(h), Size: uint32(size), Contents: io.LimitReader(bytes.NewReader(file), size), }, nil }
func (r *run) updatePhotoInAlbum(ctx context.Context, albumNode *importer.Object, photo picago.Photo) (ret error) { if photo.ID == "" { return errors.New("photo has no ID") } getMediaBytes := func() (io.ReadCloser, error) { log.Printf("Importing media from %v", photo.URL) resp, err := ctxutil.Client(ctx).Get(photo.URL) if err != nil { return nil, fmt.Errorf("importing photo %s: %v", photo.ID, err) } if resp.StatusCode != http.StatusOK { resp.Body.Close() return nil, fmt.Errorf("importing photo %s: status code = %d", photo.ID, resp.StatusCode) } return resp.Body, nil } var fileRefStr string idFilename := photo.ID + "-" + photo.Filename photoNode, err := albumNode.ChildPathObjectOrFunc(idFilename, func() (*importer.Object, error) { h := blob.NewHash() rc, err := getMediaBytes() if err != nil { return nil, err } fileRef, err := schema.WriteFileFromReader(r.Host.Target(), photo.Filename, io.TeeReader(rc, h)) if err != nil { return nil, err } fileRefStr = fileRef.String() wholeRef := blob.RefFromHash(h) if pn, err := findExistingPermanode(r.Host.Searcher(), wholeRef); err == nil { return r.Host.ObjectFromRef(pn) } return r.Host.NewObject() }) if err != nil { return err } const attrMediaURL = "picasaMediaURL" if fileRefStr == "" { fileRefStr = photoNode.Attr(nodeattr.CamliContent) // Only re-download the source photo if its URL has changed. // Empirically this seems to work: cropping a photo in the // photos.google.com UI causes its URL to change. And it makes // sense, looking at the ugliness of the URLs with all their // encoded/signed state. if !mediaURLsEqual(photoNode.Attr(attrMediaURL), photo.URL) { rc, err := getMediaBytes() if err != nil { return err } fileRef, err := schema.WriteFileFromReader(r.Host.Target(), photo.Filename, rc) rc.Close() if err != nil { return err } fileRefStr = fileRef.String() } } title := strings.TrimSpace(photo.Description) if strings.Contains(title, "\n") { title = title[:strings.Index(title, "\n")] } if title == "" && schema.IsInterestingTitle(photo.Filename) { title = photo.Filename } // TODO(tgulacsi): add more attrs (comments ?) // for names, see http://schema.org/ImageObject and http://schema.org/CreativeWork attrs := []string{ nodeattr.CamliContent, fileRefStr, attrPicasaId, photo.ID, nodeattr.Title, title, nodeattr.Description, photo.Description, nodeattr.LocationText, photo.Location, nodeattr.DateModified, schema.RFC3339FromTime(photo.Updated), nodeattr.DatePublished, schema.RFC3339FromTime(photo.Published), nodeattr.URL, photo.PageURL, } if photo.Latitude != 0 || photo.Longitude != 0 { attrs = append(attrs, nodeattr.Latitude, fmt.Sprintf("%f", photo.Latitude), nodeattr.Longitude, fmt.Sprintf("%f", photo.Longitude), ) } if err := photoNode.SetAttrs(attrs...); err != nil { return err } if err := photoNode.SetAttrValues("tag", photo.Keywords); err != nil { return err } if photo.Position > 0 { if err := albumNode.SetAttr( nodeattr.CamliPathOrderColon+strconv.Itoa(photo.Position-1), photoNode.PermanodeRef().String()); err != nil { return err } } // Do this last, after we're sure the "camliContent" attribute // has been saved successfully, because this is the one that // causes us to do it again in the future or not. if err := photoNode.SetAttrs(attrMediaURL, photo.URL); err != nil { return err } return nil }
// trunc is a hint about which blob to truncate after. It may be zero. // If the returned error is of type 'needsTruncatedAfterError', then // the zip should be attempted to be written again, but truncating the // data after the listed blob. func (pk *packer) writeAZip(trunc blob.Ref) (err error) { defer func() { if e := recover(); e != nil { if v, ok := e.(error); ok && err == nil { err = v } else { panic(e) } } }() mf := Manifest{ WholeRef: pk.wholeRef, WholeSize: pk.wholeSize, WholePartIndex: len(pk.zips), } var zbuf bytes.Buffer cw := &countWriter{w: &zbuf} zw := zip.NewWriter(cw) var approxSize = zipFixedOverhead // can't use zbuf.Len because zw buffers var dataRefsWritten []blob.Ref var dataBytesWritten int64 var schemaBlobSeen = map[blob.Ref]bool{} var schemaBlobs []blob.Ref // to add after the main file baseFileName := pk.fr.FileName() if strings.Contains(baseFileName, "/") || strings.Contains(baseFileName, "\\") { return fmt.Errorf("File schema blob %v filename had a slash in it: %q", pk.fr.SchemaBlobRef(), baseFileName) } fh := &zip.FileHeader{ Name: baseFileName, Method: zip.Store, // uncompressed } fh.SetModTime(pk.fr.ModTime()) fh.SetMode(0644) fw, err := zw.CreateHeader(fh) check(err) check(zw.Flush()) dataStart := cw.n approxSize += zipPerEntryOverhead // for the first FileHeader w/ the data zipMax := pk.s.maxZipBlobSize() chunks := pk.chunksRemain chunkWholeHash := blob.NewHash() for len(chunks) > 0 { dr := chunks[0] // the next chunk to maybe write if trunc.Valid() && trunc == dr { if approxSize == 0 { return errors.New("first blob is too large to pack, once you add the zip overhead") } break } schemaBlobsSave := schemaBlobs for _, parent := range pk.schemaParent[dr] { if !schemaBlobSeen[parent] { schemaBlobSeen[parent] = true schemaBlobs = append(schemaBlobs, parent) approxSize += int(pk.schemaBlob[parent].Size()) + zipPerEntryOverhead } } thisSize := pk.dataSize[dr] approxSize += int(thisSize) if approxSize+mf.approxSerializedSize() > zipMax { if fn := testHookStopBeforeOverflowing; fn != nil { fn() } schemaBlobs = schemaBlobsSave // restore it break } // Copy the data to the zip. rc, size, err := pk.s.Fetch(dr) check(err) if size != thisSize { rc.Close() return errors.New("unexpected size") } if n, err := io.Copy(io.MultiWriter(fw, chunkWholeHash), rc); err != nil || n != int64(size) { rc.Close() return fmt.Errorf("copy to zip = %v, %v; want %v bytes", n, err, size) } rc.Close() dataRefsWritten = append(dataRefsWritten, dr) dataBytesWritten += int64(size) chunks = chunks[1:] } mf.DataBlobsOrigin = blob.RefFromHash(chunkWholeHash) // zipBlobs is where a schema or data blob is relative to the beginning // of the zip file. var zipBlobs []BlobAndPos var dataOffset int64 for _, br := range dataRefsWritten { size := pk.dataSize[br] mf.DataBlobs = append(mf.DataBlobs, BlobAndPos{blob.SizedRef{Ref: br, Size: size}, dataOffset}) zipBlobs = append(zipBlobs, BlobAndPos{blob.SizedRef{Ref: br, Size: size}, dataStart + dataOffset}) dataOffset += int64(size) } for _, br := range schemaBlobs { fw, err := zw.CreateHeader(&zip.FileHeader{ Name: "camlistore/" + br.String() + ".json", Method: zip.Store, // uncompressed }) check(err) check(zw.Flush()) b := pk.schemaBlob[br] zipBlobs = append(zipBlobs, BlobAndPos{blob.SizedRef{Ref: br, Size: b.Size()}, cw.n}) rc := b.Open() n, err := io.Copy(fw, rc) rc.Close() check(err) if n != int64(b.Size()) { return fmt.Errorf("failed to write all of schema blob %v: %d bytes, not wanted %d", br, n, b.Size()) } } // Manifest file fw, err = zw.Create(zipManifestPath) check(err) enc, err := json.MarshalIndent(mf, "", " ") check(err) _, err = fw.Write(enc) check(err) err = zw.Close() check(err) if zbuf.Len() > zipMax { // We guessed wrong. Back up. Find out how many blobs we went over. overage := zbuf.Len() - zipMax for i := len(dataRefsWritten) - 1; i >= 0; i-- { dr := dataRefsWritten[i] if overage <= 0 { return needsTruncatedAfterError{dr} } overage -= int(pk.dataSize[dr]) } return errors.New("file is unpackable; first blob is too big to fit") } zipRef := blob.SHA1FromBytes(zbuf.Bytes()) zipSB, err := blobserver.ReceiveNoHash(pk.s.large, zipRef, bytes.NewReader(zbuf.Bytes())) if err != nil { return err } bm := pk.s.meta.BeginBatch() bm.Set(fmt.Sprintf("%s%s:%d", wholeMetaPrefix, pk.wholeRef, len(pk.zips)), fmt.Sprintf("%s %d %d %d", zipRef, dataStart, pk.wholeBytesWritten, dataBytesWritten)) pk.wholeBytesWritten += dataBytesWritten pk.zips = append(pk.zips, writtenZip{ SizedRef: zipSB, dataRefs: dataRefsWritten, }) for _, zb := range zipBlobs { bm.Set(blobMetaPrefix+zb.Ref.String(), fmt.Sprintf("%d %v %d", zb.Size, zipRef, zb.Offset)) } if err := pk.s.meta.CommitBatch(bm); err != nil { return err } // Delete from small if !pk.s.skipDelete { toDelete := make([]blob.Ref, 0, len(dataRefsWritten)+len(schemaBlobs)) toDelete = append(toDelete, dataRefsWritten...) toDelete = append(toDelete, schemaBlobs...) if err := pk.s.small.RemoveBlobs(toDelete); err != nil { // Can't really do anything about it and doesn't really matter, so // just log for now. pk.s.Logf("Error removing blobs from %s: %v", pk.s.small, err) } } // On success, consume the chunks we wrote from pk.chunksRemain. pk.chunksRemain = pk.chunksRemain[len(dataRefsWritten):] return nil }
func TestPackerBoundarySplits(t *testing.T) { if testing.Short() { t.Skip("skipping slow test") } // Test a file of three chunk sizes, totalling near the 16 MB // boundary: // - 1st chunk is 6 MB. ("blobA") // - 2nd chunk is 6 MB. ("blobB") // - 3rd chunk ("blobC") is binary-searched (up to 4MB) to find // which size causes the packer to write two zip files. // During the test we set zip overhead boundaries to 0, to // force the test to into its pathological misprediction code paths, // where it needs to back up and rewrite the zip with one part less. // That's why the test starts with two zip files: so there's at // least one that can be removed to make room. defer setIntTemporarily(&zipPerEntryOverhead, 0)() const sizeAB = 12 << 20 const maxBlobSize = 16 << 20 bytesAB := randBytes(sizeAB) blobA := &test.Blob{string(bytesAB[:sizeAB/2])} blobB := &test.Blob{string(bytesAB[sizeAB/2:])} refA := blobA.BlobRef() refB := blobB.BlobRef() bytesCFull := randBytes(maxBlobSize - sizeAB) // will be sliced down // Mechanism to verify we hit the back-up code path: var ( mu sync.Mutex sawTruncate blob.Ref stoppedBeforeOverflow bool ) testHookSawTruncate = func(after blob.Ref) { if after != refB { t.Errorf("unexpected truncate point %v", after) } mu.Lock() defer mu.Unlock() sawTruncate = after } testHookStopBeforeOverflowing = func() { mu.Lock() defer mu.Unlock() stoppedBeforeOverflow = true } defer func() { testHookSawTruncate = nil testHookStopBeforeOverflowing = nil }() generatesTwoZips := func(sizeC int) (ret bool) { large := new(test.Fetcher) s := &storage{ small: new(test.Fetcher), large: large, meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: ", // Ignore these phrases: "Packing file ", "Packed file ", ), } s.init() // Upload first two chunks blobA.MustUpload(t, s) blobB.MustUpload(t, s) // Upload second chunk bytesC := bytesCFull[:sizeC] h := blob.NewHash() h.Write(bytesC) refC := blob.RefFromHash(h) _, err := s.ReceiveBlob(refC, bytes.NewReader(bytesC)) if err != nil { t.Fatal(err) } // Upload the file schema blob. m := schema.NewFileMap("foo.dat") m.PopulateParts(sizeAB+int64(sizeC), []schema.BytesPart{ schema.BytesPart{ Size: sizeAB / 2, BlobRef: refA, }, schema.BytesPart{ Size: sizeAB / 2, BlobRef: refB, }, schema.BytesPart{ Size: uint64(sizeC), BlobRef: refC, }, }) fjson, err := m.JSON() if err != nil { t.Fatalf("schema filemap JSON: %v", err) } fb := &test.Blob{Contents: fjson} fb.MustUpload(t, s) num := large.NumBlobs() if num < 1 || num > 2 { t.Fatalf("for size %d, num packed zip blobs = %d; want 1 or 2", sizeC, num) } return num == 2 } maxC := maxBlobSize - sizeAB smallestC := sort.Search(maxC, generatesTwoZips) if smallestC == maxC { t.Fatalf("never found a point at which we generated 2 zip files") } t.Logf("After 12 MB of data (in 2 chunks), the smallest blob that generates two zip files is %d bytes (%.03f MB)", smallestC, float64(smallestC)/(1<<20)) t.Logf("Zip overhead (for this two chunk file) = %d bytes", maxBlobSize-1-smallestC-sizeAB) mu.Lock() if sawTruncate != refB { t.Errorf("truncate after = %v; want %v", sawTruncate, refB) } if !stoppedBeforeOverflow { t.Error("never hit the code path where it calculates that another data chunk would push it over the 16MB boundary") } }
func TestReindex(t *testing.T) { if testing.Short() { t.Skip("skipping in short mode") } type file struct { size int64 name string contents []byte } files := []file{ {17 << 20, "foo.dat", randBytesSrc(17<<20, 42)}, {10 << 20, "bar.dat", randBytesSrc(10<<20, 43)}, {5 << 20, "baz.dat", randBytesSrc(5<<20, 44)}, } pt := testPack(t, func(sto blobserver.Storage) error { for _, f := range files { if _, err := schema.WriteFileFromReader(sto, f.name, bytes.NewReader(f.contents)); err != nil { return err } } return nil }, wantNumLargeBlobs(4), wantNumSmallBlobs(0), ) // backup the meta that is supposed to be lost/erased. // pt.sto.reindex allocates a new pt.sto.meta, so meta != pt.sto.meta after it is called. meta := pt.sto.meta // and build new meta index if err := pt.sto.reindex(context.TODO(), func() (sorted.KeyValue, error) { return sorted.NewMemoryKeyValue(), nil }); err != nil { t.Fatal(err) } validBlobKey := func(key, value string) error { if !strings.HasPrefix(key, "b:") { return errors.New("not a blob meta key") } wantRef, ok := blob.Parse(key[2:]) if !ok { return errors.New("bogus blobref in key") } m, err := parseMetaRow([]byte(value)) if err != nil { return err } rc, err := pt.large.SubFetch(m.largeRef, int64(m.largeOff), int64(m.size)) if err != nil { return err } defer rc.Close() h := wantRef.Hash() n, err := io.Copy(h, rc) if err != nil { return err } if !wantRef.HashMatches(h) { return errors.New("content doesn't match") } if n != int64(m.size) { return errors.New("size doesn't match") } return nil } // check that new meta is identical to "lost" one newRows := 0 if err := sorted.Foreach(pt.sto.meta, func(key, newValue string) error { oldValue, err := meta.Get(key) if err != nil { t.Fatalf("Could not get value for %v in old meta: %v", key, err) } newRows++ // Exact match is fine. if oldValue == newValue { return nil } // If it differs, it should at least be correct. (blob metadata // can now point to different packed zips, depending on sorting) err = validBlobKey(key, newValue) if err == nil { return nil } t.Errorf("Reindexing error: for key %v: %v\n got: %q\nwant: %q", key, err, newValue, oldValue) return nil // keep enumerating, regardless of errors }); err != nil { t.Fatal(err) } // make sure they have the same number of entries too, to be sure that the reindexing // did not miss entries that the old meta had. oldRows := countSortedRows(t, meta) if oldRows != newRows { t.Fatalf("index number of entries mismatch: got %d entries in new index, wanted %d (as in index before reindexing)", newRows, oldRows) } // And verify we can read one of the files back out. hash := blob.NewHash() hash.Write(files[0].contents) pt.testOpenWholeRef(t, blob.RefFromHash(hash), files[0].size) }
func TestPackLarge(t *testing.T) { if testing.Short() { t.Skip("skipping in short mode") } const fileSize = 17 << 20 // more than 16 MB, so more than one zip const fileName = "foo.dat" fileContents := randBytes(fileSize) hash := blob.NewHash() hash.Write(fileContents) wholeRef := blob.RefFromHash(hash) pt := testPack(t, func(sto blobserver.Storage) error { _, err := schema.WriteFileFromReader(sto, fileName, bytes.NewReader(fileContents)) return err }, wantNumLargeBlobs(2), wantNumSmallBlobs(0), ) // Gather the "w:*" meta rows we wrote. got := map[string]string{} if err := sorted.Foreach(pt.sto.meta, func(key, value string) error { if strings.HasPrefix(key, "b:") { return nil } got[key] = value return nil }); err != nil { t.Fatal(err) } // Verify the two zips are correctly described. // There should be one row to say that we have two zip, and // that the overall file is 17MB: keyBase := "w:" + wholeRef.String() if g, w := got[keyBase], "17825792 2"; g != w { t.Fatalf("meta row for key %q = %q; want %q", keyBase, g, w) } // ... (and a little helper) ... parseMeta := func(n int) (zipOff, dataOff, dataLen int64) { key := keyBase + ":" + strconv.Itoa(n) v := got[key] f := strings.Fields(v) if len(f) != 4 { t.Fatalf("meta for key %q = %q; expected 4 space-separated fields", key, v) } i64 := func(n int) int64 { i, err := strconv.ParseInt(f[n], 10, 64) if err != nil { t.Fatalf("error parsing int64 %q in field index %d of meta key %q (value %q): %v", f[n], n, key, v, err) } return i } zipOff, dataOff, dataLen = i64(1), i64(2), i64(3) return } // And then verify if we have the two "w:<wholeref>:0" and // "w:<wholeref>:1" rows and that they're consistent. z0, d0, l0 := parseMeta(0) z1, d1, l1 := parseMeta(1) if z0 != z1 { t.Errorf("expected zip offset in zip0 and zip1 to match. got %d and %d", z0, z0) } if d0 != 0 { t.Errorf("zip0's data offset = %d; want 0", d0) } if d1 != l0 { t.Errorf("zip1 data offset %d != zip0 data length %d", d1, l0) } if d1+l1 != fileSize { t.Errorf("zip1's offset %d + length %d = %d; want %d (fileSize)", d1, l1, d1+l1, fileSize) } // And verify we can read it back out. pt.testOpenWholeRef(t, wholeRef, fileSize) }
func TestReindex(t *testing.T) { if testing.Short() { t.Skip("skipping in short mode") } type file struct { size int64 name string contents []byte } files := []file{ {17 << 20, "foo.dat", randBytesSrc(17<<20, 42)}, {10 << 20, "bar.dat", randBytesSrc(10<<20, 43)}, {5 << 20, "baz.dat", randBytesSrc(5<<20, 44)}, } pt := testPack(t, func(sto blobserver.Storage) error { for _, f := range files { if _, err := schema.WriteFileFromReader(sto, f.name, bytes.NewReader(f.contents)); err != nil { return err } } return nil }, wantNumLargeBlobs(4), wantNumSmallBlobs(0), ) // backup the meta that is supposed to be lost/erased. // pt.sto.reindex allocates a new pt.sto.meta, so meta != pt.sto.meta after it is called. meta := pt.sto.meta // and build new meta index if err := pt.sto.reindex(context.TODO(), func() (sorted.KeyValue, error) { return sorted.NewMemoryKeyValue(), nil }); err != nil { t.Fatal(err) } // check that new meta is identical to "lost" one newRows := 0 if err := sorted.Foreach(pt.sto.meta, func(key, newValue string) error { oldValue, err := meta.Get(key) if err != nil { t.Fatalf("Could not get value for %v in old meta: %v", key, err) } if oldValue != newValue { t.Fatalf("Reindexing error: for key %v, got %v, want %v", key, newValue, oldValue) } newRows++ return nil }); err != nil { t.Fatal(err) } // make sure they have the same number of entries too, to be sure that the reindexing // did not miss entries that the old meta had. oldRows := countSortedRows(t, meta) if oldRows != newRows { t.Fatalf("index number of entries mismatch: got %d entries in new index, wanted %d (as in index before reindexing)", newRows, oldRows) } // And verify we can read one of the files back out. hash := blob.NewHash() hash.Write(files[0].contents) pt.testOpenWholeRef(t, blob.RefFromHash(hash), files[0].size) }