func TestStorage(t *testing.T) { storagetest.Test(t, func(t *testing.T) (sto blobserver.Storage, cleanup func()) { s := &storage{ small: new(test.Fetcher), large: new(test.Fetcher), meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } s.init() return s, func() {} }) }
func testStreamBlobs(t *testing.T, small blobserver.Storage, large subFetcherStorage, populate func(*testing.T, *storage) []storagetest.StreamerTestOpt) { s := &storage{ small: small, large: large, meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } s.init() wants := populate(t, s) storagetest.TestStreamer(t, s, wants...) }
func TestStorageNoSmallSubfetch(t *testing.T) { storagetest.Test(t, func(t *testing.T) (sto blobserver.Storage, cleanup func()) { s := &storage{ // We need to hide SubFetcher, to test *storage's SubFetch, as it delegates // to the underlying SubFetcher, if small implements that interface. small: hideSubFetcher(new(test.Fetcher)), large: new(test.Fetcher), meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } s.init() return s, func() {} }) }
// see if storage proxies through to small for Fetch, Stat, and Enumerate. func TestSmallFallback(t *testing.T) { small := new(test.Fetcher) s := &storage{ small: small, large: new(test.Fetcher), meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } s.init() b1 := &test.Blob{"foo"} b1.MustUpload(t, small) wantSB := b1.SizedRef() // Fetch rc, _, err := s.Fetch(b1.BlobRef()) if err != nil { t.Errorf("failed to Get blob: %v", err) } else { rc.Close() } // Stat. sb, err := blobserver.StatBlob(s, b1.BlobRef()) if err != nil { t.Errorf("failed to Stat blob: %v", err) } else if sb != wantSB { t.Errorf("Stat = %v; want %v", sb, wantSB) } // Enumerate saw := false ctx, cancel := context.WithCancel(context.TODO()) defer cancel() if err := blobserver.EnumerateAll(ctx, s, func(sb blob.SizedRef) error { if sb != wantSB { return fmt.Errorf("saw blob %v; want %v", sb, wantSB) } saw = true return nil }); err != nil { t.Errorf("EnuerateAll: %v", err) } if !saw { t.Error("didn't see blob in Enumerate") } }
func TestStreamBlobs(t *testing.T) { small := new(test.Fetcher) s := &storage{ small: small, large: new(test.Fetcher), meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } s.init() all := map[blob.Ref]bool{} const nBlobs = 10 for i := 0; i < nBlobs; i++ { b := &test.Blob{strconv.Itoa(i)} b.MustUpload(t, small) all[b.BlobRef()] = true } ctx, cancel := context.WithCancel(context.TODO()) defer cancel() token := "" // beginning got := map[blob.Ref]bool{} dest := make(chan blobserver.BlobAndToken, 16) done := make(chan bool) go func() { defer close(done) for bt := range dest { got[bt.Blob.Ref()] = true } }() err := s.StreamBlobs(ctx, dest, token) if err != nil { t.Fatalf("StreamBlobs = %v", err) } <-done if !reflect.DeepEqual(got, all) { t.Errorf("Got blobs %v; want %v", got, all) } storagetest.TestStreamer(t, s, storagetest.WantN(nBlobs)) }
func TestPackerBoundarySplits(t *testing.T) { if testing.Short() { t.Skip("skipping slow test") } // Test a file of three chunk sizes, totalling near the 16 MB // boundary: // - 1st chunk is 6 MB. ("blobA") // - 2nd chunk is 6 MB. ("blobB") // - 3rd chunk ("blobC") is binary-searched (up to 4MB) to find // which size causes the packer to write two zip files. // During the test we set zip overhead boundaries to 0, to // force the test to into its pathological misprediction code paths, // where it needs to back up and rewrite the zip with one part less. // That's why the test starts with two zip files: so there's at // least one that can be removed to make room. defer setIntTemporarily(&zipPerEntryOverhead, 0)() const sizeAB = 12 << 20 const maxBlobSize = 16 << 20 bytesAB := randBytes(sizeAB) blobA := &test.Blob{string(bytesAB[:sizeAB/2])} blobB := &test.Blob{string(bytesAB[sizeAB/2:])} refA := blobA.BlobRef() refB := blobB.BlobRef() bytesCFull := randBytes(maxBlobSize - sizeAB) // will be sliced down // Mechanism to verify we hit the back-up code path: var ( mu sync.Mutex sawTruncate blob.Ref stoppedBeforeOverflow bool ) testHookSawTruncate = func(after blob.Ref) { if after != refB { t.Errorf("unexpected truncate point %v", after) } mu.Lock() defer mu.Unlock() sawTruncate = after } testHookStopBeforeOverflowing = func() { mu.Lock() defer mu.Unlock() stoppedBeforeOverflow = true } defer func() { testHookSawTruncate = nil testHookStopBeforeOverflowing = nil }() generatesTwoZips := func(sizeC int) (ret bool) { large := new(test.Fetcher) s := &storage{ small: new(test.Fetcher), large: large, meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: ", // Ignore these phrases: "Packing file ", "Packed file ", ), } s.init() // Upload first two chunks blobA.MustUpload(t, s) blobB.MustUpload(t, s) // Upload second chunk bytesC := bytesCFull[:sizeC] h := blob.NewHash() h.Write(bytesC) refC := blob.RefFromHash(h) _, err := s.ReceiveBlob(refC, bytes.NewReader(bytesC)) if err != nil { t.Fatal(err) } // Upload the file schema blob. m := schema.NewFileMap("foo.dat") m.PopulateParts(sizeAB+int64(sizeC), []schema.BytesPart{ schema.BytesPart{ Size: sizeAB / 2, BlobRef: refA, }, schema.BytesPart{ Size: sizeAB / 2, BlobRef: refB, }, schema.BytesPart{ Size: uint64(sizeC), BlobRef: refC, }, }) fjson, err := m.JSON() if err != nil { t.Fatalf("schema filemap JSON: %v", err) } fb := &test.Blob{Contents: fjson} fb.MustUpload(t, s) num := large.NumBlobs() if num < 1 || num > 2 { t.Fatalf("for size %d, num packed zip blobs = %d; want 1 or 2", sizeC, num) } return num == 2 } maxC := maxBlobSize - sizeAB smallestC := sort.Search(maxC, generatesTwoZips) if smallestC == maxC { t.Fatalf("never found a point at which we generated 2 zip files") } t.Logf("After 12 MB of data (in 2 chunks), the smallest blob that generates two zip files is %d bytes (%.03f MB)", smallestC, float64(smallestC)/(1<<20)) t.Logf("Zip overhead (for this two chunk file) = %d bytes", maxBlobSize-1-smallestC-sizeAB) mu.Lock() if sawTruncate != refB { t.Errorf("truncate after = %v; want %v", sawTruncate, refB) } if !stoppedBeforeOverflow { t.Error("never hit the code path where it calculates that another data chunk would push it over the 16MB boundary") } }
func TestRemoveBlobs(t *testing.T) { ctx, cancel := context.WithCancel(context.TODO()) defer cancel() // The basic small cases are handled via storagetest in TestStorage, // so this only tests removing packed blobs. small := new(test.Fetcher) large := new(test.Fetcher) sto := &storage{ small: small, large: large, meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } sto.init() const fileSize = 1 << 20 fileContents := randBytes(fileSize) if _, err := schema.WriteFileFromReader(sto, "foo.dat", bytes.NewReader(fileContents)); err != nil { t.Fatal(err) } if small.NumBlobs() != 0 || large.NumBlobs() == 0 { t.Fatalf("small, large counts == %d, %d; want 0, non-zero", small.NumBlobs(), large.NumBlobs()) } var all []blob.SizedRef if err := blobserver.EnumerateAll(ctx, sto, func(sb blob.SizedRef) error { all = append(all, sb) return nil }); err != nil { t.Fatal(err) } // Find the zip zipBlob, err := singleBlob(sto.large) if err != nil { t.Fatalf("failed to find packed zip: %v", err) } // The zip file is in use, so verify we can't delete it. if err := sto.deleteZipPack(zipBlob.Ref); err == nil { t.Fatalf("zip pack blob deleted but it should not have been allowed") } // Delete everything for len(all) > 0 { del := all[0].Ref all = all[1:] if err := sto.RemoveBlobs([]blob.Ref{del}); err != nil { t.Fatalf("RemoveBlobs: %v", err) } if err := storagetest.CheckEnumerate(sto, all); err != nil { t.Fatalf("After deleting %v, %v", del, err) } } dRows := func() (n int) { if err := sorted.ForeachInRange(sto.meta, "d:", "", func(key, value string) error { if strings.HasPrefix(key, "d:") { n++ } return nil }); err != nil { t.Fatalf("meta iteration error: %v", err) } return } if n := dRows(); n == 0 { t.Fatalf("expected a 'd:' row after deletes") } // TODO: test the background pack-deleter loop? figure out its design first. if err := sto.deleteZipPack(zipBlob.Ref); err != nil { t.Errorf("error deleting zip %v: %v", zipBlob.Ref, err) } if n := dRows(); n != 0 { t.Errorf("expected the 'd:' row to be deleted") } }
func testPack(t *testing.T, write func(sto blobserver.Storage) error, checks ...func(*packTest), ) *packTest { ctx, cancel := context.WithCancel(context.TODO()) defer cancel() logical := new(test.Fetcher) small, large := new(test.Fetcher), new(test.Fetcher) pt := &packTest{ logical: logical, small: small, large: large, } // Figure out the logical baseline blobs we'll later expect in the packed storage. if err := write(logical); err != nil { t.Fatal(err) } t.Logf("items in logical storage: %d", logical.NumBlobs()) pt.sto = &storage{ small: small, large: large, meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } pt.sto.init() for _, setOpt := range checks { setOpt(pt) } if err := write(pt.sto); err != nil { t.Fatal(err) } t.Logf("items in small: %v", small.NumBlobs()) t.Logf("items in large: %v", large.NumBlobs()) if want, ok := pt.wantLargeBlobs.(int); ok && want != large.NumBlobs() { t.Fatalf("num large blobs = %d; want %d", large.NumBlobs(), want) } if want, ok := pt.wantSmallBlobs.(int); ok && want != small.NumBlobs() { t.Fatalf("num small blobs = %d; want %d", small.NumBlobs(), want) } var zipRefs []blob.Ref var zipSeen = map[blob.Ref]bool{} blobserver.EnumerateAll(ctx, large, func(sb blob.SizedRef) error { zipRefs = append(zipRefs, sb.Ref) zipSeen[sb.Ref] = true return nil }) if len(zipRefs) != large.NumBlobs() { t.Fatalf("Enumerated only %d zip files; expected %d", len(zipRefs), large.NumBlobs()) } bytesOfZip := map[blob.Ref][]byte{} for _, zipRef := range zipRefs { rc, _, err := large.Fetch(zipRef) if err != nil { t.Fatal(err) } zipBytes, err := ioutil.ReadAll(rc) rc.Close() if err != nil { t.Fatalf("Error slurping %s: %v", zipRef, err) } if len(zipBytes) > constants.MaxBlobSize { t.Fatalf("zip is too large: %d > max %d", len(zipBytes), constants.MaxBlobSize) } bytesOfZip[zipRef] = zipBytes zr, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes))) if err != nil { t.Fatalf("Error reading resulting zip file: %v", err) } if len(zr.File) == 0 { t.Fatal("zip is empty") } nameSeen := map[string]bool{} for i, zf := range zr.File { if nameSeen[zf.Name] { t.Errorf("duplicate name %q seen", zf.Name) } nameSeen[zf.Name] = true t.Logf("zip[%d] size %d, %v", i, zf.UncompressedSize64, zf.Name) } mfr, err := zr.File[len(zr.File)-1].Open() if err != nil { t.Fatalf("Error opening manifest JSON: %v", err) } maniJSON, err := ioutil.ReadAll(mfr) if err != nil { t.Fatalf("Error reading manifest JSON: %v", err) } var mf Manifest if err := json.Unmarshal(maniJSON, &mf); err != nil { t.Fatalf("invalid JSON: %v", err) } // Verify each chunk described in the manifest: baseOffset, err := zr.File[0].DataOffset() if err != nil { t.Fatal(err) } for _, bo := range mf.DataBlobs { h := bo.Ref.Hash() h.Write(zipBytes[baseOffset+bo.Offset : baseOffset+bo.Offset+int64(bo.Size)]) if !bo.Ref.HashMatches(h) { t.Errorf("blob %+v didn't describe the actual data in the zip", bo) } } if debug { t.Logf("Manifest: %s", maniJSON) } } // Verify that each chunk in the logical mapping is in the meta. logBlobs := 0 if err := blobserver.EnumerateAll(ctx, logical, func(sb blob.SizedRef) error { logBlobs++ v, err := pt.sto.meta.Get(blobMetaPrefix + sb.Ref.String()) if err == sorted.ErrNotFound && pt.okayNoMeta[sb.Ref] { return nil } if err != nil { return fmt.Errorf("error looking up logical blob %v in meta: %v", sb.Ref, err) } m, err := parseMetaRow([]byte(v)) if err != nil { return fmt.Errorf("error parsing logical blob %v meta %q: %v", sb.Ref, v, err) } if !m.exists || m.size != sb.Size || !zipSeen[m.largeRef] { return fmt.Errorf("logical blob %v = %+v; want in zip", sb.Ref, m) } h := sb.Ref.Hash() h.Write(bytesOfZip[m.largeRef][m.largeOff : m.largeOff+sb.Size]) if !sb.Ref.HashMatches(h) { t.Errorf("blob %v not found matching in zip", sb.Ref) } return nil }); err != nil { t.Fatal(err) } if logBlobs != logical.NumBlobs() { t.Error("enumerate over logical blobs didn't work?") } // TODO, more tests: // -- like TestPackTwoIdenticalfiles, but instead of testing // no dup for 100% identical file bytes, test that uploading a // 49% identical one does not denormalize and repack. // -- test StreamBlobs in all its various flavours, and recovering from stream blobs. // -- overflowing the 16MB chunk size with huge initial chunks return pt }