/* 1KB ReadAt calls before: fileread_test.go:253: Blob Size: 4194304 raw, 4201523 with meta (1.00172x) fileread_test.go:283: Blobs fetched: 4160 (63.03x) fileread_test.go:284: Bytes fetched: 361174780 (85.96x) 2KB ReadAt calls before: fileread_test.go:253: Blob Size: 4194304 raw, 4201523 with meta (1.00172x) fileread_test.go:283: Blobs fetched: 2112 (32.00x) fileread_test.go:284: Bytes fetched: 182535389 (43.45x) After fix: fileread_test.go:253: Blob Size: 4194304 raw, 4201523 with meta (1.00172x) fileread_test.go:283: Blobs fetched: 66 (1.00x) fileread_test.go:284: Bytes fetched: 4201523 (1.00x) */ func TestReaderEfficiency(t *testing.T) { const fileSize = 4 << 20 bigFile := make([]byte, fileSize) rnd := rand.New(rand.NewSource(1)) for i := range bigFile { bigFile[i] = byte(rnd.Intn(256)) } sto := new(test.Fetcher) // in-memory blob storage fileMap := NewFileMap("testfile") fileref, err := WriteFileMap(sto, fileMap, bytes.NewReader(bigFile)) if err != nil { t.Fatalf("WriteFileMap: %v", err) } fr, err := NewFileReader(sto, fileref) if err != nil { t.Fatal(err) } numBlobs := sto.NumBlobs() t.Logf("Num blobs = %d", numBlobs) sumSize := sto.SumBlobSize() t.Logf("Blob Size: %d raw, %d with meta (%.05fx)", fileSize, sumSize, float64(sumSize)/float64(fileSize)) const readSize = 2 << 10 buf := make([]byte, readSize) for off := int64(0); off < fileSize; off += readSize { n, err := fr.ReadAt(buf, off) if err != nil { t.Fatalf("ReadAt at offset %d: %v", off, err) } if n != readSize { t.Fatalf("Read %d bytes at offset %d; want %d", n, off, readSize) } got, want := buf, bigFile[off:off+readSize] if !bytes.Equal(buf, want) { t.Errorf("Incorrect read at offset %d:\n got: %s\n want: %s", off, summary(got), summary(want)) off := 0 for len(got) > 0 && len(want) > 0 && got[0] == want[0] { off++ got = got[1:] want = want[1:] } t.Errorf(" differences start at offset %d:\n got: %s\n want: %s\n", off, summary(got), summary(want)) break } } fr.Close() blobsFetched, bytesFetched := sto.Stats() if blobsFetched != int64(numBlobs) { t.Errorf("Fetched %d blobs; want %d", blobsFetched, numBlobs) } if bytesFetched != sumSize { t.Errorf("Fetched %d bytes; want %d", bytesFetched, sumSize) } }
// Tests a bunch of rounds on a bunch of data. func TestArchiverStress(t *testing.T) { if testing.Short() { t.Skip("Skipping in short mode") } src := new(test.Fetcher) fileRef, err := schema.WriteFileFromReader(src, "random", io.LimitReader(randReader{}, 10<<20)) if err != nil { t.Fatal(err) } n0 := src.NumBlobs() t.Logf("Wrote %v in %d blobs", fileRef, n0) refs0 := src.BlobrefStrings() var zips [][]byte archived := map[blob.Ref]bool{} a := &Archiver{ Source: src, MinZipSize: 1 << 20, DeleteSourceAfterStore: true, Store: func(zipd []byte, brs []blob.SizedRef) error { zips = append(zips, zipd) for _, sbr := range brs { if archived[sbr.Ref] { t.Error("duplicate archive of %v", sbr.Ref) } archived[sbr.Ref] = true } return nil }, } for { err := a.RunOnce() if err == ErrSourceTooSmall { break } if err != nil { t.Fatal(err) } } if len(archived) == 0 { t.Errorf("unexpected small number of archived blobs = %d", len(archived)) } if len(zips) < 2 { t.Errorf("unexpected small number of zip files = %d", len(zips)) } if n1 := src.NumBlobs() + len(archived); n0 != n1 { t.Errorf("original %d blobs != %d after + %d archived (%d)", n0, src.NumBlobs(), len(archived), n1) } // And restore: for _, zipd := range zips { if err := foreachZipEntry(zipd, func(br blob.Ref, contents []byte) { tb := &test.Blob{Contents: string(contents)} if tb.BlobRef() != br { t.Fatal("corrupt zip callback") } src.AddBlob(tb) }); err != nil { t.Fatal(err) } } refs1 := src.BlobrefStrings() if !reflect.DeepEqual(refs0, refs1) { t.Error("Restore error.") } }
func TestPackerBoundarySplits(t *testing.T) { if testing.Short() { t.Skip("skipping slow test") } // Test a file of three chunk sizes, totalling near the 16 MB // boundary: // - 1st chunk is 6 MB. ("blobA") // - 2nd chunk is 6 MB. ("blobB") // - 3rd chunk ("blobC") is binary-searched (up to 4MB) to find // which size causes the packer to write two zip files. // During the test we set zip overhead boundaries to 0, to // force the test to into its pathological misprediction code paths, // where it needs to back up and rewrite the zip with one part less. // That's why the test starts with two zip files: so there's at // least one that can be removed to make room. defer setIntTemporarily(&zipPerEntryOverhead, 0)() const sizeAB = 12 << 20 const maxBlobSize = 16 << 20 bytesAB := randBytes(sizeAB) blobA := &test.Blob{string(bytesAB[:sizeAB/2])} blobB := &test.Blob{string(bytesAB[sizeAB/2:])} refA := blobA.BlobRef() refB := blobB.BlobRef() bytesCFull := randBytes(maxBlobSize - sizeAB) // will be sliced down // Mechanism to verify we hit the back-up code path: var ( mu sync.Mutex sawTruncate blob.Ref stoppedBeforeOverflow bool ) testHookSawTruncate = func(after blob.Ref) { if after != refB { t.Errorf("unexpected truncate point %v", after) } mu.Lock() defer mu.Unlock() sawTruncate = after } testHookStopBeforeOverflowing = func() { mu.Lock() defer mu.Unlock() stoppedBeforeOverflow = true } defer func() { testHookSawTruncate = nil testHookStopBeforeOverflowing = nil }() generatesTwoZips := func(sizeC int) (ret bool) { large := new(test.Fetcher) s := &storage{ small: new(test.Fetcher), large: large, meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: ", // Ignore these phrases: "Packing file ", "Packed file ", ), } s.init() // Upload first two chunks blobA.MustUpload(t, s) blobB.MustUpload(t, s) // Upload second chunk bytesC := bytesCFull[:sizeC] h := blob.NewHash() h.Write(bytesC) refC := blob.RefFromHash(h) _, err := s.ReceiveBlob(refC, bytes.NewReader(bytesC)) if err != nil { t.Fatal(err) } // Upload the file schema blob. m := schema.NewFileMap("foo.dat") m.PopulateParts(sizeAB+int64(sizeC), []schema.BytesPart{ schema.BytesPart{ Size: sizeAB / 2, BlobRef: refA, }, schema.BytesPart{ Size: sizeAB / 2, BlobRef: refB, }, schema.BytesPart{ Size: uint64(sizeC), BlobRef: refC, }, }) fjson, err := m.JSON() if err != nil { t.Fatalf("schema filemap JSON: %v", err) } fb := &test.Blob{Contents: fjson} fb.MustUpload(t, s) num := large.NumBlobs() if num < 1 || num > 2 { t.Fatalf("for size %d, num packed zip blobs = %d; want 1 or 2", sizeC, num) } return num == 2 } maxC := maxBlobSize - sizeAB smallestC := sort.Search(maxC, generatesTwoZips) if smallestC == maxC { t.Fatalf("never found a point at which we generated 2 zip files") } t.Logf("After 12 MB of data (in 2 chunks), the smallest blob that generates two zip files is %d bytes (%.03f MB)", smallestC, float64(smallestC)/(1<<20)) t.Logf("Zip overhead (for this two chunk file) = %d bytes", maxBlobSize-1-smallestC-sizeAB) mu.Lock() if sawTruncate != refB { t.Errorf("truncate after = %v; want %v", sawTruncate, refB) } if !stoppedBeforeOverflow { t.Error("never hit the code path where it calculates that another data chunk would push it over the 16MB boundary") } }
func TestRemoveBlobs(t *testing.T) { ctx, cancel := context.WithCancel(context.TODO()) defer cancel() // The basic small cases are handled via storagetest in TestStorage, // so this only tests removing packed blobs. small := new(test.Fetcher) large := new(test.Fetcher) sto := &storage{ small: small, large: large, meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } sto.init() const fileSize = 1 << 20 fileContents := randBytes(fileSize) if _, err := schema.WriteFileFromReader(sto, "foo.dat", bytes.NewReader(fileContents)); err != nil { t.Fatal(err) } if small.NumBlobs() != 0 || large.NumBlobs() == 0 { t.Fatalf("small, large counts == %d, %d; want 0, non-zero", small.NumBlobs(), large.NumBlobs()) } var all []blob.SizedRef if err := blobserver.EnumerateAll(ctx, sto, func(sb blob.SizedRef) error { all = append(all, sb) return nil }); err != nil { t.Fatal(err) } // Find the zip zipBlob, err := singleBlob(sto.large) if err != nil { t.Fatalf("failed to find packed zip: %v", err) } // The zip file is in use, so verify we can't delete it. if err := sto.deleteZipPack(zipBlob.Ref); err == nil { t.Fatalf("zip pack blob deleted but it should not have been allowed") } // Delete everything for len(all) > 0 { del := all[0].Ref all = all[1:] if err := sto.RemoveBlobs([]blob.Ref{del}); err != nil { t.Fatalf("RemoveBlobs: %v", err) } if err := storagetest.CheckEnumerate(sto, all); err != nil { t.Fatalf("After deleting %v, %v", del, err) } } dRows := func() (n int) { if err := sorted.ForeachInRange(sto.meta, "d:", "", func(key, value string) error { if strings.HasPrefix(key, "d:") { n++ } return nil }); err != nil { t.Fatalf("meta iteration error: %v", err) } return } if n := dRows(); n == 0 { t.Fatalf("expected a 'd:' row after deletes") } // TODO: test the background pack-deleter loop? figure out its design first. if err := sto.deleteZipPack(zipBlob.Ref); err != nil { t.Errorf("error deleting zip %v: %v", zipBlob.Ref, err) } if n := dRows(); n != 0 { t.Errorf("expected the 'd:' row to be deleted") } }
func testPack(t *testing.T, write func(sto blobserver.Storage) error, checks ...func(*packTest), ) *packTest { ctx, cancel := context.WithCancel(context.TODO()) defer cancel() logical := new(test.Fetcher) small, large := new(test.Fetcher), new(test.Fetcher) pt := &packTest{ logical: logical, small: small, large: large, } // Figure out the logical baseline blobs we'll later expect in the packed storage. if err := write(logical); err != nil { t.Fatal(err) } t.Logf("items in logical storage: %d", logical.NumBlobs()) pt.sto = &storage{ small: small, large: large, meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } pt.sto.init() for _, setOpt := range checks { setOpt(pt) } if err := write(pt.sto); err != nil { t.Fatal(err) } t.Logf("items in small: %v", small.NumBlobs()) t.Logf("items in large: %v", large.NumBlobs()) if want, ok := pt.wantLargeBlobs.(int); ok && want != large.NumBlobs() { t.Fatalf("num large blobs = %d; want %d", large.NumBlobs(), want) } if want, ok := pt.wantSmallBlobs.(int); ok && want != small.NumBlobs() { t.Fatalf("num small blobs = %d; want %d", small.NumBlobs(), want) } var zipRefs []blob.Ref var zipSeen = map[blob.Ref]bool{} blobserver.EnumerateAll(ctx, large, func(sb blob.SizedRef) error { zipRefs = append(zipRefs, sb.Ref) zipSeen[sb.Ref] = true return nil }) if len(zipRefs) != large.NumBlobs() { t.Fatalf("Enumerated only %d zip files; expected %d", len(zipRefs), large.NumBlobs()) } bytesOfZip := map[blob.Ref][]byte{} for _, zipRef := range zipRefs { rc, _, err := large.Fetch(zipRef) if err != nil { t.Fatal(err) } zipBytes, err := ioutil.ReadAll(rc) rc.Close() if err != nil { t.Fatalf("Error slurping %s: %v", zipRef, err) } if len(zipBytes) > constants.MaxBlobSize { t.Fatalf("zip is too large: %d > max %d", len(zipBytes), constants.MaxBlobSize) } bytesOfZip[zipRef] = zipBytes zr, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes))) if err != nil { t.Fatalf("Error reading resulting zip file: %v", err) } if len(zr.File) == 0 { t.Fatal("zip is empty") } nameSeen := map[string]bool{} for i, zf := range zr.File { if nameSeen[zf.Name] { t.Errorf("duplicate name %q seen", zf.Name) } nameSeen[zf.Name] = true t.Logf("zip[%d] size %d, %v", i, zf.UncompressedSize64, zf.Name) } mfr, err := zr.File[len(zr.File)-1].Open() if err != nil { t.Fatalf("Error opening manifest JSON: %v", err) } maniJSON, err := ioutil.ReadAll(mfr) if err != nil { t.Fatalf("Error reading manifest JSON: %v", err) } var mf Manifest if err := json.Unmarshal(maniJSON, &mf); err != nil { t.Fatalf("invalid JSON: %v", err) } // Verify each chunk described in the manifest: baseOffset, err := zr.File[0].DataOffset() if err != nil { t.Fatal(err) } for _, bo := range mf.DataBlobs { h := bo.Ref.Hash() h.Write(zipBytes[baseOffset+bo.Offset : baseOffset+bo.Offset+int64(bo.Size)]) if !bo.Ref.HashMatches(h) { t.Errorf("blob %+v didn't describe the actual data in the zip", bo) } } if debug { t.Logf("Manifest: %s", maniJSON) } } // Verify that each chunk in the logical mapping is in the meta. logBlobs := 0 if err := blobserver.EnumerateAll(ctx, logical, func(sb blob.SizedRef) error { logBlobs++ v, err := pt.sto.meta.Get(blobMetaPrefix + sb.Ref.String()) if err == sorted.ErrNotFound && pt.okayNoMeta[sb.Ref] { return nil } if err != nil { return fmt.Errorf("error looking up logical blob %v in meta: %v", sb.Ref, err) } m, err := parseMetaRow([]byte(v)) if err != nil { return fmt.Errorf("error parsing logical blob %v meta %q: %v", sb.Ref, v, err) } if !m.exists || m.size != sb.Size || !zipSeen[m.largeRef] { return fmt.Errorf("logical blob %v = %+v; want in zip", sb.Ref, m) } h := sb.Ref.Hash() h.Write(bytesOfZip[m.largeRef][m.largeOff : m.largeOff+sb.Size]) if !sb.Ref.HashMatches(h) { t.Errorf("blob %v not found matching in zip", sb.Ref) } return nil }); err != nil { t.Fatal(err) } if logBlobs != logical.NumBlobs() { t.Error("enumerate over logical blobs didn't work?") } // TODO, more tests: // -- like TestPackTwoIdenticalfiles, but instead of testing // no dup for 100% identical file bytes, test that uploading a // 49% identical one does not denormalize and repack. // -- test StreamBlobs in all its various flavours, and recovering from stream blobs. // -- overflowing the 16MB chunk size with huge initial chunks return pt }