Ejemplo n.º 1
0
/*

1KB ReadAt calls before:
fileread_test.go:253: Blob Size: 4194304 raw, 4201523 with meta (1.00172x)
fileread_test.go:283: Blobs fetched: 4160 (63.03x)
fileread_test.go:284: Bytes fetched: 361174780 (85.96x)

2KB ReadAt calls before:
fileread_test.go:253: Blob Size: 4194304 raw, 4201523 with meta (1.00172x)
fileread_test.go:283: Blobs fetched: 2112 (32.00x)
fileread_test.go:284: Bytes fetched: 182535389 (43.45x)

After fix:
fileread_test.go:253: Blob Size: 4194304 raw, 4201523 with meta (1.00172x)
fileread_test.go:283: Blobs fetched: 66 (1.00x)
fileread_test.go:284: Bytes fetched: 4201523 (1.00x)
*/
func TestReaderEfficiency(t *testing.T) {
	const fileSize = 4 << 20
	bigFile := make([]byte, fileSize)
	rnd := rand.New(rand.NewSource(1))
	for i := range bigFile {
		bigFile[i] = byte(rnd.Intn(256))
	}

	sto := new(test.Fetcher) // in-memory blob storage
	fileMap := NewFileMap("testfile")
	fileref, err := WriteFileMap(sto, fileMap, bytes.NewReader(bigFile))
	if err != nil {
		t.Fatalf("WriteFileMap: %v", err)
	}

	fr, err := NewFileReader(sto, fileref)
	if err != nil {
		t.Fatal(err)
	}

	numBlobs := sto.NumBlobs()
	t.Logf("Num blobs = %d", numBlobs)
	sumSize := sto.SumBlobSize()
	t.Logf("Blob Size: %d raw, %d with meta (%.05fx)", fileSize, sumSize, float64(sumSize)/float64(fileSize))

	const readSize = 2 << 10
	buf := make([]byte, readSize)
	for off := int64(0); off < fileSize; off += readSize {
		n, err := fr.ReadAt(buf, off)
		if err != nil {
			t.Fatalf("ReadAt at offset %d: %v", off, err)
		}
		if n != readSize {
			t.Fatalf("Read %d bytes at offset %d; want %d", n, off, readSize)
		}
		got, want := buf, bigFile[off:off+readSize]
		if !bytes.Equal(buf, want) {
			t.Errorf("Incorrect read at offset %d:\n  got: %s\n want: %s", off, summary(got), summary(want))
			off := 0
			for len(got) > 0 && len(want) > 0 && got[0] == want[0] {
				off++
				got = got[1:]
				want = want[1:]
			}
			t.Errorf("  differences start at offset %d:\n    got: %s\n   want: %s\n", off, summary(got), summary(want))
			break
		}
	}
	fr.Close()
	blobsFetched, bytesFetched := sto.Stats()
	if blobsFetched != int64(numBlobs) {
		t.Errorf("Fetched %d blobs; want %d", blobsFetched, numBlobs)
	}
	if bytesFetched != sumSize {
		t.Errorf("Fetched %d bytes; want %d", bytesFetched, sumSize)
	}
}
Ejemplo n.º 2
0
// Tests a bunch of rounds on a bunch of data.
func TestArchiverStress(t *testing.T) {
	if testing.Short() {
		t.Skip("Skipping in short mode")
	}
	src := new(test.Fetcher)
	fileRef, err := schema.WriteFileFromReader(src, "random", io.LimitReader(randReader{}, 10<<20))
	if err != nil {
		t.Fatal(err)
	}
	n0 := src.NumBlobs()
	t.Logf("Wrote %v in %d blobs", fileRef, n0)

	refs0 := src.BlobrefStrings()

	var zips [][]byte
	archived := map[blob.Ref]bool{}
	a := &Archiver{
		Source:                 src,
		MinZipSize:             1 << 20,
		DeleteSourceAfterStore: true,
		Store: func(zipd []byte, brs []blob.SizedRef) error {
			zips = append(zips, zipd)
			for _, sbr := range brs {
				if archived[sbr.Ref] {
					t.Error("duplicate archive of %v", sbr.Ref)
				}
				archived[sbr.Ref] = true
			}
			return nil
		},
	}
	for {
		err := a.RunOnce()
		if err == ErrSourceTooSmall {
			break
		}
		if err != nil {
			t.Fatal(err)
		}
	}

	if len(archived) == 0 {
		t.Errorf("unexpected small number of archived blobs = %d", len(archived))
	}
	if len(zips) < 2 {
		t.Errorf("unexpected small number of zip files = %d", len(zips))
	}
	if n1 := src.NumBlobs() + len(archived); n0 != n1 {
		t.Errorf("original %d blobs != %d after + %d archived (%d)", n0, src.NumBlobs(), len(archived), n1)
	}

	// And restore:
	for _, zipd := range zips {
		if err := foreachZipEntry(zipd, func(br blob.Ref, contents []byte) {
			tb := &test.Blob{Contents: string(contents)}
			if tb.BlobRef() != br {
				t.Fatal("corrupt zip callback")
			}
			src.AddBlob(tb)
		}); err != nil {
			t.Fatal(err)
		}
	}

	refs1 := src.BlobrefStrings()
	if !reflect.DeepEqual(refs0, refs1) {
		t.Error("Restore error.")
	}
}
Ejemplo n.º 3
0
func TestPackerBoundarySplits(t *testing.T) {
	if testing.Short() {
		t.Skip("skipping slow test")
	}
	// Test a file of three chunk sizes, totalling near the 16 MB
	// boundary:
	//    - 1st chunk is 6 MB. ("blobA")
	//    - 2nd chunk is 6 MB. ("blobB")
	//    - 3rd chunk ("blobC") is binary-searched (up to 4MB) to find
	//      which size causes the packer to write two zip files.

	// During the test we set zip overhead boundaries to 0, to
	// force the test to into its pathological misprediction code paths,
	// where it needs to back up and rewrite the zip with one part less.
	// That's why the test starts with two zip files: so there's at
	// least one that can be removed to make room.
	defer setIntTemporarily(&zipPerEntryOverhead, 0)()

	const sizeAB = 12 << 20
	const maxBlobSize = 16 << 20
	bytesAB := randBytes(sizeAB)
	blobA := &test.Blob{string(bytesAB[:sizeAB/2])}
	blobB := &test.Blob{string(bytesAB[sizeAB/2:])}
	refA := blobA.BlobRef()
	refB := blobB.BlobRef()
	bytesCFull := randBytes(maxBlobSize - sizeAB) // will be sliced down

	// Mechanism to verify we hit the back-up code path:
	var (
		mu                    sync.Mutex
		sawTruncate           blob.Ref
		stoppedBeforeOverflow bool
	)
	testHookSawTruncate = func(after blob.Ref) {
		if after != refB {
			t.Errorf("unexpected truncate point %v", after)
		}
		mu.Lock()
		defer mu.Unlock()
		sawTruncate = after
	}
	testHookStopBeforeOverflowing = func() {
		mu.Lock()
		defer mu.Unlock()
		stoppedBeforeOverflow = true
	}
	defer func() {
		testHookSawTruncate = nil
		testHookStopBeforeOverflowing = nil
	}()

	generatesTwoZips := func(sizeC int) (ret bool) {
		large := new(test.Fetcher)
		s := &storage{
			small: new(test.Fetcher),
			large: large,
			meta:  sorted.NewMemoryKeyValue(),
			log: test.NewLogger(t, "blobpacked: ",
				// Ignore these phrases:
				"Packing file ",
				"Packed file ",
			),
		}
		s.init()

		// Upload first two chunks
		blobA.MustUpload(t, s)
		blobB.MustUpload(t, s)

		// Upload second chunk
		bytesC := bytesCFull[:sizeC]
		h := blob.NewHash()
		h.Write(bytesC)
		refC := blob.RefFromHash(h)
		_, err := s.ReceiveBlob(refC, bytes.NewReader(bytesC))
		if err != nil {
			t.Fatal(err)
		}

		// Upload the file schema blob.
		m := schema.NewFileMap("foo.dat")
		m.PopulateParts(sizeAB+int64(sizeC), []schema.BytesPart{
			schema.BytesPart{
				Size:    sizeAB / 2,
				BlobRef: refA,
			},
			schema.BytesPart{
				Size:    sizeAB / 2,
				BlobRef: refB,
			},
			schema.BytesPart{
				Size:    uint64(sizeC),
				BlobRef: refC,
			},
		})
		fjson, err := m.JSON()
		if err != nil {
			t.Fatalf("schema filemap JSON: %v", err)
		}
		fb := &test.Blob{Contents: fjson}
		fb.MustUpload(t, s)
		num := large.NumBlobs()
		if num < 1 || num > 2 {
			t.Fatalf("for size %d, num packed zip blobs = %d; want 1 or 2", sizeC, num)
		}
		return num == 2
	}
	maxC := maxBlobSize - sizeAB
	smallestC := sort.Search(maxC, generatesTwoZips)
	if smallestC == maxC {
		t.Fatalf("never found a point at which we generated 2 zip files")
	}
	t.Logf("After 12 MB of data (in 2 chunks), the smallest blob that generates two zip files is %d bytes (%.03f MB)", smallestC, float64(smallestC)/(1<<20))
	t.Logf("Zip overhead (for this two chunk file) = %d bytes", maxBlobSize-1-smallestC-sizeAB)

	mu.Lock()
	if sawTruncate != refB {
		t.Errorf("truncate after = %v; want %v", sawTruncate, refB)
	}
	if !stoppedBeforeOverflow {
		t.Error("never hit the code path where it calculates that another data chunk would push it over the 16MB boundary")
	}
}
Ejemplo n.º 4
0
func TestRemoveBlobs(t *testing.T) {
	ctx, cancel := context.WithCancel(context.TODO())
	defer cancel()

	// The basic small cases are handled via storagetest in TestStorage,
	// so this only tests removing packed blobs.

	small := new(test.Fetcher)
	large := new(test.Fetcher)
	sto := &storage{
		small: small,
		large: large,
		meta:  sorted.NewMemoryKeyValue(),
		log:   test.NewLogger(t, "blobpacked: "),
	}
	sto.init()

	const fileSize = 1 << 20
	fileContents := randBytes(fileSize)
	if _, err := schema.WriteFileFromReader(sto, "foo.dat", bytes.NewReader(fileContents)); err != nil {
		t.Fatal(err)
	}
	if small.NumBlobs() != 0 || large.NumBlobs() == 0 {
		t.Fatalf("small, large counts == %d, %d; want 0, non-zero", small.NumBlobs(), large.NumBlobs())
	}
	var all []blob.SizedRef
	if err := blobserver.EnumerateAll(ctx, sto, func(sb blob.SizedRef) error {
		all = append(all, sb)
		return nil
	}); err != nil {
		t.Fatal(err)
	}

	// Find the zip
	zipBlob, err := singleBlob(sto.large)
	if err != nil {
		t.Fatalf("failed to find packed zip: %v", err)
	}

	// The zip file is in use, so verify we can't delete it.
	if err := sto.deleteZipPack(zipBlob.Ref); err == nil {
		t.Fatalf("zip pack blob deleted but it should not have been allowed")
	}

	// Delete everything
	for len(all) > 0 {
		del := all[0].Ref
		all = all[1:]
		if err := sto.RemoveBlobs([]blob.Ref{del}); err != nil {
			t.Fatalf("RemoveBlobs: %v", err)
		}
		if err := storagetest.CheckEnumerate(sto, all); err != nil {
			t.Fatalf("After deleting %v, %v", del, err)
		}
	}

	dRows := func() (n int) {
		if err := sorted.ForeachInRange(sto.meta, "d:", "", func(key, value string) error {
			if strings.HasPrefix(key, "d:") {
				n++
			}
			return nil
		}); err != nil {
			t.Fatalf("meta iteration error: %v", err)
		}
		return
	}

	if n := dRows(); n == 0 {
		t.Fatalf("expected a 'd:' row after deletes")
	}

	// TODO: test the background pack-deleter loop? figure out its design first.
	if err := sto.deleteZipPack(zipBlob.Ref); err != nil {
		t.Errorf("error deleting zip %v: %v", zipBlob.Ref, err)
	}
	if n := dRows(); n != 0 {
		t.Errorf("expected the 'd:' row to be deleted")
	}
}
Ejemplo n.º 5
0
func testPack(t *testing.T,
	write func(sto blobserver.Storage) error,
	checks ...func(*packTest),
) *packTest {
	ctx, cancel := context.WithCancel(context.TODO())
	defer cancel()

	logical := new(test.Fetcher)
	small, large := new(test.Fetcher), new(test.Fetcher)
	pt := &packTest{
		logical: logical,
		small:   small,
		large:   large,
	}
	// Figure out the logical baseline blobs we'll later expect in the packed storage.
	if err := write(logical); err != nil {
		t.Fatal(err)
	}
	t.Logf("items in logical storage: %d", logical.NumBlobs())

	pt.sto = &storage{
		small: small,
		large: large,
		meta:  sorted.NewMemoryKeyValue(),
		log:   test.NewLogger(t, "blobpacked: "),
	}
	pt.sto.init()

	for _, setOpt := range checks {
		setOpt(pt)
	}

	if err := write(pt.sto); err != nil {
		t.Fatal(err)
	}

	t.Logf("items in small: %v", small.NumBlobs())
	t.Logf("items in large: %v", large.NumBlobs())

	if want, ok := pt.wantLargeBlobs.(int); ok && want != large.NumBlobs() {
		t.Fatalf("num large blobs = %d; want %d", large.NumBlobs(), want)
	}
	if want, ok := pt.wantSmallBlobs.(int); ok && want != small.NumBlobs() {
		t.Fatalf("num small blobs = %d; want %d", small.NumBlobs(), want)
	}

	var zipRefs []blob.Ref
	var zipSeen = map[blob.Ref]bool{}
	blobserver.EnumerateAll(ctx, large, func(sb blob.SizedRef) error {
		zipRefs = append(zipRefs, sb.Ref)
		zipSeen[sb.Ref] = true
		return nil
	})
	if len(zipRefs) != large.NumBlobs() {
		t.Fatalf("Enumerated only %d zip files; expected %d", len(zipRefs), large.NumBlobs())
	}

	bytesOfZip := map[blob.Ref][]byte{}
	for _, zipRef := range zipRefs {
		rc, _, err := large.Fetch(zipRef)
		if err != nil {
			t.Fatal(err)
		}
		zipBytes, err := ioutil.ReadAll(rc)
		rc.Close()
		if err != nil {
			t.Fatalf("Error slurping %s: %v", zipRef, err)
		}
		if len(zipBytes) > constants.MaxBlobSize {
			t.Fatalf("zip is too large: %d > max %d", len(zipBytes), constants.MaxBlobSize)
		}
		bytesOfZip[zipRef] = zipBytes
		zr, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes)))
		if err != nil {
			t.Fatalf("Error reading resulting zip file: %v", err)
		}
		if len(zr.File) == 0 {
			t.Fatal("zip is empty")
		}
		nameSeen := map[string]bool{}
		for i, zf := range zr.File {
			if nameSeen[zf.Name] {
				t.Errorf("duplicate name %q seen", zf.Name)
			}
			nameSeen[zf.Name] = true
			t.Logf("zip[%d] size %d, %v", i, zf.UncompressedSize64, zf.Name)
		}
		mfr, err := zr.File[len(zr.File)-1].Open()
		if err != nil {
			t.Fatalf("Error opening manifest JSON: %v", err)
		}
		maniJSON, err := ioutil.ReadAll(mfr)
		if err != nil {
			t.Fatalf("Error reading manifest JSON: %v", err)
		}
		var mf Manifest
		if err := json.Unmarshal(maniJSON, &mf); err != nil {
			t.Fatalf("invalid JSON: %v", err)
		}

		// Verify each chunk described in the manifest:
		baseOffset, err := zr.File[0].DataOffset()
		if err != nil {
			t.Fatal(err)
		}
		for _, bo := range mf.DataBlobs {
			h := bo.Ref.Hash()
			h.Write(zipBytes[baseOffset+bo.Offset : baseOffset+bo.Offset+int64(bo.Size)])
			if !bo.Ref.HashMatches(h) {
				t.Errorf("blob %+v didn't describe the actual data in the zip", bo)
			}
		}
		if debug {
			t.Logf("Manifest: %s", maniJSON)
		}
	}

	// Verify that each chunk in the logical mapping is in the meta.
	logBlobs := 0
	if err := blobserver.EnumerateAll(ctx, logical, func(sb blob.SizedRef) error {
		logBlobs++
		v, err := pt.sto.meta.Get(blobMetaPrefix + sb.Ref.String())
		if err == sorted.ErrNotFound && pt.okayNoMeta[sb.Ref] {
			return nil
		}
		if err != nil {
			return fmt.Errorf("error looking up logical blob %v in meta: %v", sb.Ref, err)
		}
		m, err := parseMetaRow([]byte(v))
		if err != nil {
			return fmt.Errorf("error parsing logical blob %v meta %q: %v", sb.Ref, v, err)
		}
		if !m.exists || m.size != sb.Size || !zipSeen[m.largeRef] {
			return fmt.Errorf("logical blob %v = %+v; want in zip", sb.Ref, m)
		}
		h := sb.Ref.Hash()
		h.Write(bytesOfZip[m.largeRef][m.largeOff : m.largeOff+sb.Size])
		if !sb.Ref.HashMatches(h) {
			t.Errorf("blob %v not found matching in zip", sb.Ref)
		}
		return nil
	}); err != nil {
		t.Fatal(err)
	}
	if logBlobs != logical.NumBlobs() {
		t.Error("enumerate over logical blobs didn't work?")
	}

	// TODO, more tests:
	// -- like TestPackTwoIdenticalfiles, but instead of testing
	// no dup for 100% identical file bytes, test that uploading a
	// 49% identical one does not denormalize and repack.
	// -- test StreamBlobs in all its various flavours, and recovering from stream blobs.
	// -- overflowing the 16MB chunk size with huge initial chunks
	return pt
}