Esempio n. 1
0
func TestReaderSeekStress(t *testing.T) {
	const fileSize = 750<<10 + 123
	bigFile := make([]byte, fileSize)
	rnd := rand.New(rand.NewSource(1))
	for i := range bigFile {
		bigFile[i] = byte(rnd.Intn(256))
	}

	sto := new(test.Fetcher) // in-memory blob storage
	fileMap := NewFileMap("testfile")
	fileref, err := WriteFileMap(sto, fileMap, bytes.NewReader(bigFile))
	if err != nil {
		t.Fatalf("WriteFileMap: %v", err)
	}
	c, ok := sto.BlobContents(fileref)
	if !ok {
		t.Fatal("expected file contents to be present")
	}
	const debug = false
	if debug {
		t.Logf("Fileref %s: %s", fileref, c)
	}

	// Test a bunch of reads at different offsets, making sure we always
	// get the same results.
	skipBy := int64(999)
	if testing.Short() {
		skipBy += 10 << 10
	}
	for off := int64(0); off < fileSize; off += skipBy {
		fr, err := NewFileReader(sto, fileref)
		if err != nil {
			t.Fatal(err)
		}

		skipBytes(fr, uint64(off))
		got, err := ioutil.ReadAll(fr)
		if err != nil {
			t.Fatal(err)
		}
		want := bigFile[off:]
		if !bytes.Equal(got, want) {
			t.Errorf("Incorrect read at offset %d:\n  got: %s\n want: %s", off, summary(got), summary(want))
			off := 0
			for len(got) > 0 && len(want) > 0 && got[0] == want[0] {
				off++
				got = got[1:]
				want = want[1:]
			}
			t.Errorf("  differences start at offset %d:\n    got: %s\n   want: %s\n", off, summary(got), summary(want))
			break
		}
		fr.Close()
	}
}
Esempio n. 2
0
func TestReceiveCorrupt(t *testing.T) {
	sto := new(test.Fetcher)
	data := []byte("some blob")
	br := blob.SHA1FromBytes(data)
	data[0] = 'X' // corrupt it
	_, err := blobserver.Receive(sto, br, bytes.NewReader(data))
	if err != blobserver.ErrCorruptBlob {
		t.Errorf("Receive = %v; want ErrCorruptBlob", err)
	}
	if len(sto.BlobrefStrings()) > 0 {
		t.Errorf("nothing should be stored. Got %q", sto.BlobrefStrings())
	}
}
Esempio n. 3
0
func TestOutOfOrderIndexing(t *testing.T) {
	tf := new(test.Fetcher)
	s := sorted.NewMemoryKeyValue()

	ix, err := index.New(s)
	if err != nil {
		t.Fatal(err)
	}
	ix.BlobSource = tf

	t.Logf("file ref = %v", fileBlobRef)
	t.Logf("missing data chunks = %v, %v, %v", chunk1ref, chunk2ref, chunk3ref)

	add := func(b *test.Blob) {
		tf.AddBlob(b)
		if _, err := ix.ReceiveBlob(b.BlobRef(), b.Reader()); err != nil {
			t.Fatalf("ReceiveBlob(%v): %v", b.BlobRef(), err)
		}
	}

	add(fileBlob)

	{
		key := fmt.Sprintf("missing|%s|%s", fileBlobRef, chunk1ref)
		if got, err := s.Get(key); got == "" || err != nil {
			t.Errorf("key %q missing (err: %v); want 1", key, err)
		}
	}

	add(chunk1)
	add(chunk2)

	ix.Exp_AwaitReindexing(t)

	{
		key := fmt.Sprintf("missing|%s|%s", fileBlobRef, chunk3ref)
		if got, err := s.Get(key); got == "" || err != nil {
			t.Errorf("key %q missing (err: %v); want 1", key, err)
		}
	}

	add(chunk3)

	ix.Exp_AwaitReindexing(t)

	foreachSorted(t, s, func(k, v string) {
		if strings.HasPrefix(k, "missing|") {
			t.Errorf("Shouldn't have missing key: %q", k)
		}
	})
}
func TestArchiver(t *testing.T) {
	src := new(test.Fetcher)
	blobHello := &test.Blob{Contents: "Hello"}
	blobWorld := &test.Blob{Contents: "World" + strings.Repeat("!", 1024)}

	golden := map[blob.Ref]string{
		blobHello.BlobRef(): blobHello.Contents,
		blobWorld.BlobRef(): blobWorld.Contents,
	}

	a := &Archiver{
		Source:                 src,
		DeleteSourceAfterStore: true,
	}

	src.AddBlob(blobHello)
	a.Store = func([]byte, []blob.SizedRef) error {
		return errors.New("Store shouldn't be called")
	}
	a.MinZipSize = 400 // empirically: the zip will be 416 bytes
	if err := a.RunOnce(); err != ErrSourceTooSmall {
		t.Fatalf("RunOnce with just Hello = %v; want ErrSourceTooSmall", err)
	}

	src.AddBlob(blobWorld)
	var zipData []byte
	var inZip []blob.SizedRef
	a.Store = func(zip []byte, brs []blob.SizedRef) error {
		zipData = zip
		inZip = brs
		return nil
	}
	if err := a.RunOnce(); err != nil {
		t.Fatalf("RunOnce with Hello and World = %v", err)
	}
	if zipData == nil {
		t.Error("no zip data stored")
	}
	if len(src.BlobrefStrings()) != 0 {
		t.Errorf("source still has blobs = %d; want none", len(src.BlobrefStrings()))
	}
	if len(inZip) != 2 {
		t.Errorf("expected 2 blobs reported as in zip to Store; got %v", inZip)
	}

	got := map[blob.Ref]string{}
	if err := foreachZipEntry(zipData, func(br blob.Ref, all []byte) {
		got[br] = string(all)
	}); err != nil {
		t.Fatal(err)
	}
	if !reflect.DeepEqual(golden, got) {
		t.Errorf("zip contents didn't match. got: %v; want %v", got, golden)
	}
}
Esempio n. 5
0
/*

1KB ReadAt calls before:
fileread_test.go:253: Blob Size: 4194304 raw, 4201523 with meta (1.00172x)
fileread_test.go:283: Blobs fetched: 4160 (63.03x)
fileread_test.go:284: Bytes fetched: 361174780 (85.96x)

2KB ReadAt calls before:
fileread_test.go:253: Blob Size: 4194304 raw, 4201523 with meta (1.00172x)
fileread_test.go:283: Blobs fetched: 2112 (32.00x)
fileread_test.go:284: Bytes fetched: 182535389 (43.45x)

After fix:
fileread_test.go:253: Blob Size: 4194304 raw, 4201523 with meta (1.00172x)
fileread_test.go:283: Blobs fetched: 66 (1.00x)
fileread_test.go:284: Bytes fetched: 4201523 (1.00x)
*/
func TestReaderEfficiency(t *testing.T) {
	const fileSize = 4 << 20
	bigFile := make([]byte, fileSize)
	rnd := rand.New(rand.NewSource(1))
	for i := range bigFile {
		bigFile[i] = byte(rnd.Intn(256))
	}

	sto := new(test.Fetcher) // in-memory blob storage
	fileMap := NewFileMap("testfile")
	fileref, err := WriteFileMap(sto, fileMap, bytes.NewReader(bigFile))
	if err != nil {
		t.Fatalf("WriteFileMap: %v", err)
	}

	fr, err := NewFileReader(sto, fileref)
	if err != nil {
		t.Fatal(err)
	}

	numBlobs := sto.NumBlobs()
	t.Logf("Num blobs = %d", numBlobs)
	sumSize := sto.SumBlobSize()
	t.Logf("Blob Size: %d raw, %d with meta (%.05fx)", fileSize, sumSize, float64(sumSize)/float64(fileSize))

	const readSize = 2 << 10
	buf := make([]byte, readSize)
	for off := int64(0); off < fileSize; off += readSize {
		n, err := fr.ReadAt(buf, off)
		if err != nil {
			t.Fatalf("ReadAt at offset %d: %v", off, err)
		}
		if n != readSize {
			t.Fatalf("Read %d bytes at offset %d; want %d", n, off, readSize)
		}
		got, want := buf, bigFile[off:off+readSize]
		if !bytes.Equal(buf, want) {
			t.Errorf("Incorrect read at offset %d:\n  got: %s\n want: %s", off, summary(got), summary(want))
			off := 0
			for len(got) > 0 && len(want) > 0 && got[0] == want[0] {
				off++
				got = got[1:]
				want = want[1:]
			}
			t.Errorf("  differences start at offset %d:\n    got: %s\n   want: %s\n", off, summary(got), summary(want))
			break
		}
	}
	fr.Close()
	blobsFetched, bytesFetched := sto.Stats()
	if blobsFetched != int64(numBlobs) {
		t.Errorf("Fetched %d blobs; want %d", blobsFetched, numBlobs)
	}
	if bytesFetched != sumSize {
		t.Errorf("Fetched %d bytes; want %d", bytesFetched, sumSize)
	}
}
Esempio n. 6
0
// Tests a bunch of rounds on a bunch of data.
func TestArchiverStress(t *testing.T) {
	if testing.Short() {
		t.Skip("Skipping in short mode")
	}
	src := new(test.Fetcher)
	fileRef, err := schema.WriteFileFromReader(src, "random", io.LimitReader(randReader{}, 10<<20))
	if err != nil {
		t.Fatal(err)
	}
	n0 := src.NumBlobs()
	t.Logf("Wrote %v in %d blobs", fileRef, n0)

	refs0 := src.BlobrefStrings()

	var zips [][]byte
	archived := map[blob.Ref]bool{}
	a := &Archiver{
		Source:                 src,
		MinZipSize:             1 << 20,
		DeleteSourceAfterStore: true,
		Store: func(zipd []byte, brs []blob.SizedRef) error {
			zips = append(zips, zipd)
			for _, sbr := range brs {
				if archived[sbr.Ref] {
					t.Error("duplicate archive of %v", sbr.Ref)
				}
				archived[sbr.Ref] = true
			}
			return nil
		},
	}
	for {
		err := a.RunOnce()
		if err == ErrSourceTooSmall {
			break
		}
		if err != nil {
			t.Fatal(err)
		}
	}

	if len(archived) == 0 {
		t.Errorf("unexpected small number of archived blobs = %d", len(archived))
	}
	if len(zips) < 2 {
		t.Errorf("unexpected small number of zip files = %d", len(zips))
	}
	if n1 := src.NumBlobs() + len(archived); n0 != n1 {
		t.Errorf("original %d blobs != %d after + %d archived (%d)", n0, src.NumBlobs(), len(archived), n1)
	}

	// And restore:
	for _, zipd := range zips {
		if err := foreachZipEntry(zipd, func(br blob.Ref, contents []byte) {
			tb := &test.Blob{Contents: string(contents)}
			if tb.BlobRef() != br {
				t.Fatal("corrupt zip callback")
			}
			src.AddBlob(tb)
		}); err != nil {
			t.Fatal(err)
		}
	}

	refs1 := src.BlobrefStrings()
	if !reflect.DeepEqual(refs0, refs1) {
		t.Error("Restore error.")
	}
}
Esempio n. 7
0
// tests that we add the missing wholeRef entries in FileInfo rows when going from
// a version 4 to a version 5 index.
func TestFixMissingWholeref(t *testing.T) {
	tf := new(test.Fetcher)
	s := sorted.NewMemoryKeyValue()

	ix, err := index.New(s)
	if err != nil {
		t.Fatal(err)
	}
	ix.InitBlobSource(tf)

	// populate with a file
	add := func(b *test.Blob) {
		tf.AddBlob(b)
		if _, err := ix.ReceiveBlob(b.BlobRef(), b.Reader()); err != nil {
			t.Fatalf("ReceiveBlob(%v): %v", b.BlobRef(), err)
		}
	}
	add(chunk1)
	add(chunk2)
	add(chunk3)
	add(fileBlob)

	// revert the row to the old form, by stripping the wholeRef suffix
	key := "fileinfo|" + fileBlobRef.String()
	val5, err := s.Get(key)
	if err != nil {
		t.Fatalf("could not get %v: %v", key, err)
	}
	parts := strings.SplitN(val5, "|", 4)
	val4 := strings.Join(parts[:3], "|")
	if err := s.Set(key, val4); err != nil {
		t.Fatalf("could not set (%v, %v): %v", key, val4, err)
	}

	// revert index version at 4 to trigger the fix
	if err := s.Set("schemaversion", "4"); err != nil {
		t.Fatal(err)
	}

	// init broken index
	ix, err = index.New(s)
	if err != index.Exp_ErrMissingWholeRef {
		t.Fatalf("wrong error upon index initialization: got %v, wanted %v", err, index.Exp_ErrMissingWholeRef)
	}
	// and fix it
	if err := ix.Exp_FixMissingWholeRef(tf); err != nil {
		t.Fatal(err)
	}

	// init fixed index
	ix, err = index.New(s)
	if err != nil {
		t.Fatal(err)
	}
	// and check that the value is now actually fixed
	fi, err := ix.GetFileInfo(fileBlobRef)
	if err != nil {
		t.Fatal(err)
	}
	if fi.WholeRef.String() != parts[3] {
		t.Fatalf("index fileInfo wholeref was not fixed: got %q, wanted %v", fi.WholeRef, parts[3])
	}
}
Esempio n. 8
0
func TestWriteThenRead(t *testing.T) {
	m := NewFileMap("test-file")
	const size = 5 << 20
	r := &randReader{seed: 123, length: size}
	sto := new(test.Fetcher)
	var buf bytes.Buffer
	br, err := WriteFileMap(sto, m, io.TeeReader(r, &buf))
	if err != nil {
		t.Fatal(err)
	}

	var got bytes.Buffer
	fr, err := NewFileReader(sto, br)
	if err != nil {
		t.Fatal(err)
	}

	n, err := io.Copy(&got, fr)
	if err != nil {
		t.Fatal(err)
	}
	if n != size {
		t.Errorf("read back %d bytes; want %d", n, size)
	}
	if !bytes.Equal(buf.Bytes(), got.Bytes()) {
		t.Error("bytes differ")
	}

	var offs []int

	getOffsets := func() error {
		offs = offs[:0]
		var off int
		return fr.ForeachChunk(func(_ []blob.Ref, p BytesPart) error {
			offs = append(offs, off)
			off += int(p.Size)
			return err
		})
	}

	if err := getOffsets(); err != nil {
		t.Fatal(err)
	}
	sort.Ints(offs)
	wantOffs := "[0 262144 358150 433428 525437 602690 675039 748088 816210 898743 980993 1053410 1120438 1188662 1265192 1332541 1398316 1463899 1530446 1596700 1668839 1738909 1817065 1891025 1961646 2031127 2099232 2170640 2238692 2304743 2374317 2440449 2514327 2582670 2653257 2753975 2827518 2905783 2975426 3053820 3134057 3204879 3271019 3346750 3421351 3487420 3557939 3624006 3701093 3768863 3842013 3918267 4001933 4069157 4139132 4208109 4281390 4348801 4422695 4490535 4568111 4642769 4709005 4785526 4866313 4933575 5005564 5071633 5152695 5227716]"
	gotOffs := fmt.Sprintf("%v", offs)
	if wantOffs != gotOffs {
		t.Errorf("Got chunk offsets %v; want %v", gotOffs, wantOffs)
	}

	// Now force a fetch failure on one of the filereader schema chunks, to
	// force a failure of GetChunkOffsets
	errFetch := errors.New("fake fetch error")
	var fetches struct {
		sync.Mutex
		n int
	}
	sto.FetchErr = func() error {
		fetches.Lock()
		defer fetches.Unlock()
		fetches.n++
		if fetches.n == 1 {
			return nil
		}
		return errFetch
	}

	fr, err = NewFileReader(sto, br)
	if err != nil {
		t.Fatal(err)
	}
	if err := getOffsets(); fmt.Sprint(err) != "schema/filereader: fetching file schema blob: fake fetch error" {
		t.Errorf("expected second call of GetChunkOffsets to return wrapped errFetch; got %v", err)
	}
}
Esempio n. 9
0
func TestPackerBoundarySplits(t *testing.T) {
	if testing.Short() {
		t.Skip("skipping slow test")
	}
	// Test a file of three chunk sizes, totalling near the 16 MB
	// boundary:
	//    - 1st chunk is 6 MB. ("blobA")
	//    - 2nd chunk is 6 MB. ("blobB")
	//    - 3rd chunk ("blobC") is binary-searched (up to 4MB) to find
	//      which size causes the packer to write two zip files.

	// During the test we set zip overhead boundaries to 0, to
	// force the test to into its pathological misprediction code paths,
	// where it needs to back up and rewrite the zip with one part less.
	// That's why the test starts with two zip files: so there's at
	// least one that can be removed to make room.
	defer setIntTemporarily(&zipPerEntryOverhead, 0)()

	const sizeAB = 12 << 20
	const maxBlobSize = 16 << 20
	bytesAB := randBytes(sizeAB)
	blobA := &test.Blob{string(bytesAB[:sizeAB/2])}
	blobB := &test.Blob{string(bytesAB[sizeAB/2:])}
	refA := blobA.BlobRef()
	refB := blobB.BlobRef()
	bytesCFull := randBytes(maxBlobSize - sizeAB) // will be sliced down

	// Mechanism to verify we hit the back-up code path:
	var (
		mu                    sync.Mutex
		sawTruncate           blob.Ref
		stoppedBeforeOverflow bool
	)
	testHookSawTruncate = func(after blob.Ref) {
		if after != refB {
			t.Errorf("unexpected truncate point %v", after)
		}
		mu.Lock()
		defer mu.Unlock()
		sawTruncate = after
	}
	testHookStopBeforeOverflowing = func() {
		mu.Lock()
		defer mu.Unlock()
		stoppedBeforeOverflow = true
	}
	defer func() {
		testHookSawTruncate = nil
		testHookStopBeforeOverflowing = nil
	}()

	generatesTwoZips := func(sizeC int) (ret bool) {
		large := new(test.Fetcher)
		s := &storage{
			small: new(test.Fetcher),
			large: large,
			meta:  sorted.NewMemoryKeyValue(),
			log: test.NewLogger(t, "blobpacked: ",
				// Ignore these phrases:
				"Packing file ",
				"Packed file ",
			),
		}
		s.init()

		// Upload first two chunks
		blobA.MustUpload(t, s)
		blobB.MustUpload(t, s)

		// Upload second chunk
		bytesC := bytesCFull[:sizeC]
		h := blob.NewHash()
		h.Write(bytesC)
		refC := blob.RefFromHash(h)
		_, err := s.ReceiveBlob(refC, bytes.NewReader(bytesC))
		if err != nil {
			t.Fatal(err)
		}

		// Upload the file schema blob.
		m := schema.NewFileMap("foo.dat")
		m.PopulateParts(sizeAB+int64(sizeC), []schema.BytesPart{
			schema.BytesPart{
				Size:    sizeAB / 2,
				BlobRef: refA,
			},
			schema.BytesPart{
				Size:    sizeAB / 2,
				BlobRef: refB,
			},
			schema.BytesPart{
				Size:    uint64(sizeC),
				BlobRef: refC,
			},
		})
		fjson, err := m.JSON()
		if err != nil {
			t.Fatalf("schema filemap JSON: %v", err)
		}
		fb := &test.Blob{Contents: fjson}
		fb.MustUpload(t, s)
		num := large.NumBlobs()
		if num < 1 || num > 2 {
			t.Fatalf("for size %d, num packed zip blobs = %d; want 1 or 2", sizeC, num)
		}
		return num == 2
	}
	maxC := maxBlobSize - sizeAB
	smallestC := sort.Search(maxC, generatesTwoZips)
	if smallestC == maxC {
		t.Fatalf("never found a point at which we generated 2 zip files")
	}
	t.Logf("After 12 MB of data (in 2 chunks), the smallest blob that generates two zip files is %d bytes (%.03f MB)", smallestC, float64(smallestC)/(1<<20))
	t.Logf("Zip overhead (for this two chunk file) = %d bytes", maxBlobSize-1-smallestC-sizeAB)

	mu.Lock()
	if sawTruncate != refB {
		t.Errorf("truncate after = %v; want %v", sawTruncate, refB)
	}
	if !stoppedBeforeOverflow {
		t.Error("never hit the code path where it calculates that another data chunk would push it over the 16MB boundary")
	}
}
Esempio n. 10
0
func TestRemoveBlobs(t *testing.T) {
	ctx, cancel := context.WithCancel(context.TODO())
	defer cancel()

	// The basic small cases are handled via storagetest in TestStorage,
	// so this only tests removing packed blobs.

	small := new(test.Fetcher)
	large := new(test.Fetcher)
	sto := &storage{
		small: small,
		large: large,
		meta:  sorted.NewMemoryKeyValue(),
		log:   test.NewLogger(t, "blobpacked: "),
	}
	sto.init()

	const fileSize = 1 << 20
	fileContents := randBytes(fileSize)
	if _, err := schema.WriteFileFromReader(sto, "foo.dat", bytes.NewReader(fileContents)); err != nil {
		t.Fatal(err)
	}
	if small.NumBlobs() != 0 || large.NumBlobs() == 0 {
		t.Fatalf("small, large counts == %d, %d; want 0, non-zero", small.NumBlobs(), large.NumBlobs())
	}
	var all []blob.SizedRef
	if err := blobserver.EnumerateAll(ctx, sto, func(sb blob.SizedRef) error {
		all = append(all, sb)
		return nil
	}); err != nil {
		t.Fatal(err)
	}

	// Find the zip
	zipBlob, err := singleBlob(sto.large)
	if err != nil {
		t.Fatalf("failed to find packed zip: %v", err)
	}

	// The zip file is in use, so verify we can't delete it.
	if err := sto.deleteZipPack(zipBlob.Ref); err == nil {
		t.Fatalf("zip pack blob deleted but it should not have been allowed")
	}

	// Delete everything
	for len(all) > 0 {
		del := all[0].Ref
		all = all[1:]
		if err := sto.RemoveBlobs([]blob.Ref{del}); err != nil {
			t.Fatalf("RemoveBlobs: %v", err)
		}
		if err := storagetest.CheckEnumerate(sto, all); err != nil {
			t.Fatalf("After deleting %v, %v", del, err)
		}
	}

	dRows := func() (n int) {
		if err := sorted.ForeachInRange(sto.meta, "d:", "", func(key, value string) error {
			if strings.HasPrefix(key, "d:") {
				n++
			}
			return nil
		}); err != nil {
			t.Fatalf("meta iteration error: %v", err)
		}
		return
	}

	if n := dRows(); n == 0 {
		t.Fatalf("expected a 'd:' row after deletes")
	}

	// TODO: test the background pack-deleter loop? figure out its design first.
	if err := sto.deleteZipPack(zipBlob.Ref); err != nil {
		t.Errorf("error deleting zip %v: %v", zipBlob.Ref, err)
	}
	if n := dRows(); n != 0 {
		t.Errorf("expected the 'd:' row to be deleted")
	}
}
Esempio n. 11
0
func testPack(t *testing.T,
	write func(sto blobserver.Storage) error,
	checks ...func(*packTest),
) *packTest {
	ctx, cancel := context.WithCancel(context.TODO())
	defer cancel()

	logical := new(test.Fetcher)
	small, large := new(test.Fetcher), new(test.Fetcher)
	pt := &packTest{
		logical: logical,
		small:   small,
		large:   large,
	}
	// Figure out the logical baseline blobs we'll later expect in the packed storage.
	if err := write(logical); err != nil {
		t.Fatal(err)
	}
	t.Logf("items in logical storage: %d", logical.NumBlobs())

	pt.sto = &storage{
		small: small,
		large: large,
		meta:  sorted.NewMemoryKeyValue(),
		log:   test.NewLogger(t, "blobpacked: "),
	}
	pt.sto.init()

	for _, setOpt := range checks {
		setOpt(pt)
	}

	if err := write(pt.sto); err != nil {
		t.Fatal(err)
	}

	t.Logf("items in small: %v", small.NumBlobs())
	t.Logf("items in large: %v", large.NumBlobs())

	if want, ok := pt.wantLargeBlobs.(int); ok && want != large.NumBlobs() {
		t.Fatalf("num large blobs = %d; want %d", large.NumBlobs(), want)
	}
	if want, ok := pt.wantSmallBlobs.(int); ok && want != small.NumBlobs() {
		t.Fatalf("num small blobs = %d; want %d", small.NumBlobs(), want)
	}

	var zipRefs []blob.Ref
	var zipSeen = map[blob.Ref]bool{}
	blobserver.EnumerateAll(ctx, large, func(sb blob.SizedRef) error {
		zipRefs = append(zipRefs, sb.Ref)
		zipSeen[sb.Ref] = true
		return nil
	})
	if len(zipRefs) != large.NumBlobs() {
		t.Fatalf("Enumerated only %d zip files; expected %d", len(zipRefs), large.NumBlobs())
	}

	bytesOfZip := map[blob.Ref][]byte{}
	for _, zipRef := range zipRefs {
		rc, _, err := large.Fetch(zipRef)
		if err != nil {
			t.Fatal(err)
		}
		zipBytes, err := ioutil.ReadAll(rc)
		rc.Close()
		if err != nil {
			t.Fatalf("Error slurping %s: %v", zipRef, err)
		}
		if len(zipBytes) > constants.MaxBlobSize {
			t.Fatalf("zip is too large: %d > max %d", len(zipBytes), constants.MaxBlobSize)
		}
		bytesOfZip[zipRef] = zipBytes
		zr, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes)))
		if err != nil {
			t.Fatalf("Error reading resulting zip file: %v", err)
		}
		if len(zr.File) == 0 {
			t.Fatal("zip is empty")
		}
		nameSeen := map[string]bool{}
		for i, zf := range zr.File {
			if nameSeen[zf.Name] {
				t.Errorf("duplicate name %q seen", zf.Name)
			}
			nameSeen[zf.Name] = true
			t.Logf("zip[%d] size %d, %v", i, zf.UncompressedSize64, zf.Name)
		}
		mfr, err := zr.File[len(zr.File)-1].Open()
		if err != nil {
			t.Fatalf("Error opening manifest JSON: %v", err)
		}
		maniJSON, err := ioutil.ReadAll(mfr)
		if err != nil {
			t.Fatalf("Error reading manifest JSON: %v", err)
		}
		var mf Manifest
		if err := json.Unmarshal(maniJSON, &mf); err != nil {
			t.Fatalf("invalid JSON: %v", err)
		}

		// Verify each chunk described in the manifest:
		baseOffset, err := zr.File[0].DataOffset()
		if err != nil {
			t.Fatal(err)
		}
		for _, bo := range mf.DataBlobs {
			h := bo.Ref.Hash()
			h.Write(zipBytes[baseOffset+bo.Offset : baseOffset+bo.Offset+int64(bo.Size)])
			if !bo.Ref.HashMatches(h) {
				t.Errorf("blob %+v didn't describe the actual data in the zip", bo)
			}
		}
		if debug {
			t.Logf("Manifest: %s", maniJSON)
		}
	}

	// Verify that each chunk in the logical mapping is in the meta.
	logBlobs := 0
	if err := blobserver.EnumerateAll(ctx, logical, func(sb blob.SizedRef) error {
		logBlobs++
		v, err := pt.sto.meta.Get(blobMetaPrefix + sb.Ref.String())
		if err == sorted.ErrNotFound && pt.okayNoMeta[sb.Ref] {
			return nil
		}
		if err != nil {
			return fmt.Errorf("error looking up logical blob %v in meta: %v", sb.Ref, err)
		}
		m, err := parseMetaRow([]byte(v))
		if err != nil {
			return fmt.Errorf("error parsing logical blob %v meta %q: %v", sb.Ref, v, err)
		}
		if !m.exists || m.size != sb.Size || !zipSeen[m.largeRef] {
			return fmt.Errorf("logical blob %v = %+v; want in zip", sb.Ref, m)
		}
		h := sb.Ref.Hash()
		h.Write(bytesOfZip[m.largeRef][m.largeOff : m.largeOff+sb.Size])
		if !sb.Ref.HashMatches(h) {
			t.Errorf("blob %v not found matching in zip", sb.Ref)
		}
		return nil
	}); err != nil {
		t.Fatal(err)
	}
	if logBlobs != logical.NumBlobs() {
		t.Error("enumerate over logical blobs didn't work?")
	}

	// TODO, more tests:
	// -- like TestPackTwoIdenticalfiles, but instead of testing
	// no dup for 100% identical file bytes, test that uploading a
	// 49% identical one does not denormalize and repack.
	// -- test StreamBlobs in all its various flavours, and recovering from stream blobs.
	// -- overflowing the 16MB chunk size with huge initial chunks
	return pt
}
Esempio n. 12
0
func TestForeachChunkAllSchemaBlobs(t *testing.T) {
	sto := new(test.Fetcher) // in-memory blob storage
	foo := &test.Blob{"foo"}
	bar := &test.Blob{"bar"}
	sto.AddBlob(foo)
	sto.AddBlob(bar)

	// Make a "bytes" schema blob referencing the "foo" and "bar" chunks.
	// Verify it works.
	bytesBlob := &test.Blob{`{"camliVersion": 1,
"camliType": "bytes",
"parts": [
   {"blobRef": "` + foo.BlobRef().String() + `", "size": 3},
   {"blobRef": "` + bar.BlobRef().String() + `", "size": 3}
]}`}
	sto.AddBlob(bytesBlob)

	var fr *FileReader
	mustRead := func(name string, br blob.Ref, want string) {
		var err error
		fr, err = NewFileReader(sto, br)
		if err != nil {
			t.Fatalf("%s: %v", name, err)
		}
		all, err := ioutil.ReadAll(fr)
		if err != nil {
			t.Fatalf("%s: %v", name, err)
		}
		if string(all) != want {
			t.Errorf("%s: read contents %q; want %q", name, all, want)
		}
	}
	mustRead("bytesBlob", bytesBlob.BlobRef(), "foobar")

	// Now make another bytes schema blob embedding the previous one.
	bytesBlob2 := &test.Blob{`{"camliVersion": 1,
"camliType": "bytes",
"parts": [
   {"bytesRef": "` + bytesBlob.BlobRef().String() + `", "size": 6}
]}`}
	sto.AddBlob(bytesBlob2)
	mustRead("bytesBlob2", bytesBlob2.BlobRef(), "foobar")

	sawSchema := map[blob.Ref]bool{}
	sawData := map[blob.Ref]bool{}
	if err := fr.ForeachChunk(func(path []blob.Ref, p BytesPart) error {
		for _, sref := range path {
			sawSchema[sref] = true
		}
		sawData[p.BlobRef] = true
		return nil
	}); err != nil {
		t.Fatal(err)
	}
	want := []struct {
		name string
		tb   *test.Blob
		m    map[blob.Ref]bool
	}{
		{"bytesBlob", bytesBlob, sawSchema},
		{"bytesBlob2", bytesBlob2, sawSchema},
		{"foo", foo, sawData},
		{"bar", bar, sawData},
	}
	for _, tt := range want {
		if b := tt.tb.BlobRef(); !tt.m[b] {
			t.Errorf("didn't see %s (%s)", tt.name, b)
		}
	}
}
Esempio n. 13
0
func TestReaderForeachChunk(t *testing.T) {
	fileSize := 4 << 20
	if testing.Short() {
		fileSize = 1 << 20
	}
	bigFile := make([]byte, fileSize)
	rnd := rand.New(rand.NewSource(1))
	for i := range bigFile {
		bigFile[i] = byte(rnd.Intn(256))
	}
	sto := new(test.Fetcher) // in-memory blob storage
	fileMap := NewFileMap("testfile")
	fileref, err := WriteFileMap(sto, fileMap, bytes.NewReader(bigFile))
	if err != nil {
		t.Fatalf("WriteFileMap: %v", err)
	}

	fr, err := NewFileReader(sto, fileref)
	if err != nil {
		t.Fatal(err)
	}

	var back bytes.Buffer
	var totSize uint64
	err = fr.ForeachChunk(func(sref []blob.Ref, p BytesPart) error {
		if len(sref) < 1 {
			t.Fatal("expected at least one schemaPath blob")
		}
		for i, br := range sref {
			if !br.Valid() {
				t.Fatalf("invalid schema blob in path index %d", i)
			}
		}
		if p.BytesRef.Valid() {
			t.Fatal("should never see a valid BytesRef")
		}
		if !p.BlobRef.Valid() {
			t.Fatal("saw part with invalid blobref")
		}
		rc, size, err := sto.Fetch(p.BlobRef)
		if err != nil {
			return fmt.Errorf("Error fetching blobref of chunk %+v: %v", p, err)
		}
		defer rc.Close()
		totSize += p.Size
		if uint64(size) != p.Size {
			return fmt.Errorf("fetched size %d doesn't match expected for chunk %+v", size, p)
		}
		n, err := io.Copy(&back, rc)
		if err != nil {
			return err
		}
		if n != int64(size) {
			return fmt.Errorf("Copied unexpected %d bytes of chunk %+v", n, p)
		}
		return nil
	})
	if err != nil {
		t.Fatalf("ForeachChunk = %v", err)
	}
	if back.Len() != fileSize {
		t.Fatalf("Read file is %d bytes; want %d", back.Len(), fileSize)
	}
	if totSize != uint64(fileSize) {
		t.Errorf("sum of parts = %d; want %d", totSize, fileSize)
	}
	if !bytes.Equal(back.Bytes(), bigFile) {
		t.Errorf("file read mismatch")
	}
}