Exemple #1
0
// foreachZipBlob calls fn for each blob in the zip pack blob
// identified by zipRef.  If fn returns a non-nil error,
// foreachZipBlob stops enumerating with that error.
func (s *storage) foreachZipBlob(zipRef blob.Ref, fn func(BlobAndPos) error) error {
	sb, err := blobserver.StatBlob(s.large, zipRef)
	if err != nil {
		return err
	}
	zr, err := zip.NewReader(blob.ReaderAt(s.large, zipRef), int64(sb.Size))
	if err != nil {
		return zipOpenError{zipRef, err}
	}
	var maniFile *zip.File // or nil if not found
	var firstOff int64     // offset of first file (the packed data chunks)
	for i, f := range zr.File {
		if i == 0 {
			firstOff, err = f.DataOffset()
			if err != nil {
				return err
			}
		}
		if f.Name == zipManifestPath {
			maniFile = f
			break
		}
	}
	if maniFile == nil {
		return errors.New("no camlistore manifest file found in zip")
	}
	for _, f := range zr.File {
		if !strings.HasPrefix(f.Name, "camlistore/") || f.Name == zipManifestPath ||
			!strings.HasSuffix(f.Name, ".json") {
			continue
		}
		brStr := strings.TrimSuffix(strings.TrimPrefix(f.Name, "camlistore/"), ".json")
		br, ok := blob.Parse(brStr)
		if ok {
			off, err := f.DataOffset()
			if err != nil {
				return err
			}
			if err := fn(BlobAndPos{
				SizedRef: blob.SizedRef{Ref: br, Size: uint32(f.UncompressedSize64)},
				Offset:   off,
			}); err != nil {
				return err
			}
		}
	}
	maniRC, err := maniFile.Open()
	if err != nil {
		return err
	}
	defer maniRC.Close()
	var mf Manifest
	if err := json.NewDecoder(maniRC).Decode(&mf); err != nil {
		return err
	}
	if !mf.WholeRef.Valid() || mf.WholeSize == 0 || !mf.DataBlobsOrigin.Valid() {
		return errors.New("incomplete blobpack manifest JSON")
	}
	for _, bap := range mf.DataBlobs {
		bap.Offset += firstOff
		if err := fn(bap); err != nil {
			return err
		}
	}
	return nil
}
func testPack(t *testing.T,
	write func(sto blobserver.Storage) error,
	checks ...func(*packTest),
) *packTest {
	ctx, cancel := context.WithCancel(context.TODO())
	defer cancel()

	logical := new(test.Fetcher)
	small, large := new(test.Fetcher), new(test.Fetcher)
	pt := &packTest{
		logical: logical,
		small:   small,
		large:   large,
	}
	// Figure out the logical baseline blobs we'll later expect in the packed storage.
	if err := write(logical); err != nil {
		t.Fatal(err)
	}
	t.Logf("items in logical storage: %d", logical.NumBlobs())

	pt.sto = &storage{
		small: small,
		large: large,
		meta:  sorted.NewMemoryKeyValue(),
		log:   test.NewLogger(t, "blobpacked: "),
	}
	pt.sto.init()

	for _, setOpt := range checks {
		setOpt(pt)
	}

	if err := write(pt.sto); err != nil {
		t.Fatal(err)
	}

	t.Logf("items in small: %v", small.NumBlobs())
	t.Logf("items in large: %v", large.NumBlobs())

	if want, ok := pt.wantLargeBlobs.(int); ok && want != large.NumBlobs() {
		t.Fatalf("num large blobs = %d; want %d", large.NumBlobs(), want)
	}
	if want, ok := pt.wantSmallBlobs.(int); ok && want != small.NumBlobs() {
		t.Fatalf("num small blobs = %d; want %d", small.NumBlobs(), want)
	}

	var zipRefs []blob.Ref
	var zipSeen = map[blob.Ref]bool{}
	blobserver.EnumerateAll(ctx, large, func(sb blob.SizedRef) error {
		zipRefs = append(zipRefs, sb.Ref)
		zipSeen[sb.Ref] = true
		return nil
	})
	if len(zipRefs) != large.NumBlobs() {
		t.Fatalf("Enumerated only %d zip files; expected %d", len(zipRefs), large.NumBlobs())
	}

	bytesOfZip := map[blob.Ref][]byte{}
	for _, zipRef := range zipRefs {
		rc, _, err := large.Fetch(zipRef)
		if err != nil {
			t.Fatal(err)
		}
		zipBytes, err := ioutil.ReadAll(rc)
		rc.Close()
		if err != nil {
			t.Fatalf("Error slurping %s: %v", zipRef, err)
		}
		if len(zipBytes) > constants.MaxBlobSize {
			t.Fatalf("zip is too large: %d > max %d", len(zipBytes), constants.MaxBlobSize)
		}
		bytesOfZip[zipRef] = zipBytes
		zr, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes)))
		if err != nil {
			t.Fatalf("Error reading resulting zip file: %v", err)
		}
		if len(zr.File) == 0 {
			t.Fatal("zip is empty")
		}
		nameSeen := map[string]bool{}
		for i, zf := range zr.File {
			if nameSeen[zf.Name] {
				t.Errorf("duplicate name %q seen", zf.Name)
			}
			nameSeen[zf.Name] = true
			t.Logf("zip[%d] size %d, %v", i, zf.UncompressedSize64, zf.Name)
		}
		mfr, err := zr.File[len(zr.File)-1].Open()
		if err != nil {
			t.Fatalf("Error opening manifest JSON: %v", err)
		}
		maniJSON, err := ioutil.ReadAll(mfr)
		if err != nil {
			t.Fatalf("Error reading manifest JSON: %v", err)
		}
		var mf Manifest
		if err := json.Unmarshal(maniJSON, &mf); err != nil {
			t.Fatalf("invalid JSON: %v", err)
		}

		// Verify each chunk described in the manifest:
		baseOffset, err := zr.File[0].DataOffset()
		if err != nil {
			t.Fatal(err)
		}
		for _, bo := range mf.DataBlobs {
			h := bo.Ref.Hash()
			h.Write(zipBytes[baseOffset+bo.Offset : baseOffset+bo.Offset+int64(bo.Size)])
			if !bo.Ref.HashMatches(h) {
				t.Errorf("blob %+v didn't describe the actual data in the zip", bo)
			}
		}
		if debug {
			t.Logf("Manifest: %s", maniJSON)
		}
	}

	// Verify that each chunk in the logical mapping is in the meta.
	logBlobs := 0
	if err := blobserver.EnumerateAll(ctx, logical, func(sb blob.SizedRef) error {
		logBlobs++
		v, err := pt.sto.meta.Get(blobMetaPrefix + sb.Ref.String())
		if err == sorted.ErrNotFound && pt.okayNoMeta[sb.Ref] {
			return nil
		}
		if err != nil {
			return fmt.Errorf("error looking up logical blob %v in meta: %v", sb.Ref, err)
		}
		m, err := parseMetaRow([]byte(v))
		if err != nil {
			return fmt.Errorf("error parsing logical blob %v meta %q: %v", sb.Ref, v, err)
		}
		if !m.exists || m.size != sb.Size || !zipSeen[m.largeRef] {
			return fmt.Errorf("logical blob %v = %+v; want in zip", sb.Ref, m)
		}
		h := sb.Ref.Hash()
		h.Write(bytesOfZip[m.largeRef][m.largeOff : m.largeOff+sb.Size])
		if !sb.Ref.HashMatches(h) {
			t.Errorf("blob %v not found matching in zip", sb.Ref)
		}
		return nil
	}); err != nil {
		t.Fatal(err)
	}
	if logBlobs != logical.NumBlobs() {
		t.Error("enumerate over logical blobs didn't work?")
	}

	// TODO, more tests:
	// -- like TestPackTwoIdenticalfiles, but instead of testing
	// no dup for 100% identical file bytes, test that uploading a
	// 49% identical one does not denormalize and repack.
	// -- test StreamBlobs in all its various flavours, and recovering from stream blobs.
	// -- overflowing the 16MB chunk size with huge initial chunks
	return pt
}
Exemple #3
0
// reindex rebuilds the meta index for packed blobs. It calls newMeta to create
// a new KeyValue on which to write the index, and replaces s.meta with it. There
// is no locking whatsoever so it should not be called when the storage is already
// in use. its signature might change if/when it gets exported.
func (s *storage) reindex(ctx context.Context, newMeta func() (sorted.KeyValue, error)) error {
	meta, err := newMeta()
	if err != nil {
		return fmt.Errorf("failed to create new blobpacked meta index: %v", err)
	}

	wholeMetaByWholeRef := make(map[blob.Ref][]wholeMetaPrefixInfo)

	if err := blobserver.EnumerateAllFrom(ctx, s.large, "", func(sb blob.SizedRef) error {
		zipRef := sb.Ref
		zr, err := zip.NewReader(blob.ReaderAt(s.large, zipRef), int64(sb.Size))
		if err != nil {
			return zipOpenError{zipRef, err}
		}
		var maniFile *zip.File
		var firstOff int64 // offset of first file (the packed data chunks)
		for i, f := range zr.File {
			if i == 0 {
				firstOff, err = f.DataOffset()
				if err != nil {
					return err
				}
			}
			if f.Name == zipManifestPath {
				maniFile = f
				break
			}
		}
		if maniFile == nil {
			return fmt.Errorf("no camlistore manifest file found in zip %v", zipRef)
		}
		maniRC, err := maniFile.Open()
		if err != nil {
			return err
		}
		defer maniRC.Close()
		var mf Manifest
		if err := json.NewDecoder(maniRC).Decode(&mf); err != nil {
			return err
		}
		if !mf.WholeRef.Valid() || mf.WholeSize == 0 || !mf.DataBlobsOrigin.Valid() {
			return fmt.Errorf("incomplete blobpack manifest JSON in %v", zipRef)
		}

		bm := meta.BeginBatch()
		// In this loop, we write all the blobMetaPrefix entries for the
		// data blobs in this zip, and we also compute the dataBytesWritten, for later.
		var dataBytesWritten int64
		for _, bp := range mf.DataBlobs {
			bm.Set(blobMetaPrefix+bp.SizedRef.Ref.String(), fmt.Sprintf("%d %v %d", bp.SizedRef.Size, zipRef, firstOff+bp.Offset))
			dataBytesWritten += int64(bp.SizedRef.Size)
		}

		// In this loop, we write all the blobMetaPrefix entries for the schema blobs in this zip
		for _, f := range zr.File {
			if !(strings.HasPrefix(f.Name, "camlistore/") && strings.HasSuffix(f.Name, ".json")) ||
				f.Name == zipManifestPath {
				continue
			}
			br, ok := blob.Parse(strings.TrimSuffix(strings.TrimPrefix(f.Name, "camlistore/"), ".json"))
			if !ok {
				return fmt.Errorf("schema file in zip %v does not have blobRef as name: %v", zipRef, f.Name)
			}
			offset, err := f.DataOffset()
			if err != nil {
				return err
			}
			bm.Set(blobMetaPrefix+br.String(), fmt.Sprintf("%d %v %d", f.UncompressedSize64, zipRef, offset))
		}
		if err := meta.CommitBatch(bm); err != nil {
			return err
		}

		// record that info for later, when we got them all, so we can write the wholeMetaPrefix entries.
		wholeMetas, _ := wholeMetaByWholeRef[mf.WholeRef]
		wholeMetas = append(wholeMetas, wholeMetaPrefixInfo{
			wholePartIndex:   mf.WholePartIndex,
			zipRef:           zipRef,
			firstOffset:      firstOff,
			dataBytesWritten: dataBytesWritten,
			wholeSize:        mf.WholeSize,
		})
		wholeMetaByWholeRef[mf.WholeRef] = wholeMetas
		return nil

	}); err != nil {
		return err
	}

	// finally, write the wholeMetaPrefix entries
	bm := meta.BeginBatch()
	for wholeRef, wholeMetas := range wholeMetaByWholeRef {
		wm := wholeMetaPrefixInfos(wholeMetas)
		sort.Sort(wm)
		var wholeBytesWritten int64
		for _, w := range wm {
			bm.Set(fmt.Sprintf("%s%s:%d", wholeMetaPrefix, wholeRef, w.wholePartIndex),
				fmt.Sprintf("%s %d %d %d", w.zipRef, w.firstOffset, wholeBytesWritten, w.dataBytesWritten))
			wholeBytesWritten += w.dataBytesWritten
		}
		if wm[0].wholeSize != wholeBytesWritten {
			return fmt.Errorf("Sum of all zips (%d bytes) does not match manifest's WholeSize (%d bytes) for %v",
				wholeBytesWritten, wm[0].wholeSize, wholeRef)
		}
		bm.Set(fmt.Sprintf("%s%s", wholeMetaPrefix, wholeRef),
			fmt.Sprintf("%d %d", wholeBytesWritten, wm[len(wholeMetas)-1].wholePartIndex+1))
	}

	if err := meta.CommitBatch(bm); err != nil {
		return err
	}

	// TODO(mpl): take into account removed blobs. I can't be done for now
	// (2015-01-29) because RemoveBlobs currently only updates the meta index.
	// So if the index was lost, all information about removals was lost too.

	s.meta = meta
	return nil
}