Ejemplo n.º 1
0
// reindex rebuilds the meta index for packed blobs. It calls newMeta to create
// a new KeyValue on which to write the index, and replaces s.meta with it. There
// is no locking whatsoever so it should not be called when the storage is already
// in use. its signature might change if/when it gets exported.
func (s *storage) reindex(ctx context.Context, newMeta func() (sorted.KeyValue, error)) error {
	meta, err := newMeta()
	if err != nil {
		return fmt.Errorf("failed to create new blobpacked meta index: %v", err)
	}

	wholeMetaByWholeRef := make(map[blob.Ref][]wholeMetaPrefixInfo)

	if err := blobserver.EnumerateAllFrom(ctx, s.large, "", func(sb blob.SizedRef) error {
		zipRef := sb.Ref
		zr, err := zip.NewReader(blob.ReaderAt(s.large, zipRef), int64(sb.Size))
		if err != nil {
			return zipOpenError{zipRef, err}
		}
		var maniFile *zip.File
		var firstOff int64 // offset of first file (the packed data chunks)
		for i, f := range zr.File {
			if i == 0 {
				firstOff, err = f.DataOffset()
				if err != nil {
					return err
				}
			}
			if f.Name == zipManifestPath {
				maniFile = f
				break
			}
		}
		if maniFile == nil {
			return fmt.Errorf("no camlistore manifest file found in zip %v", zipRef)
		}
		maniRC, err := maniFile.Open()
		if err != nil {
			return err
		}
		defer maniRC.Close()
		var mf Manifest
		if err := json.NewDecoder(maniRC).Decode(&mf); err != nil {
			return err
		}
		if !mf.WholeRef.Valid() || mf.WholeSize == 0 || !mf.DataBlobsOrigin.Valid() {
			return fmt.Errorf("incomplete blobpack manifest JSON in %v", zipRef)
		}

		bm := meta.BeginBatch()
		// In this loop, we write all the blobMetaPrefix entries for the
		// data blobs in this zip, and we also compute the dataBytesWritten, for later.
		var dataBytesWritten int64
		for _, bp := range mf.DataBlobs {
			bm.Set(blobMetaPrefix+bp.SizedRef.Ref.String(), fmt.Sprintf("%d %v %d", bp.SizedRef.Size, zipRef, firstOff+bp.Offset))
			dataBytesWritten += int64(bp.SizedRef.Size)
		}

		// In this loop, we write all the blobMetaPrefix entries for the schema blobs in this zip
		for _, f := range zr.File {
			if !(strings.HasPrefix(f.Name, "camlistore/") && strings.HasSuffix(f.Name, ".json")) ||
				f.Name == zipManifestPath {
				continue
			}
			br, ok := blob.Parse(strings.TrimSuffix(strings.TrimPrefix(f.Name, "camlistore/"), ".json"))
			if !ok {
				return fmt.Errorf("schema file in zip %v does not have blobRef as name: %v", zipRef, f.Name)
			}
			offset, err := f.DataOffset()
			if err != nil {
				return err
			}
			bm.Set(blobMetaPrefix+br.String(), fmt.Sprintf("%d %v %d", f.UncompressedSize64, zipRef, offset))
		}
		if err := meta.CommitBatch(bm); err != nil {
			return err
		}

		// record that info for later, when we got them all, so we can write the wholeMetaPrefix entries.
		wholeMetas, _ := wholeMetaByWholeRef[mf.WholeRef]
		wholeMetas = append(wholeMetas, wholeMetaPrefixInfo{
			wholePartIndex:   mf.WholePartIndex,
			zipRef:           zipRef,
			firstOffset:      firstOff,
			dataBytesWritten: dataBytesWritten,
			wholeSize:        mf.WholeSize,
		})
		wholeMetaByWholeRef[mf.WholeRef] = wholeMetas
		return nil

	}); err != nil {
		return err
	}

	// finally, write the wholeMetaPrefix entries
	bm := meta.BeginBatch()
	for wholeRef, wholeMetas := range wholeMetaByWholeRef {
		wm := wholeMetaPrefixInfos(wholeMetas)
		sort.Sort(wm)
		var wholeBytesWritten int64
		for _, w := range wm {
			bm.Set(fmt.Sprintf("%s%s:%d", wholeMetaPrefix, wholeRef, w.wholePartIndex),
				fmt.Sprintf("%s %d %d %d", w.zipRef, w.firstOffset, wholeBytesWritten, w.dataBytesWritten))
			wholeBytesWritten += w.dataBytesWritten
		}
		if wm[0].wholeSize != wholeBytesWritten {
			return fmt.Errorf("Sum of all zips (%d bytes) does not match manifest's WholeSize (%d bytes) for %v",
				wholeBytesWritten, wm[0].wholeSize, wholeRef)
		}
		bm.Set(fmt.Sprintf("%s%s", wholeMetaPrefix, wholeRef),
			fmt.Sprintf("%d %d", wholeBytesWritten, wm[len(wholeMetas)-1].wholePartIndex+1))
	}

	if err := meta.CommitBatch(bm); err != nil {
		return err
	}

	// TODO(mpl): take into account removed blobs. I can't be done for now
	// (2015-01-29) because RemoveBlobs currently only updates the meta index.
	// So if the index was lost, all information about removals was lost too.

	s.meta = meta
	return nil
}
Ejemplo n.º 2
0
// foreachZipBlob calls fn for each blob in the zip pack blob
// identified by zipRef.  If fn returns a non-nil error,
// foreachZipBlob stops enumerating with that error.
func (s *storage) foreachZipBlob(zipRef blob.Ref, fn func(BlobAndPos) error) error {
	sb, err := blobserver.StatBlob(s.large, zipRef)
	if err != nil {
		return err
	}
	zr, err := zip.NewReader(blob.ReaderAt(s.large, zipRef), int64(sb.Size))
	if err != nil {
		return zipOpenError{zipRef, err}
	}
	var maniFile *zip.File // or nil if not found
	var firstOff int64     // offset of first file (the packed data chunks)
	for i, f := range zr.File {
		if i == 0 {
			firstOff, err = f.DataOffset()
			if err != nil {
				return err
			}
		}
		if f.Name == zipManifestPath {
			maniFile = f
			break
		}
	}
	if maniFile == nil {
		return errors.New("no camlistore manifest file found in zip")
	}
	// apply fn to all the schema blobs
	for _, f := range zr.File {
		if !strings.HasPrefix(f.Name, "camlistore/") || f.Name == zipManifestPath ||
			!strings.HasSuffix(f.Name, ".json") {
			continue
		}
		brStr := strings.TrimSuffix(strings.TrimPrefix(f.Name, "camlistore/"), ".json")
		br, ok := blob.Parse(brStr)
		if ok {
			off, err := f.DataOffset()
			if err != nil {
				return err
			}
			if err := fn(BlobAndPos{
				SizedRef: blob.SizedRef{Ref: br, Size: uint32(f.UncompressedSize64)},
				Offset:   off,
			}); err != nil {
				return err
			}
		}
	}
	maniRC, err := maniFile.Open()
	if err != nil {
		return err
	}
	defer maniRC.Close()
	var mf Manifest
	if err := json.NewDecoder(maniRC).Decode(&mf); err != nil {
		return err
	}
	if !mf.WholeRef.Valid() || mf.WholeSize == 0 || !mf.DataBlobsOrigin.Valid() {
		return errors.New("incomplete blobpack manifest JSON")
	}
	// apply fn to all the data blobs
	for _, bap := range mf.DataBlobs {
		bap.Offset += firstOff
		if err := fn(bap); err != nil {
			return err
		}
	}
	return nil
}