// reindex rebuilds the meta index for packed blobs. It calls newMeta to create // a new KeyValue on which to write the index, and replaces s.meta with it. There // is no locking whatsoever so it should not be called when the storage is already // in use. its signature might change if/when it gets exported. func (s *storage) reindex(ctx context.Context, newMeta func() (sorted.KeyValue, error)) error { meta, err := newMeta() if err != nil { return fmt.Errorf("failed to create new blobpacked meta index: %v", err) } wholeMetaByWholeRef := make(map[blob.Ref][]wholeMetaPrefixInfo) if err := blobserver.EnumerateAllFrom(ctx, s.large, "", func(sb blob.SizedRef) error { zipRef := sb.Ref zr, err := zip.NewReader(blob.ReaderAt(s.large, zipRef), int64(sb.Size)) if err != nil { return zipOpenError{zipRef, err} } var maniFile *zip.File var firstOff int64 // offset of first file (the packed data chunks) for i, f := range zr.File { if i == 0 { firstOff, err = f.DataOffset() if err != nil { return err } } if f.Name == zipManifestPath { maniFile = f break } } if maniFile == nil { return fmt.Errorf("no camlistore manifest file found in zip %v", zipRef) } maniRC, err := maniFile.Open() if err != nil { return err } defer maniRC.Close() var mf Manifest if err := json.NewDecoder(maniRC).Decode(&mf); err != nil { return err } if !mf.WholeRef.Valid() || mf.WholeSize == 0 || !mf.DataBlobsOrigin.Valid() { return fmt.Errorf("incomplete blobpack manifest JSON in %v", zipRef) } bm := meta.BeginBatch() // In this loop, we write all the blobMetaPrefix entries for the // data blobs in this zip, and we also compute the dataBytesWritten, for later. var dataBytesWritten int64 for _, bp := range mf.DataBlobs { bm.Set(blobMetaPrefix+bp.SizedRef.Ref.String(), fmt.Sprintf("%d %v %d", bp.SizedRef.Size, zipRef, firstOff+bp.Offset)) dataBytesWritten += int64(bp.SizedRef.Size) } // In this loop, we write all the blobMetaPrefix entries for the schema blobs in this zip for _, f := range zr.File { if !(strings.HasPrefix(f.Name, "camlistore/") && strings.HasSuffix(f.Name, ".json")) || f.Name == zipManifestPath { continue } br, ok := blob.Parse(strings.TrimSuffix(strings.TrimPrefix(f.Name, "camlistore/"), ".json")) if !ok { return fmt.Errorf("schema file in zip %v does not have blobRef as name: %v", zipRef, f.Name) } offset, err := f.DataOffset() if err != nil { return err } bm.Set(blobMetaPrefix+br.String(), fmt.Sprintf("%d %v %d", f.UncompressedSize64, zipRef, offset)) } if err := meta.CommitBatch(bm); err != nil { return err } // record that info for later, when we got them all, so we can write the wholeMetaPrefix entries. wholeMetas, _ := wholeMetaByWholeRef[mf.WholeRef] wholeMetas = append(wholeMetas, wholeMetaPrefixInfo{ wholePartIndex: mf.WholePartIndex, zipRef: zipRef, firstOffset: firstOff, dataBytesWritten: dataBytesWritten, wholeSize: mf.WholeSize, }) wholeMetaByWholeRef[mf.WholeRef] = wholeMetas return nil }); err != nil { return err } // finally, write the wholeMetaPrefix entries bm := meta.BeginBatch() for wholeRef, wholeMetas := range wholeMetaByWholeRef { wm := wholeMetaPrefixInfos(wholeMetas) sort.Sort(wm) var wholeBytesWritten int64 for _, w := range wm { bm.Set(fmt.Sprintf("%s%s:%d", wholeMetaPrefix, wholeRef, w.wholePartIndex), fmt.Sprintf("%s %d %d %d", w.zipRef, w.firstOffset, wholeBytesWritten, w.dataBytesWritten)) wholeBytesWritten += w.dataBytesWritten } if wm[0].wholeSize != wholeBytesWritten { return fmt.Errorf("Sum of all zips (%d bytes) does not match manifest's WholeSize (%d bytes) for %v", wholeBytesWritten, wm[0].wholeSize, wholeRef) } bm.Set(fmt.Sprintf("%s%s", wholeMetaPrefix, wholeRef), fmt.Sprintf("%d %d", wholeBytesWritten, wm[len(wholeMetas)-1].wholePartIndex+1)) } if err := meta.CommitBatch(bm); err != nil { return err } // TODO(mpl): take into account removed blobs. I can't be done for now // (2015-01-29) because RemoveBlobs currently only updates the meta index. // So if the index was lost, all information about removals was lost too. s.meta = meta return nil }
// foreachZipBlob calls fn for each blob in the zip pack blob // identified by zipRef. If fn returns a non-nil error, // foreachZipBlob stops enumerating with that error. func (s *storage) foreachZipBlob(zipRef blob.Ref, fn func(BlobAndPos) error) error { sb, err := blobserver.StatBlob(s.large, zipRef) if err != nil { return err } zr, err := zip.NewReader(blob.ReaderAt(s.large, zipRef), int64(sb.Size)) if err != nil { return zipOpenError{zipRef, err} } var maniFile *zip.File // or nil if not found var firstOff int64 // offset of first file (the packed data chunks) for i, f := range zr.File { if i == 0 { firstOff, err = f.DataOffset() if err != nil { return err } } if f.Name == zipManifestPath { maniFile = f break } } if maniFile == nil { return errors.New("no camlistore manifest file found in zip") } // apply fn to all the schema blobs for _, f := range zr.File { if !strings.HasPrefix(f.Name, "camlistore/") || f.Name == zipManifestPath || !strings.HasSuffix(f.Name, ".json") { continue } brStr := strings.TrimSuffix(strings.TrimPrefix(f.Name, "camlistore/"), ".json") br, ok := blob.Parse(brStr) if ok { off, err := f.DataOffset() if err != nil { return err } if err := fn(BlobAndPos{ SizedRef: blob.SizedRef{Ref: br, Size: uint32(f.UncompressedSize64)}, Offset: off, }); err != nil { return err } } } maniRC, err := maniFile.Open() if err != nil { return err } defer maniRC.Close() var mf Manifest if err := json.NewDecoder(maniRC).Decode(&mf); err != nil { return err } if !mf.WholeRef.Valid() || mf.WholeSize == 0 || !mf.DataBlobsOrigin.Valid() { return errors.New("incomplete blobpack manifest JSON") } // apply fn to all the data blobs for _, bap := range mf.DataBlobs { bap.Offset += firstOff if err := fn(bap); err != nil { return err } } return nil }