// contToken is of forms: // "" : start from beginning of zip files // "sha1-xxxxx:n" : start at == (sha1-xxxx, file n), else next zip func (st largeBlobStreamer) StreamBlobs(ctx context.Context, dest chan<- blobserver.BlobAndToken, contToken string) (err error) { defer close(dest) s := st.sto large := s.large var after string // for enumerateAll var skipFiles int var firstRef blob.Ref // first we care about if contToken != "" { f := strings.SplitN(contToken, ":", 2) if len(f) != 2 { return errContToken } firstRef, _ = blob.Parse(f[0]) skipFiles, err = strconv.Atoi(f[1]) if !firstRef.Valid() || err != nil { return errContToken } // EnumerateAllFrom takes a cursor that's greater, but // we want to start _at_ firstRef. So start // enumerating right before our target. after = firstRef.StringMinusOne() } return blobserver.EnumerateAllFrom(ctx, large, after, func(sb blob.SizedRef) error { if firstRef.Valid() { if sb.Ref.Less(firstRef) { // Skip. return nil } if firstRef.Less(sb.Ref) { skipFiles = 0 // reset it. } } fileN := 0 return s.foreachZipBlob(sb.Ref, func(bap BlobAndPos) error { if skipFiles > 0 { skipFiles-- fileN++ return nil } select { case dest <- blobserver.BlobAndToken{ Blob: blob.NewBlob(bap.Ref, bap.Size, func() types.ReadSeekCloser { return blob.NewLazyReadSeekCloser(s, bap.Ref) }), Token: fmt.Sprintf("%s:%d", sb.Ref, fileN), }: fileN++ return nil case <-ctx.Done(): return ctx.Err() } }) }) }
// doDest is false for source and true for dest. func (sh *SyncHandler) startValidatePrefix(ctx context.Context, pfx string, doDest bool) (<-chan blob.SizedRef, <-chan error) { var e blobserver.BlobEnumerator if doDest { e = sh.to } else { e = sh.from } c := make(chan blob.SizedRef, 64) errc := make(chan error, 1) go func() { defer close(c) var last string // last blobref seen; to double check storage's enumeration works correctly. err := blobserver.EnumerateAllFrom(ctx, e, pfx, func(sb blob.SizedRef) error { // Just double-check that the storage target is returning sorted results correctly. brStr := sb.Ref.String() if brStr < pfx { log.Fatalf("Storage target %T enumerate not behaving: %q < requested prefix %q", e, brStr, pfx) } if last != "" && last >= brStr { log.Fatalf("Storage target %T enumerate not behaving: previous %q >= current %q", e, last, brStr) } last = brStr // TODO: could add a more efficient method on blob.Ref to do this, // that doesn't involve call String(). if !strings.HasPrefix(brStr, pfx) { return errNotPrefix } select { case c <- sb: sh.mu.Lock() if doDest { sh.vdestCount++ sh.vdestBytes += int64(sb.Size) } else { sh.vsrcCount++ sh.vsrcBytes += int64(sb.Size) } sh.mu.Unlock() return nil case <-ctx.Done(): return ctx.Err() } }) if err == errNotPrefix { err = nil } if err != nil { // Send a zero value to shut down ListMissingDestinationBlobs. c <- blob.SizedRef{} } errc <- err }() return c, errc }
// stream the loose blobs func (st smallBlobStreamer) StreamBlobs(ctx context.Context, dest chan<- blobserver.BlobAndToken, contToken string) (err error) { small := st.sto.small if bs, ok := small.(blobserver.BlobStreamer); ok { return bs.StreamBlobs(ctx, dest, contToken) } defer close(dest) donec := ctx.Done() return blobserver.EnumerateAllFrom(ctx, small, contToken, func(sb blob.SizedRef) error { select { case dest <- blobserver.BlobAndToken{ Blob: blob.NewBlob(sb.Ref, sb.Size, func() types.ReadSeekCloser { return blob.NewLazyReadSeekCloser(small, sb.Ref) }), Token: sb.Ref.StringMinusOne(), // streamer is >=, enumerate is > }: return nil case <-donec: return ctx.Err() } }) }
// doDest is false for source and true for dest. func (sh *SyncHandler) startValidatePrefix(ctx *context.Context, pfx string, doDest bool) (<-chan blob.SizedRef, <-chan error) { var e blobserver.BlobEnumerator if doDest { e = sh.to } else { e = sh.from } c := make(chan blob.SizedRef, 64) errc := make(chan error, 1) go func() { defer close(c) err := blobserver.EnumerateAllFrom(ctx, e, pfx, func(sb blob.SizedRef) error { select { case c <- sb: // TODO: could add a more efficient method on blob.Ref to do this, // that doesn't involve call String(). if !strings.HasPrefix(sb.Ref.String(), pfx) { return errNotPrefix } sh.mu.Lock() if doDest { sh.vdestCount++ sh.vdestBytes += int64(sb.Size) } else { sh.vsrcCount++ sh.vsrcBytes += int64(sb.Size) } sh.mu.Unlock() return nil case <-ctx.Done(): return context.ErrCanceled } }) if err == errNotPrefix { err = nil } errc <- err }() return c, errc }
// reindex rebuilds the meta index for packed blobs. It calls newMeta to create // a new KeyValue on which to write the index, and replaces s.meta with it. There // is no locking whatsoever so it should not be called when the storage is already // in use. its signature might change if/when it gets exported. func (s *storage) reindex(ctx context.Context, newMeta func() (sorted.KeyValue, error)) error { meta, err := newMeta() if err != nil { return fmt.Errorf("failed to create new blobpacked meta index: %v", err) } wholeMetaByWholeRef := make(map[blob.Ref][]wholeMetaPrefixInfo) if err := blobserver.EnumerateAllFrom(ctx, s.large, "", func(sb blob.SizedRef) error { zipRef := sb.Ref zr, err := zip.NewReader(blob.ReaderAt(s.large, zipRef), int64(sb.Size)) if err != nil { return zipOpenError{zipRef, err} } var maniFile *zip.File var firstOff int64 // offset of first file (the packed data chunks) for i, f := range zr.File { if i == 0 { firstOff, err = f.DataOffset() if err != nil { return err } } if f.Name == zipManifestPath { maniFile = f break } } if maniFile == nil { return fmt.Errorf("no camlistore manifest file found in zip %v", zipRef) } maniRC, err := maniFile.Open() if err != nil { return err } defer maniRC.Close() var mf Manifest if err := json.NewDecoder(maniRC).Decode(&mf); err != nil { return err } if !mf.WholeRef.Valid() || mf.WholeSize == 0 || !mf.DataBlobsOrigin.Valid() { return fmt.Errorf("incomplete blobpack manifest JSON in %v", zipRef) } bm := meta.BeginBatch() // In this loop, we write all the blobMetaPrefix entries for the // data blobs in this zip, and we also compute the dataBytesWritten, for later. var dataBytesWritten int64 for _, bp := range mf.DataBlobs { bm.Set(blobMetaPrefix+bp.SizedRef.Ref.String(), fmt.Sprintf("%d %v %d", bp.SizedRef.Size, zipRef, firstOff+bp.Offset)) dataBytesWritten += int64(bp.SizedRef.Size) } // In this loop, we write all the blobMetaPrefix entries for the schema blobs in this zip for _, f := range zr.File { if !(strings.HasPrefix(f.Name, "camlistore/") && strings.HasSuffix(f.Name, ".json")) || f.Name == zipManifestPath { continue } br, ok := blob.Parse(strings.TrimSuffix(strings.TrimPrefix(f.Name, "camlistore/"), ".json")) if !ok { return fmt.Errorf("schema file in zip %v does not have blobRef as name: %v", zipRef, f.Name) } offset, err := f.DataOffset() if err != nil { return err } bm.Set(blobMetaPrefix+br.String(), fmt.Sprintf("%d %v %d", f.UncompressedSize64, zipRef, offset)) } if err := meta.CommitBatch(bm); err != nil { return err } // record that info for later, when we got them all, so we can write the wholeMetaPrefix entries. wholeMetas, _ := wholeMetaByWholeRef[mf.WholeRef] wholeMetas = append(wholeMetas, wholeMetaPrefixInfo{ wholePartIndex: mf.WholePartIndex, zipRef: zipRef, firstOffset: firstOff, dataBytesWritten: dataBytesWritten, wholeSize: mf.WholeSize, }) wholeMetaByWholeRef[mf.WholeRef] = wholeMetas return nil }); err != nil { return err } // finally, write the wholeMetaPrefix entries bm := meta.BeginBatch() for wholeRef, wholeMetas := range wholeMetaByWholeRef { wm := wholeMetaPrefixInfos(wholeMetas) sort.Sort(wm) var wholeBytesWritten int64 for _, w := range wm { bm.Set(fmt.Sprintf("%s%s:%d", wholeMetaPrefix, wholeRef, w.wholePartIndex), fmt.Sprintf("%s %d %d %d", w.zipRef, w.firstOffset, wholeBytesWritten, w.dataBytesWritten)) wholeBytesWritten += w.dataBytesWritten } if wm[0].wholeSize != wholeBytesWritten { return fmt.Errorf("Sum of all zips (%d bytes) does not match manifest's WholeSize (%d bytes) for %v", wholeBytesWritten, wm[0].wholeSize, wholeRef) } bm.Set(fmt.Sprintf("%s%s", wholeMetaPrefix, wholeRef), fmt.Sprintf("%d %d", wholeBytesWritten, wm[len(wholeMetas)-1].wholePartIndex+1)) } if err := meta.CommitBatch(bm); err != nil { return err } // TODO(mpl): take into account removed blobs. I can't be done for now // (2015-01-29) because RemoveBlobs currently only updates the meta index. // So if the index was lost, all information about removals was lost too. s.meta = meta return nil }