func (s *Storage) StreamBlobs(ctx *context.Context, dest chan<- blobserver.BlobAndToken, contToken string) error { // for this impl, contToken is >= blobref.String() defer close(dest) s.mu.RLock() defer s.mu.RUnlock() sorted := make([]blob.Ref, 0, len(s.m)) for br := range s.m { sorted = append(sorted, br) } sort.Sort(blob.ByRef(sorted)) for _, br := range sorted { if br.String() < contToken { continue } select { case <-ctx.Done(): return context.ErrCanceled case dest <- blobserver.BlobAndToken{ Blob: blob.NewBlob(br, uint32(len(s.m[br])), func() types.ReadSeekCloser { return blob.NewLazyReadSeekCloser(s, br) }), Token: br.String(), }: } } return nil }
// contToken is of forms: // "" : start from beginning of zip files // "sha1-xxxxx:n" : start at == (sha1-xxxx, file n), else next zip func (st largeBlobStreamer) StreamBlobs(ctx context.Context, dest chan<- blobserver.BlobAndToken, contToken string) (err error) { defer close(dest) s := st.sto large := s.large var after string // for enumerateAll var skipFiles int var firstRef blob.Ref // first we care about if contToken != "" { f := strings.SplitN(contToken, ":", 2) if len(f) != 2 { return errContToken } firstRef, _ = blob.Parse(f[0]) skipFiles, err = strconv.Atoi(f[1]) if !firstRef.Valid() || err != nil { return errContToken } // EnumerateAllFrom takes a cursor that's greater, but // we want to start _at_ firstRef. So start // enumerating right before our target. after = firstRef.StringMinusOne() } return blobserver.EnumerateAllFrom(ctx, large, after, func(sb blob.SizedRef) error { if firstRef.Valid() { if sb.Ref.Less(firstRef) { // Skip. return nil } if firstRef.Less(sb.Ref) { skipFiles = 0 // reset it. } } fileN := 0 return s.foreachZipBlob(sb.Ref, func(bap BlobAndPos) error { if skipFiles > 0 { skipFiles-- fileN++ return nil } select { case dest <- blobserver.BlobAndToken{ Blob: blob.NewBlob(bap.Ref, bap.Size, func() types.ReadSeekCloser { return blob.NewLazyReadSeekCloser(s, bap.Ref) }), Token: fmt.Sprintf("%s:%d", sb.Ref, fileN), }: fileN++ return nil case <-ctx.Done(): return ctx.Err() } }) }) }
func (tb *Blob) Blob() *blob.Blob { s := tb.Contents return blob.NewBlob(tb.BlobRef(), tb.Size(), func() types.ReadSeekCloser { return struct { io.ReadSeeker io.Closer }{ io.NewSectionReader(strings.NewReader(s), 0, int64(len(s))), ioutil.NopCloser(nil), } }) }
// stream the loose blobs func (st smallBlobStreamer) StreamBlobs(ctx context.Context, dest chan<- blobserver.BlobAndToken, contToken string) (err error) { small := st.sto.small if bs, ok := small.(blobserver.BlobStreamer); ok { return bs.StreamBlobs(ctx, dest, contToken) } defer close(dest) donec := ctx.Done() return blobserver.EnumerateAllFrom(ctx, small, contToken, func(sb blob.SizedRef) error { select { case dest <- blobserver.BlobAndToken{ Blob: blob.NewBlob(sb.Ref, sb.Size, func() types.ReadSeekCloser { return blob.NewLazyReadSeekCloser(small, sb.Ref) }), Token: sb.Ref.StringMinusOne(), // streamer is >=, enumerate is > }: return nil case <-donec: return ctx.Err() } }) }
// StreamBlobs Implements the blobserver.StreamBlobs interface. func (s *storage) StreamBlobs(ctx *context.Context, dest chan<- blobserver.BlobAndToken, contToken string) error { defer close(dest) fileNum, offset, err := parseContToken(contToken) if err != nil { return errors.New("diskpacked: invalid continuation token") } debug.Printf("Continuing blob streaming from pack %s, offset %d", s.filename(fileNum), offset) fd, err := os.Open(s.filename(fileNum)) if err != nil { return err } // fd will change over time; Close whichever is current when we exit. defer func() { if fd != nil { // may be nil on os.Open error below fd.Close() } }() // ContToken always refers to the exact next place we will read from. // Note that seeking past the end is legal on Unix and for io.Seeker, // but that will just result in a mostly harmless EOF. // // TODO: probably be stricter here and don't allow seek past // the end, since we know the size of closed files and the // size of the file diskpacked currently still writing. _, err = fd.Seek(offset, os.SEEK_SET) if err != nil { return err } const ioBufSize = 256 * 1024 // We'll use bufio to avoid read system call overhead. r := bufio.NewReaderSize(fd, ioBufSize) for { // Are we at the EOF of this pack? if _, err := r.Peek(1); err != nil { if err != io.EOF { return err } // EOF case; continue to the next pack, if any. fileNum += 1 offset = 0 fd.Close() // Close the previous pack fd, err = os.Open(s.filename(fileNum)) if os.IsNotExist(err) { // We reached the end. return nil } else if err != nil { return err } r.Reset(fd) continue } thisOffset := offset // of current blob's header consumed, digest, size, err := readHeader(r) if err != nil { return err } offset += int64(consumed) if deletedBlobRef.Match(digest) { // Skip over deletion padding if _, err := io.CopyN(ioutil.Discard, r, int64(size)); err != nil { return err } offset += int64(size) continue } // Finally, read and send the blob. // TODO: remove this allocation per blob. We can make one instead // outside of the loop, guarded by a mutex, and re-use it, only to // lock the mutex and clone it if somebody actually calls Open // on the *blob.Blob. Otherwise callers just scanning all the blobs // to see if they have everything incur lots of garbage if they // don't open any blobs. data := make([]byte, size) if _, err := io.ReadFull(r, data); err != nil { return err } offset += int64(size) ref, ok := blob.ParseBytes(digest) if !ok { return fmt.Errorf("diskpacked: Invalid blobref %q", digest) } newReader := func() types.ReadSeekCloser { return newReadSeekNopCloser(bytes.NewReader(data)) } blob := blob.NewBlob(ref, size, newReader) select { case dest <- blobserver.BlobAndToken{ Blob: blob, Token: fmt.Sprintf("%d %d", fileNum, thisOffset), }: // Nothing. case <-ctx.Done(): return context.ErrCanceled } } }