func (s *Storage) StreamBlobs(ctx *context.Context, dest chan<- blobserver.BlobAndToken, contToken string) error { // for this impl, contToken is >= blobref.String() defer close(dest) s.mu.RLock() defer s.mu.RUnlock() sorted := make([]blob.Ref, 0, len(s.m)) for br := range s.m { sorted = append(sorted, br) } sort.Sort(blob.ByRef(sorted)) for _, br := range sorted { if br.String() < contToken { continue } select { case <-ctx.Done(): return context.ErrCanceled case dest <- blobserver.BlobAndToken{ Blob: blob.NewBlob(br, uint32(len(s.m[br])), func() types.ReadSeekCloser { return blob.NewLazyReadSeekCloser(s, br) }), Token: br.String(), }: } } return nil }
func (sto *appengineStorage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) error { defer close(dest) loan := ctxPool.Get() defer loan.Return() actx := loan prefix := sto.namespace + "|" keyBegin := datastore.NewKey(actx, memKind, prefix+after, 0, nil) keyEnd := datastore.NewKey(actx, memKind, sto.namespace+"~", 0, nil) q := datastore.NewQuery(memKind).Limit(int(limit)).Filter("__key__>", keyBegin).Filter("__key__<", keyEnd) it := q.Run(actx) var row memEnt for { key, err := it.Next(&row) if err == datastore.Done { break } if err != nil { return err } select { case dest <- blob.SizedRef{blob.ParseOrZero(key.StringID()[len(prefix):]), row.Size}: case <-ctx.Done(): return context.ErrCanceled } } return nil }
func (s *storage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) (err error) { defer close(dest) t := s.index.Find(after, "") defer func() { closeErr := t.Close() if err == nil { err = closeErr } }() for i := 0; i < limit && t.Next(); { key := t.Key() if key <= after { // EnumerateBlobs' semantics are '>', but sorted.KeyValue.Find is '>='. continue } br, ok := blob.Parse(key) if !ok { return fmt.Errorf("diskpacked: couldn't parse index key %q", key) } m, ok := parseBlobMeta(t.Value()) if !ok { return fmt.Errorf("diskpacked: couldn't parse index value %q: %q", key, t.Value()) } select { case dest <- m.SizedRef(br): case <-ctx.Done(): return context.ErrCanceled } i++ } return nil }
func (ix *Index) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) (err error) { defer close(dest) it := ix.s.Find("have:"+after, "have~") defer func() { closeErr := it.Close() if err == nil { err = closeErr } }() n := int(0) for n < limit && it.Next() { k := it.Key() if k <= after { continue } if !strings.HasPrefix(k, "have:") { break } n++ br, ok := blob.Parse(k[len("have:"):]) size, err := strconv.ParseUint(it.Value(), 10, 32) if ok && err == nil { select { case dest <- blob.SizedRef{br, int64(size)}: case <-ctx.Done(): return context.ErrCanceled } } } return nil }
func (s *storage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) error { defer close(dest) iter := s.index.Find(after, "") n := 0 for iter.Next() { if iter.Key() == after { continue } br, ok := blob.Parse(iter.Key()) if !ok { panic("Bogus encrypt index key: " + iter.Key()) } plainSize, ok := parseMetaValuePlainSize(iter.Value()) if !ok { panic("Bogus encrypt index value: " + iter.Value()) } select { case dest <- blob.SizedRef{br, plainSize}: case <-ctx.Done(): return context.ErrCanceled } n++ if limit != 0 && n >= limit { break } } return iter.Close() }
func (sto *s3Storage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) (err error) { defer close(dest) if faultEnumerate.FailErr(&err) { return } startAt := after if _, ok := blob.Parse(after); ok { startAt = nextStr(after) } objs, err := sto.s3Client.ListBucket(sto.bucket, startAt, limit) if err != nil { log.Printf("s3 ListBucket: %v", err) return err } for _, obj := range objs { if obj.Key == after { continue } br, ok := blob.Parse(obj.Key) if !ok { continue } select { case dest <- blob.SizedRef{Ref: br, Size: uint32(obj.Size)}: case <-ctx.Done(): return context.ErrCanceled } } return nil }
// EnumeratePermanodesLastModified sends all permanodes, sorted by most recently modified first, to ch, // or until ctx is done. // // The Corpus must already be locked with RLock. func (c *Corpus) EnumeratePermanodesLastModifiedLocked(ctx *context.Context, ch chan<- camtypes.BlobMeta) error { defer close(ch) // TODO: keep these sorted in memory pns := make([]pnAndTime, 0, len(c.permanodes)) for pn := range c.permanodes { if modt, ok := c.PermanodeModtimeLocked(pn); ok { pns = append(pns, pnAndTime{pn, modt}) } } sort.Sort(sort.Reverse(byPermanodeModtime(pns))) for _, cand := range pns { bm := c.blobs[cand.pn] if bm == nil { continue } select { case ch <- *bm: continue case <-ctx.Done(): return context.ErrCanceled } } return nil }
func (ns *nsto) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) error { defer close(dest) done := ctx.Done() it := ns.inventory.Find(after, "") first := true for limit > 0 && it.Next() { if first { first = false if after != "" && it.Key() == after { continue } } br, ok := blob.ParseBytes(it.KeyBytes()) size, err := strutil.ParseUintBytes(it.ValueBytes(), 10, 32) if !ok || err != nil { log.Printf("Bogus namespace key %q / value %q", it.Key(), it.Value()) continue } select { case dest <- blob.SizedRef{br, uint32(size)}: case <-done: return context.ErrCanceled } limit-- } if err := it.Close(); err != nil { return err } return nil }
func (m *mongoStorage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) error { defer close(dest) var b blobDoc var qry bson.M if after != "" { qry = bson.M{"key": bson.M{"$gt": after}} } iter := m.c.Find(qry).Limit(limit).Select(bson.M{"key": 1, "size": 1}).Sort("key").Iter() for iter.Next(&b) { br, ok := blob.Parse(b.Key) if !ok { continue } select { case dest <- blob.SizedRef{Ref: br, Size: uint32(b.Size)}: case <-ctx.Done(): // Close the iterator but ignore the error value since we are already cancelling if err := iter.Close(); err != nil { log.Printf("Error closing iterator after enumerating: %v", err) } return context.ErrCanceled } } if err := iter.Close(); err != nil { return err } return nil }
func (s *Storage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) error { defer close(dest) s.mu.RLock() defer s.mu.RUnlock() // TODO(bradfitz): care about keeping this sorted like we used // to? I think it was more expensive than it was worth before, // since maintaining it was more costly than how often it was // used. But perhaps it'd make sense to maintain it lazily: // construct it on EnumerateBlobs but invalidate it everywhere // else. Probably doesn't matter much. sorted := make([]blob.Ref, 0, len(s.m)) for br := range s.m { sorted = append(sorted, br) } sort.Sort(blob.ByRef(sorted)) n := 0 for _, br := range sorted { if after != "" && br.String() <= after { continue } select { case dest <- blob.SizedRef{br, uint32(len(s.m[br]))}: case <-ctx.Done(): return context.ErrCanceled } n++ if limit > 0 && n == limit { break } } return nil }
// contToken is of forms: // "" : start from beginning of zip files // "sha1-xxxxx:n" : start at == (sha1-xxxx, file n), else next zip func (st largeBlobStreamer) StreamBlobs(ctx *context.Context, dest chan<- blobserver.BlobAndToken, contToken string) (err error) { defer close(dest) s := st.sto large := s.large var after string // for enumerateAll var skipFiles int var firstRef blob.Ref // first we care about if contToken != "" { f := strings.SplitN(contToken, ":", 2) if len(f) != 2 { return errContToken } firstRef, _ = blob.Parse(f[0]) skipFiles, err = strconv.Atoi(f[1]) if !firstRef.Valid() || err != nil { return errContToken } // EnumerateAllFrom takes a cursor that's greater, but // we want to start _at_ firstRef. So start // enumerating right before our target. after = firstRef.StringMinusOne() } return blobserver.EnumerateAllFrom(ctx, large, after, func(sb blob.SizedRef) error { if firstRef.Valid() { if sb.Ref.Less(firstRef) { // Skip. return nil } if firstRef.Less(sb.Ref) { skipFiles = 0 // reset it. } } fileN := 0 return s.foreachZipBlob(sb.Ref, func(bap BlobAndPos) error { if skipFiles > 0 { skipFiles-- fileN++ return nil } select { case dest <- blobserver.BlobAndToken{ Blob: blob.NewBlob(bap.Ref, bap.Size, func() types.ReadSeekCloser { return blob.NewLazyReadSeekCloser(s, bap.Ref) }), Token: fmt.Sprintf("%s:%d", sb.Ref, fileN), }: fileN++ return nil case <-ctx.Done(): return context.ErrCanceled } }) }) }
// doDest is false for source and true for dest. func (sh *SyncHandler) startValidatePrefix(ctx *context.Context, pfx string, doDest bool) (<-chan blob.SizedRef, <-chan error) { var e blobserver.BlobEnumerator if doDest { e = sh.to } else { e = sh.from } c := make(chan blob.SizedRef, 64) errc := make(chan error, 1) go func() { defer close(c) var last string // last blobref seen; to double check storage's enumeration works correctly. err := blobserver.EnumerateAllFrom(ctx, e, pfx, func(sb blob.SizedRef) error { // Just double-check that the storage target is returning sorted results correctly. brStr := sb.Ref.String() if brStr < pfx { log.Fatalf("Storage target %T enumerate not behaving: %q < requested prefix %q", e, brStr, pfx) } if last != "" && last >= brStr { log.Fatalf("Storage target %T enumerate not behaving: previous %q >= current %q", e, last, brStr) } last = brStr // TODO: could add a more efficient method on blob.Ref to do this, // that doesn't involve call String(). if !strings.HasPrefix(brStr, pfx) { return errNotPrefix } select { case c <- sb: sh.mu.Lock() if doDest { sh.vdestCount++ sh.vdestBytes += int64(sb.Size) } else { sh.vsrcCount++ sh.vsrcBytes += int64(sb.Size) } sh.mu.Unlock() return nil case <-ctx.Done(): return context.ErrCanceled } }) if err == errNotPrefix { err = nil } if err != nil { // Send a zero value to shut down ListMissingDestinationBlobs. c <- blob.SizedRef{} } errc <- err }() return c, errc }
// EnumerateBlobMetaLocked sends all known blobs to ch, or until the context is canceled. // // The Corpus must already be locked with RLock. func (c *Corpus) EnumerateBlobMetaLocked(ctx *context.Context, ch chan<- camtypes.BlobMeta) error { defer close(ch) for _, bm := range c.blobs { select { case ch <- *bm: case <-ctx.Done(): return context.ErrCanceled } } return nil }
func (m testItemEnum) EnumerateItem(ctx *context.Context, it Item, dest chan<- Item) error { defer close(dest) for _, v := range m[it.(string)] { select { case dest <- v: case <-ctx.Done(): return context.ErrCanceled } } return nil }
func (s testEnum) Enumerate(ctx *context.Context, dest chan<- Item) error { defer close(dest) for _, v := range s { select { case dest <- v: case <-ctx.Done(): return context.ErrCanceled } } return nil }
func (ds *DiskStorage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) error { defer close(dest) if limit == 0 { log.Printf("Warning: localdisk.EnumerateBlobs called with a limit of 0") } limitMutable := limit return ds.readBlobs(readBlobRequest{ done: ctx.Done(), ch: dest, dirRoot: ds.root, after: after, remain: &limitMutable, }) }
// EnumerateBlobMeta sends all metadata about all known blobs to ch and then closes ch. func (x *Index) EnumerateBlobMeta(ctx *context.Context, ch chan<- camtypes.BlobMeta) (err error) { if x.corpus != nil { x.corpus.RLock() defer x.corpus.RUnlock() return x.corpus.EnumerateBlobMetaLocked(ctx, ch) } defer close(ch) return enumerateBlobMeta(x.s, func(bm camtypes.BlobMeta) error { select { case ch <- bm: case <-ctx.Done(): return context.ErrCanceled } return nil }) }
// EnumerateCamliBlobsLocked sends just camlistore meta blobs to ch. // // The Corpus must already be locked with RLock. // // If camType is empty, all camlistore blobs are sent, otherwise it specifies // the camliType to send. // ch is closed at the end. The err will either be nil or context.ErrCanceled. func (c *Corpus) EnumerateCamliBlobsLocked(ctx *context.Context, camType string, ch chan<- camtypes.BlobMeta) error { defer close(ch) for t, m := range c.camBlobs { if camType != "" && camType != t { continue } for _, bm := range m { select { case ch <- *bm: case <-ctx.Done(): return context.ErrCanceled } } } return nil }
func enumerateAllBlobs(ctx *context.Context, s blobserver.Storage, destc chan<- blob.SizedRef) error { // Use *client.Client's support for enumerating all blobs if // possible, since it could probably do a better job knowing // HTTP boundaries and such. if c, ok := s.(*client.Client); ok { return c.SimpleEnumerateBlobs(ctx, destc) } defer close(destc) return blobserver.EnumerateAll(ctx, s, func(sb blob.SizedRef) error { select { case destc <- sb: case <-ctx.Done(): return context.ErrCanceled } return nil }) }
func (gs *Storage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) error { defer close(dest) objs, err := gs.client.EnumerateObjects(gs.bucket, after, limit) if err != nil { log.Printf("gstorage EnumerateObjects: %v", err) return err } for _, obj := range objs { br, ok := blob.Parse(obj.Key) if !ok { continue } select { case dest <- blob.SizedRef{Ref: br, Size: uint32(obj.Size)}: case <-ctx.Done(): return context.ErrCanceled } } return nil }
func (sto *s3Storage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) error { defer close(dest) objs, err := sto.s3Client.ListBucket(sto.bucket, after, limit) if err != nil { log.Printf("s3 ListBucket: %v", err) return err } for _, obj := range objs { br, ok := blob.Parse(obj.Key) if !ok { continue } select { case dest <- blob.SizedRef{Ref: br, Size: obj.Size}: case <-ctx.Done(): return context.ErrCanceled } } return nil }
func (sto *swiftStorage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) error { defer close(dest) objs, err := sto.client.ObjectsAll(sto.container, nil) if err != nil { log.Printf("swift ObjectsAll: %v", err) return err } for _, obj := range objs { br, ok := blob.Parse(obj.Name) if !ok { continue } select { case dest <- blob.SizedRef{Ref: br, Size: uint32(obj.Bytes)}: case <-ctx.Done(): return context.ErrCanceled } } return nil }
// stream the loose blobs func (st smallBlobStreamer) StreamBlobs(ctx *context.Context, dest chan<- blobserver.BlobAndToken, contToken string) (err error) { small := st.sto.small if bs, ok := small.(blobserver.BlobStreamer); ok { return bs.StreamBlobs(ctx, dest, contToken) } defer close(dest) donec := ctx.Done() return blobserver.EnumerateAllFrom(ctx, small, contToken, func(sb blob.SizedRef) error { select { case dest <- blobserver.BlobAndToken{ Blob: blob.NewBlob(sb.Ref, sb.Size, func() types.ReadSeekCloser { return blob.NewLazyReadSeekCloser(small, sb.Ref) }), Token: sb.Ref.StringMinusOne(), // streamer is >=, enumerate is > }: return nil case <-donec: return context.ErrCanceled } }) }
// doDest is false for source and true for dest. func (sh *SyncHandler) startValidatePrefix(ctx *context.Context, pfx string, doDest bool) (<-chan blob.SizedRef, <-chan error) { var e blobserver.BlobEnumerator if doDest { e = sh.to } else { e = sh.from } c := make(chan blob.SizedRef, 64) errc := make(chan error, 1) go func() { defer close(c) err := blobserver.EnumerateAllFrom(ctx, e, pfx, func(sb blob.SizedRef) error { select { case c <- sb: // TODO: could add a more efficient method on blob.Ref to do this, // that doesn't involve call String(). if !strings.HasPrefix(sb.Ref.String(), pfx) { return errNotPrefix } sh.mu.Lock() if doDest { sh.vdestCount++ sh.vdestBytes += int64(sb.Size) } else { sh.vsrcCount++ sh.vsrcBytes += int64(sb.Size) } sh.mu.Unlock() return nil case <-ctx.Done(): return context.ErrCanceled } }) if err == errNotPrefix { err = nil } errc <- err }() return c, errc }
func (s staticStreamer) StreamBlobs(ctx *context.Context, dest chan<- blobserver.BlobAndToken, contToken string) error { defer close(dest) var pos int if contToken != "" { var err error pos, err = strconv.Atoi(contToken) if err != nil || pos < 0 || pos >= len(s) { return errors.New("invalid token") } s = s[pos:] } for len(s) > 0 { select { case dest <- blobserver.BlobAndToken{Blob: s[0], Token: strconv.Itoa(pos)}: pos++ s = s[1:] case <-ctx.Done(): return context.ErrCanceled } } return nil }
func (tf *Fetcher) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) error { defer close(dest) tf.l.Lock() defer tf.l.Unlock() n := 0 for _, k := range tf.sorted { if k <= after { continue } b := tf.m[k] select { case dest <- blob.SizedRef{b.BlobRef(), b.Size()}: case <-ctx.Done(): return context.ErrCanceled } n++ if limit > 0 && n == limit { break } } return nil }
func (s *Storage) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) error { defer close(dest) objs, err := s.client.EnumerateObjects(s.bucket, s.dirPrefix+after, limit) if err != nil { log.Printf("gstorage EnumerateObjects: %v", err) return err } for _, obj := range objs { dir, file := path.Split(obj.Key) if dir != s.dirPrefix { continue } br, ok := blob.Parse(file) if !ok { return fmt.Errorf("Non-Camlistore object named %q found in bucket", file) } select { case dest <- blob.SizedRef{Ref: br, Size: uint32(obj.Size)}: case <-ctx.Done(): return context.ErrCanceled } } return nil }
func (im *imp) Run(ctx *context.Context) (err error) { log.Printf("Running picasa importer.") defer func() { log.Printf("picasa importer returned: %v", err) }() im.Lock() client := &http.Client{Transport: im.transport} im.Unlock() root, err := im.getRootNode() if err != nil { return err } itemch := make(chan imageFile) errch := make(chan error, parallelWorkers) tbd := make(chan imageFile) // For caching album name -> imported Object, to skip lookup by path // (Attr) as much as possible. var albumCacheMu sync.Mutex albumCache := make(map[string]*importer.Object) getParentObj := func(name, title string) *importer.Object { albumCacheMu.Lock() defer albumCacheMu.Unlock() parent, ok := albumCache[name] if ok { return parent } parent, err = im.getChildByPath(name) if err != nil { log.Printf("getParentObj(%s): %v", name, err) } if parent == nil { parent, err = root.ChildPathObject(name) if err != nil { log.Printf("error creating ChildPathObject(%s): %v", name, err) errch <- err parent = root } } albumCache[name] = parent if err = parent.SetAttrs("title", title, "tag", name); err != nil { errch <- err } return parent } var workers sync.WaitGroup worker := func() { for img := range tbd { parent := getParentObj(img.albumName, img.albumTitle) fn := img.albumName + "/" + img.fileName log.Printf("importing %s", fn) fileRef, err := schema.WriteFileFromReader(im.host.Target(), fn, img.r) img.r.Close() if err != nil { // FIXME(tgulacsi): cannot download movies log.Printf("error downloading %s: %v", img.fileName, err) continue } // parent will have an attr camliPath:img.fileName set to this permanode obj, err := parent.ChildPathObject(img.fileName) if err != nil { errch <- err } if err = obj.SetAttrs( "camliContent", fileRef.String(), "album", img.albumTitle, "tag", img.albumName, ); err != nil { errch <- err } } workers.Done() } workers.Add(parallelWorkers) for i := 0; i < parallelWorkers; i++ { go worker() } // decide whether we should import this image filter := func(img imageFile) (bool, error) { intrErr := func(e error) error { if e != nil { return e } if ctx.IsCanceled() { return context.ErrCanceled } return nil } parent := getParentObj(img.albumName, img.albumTitle) if parent != nil { pn := parent.Attr("camliPath:" + img.fileName) if pn != "" { ref, ok := blob.Parse(pn) if !ok { return true, fmt.Errorf("cannot parse %s as blobRef", pn) } obj, err := im.host.ObjectFromRef(ref) if err != nil { return false, err } if obj != nil { log.Printf("%s/%s already imported as %s.", img.albumName, img.fileName, obj.PermanodeRef()) return false, intrErr(nil) } } } return true, intrErr(nil) } go iterItems(itemch, errch, filter, client, "default") for { select { case err = <-errch: close(tbd) if err == context.ErrCanceled { log.Printf("Picasa importer has been interrupted.") } else { log.Printf("Picasa importer error: %v", err) workers.Wait() } return err case <-ctx.Done(): log.Printf("Picasa importer has been interrupted.") close(tbd) return context.ErrCanceled case img := <-itemch: tbd <- img } } close(tbd) workers.Wait() return nil }
// Note: closes ch. func (c *Client) EnumerateBlobsOpts(ctx *context.Context, ch chan<- blob.SizedRef, opts EnumerateOpts) error { defer close(ch) if opts.After != "" && opts.MaxWait != 0 { return errors.New("client error: it's invalid to use enumerate After and MaxWaitSec together") } pfx, err := c.prefix() if err != nil { return err } error := func(msg string, e error) error { err := fmt.Errorf("client enumerate error: %s: %v", msg, e) c.log.Print(err.Error()) return err } nSent := 0 keepGoing := true after := opts.After for keepGoing { waitSec := 0 if after == "" { if opts.MaxWait > 0 { waitSec = int(opts.MaxWait.Seconds()) if waitSec == 0 { waitSec = 1 } } } url_ := fmt.Sprintf("%s/camli/enumerate-blobs?after=%s&limit=%d&maxwaitsec=%d", pfx, url.QueryEscape(after), enumerateBatchSize, waitSec) req := c.newRequest("GET", url_) resp, err := c.httpClient.Do(req) if err != nil { return error("http request", err) } json, err := c.responseJSONMap("enumerate-blobs", resp) if err != nil { return error("stat json parse error", err) } blobs, ok := getJSONMapArray(json, "blobs") if !ok { return error("response JSON didn't contain 'blobs' array", nil) } for _, v := range blobs { itemJSON, ok := v.(map[string]interface{}) if !ok { return error("item in 'blobs' was malformed", nil) } blobrefStr, ok := getJSONMapString(itemJSON, "blobRef") if !ok { return error("item in 'blobs' was missing string 'blobRef'", nil) } size, ok := getJSONMapInt64(itemJSON, "size") if !ok { return error("item in 'blobs' was missing numeric 'size'", nil) } br, ok := blob.Parse(blobrefStr) if !ok { return error("item in 'blobs' had invalid blobref.", nil) } select { case ch <- blob.SizedRef{Ref: br, Size: size}: case <-ctx.Done(): return context.ErrCanceled } nSent++ if opts.Limit == nSent { // nSent can't be zero at this point, so opts.Limit being 0 // is okay. return nil } } after, keepGoing = getJSONMapString(json, "continueAfter") } return nil }
// StreamBlobs Implements the blobserver.StreamBlobs interface. func (s *storage) StreamBlobs(ctx *context.Context, dest chan<- blobserver.BlobAndToken, contToken string) error { defer close(dest) fileNum, offset, err := parseContToken(contToken) if err != nil { return errors.New("diskpacked: invalid continuation token") } debug.Printf("Continuing blob streaming from pack %s, offset %d", s.filename(fileNum), offset) fd, err := os.Open(s.filename(fileNum)) if err != nil { return err } // fd will change over time; Close whichever is current when we exit. defer func() { if fd != nil { // may be nil on os.Open error below fd.Close() } }() // ContToken always refers to the exact next place we will read from. // Note that seeking past the end is legal on Unix and for io.Seeker, // but that will just result in a mostly harmless EOF. // // TODO: probably be stricter here and don't allow seek past // the end, since we know the size of closed files and the // size of the file diskpacked currently still writing. _, err = fd.Seek(offset, os.SEEK_SET) if err != nil { return err } const ioBufSize = 256 * 1024 // We'll use bufio to avoid read system call overhead. r := bufio.NewReaderSize(fd, ioBufSize) for { // Are we at the EOF of this pack? if _, err := r.Peek(1); err != nil { if err != io.EOF { return err } // EOF case; continue to the next pack, if any. fileNum += 1 offset = 0 fd.Close() // Close the previous pack fd, err = os.Open(s.filename(fileNum)) if os.IsNotExist(err) { // We reached the end. return nil } else if err != nil { return err } r.Reset(fd) continue } thisOffset := offset // of current blob's header consumed, digest, size, err := readHeader(r) if err != nil { return err } offset += int64(consumed) if deletedBlobRef.Match(digest) { // Skip over deletion padding if _, err := io.CopyN(ioutil.Discard, r, int64(size)); err != nil { return err } offset += int64(size) continue } // Finally, read and send the blob. // TODO: remove this allocation per blob. We can make one instead // outside of the loop, guarded by a mutex, and re-use it, only to // lock the mutex and clone it if somebody actually calls Open // on the *blob.Blob. Otherwise callers just scanning all the blobs // to see if they have everything incur lots of garbage if they // don't open any blobs. data := make([]byte, size) if _, err := io.ReadFull(r, data); err != nil { return err } offset += int64(size) ref, ok := blob.ParseBytes(digest) if !ok { return fmt.Errorf("diskpacked: Invalid blobref %q", digest) } newReader := func() types.ReadSeekCloser { return newReadSeekNopCloser(bytes.NewReader(data)) } blob := blob.NewBlob(ref, size, newReader) select { case dest <- blobserver.BlobAndToken{ Blob: blob, Token: fmt.Sprintf("%d %d", fileNum, thisOffset), }: // Nothing. case <-ctx.Done(): return context.ErrCanceled } } }