Example #1
0
// contToken is of forms:
//    ""                : start from beginning of zip files
//    "sha1-xxxxx:n"    : start at == (sha1-xxxx, file n), else next zip
func (st largeBlobStreamer) StreamBlobs(ctx context.Context, dest chan<- blobserver.BlobAndToken, contToken string) (err error) {
	defer close(dest)
	s := st.sto
	large := s.large

	var after string // for enumerateAll
	var skipFiles int
	var firstRef blob.Ref // first we care about

	if contToken != "" {
		f := strings.SplitN(contToken, ":", 2)
		if len(f) != 2 {
			return errContToken
		}
		firstRef, _ = blob.Parse(f[0])
		skipFiles, err = strconv.Atoi(f[1])
		if !firstRef.Valid() || err != nil {
			return errContToken
		}
		// EnumerateAllFrom takes a cursor that's greater, but
		// we want to start _at_ firstRef. So start
		// enumerating right before our target.
		after = firstRef.StringMinusOne()
	}
	return blobserver.EnumerateAllFrom(ctx, large, after, func(sb blob.SizedRef) error {
		if firstRef.Valid() {
			if sb.Ref.Less(firstRef) {
				// Skip.
				return nil
			}
			if firstRef.Less(sb.Ref) {
				skipFiles = 0 // reset it.
			}
		}
		fileN := 0
		return s.foreachZipBlob(sb.Ref, func(bap BlobAndPos) error {
			if skipFiles > 0 {
				skipFiles--
				fileN++
				return nil
			}
			select {
			case dest <- blobserver.BlobAndToken{
				Blob: blob.NewBlob(bap.Ref, bap.Size, func() types.ReadSeekCloser {
					return blob.NewLazyReadSeekCloser(s, bap.Ref)
				}),
				Token: fmt.Sprintf("%s:%d", sb.Ref, fileN),
			}:
				fileN++
				return nil
			case <-ctx.Done():
				return ctx.Err()
			}
		})
	})
}
Example #2
0
// doDest is false for source and true for dest.
func (sh *SyncHandler) startValidatePrefix(ctx context.Context, pfx string, doDest bool) (<-chan blob.SizedRef, <-chan error) {
	var e blobserver.BlobEnumerator
	if doDest {
		e = sh.to
	} else {
		e = sh.from
	}
	c := make(chan blob.SizedRef, 64)
	errc := make(chan error, 1)
	go func() {
		defer close(c)
		var last string // last blobref seen; to double check storage's enumeration works correctly.
		err := blobserver.EnumerateAllFrom(ctx, e, pfx, func(sb blob.SizedRef) error {
			// Just double-check that the storage target is returning sorted results correctly.
			brStr := sb.Ref.String()
			if brStr < pfx {
				log.Fatalf("Storage target %T enumerate not behaving: %q < requested prefix %q", e, brStr, pfx)
			}
			if last != "" && last >= brStr {
				log.Fatalf("Storage target %T enumerate not behaving: previous %q >= current %q", e, last, brStr)
			}
			last = brStr

			// TODO: could add a more efficient method on blob.Ref to do this,
			// that doesn't involve call String().
			if !strings.HasPrefix(brStr, pfx) {
				return errNotPrefix
			}
			select {
			case c <- sb:
				sh.mu.Lock()
				if doDest {
					sh.vdestCount++
					sh.vdestBytes += int64(sb.Size)
				} else {
					sh.vsrcCount++
					sh.vsrcBytes += int64(sb.Size)
				}
				sh.mu.Unlock()
				return nil
			case <-ctx.Done():
				return ctx.Err()
			}
		})
		if err == errNotPrefix {
			err = nil
		}
		if err != nil {
			// Send a zero value to shut down ListMissingDestinationBlobs.
			c <- blob.SizedRef{}
		}
		errc <- err
	}()
	return c, errc
}
Example #3
0
// stream the loose blobs
func (st smallBlobStreamer) StreamBlobs(ctx context.Context, dest chan<- blobserver.BlobAndToken, contToken string) (err error) {
	small := st.sto.small
	if bs, ok := small.(blobserver.BlobStreamer); ok {
		return bs.StreamBlobs(ctx, dest, contToken)
	}
	defer close(dest)
	donec := ctx.Done()
	return blobserver.EnumerateAllFrom(ctx, small, contToken, func(sb blob.SizedRef) error {
		select {
		case dest <- blobserver.BlobAndToken{
			Blob: blob.NewBlob(sb.Ref, sb.Size, func() types.ReadSeekCloser {
				return blob.NewLazyReadSeekCloser(small, sb.Ref)
			}),
			Token: sb.Ref.StringMinusOne(), // streamer is >=, enumerate is >
		}:
			return nil
		case <-donec:
			return ctx.Err()
		}
	})
}
Example #4
0
// doDest is false for source and true for dest.
func (sh *SyncHandler) startValidatePrefix(ctx *context.Context, pfx string, doDest bool) (<-chan blob.SizedRef, <-chan error) {
	var e blobserver.BlobEnumerator
	if doDest {
		e = sh.to
	} else {
		e = sh.from
	}
	c := make(chan blob.SizedRef, 64)
	errc := make(chan error, 1)
	go func() {
		defer close(c)
		err := blobserver.EnumerateAllFrom(ctx, e, pfx, func(sb blob.SizedRef) error {
			select {
			case c <- sb:
				// TODO: could add a more efficient method on blob.Ref to do this,
				// that doesn't involve call String().
				if !strings.HasPrefix(sb.Ref.String(), pfx) {
					return errNotPrefix
				}
				sh.mu.Lock()
				if doDest {
					sh.vdestCount++
					sh.vdestBytes += int64(sb.Size)
				} else {
					sh.vsrcCount++
					sh.vsrcBytes += int64(sb.Size)
				}
				sh.mu.Unlock()
				return nil
			case <-ctx.Done():
				return context.ErrCanceled
			}
		})
		if err == errNotPrefix {
			err = nil
		}
		errc <- err
	}()
	return c, errc
}
Example #5
0
// reindex rebuilds the meta index for packed blobs. It calls newMeta to create
// a new KeyValue on which to write the index, and replaces s.meta with it. There
// is no locking whatsoever so it should not be called when the storage is already
// in use. its signature might change if/when it gets exported.
func (s *storage) reindex(ctx context.Context, newMeta func() (sorted.KeyValue, error)) error {
	meta, err := newMeta()
	if err != nil {
		return fmt.Errorf("failed to create new blobpacked meta index: %v", err)
	}

	wholeMetaByWholeRef := make(map[blob.Ref][]wholeMetaPrefixInfo)

	if err := blobserver.EnumerateAllFrom(ctx, s.large, "", func(sb blob.SizedRef) error {
		zipRef := sb.Ref
		zr, err := zip.NewReader(blob.ReaderAt(s.large, zipRef), int64(sb.Size))
		if err != nil {
			return zipOpenError{zipRef, err}
		}
		var maniFile *zip.File
		var firstOff int64 // offset of first file (the packed data chunks)
		for i, f := range zr.File {
			if i == 0 {
				firstOff, err = f.DataOffset()
				if err != nil {
					return err
				}
			}
			if f.Name == zipManifestPath {
				maniFile = f
				break
			}
		}
		if maniFile == nil {
			return fmt.Errorf("no camlistore manifest file found in zip %v", zipRef)
		}
		maniRC, err := maniFile.Open()
		if err != nil {
			return err
		}
		defer maniRC.Close()
		var mf Manifest
		if err := json.NewDecoder(maniRC).Decode(&mf); err != nil {
			return err
		}
		if !mf.WholeRef.Valid() || mf.WholeSize == 0 || !mf.DataBlobsOrigin.Valid() {
			return fmt.Errorf("incomplete blobpack manifest JSON in %v", zipRef)
		}

		bm := meta.BeginBatch()
		// In this loop, we write all the blobMetaPrefix entries for the
		// data blobs in this zip, and we also compute the dataBytesWritten, for later.
		var dataBytesWritten int64
		for _, bp := range mf.DataBlobs {
			bm.Set(blobMetaPrefix+bp.SizedRef.Ref.String(), fmt.Sprintf("%d %v %d", bp.SizedRef.Size, zipRef, firstOff+bp.Offset))
			dataBytesWritten += int64(bp.SizedRef.Size)
		}

		// In this loop, we write all the blobMetaPrefix entries for the schema blobs in this zip
		for _, f := range zr.File {
			if !(strings.HasPrefix(f.Name, "camlistore/") && strings.HasSuffix(f.Name, ".json")) ||
				f.Name == zipManifestPath {
				continue
			}
			br, ok := blob.Parse(strings.TrimSuffix(strings.TrimPrefix(f.Name, "camlistore/"), ".json"))
			if !ok {
				return fmt.Errorf("schema file in zip %v does not have blobRef as name: %v", zipRef, f.Name)
			}
			offset, err := f.DataOffset()
			if err != nil {
				return err
			}
			bm.Set(blobMetaPrefix+br.String(), fmt.Sprintf("%d %v %d", f.UncompressedSize64, zipRef, offset))
		}
		if err := meta.CommitBatch(bm); err != nil {
			return err
		}

		// record that info for later, when we got them all, so we can write the wholeMetaPrefix entries.
		wholeMetas, _ := wholeMetaByWholeRef[mf.WholeRef]
		wholeMetas = append(wholeMetas, wholeMetaPrefixInfo{
			wholePartIndex:   mf.WholePartIndex,
			zipRef:           zipRef,
			firstOffset:      firstOff,
			dataBytesWritten: dataBytesWritten,
			wholeSize:        mf.WholeSize,
		})
		wholeMetaByWholeRef[mf.WholeRef] = wholeMetas
		return nil

	}); err != nil {
		return err
	}

	// finally, write the wholeMetaPrefix entries
	bm := meta.BeginBatch()
	for wholeRef, wholeMetas := range wholeMetaByWholeRef {
		wm := wholeMetaPrefixInfos(wholeMetas)
		sort.Sort(wm)
		var wholeBytesWritten int64
		for _, w := range wm {
			bm.Set(fmt.Sprintf("%s%s:%d", wholeMetaPrefix, wholeRef, w.wholePartIndex),
				fmt.Sprintf("%s %d %d %d", w.zipRef, w.firstOffset, wholeBytesWritten, w.dataBytesWritten))
			wholeBytesWritten += w.dataBytesWritten
		}
		if wm[0].wholeSize != wholeBytesWritten {
			return fmt.Errorf("Sum of all zips (%d bytes) does not match manifest's WholeSize (%d bytes) for %v",
				wholeBytesWritten, wm[0].wholeSize, wholeRef)
		}
		bm.Set(fmt.Sprintf("%s%s", wholeMetaPrefix, wholeRef),
			fmt.Sprintf("%d %d", wholeBytesWritten, wm[len(wholeMetas)-1].wholePartIndex+1))
	}

	if err := meta.CommitBatch(bm); err != nil {
		return err
	}

	// TODO(mpl): take into account removed blobs. I can't be done for now
	// (2015-01-29) because RemoveBlobs currently only updates the meta index.
	// So if the index was lost, all information about removals was lost too.

	s.meta = meta
	return nil
}