func (pt *packTest) testOpenWholeRef(t *testing.T, wholeRef blob.Ref, wantSize int64) {
	rc, gotSize, err := pt.sto.OpenWholeRef(wholeRef, 0)
	if err != nil {
		t.Errorf("OpenWholeRef = %v", err)
		return
	}
	defer rc.Close()
	if gotSize != wantSize {
		t.Errorf("OpenWholeRef size = %v; want %v", gotSize, wantSize)
		return
	}
	h := blob.NewHash()
	n, err := io.Copy(h, rc)
	if err != nil {
		t.Errorf("OpenWholeRef read error: %v", err)
		return
	}
	if n != wantSize {
		t.Errorf("OpenWholeRef read %v bytes; want %v", n, wantSize)
		return
	}
	gotRef := blob.RefFromHash(h)
	if gotRef != wholeRef {
		t.Errorf("OpenWholeRef read contents = %v; want %v", gotRef, wholeRef)
	}
}
Exemple #2
0
// This is the simple 1MB chunk version. The rolling checksum version is below.
func writeFileMapOld(bs blobserver.StatReceiver, file *Builder, r io.Reader) (blob.Ref, error) {
	parts, size := []BytesPart{}, int64(0)

	var buf bytes.Buffer
	for {
		buf.Reset()
		n, err := io.Copy(&buf, io.LimitReader(r, maxBlobSize))
		if err != nil {
			return blob.Ref{}, err
		}
		if n == 0 {
			break
		}

		hash := blob.NewHash()
		io.Copy(hash, bytes.NewReader(buf.Bytes()))
		br := blob.RefFromHash(hash)
		hasBlob, err := serverHasBlob(bs, br)
		if err != nil {
			return blob.Ref{}, err
		}
		if !hasBlob {
			sb, err := bs.ReceiveBlob(br, &buf)
			if err != nil {
				return blob.Ref{}, err
			}
			if want := (blob.SizedRef{br, uint32(n)}); sb != want {
				return blob.Ref{}, fmt.Errorf("schema/filewriter: wrote %s, expect", sb, want)
			}
		}

		size += n
		parts = append(parts, BytesPart{
			BlobRef: br,
			Size:    uint64(n),
			Offset:  0, // into BlobRef to read from (not of dest)
		})
	}

	err := file.PopulateParts(size, parts)
	if err != nil {
		return blob.Ref{}, err
	}

	json := file.Blob().JSON()
	if err != nil {
		return blob.Ref{}, err
	}
	br := blob.SHA1FromString(json)
	sb, err := bs.ReceiveBlob(br, strings.NewReader(json))
	if err != nil {
		return blob.Ref{}, err
	}
	if expect := (blob.SizedRef{br, uint32(len(json))}); expect != sb {
		return blob.Ref{}, fmt.Errorf("schema/filewriter: wrote %s bytes, got %s ack'd", expect, sb)
	}

	return br, nil
}
Exemple #3
0
func (pk *packer) pack() error {
	if err := pk.scanChunks(); err != nil {
		return err
	}

	// TODO: decide as a fuction of schemaRefs and dataRefs
	// already in s.large whether it makes sense to still compact
	// this from a savings standpoint. For now we just always do.
	// Maybe we'd have knobs in the future. Ideally not.

	// Don't pack a file if we already have its wholeref stored
	// otherwise (perhaps under a different filename). But that
	// means we have to compute its wholeref first. We assume the
	// blob source will cache these lookups so it's not too
	// expensive to do two passes over the input.
	h := blob.NewHash()
	var err error
	pk.wholeSize, err = io.Copy(h, pk.fr)
	if err != nil {
		return err
	}
	pk.wholeRef = blob.RefFromHash(h)
	wholeKey := wholeMetaPrefix + pk.wholeRef.String()
	_, err = pk.s.meta.Get(wholeKey)
	if err == nil {
		// Nil error means there was some knowledge of this wholeref.
		return fmt.Errorf("already have wholeref %v packed; not packing again", pk.wholeRef)
	} else if err != sorted.ErrNotFound {
		return err
	}

	pk.chunksRemain = pk.dataRefs
	var trunc blob.Ref
MakingZips:
	for len(pk.chunksRemain) > 0 {
		if err := pk.writeAZip(trunc); err != nil {
			if needTrunc, ok := err.(needsTruncatedAfterError); ok {
				trunc = needTrunc.Ref
				if fn := testHookSawTruncate; fn != nil {
					fn(trunc)
				}
				continue MakingZips
			}
			return err
		}
		trunc = blob.Ref{}
	}

	// Record the final wholeMetaPrefix record:
	err = pk.s.meta.Set(wholeKey, fmt.Sprintf("%d %d", pk.wholeSize, len(pk.zips)))
	if err != nil {
		return fmt.Errorf("Error setting %s: %v", wholeKey, err)
	}

	return nil
}
Exemple #4
0
func (h *DeployHandler) storeInstanceConf(conf *InstanceConf) (blob.Ref, error) {
	contents, err := json.Marshal(conf)
	if err != nil {
		return blob.Ref{}, fmt.Errorf("could not json encode instance config: %v", err)
	}
	hash := blob.NewHash()
	_, err = io.Copy(hash, bytes.NewReader(contents))
	if err != nil {
		return blob.Ref{}, fmt.Errorf("could not hash blob contents: %v", err)
	}
	br := blob.RefFromHash(hash)
	if _, err := blobserver.Receive(h.instConf, br, bytes.NewReader(contents)); err != nil {
		return blob.Ref{}, fmt.Errorf("could not store instance config blob: %v", err)
	}
	return br, nil
}
Exemple #5
0
// Blob builds the Blob. The builder continues to be usable after a call to Build.
func (bb *Builder) Blob() *Blob {
	json, err := mapJSON(bb.m)
	if err != nil {
		panic(err)
	}
	ss, err := parseSuperset(strings.NewReader(json))
	if err != nil {
		panic(err)
	}
	h := blob.NewHash()
	h.Write([]byte(json))
	return &Blob{
		str: json,
		ss:  ss,
		br:  blob.RefFromHash(h),
	}
}
func TestPackLarge(t *testing.T) {
	if testing.Short() {
		t.Skip("skipping in short mode")
	}
	const fileSize = 17 << 20 // more than 16 MB, so more than one zip
	const fileName = "foo.dat"
	fileContents := randBytes(fileSize)

	hash := blob.NewHash()
	hash.Write(fileContents)
	wholeRef := blob.RefFromHash(hash)

	pt := testPack(t,
		func(sto blobserver.Storage) error {
			_, err := schema.WriteFileFromReader(sto, fileName, bytes.NewReader(fileContents))
			return err
		},
		wantNumLargeBlobs(2),
		wantNumSmallBlobs(0),
	)

	// Verify we wrote the correct "w:*" meta rows.
	got := map[string]string{}
	want := map[string]string{
		"w:" + wholeRef.String():        "17825792 2",
		"w:" + wholeRef.String() + ":0": "sha1-9b4a3d114c059988075c87293c86ee7cbc6f4af5 37 0 16709479",
		"w:" + wholeRef.String() + ":1": "sha1-fe6326ac6b389ffe302623e4a501bfc8c6272e8e 37 16709479 1116313",
	}
	if err := sorted.Foreach(pt.sto.meta, func(key, value string) error {
		if strings.HasPrefix(key, "b:") {
			return nil
		}
		got[key] = value
		return nil
	}); err != nil {
		t.Fatal(err)
	}
	if !reflect.DeepEqual(got, want) {
		t.Errorf("'w:*' meta rows = %v; want %v", got, want)
	}

	// And verify we can read it back out.
	pt.testOpenWholeRef(t, wholeRef, fileSize)
}
Exemple #7
0
func stdinBlobHandle() (uh *client.UploadHandle, err error) {
	var buf bytes.Buffer
	size, err := io.Copy(&buf, cmdmain.Stdin)
	if err != nil {
		return
	}
	// TODO(bradfitz,mpl): limit this buffer size?
	file := buf.Bytes()
	h := blob.NewHash()
	size, err = io.Copy(h, bytes.NewReader(file))
	if err != nil {
		return
	}
	return &client.UploadHandle{
		BlobRef:  blob.RefFromHash(h),
		Size:     size,
		Contents: io.LimitReader(bytes.NewReader(file), size),
	}, nil
}
func TestPackNormal(t *testing.T) {
	const fileSize = 5 << 20
	const fileName = "foo.dat"
	fileContents := randBytes(fileSize)

	hash := blob.NewHash()
	hash.Write(fileContents)
	wholeRef := blob.RefFromHash(hash)

	pt := testPack(t,
		func(sto blobserver.Storage) error {
			_, err := schema.WriteFileFromReader(sto, fileName, bytes.NewReader(fileContents))
			return err
		},
		wantNumLargeBlobs(1),
		wantNumSmallBlobs(0),
	)
	// And verify we can read it back out.
	pt.testOpenWholeRef(t, wholeRef, fileSize)
}
Exemple #9
0
func stdinBlobHandle() (uh *client.UploadHandle, err error) {
	var buf bytes.Buffer
	size, err := io.CopyN(&buf, cmdmain.Stdin, constants.MaxBlobSize+1)
	if err != nil {
		return
	}
	if size > constants.MaxBlobSize {
		err = fmt.Errorf("blob size cannot be bigger than %d", constants.MaxBlobSize)
	}
	file := buf.Bytes()
	h := blob.NewHash()
	size, err = io.Copy(h, bytes.NewReader(file))
	if err != nil {
		return
	}
	return &client.UploadHandle{
		BlobRef:  blob.RefFromHash(h),
		Size:     uint32(size),
		Contents: io.LimitReader(bytes.NewReader(file), size),
	}, nil
}
Exemple #10
0
func (r *run) updatePhotoInAlbum(ctx context.Context, albumNode *importer.Object, photo picago.Photo) (ret error) {
	if photo.ID == "" {
		return errors.New("photo has no ID")
	}

	getMediaBytes := func() (io.ReadCloser, error) {
		log.Printf("Importing media from %v", photo.URL)
		resp, err := ctxutil.Client(ctx).Get(photo.URL)
		if err != nil {
			return nil, fmt.Errorf("importing photo %s: %v", photo.ID, err)
		}
		if resp.StatusCode != http.StatusOK {
			resp.Body.Close()
			return nil, fmt.Errorf("importing photo %s: status code = %d", photo.ID, resp.StatusCode)
		}
		return resp.Body, nil
	}

	var fileRefStr string
	idFilename := photo.ID + "-" + photo.Filename
	photoNode, err := albumNode.ChildPathObjectOrFunc(idFilename, func() (*importer.Object, error) {
		h := blob.NewHash()
		rc, err := getMediaBytes()
		if err != nil {
			return nil, err
		}
		fileRef, err := schema.WriteFileFromReader(r.Host.Target(), photo.Filename, io.TeeReader(rc, h))
		if err != nil {
			return nil, err
		}
		fileRefStr = fileRef.String()
		wholeRef := blob.RefFromHash(h)
		if pn, err := findExistingPermanode(r.Host.Searcher(), wholeRef); err == nil {
			return r.Host.ObjectFromRef(pn)
		}
		return r.Host.NewObject()
	})
	if err != nil {
		return err
	}

	const attrMediaURL = "picasaMediaURL"
	if fileRefStr == "" {
		fileRefStr = photoNode.Attr(nodeattr.CamliContent)
		// Only re-download the source photo if its URL has changed.
		// Empirically this seems to work: cropping a photo in the
		// photos.google.com UI causes its URL to change. And it makes
		// sense, looking at the ugliness of the URLs with all their
		// encoded/signed state.
		if !mediaURLsEqual(photoNode.Attr(attrMediaURL), photo.URL) {
			rc, err := getMediaBytes()
			if err != nil {
				return err
			}
			fileRef, err := schema.WriteFileFromReader(r.Host.Target(), photo.Filename, rc)
			rc.Close()
			if err != nil {
				return err
			}
			fileRefStr = fileRef.String()
		}
	}

	title := strings.TrimSpace(photo.Description)
	if strings.Contains(title, "\n") {
		title = title[:strings.Index(title, "\n")]
	}
	if title == "" && schema.IsInterestingTitle(photo.Filename) {
		title = photo.Filename
	}

	// TODO(tgulacsi): add more attrs (comments ?)
	// for names, see http://schema.org/ImageObject and http://schema.org/CreativeWork
	attrs := []string{
		nodeattr.CamliContent, fileRefStr,
		attrPicasaId, photo.ID,
		nodeattr.Title, title,
		nodeattr.Description, photo.Description,
		nodeattr.LocationText, photo.Location,
		nodeattr.DateModified, schema.RFC3339FromTime(photo.Updated),
		nodeattr.DatePublished, schema.RFC3339FromTime(photo.Published),
		nodeattr.URL, photo.PageURL,
	}
	if photo.Latitude != 0 || photo.Longitude != 0 {
		attrs = append(attrs,
			nodeattr.Latitude, fmt.Sprintf("%f", photo.Latitude),
			nodeattr.Longitude, fmt.Sprintf("%f", photo.Longitude),
		)
	}
	if err := photoNode.SetAttrs(attrs...); err != nil {
		return err
	}
	if err := photoNode.SetAttrValues("tag", photo.Keywords); err != nil {
		return err
	}
	if photo.Position > 0 {
		if err := albumNode.SetAttr(
			nodeattr.CamliPathOrderColon+strconv.Itoa(photo.Position-1),
			photoNode.PermanodeRef().String()); err != nil {
			return err
		}
	}

	// Do this last, after we're sure the "camliContent" attribute
	// has been saved successfully, because this is the one that
	// causes us to do it again in the future or not.
	if err := photoNode.SetAttrs(attrMediaURL, photo.URL); err != nil {
		return err
	}
	return nil
}
Exemple #11
0
// trunc is a hint about which blob to truncate after. It may be zero.
// If the returned error is of type 'needsTruncatedAfterError', then
// the zip should be attempted to be written again, but truncating the
// data after the listed blob.
func (pk *packer) writeAZip(trunc blob.Ref) (err error) {
	defer func() {
		if e := recover(); e != nil {
			if v, ok := e.(error); ok && err == nil {
				err = v
			} else {
				panic(e)
			}
		}
	}()
	mf := Manifest{
		WholeRef:       pk.wholeRef,
		WholeSize:      pk.wholeSize,
		WholePartIndex: len(pk.zips),
	}
	var zbuf bytes.Buffer
	cw := &countWriter{w: &zbuf}
	zw := zip.NewWriter(cw)

	var approxSize = zipFixedOverhead // can't use zbuf.Len because zw buffers
	var dataRefsWritten []blob.Ref
	var dataBytesWritten int64
	var schemaBlobSeen = map[blob.Ref]bool{}
	var schemaBlobs []blob.Ref // to add after the main file

	baseFileName := pk.fr.FileName()
	if strings.Contains(baseFileName, "/") || strings.Contains(baseFileName, "\\") {
		return fmt.Errorf("File schema blob %v filename had a slash in it: %q", pk.fr.SchemaBlobRef(), baseFileName)
	}
	fh := &zip.FileHeader{
		Name:   baseFileName,
		Method: zip.Store, // uncompressed
	}
	fh.SetModTime(pk.fr.ModTime())
	fh.SetMode(0644)
	fw, err := zw.CreateHeader(fh)
	check(err)
	check(zw.Flush())
	dataStart := cw.n
	approxSize += zipPerEntryOverhead // for the first FileHeader w/ the data

	zipMax := pk.s.maxZipBlobSize()
	chunks := pk.chunksRemain
	chunkWholeHash := blob.NewHash()
	for len(chunks) > 0 {
		dr := chunks[0] // the next chunk to maybe write

		if trunc.Valid() && trunc == dr {
			if approxSize == 0 {
				return errors.New("first blob is too large to pack, once you add the zip overhead")
			}
			break
		}

		schemaBlobsSave := schemaBlobs
		for _, parent := range pk.schemaParent[dr] {
			if !schemaBlobSeen[parent] {
				schemaBlobSeen[parent] = true
				schemaBlobs = append(schemaBlobs, parent)
				approxSize += int(pk.schemaBlob[parent].Size()) + zipPerEntryOverhead
			}
		}

		thisSize := pk.dataSize[dr]
		approxSize += int(thisSize)
		if approxSize+mf.approxSerializedSize() > zipMax {
			if fn := testHookStopBeforeOverflowing; fn != nil {
				fn()
			}
			schemaBlobs = schemaBlobsSave // restore it
			break
		}

		// Copy the data to the zip.
		rc, size, err := pk.s.Fetch(dr)
		check(err)
		if size != thisSize {
			rc.Close()
			return errors.New("unexpected size")
		}
		if n, err := io.Copy(io.MultiWriter(fw, chunkWholeHash), rc); err != nil || n != int64(size) {
			rc.Close()
			return fmt.Errorf("copy to zip = %v, %v; want %v bytes", n, err, size)
		}
		rc.Close()

		dataRefsWritten = append(dataRefsWritten, dr)
		dataBytesWritten += int64(size)
		chunks = chunks[1:]
	}
	mf.DataBlobsOrigin = blob.RefFromHash(chunkWholeHash)

	// zipBlobs is where a schema or data blob is relative to the beginning
	// of the zip file.
	var zipBlobs []BlobAndPos

	var dataOffset int64
	for _, br := range dataRefsWritten {
		size := pk.dataSize[br]
		mf.DataBlobs = append(mf.DataBlobs, BlobAndPos{blob.SizedRef{Ref: br, Size: size}, dataOffset})

		zipBlobs = append(zipBlobs, BlobAndPos{blob.SizedRef{Ref: br, Size: size}, dataStart + dataOffset})
		dataOffset += int64(size)
	}

	for _, br := range schemaBlobs {
		fw, err := zw.CreateHeader(&zip.FileHeader{
			Name:   "camlistore/" + br.String() + ".json",
			Method: zip.Store, // uncompressed
		})
		check(err)
		check(zw.Flush())
		b := pk.schemaBlob[br]
		zipBlobs = append(zipBlobs, BlobAndPos{blob.SizedRef{Ref: br, Size: b.Size()}, cw.n})
		rc := b.Open()
		n, err := io.Copy(fw, rc)
		rc.Close()
		check(err)
		if n != int64(b.Size()) {
			return fmt.Errorf("failed to write all of schema blob %v: %d bytes, not wanted %d", br, n, b.Size())
		}
	}

	// Manifest file
	fw, err = zw.Create(zipManifestPath)
	check(err)
	enc, err := json.MarshalIndent(mf, "", "  ")
	check(err)
	_, err = fw.Write(enc)
	check(err)
	err = zw.Close()
	check(err)

	if zbuf.Len() > zipMax {
		// We guessed wrong. Back up. Find out how many blobs we went over.
		overage := zbuf.Len() - zipMax
		for i := len(dataRefsWritten) - 1; i >= 0; i-- {
			dr := dataRefsWritten[i]
			if overage <= 0 {
				return needsTruncatedAfterError{dr}
			}
			overage -= int(pk.dataSize[dr])
		}
		return errors.New("file is unpackable; first blob is too big to fit")
	}

	zipRef := blob.SHA1FromBytes(zbuf.Bytes())
	zipSB, err := blobserver.ReceiveNoHash(pk.s.large, zipRef, bytes.NewReader(zbuf.Bytes()))
	if err != nil {
		return err
	}

	bm := pk.s.meta.BeginBatch()
	bm.Set(fmt.Sprintf("%s%s:%d", wholeMetaPrefix, pk.wholeRef, len(pk.zips)),
		fmt.Sprintf("%s %d %d %d",
			zipRef,
			dataStart,
			pk.wholeBytesWritten,
			dataBytesWritten))

	pk.wholeBytesWritten += dataBytesWritten
	pk.zips = append(pk.zips, writtenZip{
		SizedRef: zipSB,
		dataRefs: dataRefsWritten,
	})

	for _, zb := range zipBlobs {
		bm.Set(blobMetaPrefix+zb.Ref.String(), fmt.Sprintf("%d %v %d", zb.Size, zipRef, zb.Offset))
	}
	if err := pk.s.meta.CommitBatch(bm); err != nil {
		return err
	}

	// Delete from small
	if !pk.s.skipDelete {
		toDelete := make([]blob.Ref, 0, len(dataRefsWritten)+len(schemaBlobs))
		toDelete = append(toDelete, dataRefsWritten...)
		toDelete = append(toDelete, schemaBlobs...)
		if err := pk.s.small.RemoveBlobs(toDelete); err != nil {
			// Can't really do anything about it and doesn't really matter, so
			// just log for now.
			pk.s.Logf("Error removing blobs from %s: %v", pk.s.small, err)
		}
	}

	// On success, consume the chunks we wrote from pk.chunksRemain.
	pk.chunksRemain = pk.chunksRemain[len(dataRefsWritten):]
	return nil
}
func TestPackerBoundarySplits(t *testing.T) {
	if testing.Short() {
		t.Skip("skipping slow test")
	}
	// Test a file of three chunk sizes, totalling near the 16 MB
	// boundary:
	//    - 1st chunk is 6 MB. ("blobA")
	//    - 2nd chunk is 6 MB. ("blobB")
	//    - 3rd chunk ("blobC") is binary-searched (up to 4MB) to find
	//      which size causes the packer to write two zip files.

	// During the test we set zip overhead boundaries to 0, to
	// force the test to into its pathological misprediction code paths,
	// where it needs to back up and rewrite the zip with one part less.
	// That's why the test starts with two zip files: so there's at
	// least one that can be removed to make room.
	defer setIntTemporarily(&zipPerEntryOverhead, 0)()

	const sizeAB = 12 << 20
	const maxBlobSize = 16 << 20
	bytesAB := randBytes(sizeAB)
	blobA := &test.Blob{string(bytesAB[:sizeAB/2])}
	blobB := &test.Blob{string(bytesAB[sizeAB/2:])}
	refA := blobA.BlobRef()
	refB := blobB.BlobRef()
	bytesCFull := randBytes(maxBlobSize - sizeAB) // will be sliced down

	// Mechanism to verify we hit the back-up code path:
	var (
		mu                    sync.Mutex
		sawTruncate           blob.Ref
		stoppedBeforeOverflow bool
	)
	testHookSawTruncate = func(after blob.Ref) {
		if after != refB {
			t.Errorf("unexpected truncate point %v", after)
		}
		mu.Lock()
		defer mu.Unlock()
		sawTruncate = after
	}
	testHookStopBeforeOverflowing = func() {
		mu.Lock()
		defer mu.Unlock()
		stoppedBeforeOverflow = true
	}
	defer func() {
		testHookSawTruncate = nil
		testHookStopBeforeOverflowing = nil
	}()

	generatesTwoZips := func(sizeC int) (ret bool) {
		large := new(test.Fetcher)
		s := &storage{
			small: new(test.Fetcher),
			large: large,
			meta:  sorted.NewMemoryKeyValue(),
			log: test.NewLogger(t, "blobpacked: ",
				// Ignore these phrases:
				"Packing file ",
				"Packed file ",
			),
		}
		s.init()

		// Upload first two chunks
		blobA.MustUpload(t, s)
		blobB.MustUpload(t, s)

		// Upload second chunk
		bytesC := bytesCFull[:sizeC]
		h := blob.NewHash()
		h.Write(bytesC)
		refC := blob.RefFromHash(h)
		_, err := s.ReceiveBlob(refC, bytes.NewReader(bytesC))
		if err != nil {
			t.Fatal(err)
		}

		// Upload the file schema blob.
		m := schema.NewFileMap("foo.dat")
		m.PopulateParts(sizeAB+int64(sizeC), []schema.BytesPart{
			schema.BytesPart{
				Size:    sizeAB / 2,
				BlobRef: refA,
			},
			schema.BytesPart{
				Size:    sizeAB / 2,
				BlobRef: refB,
			},
			schema.BytesPart{
				Size:    uint64(sizeC),
				BlobRef: refC,
			},
		})
		fjson, err := m.JSON()
		if err != nil {
			t.Fatalf("schema filemap JSON: %v", err)
		}
		fb := &test.Blob{Contents: fjson}
		fb.MustUpload(t, s)
		num := large.NumBlobs()
		if num < 1 || num > 2 {
			t.Fatalf("for size %d, num packed zip blobs = %d; want 1 or 2", sizeC, num)
		}
		return num == 2
	}
	maxC := maxBlobSize - sizeAB
	smallestC := sort.Search(maxC, generatesTwoZips)
	if smallestC == maxC {
		t.Fatalf("never found a point at which we generated 2 zip files")
	}
	t.Logf("After 12 MB of data (in 2 chunks), the smallest blob that generates two zip files is %d bytes (%.03f MB)", smallestC, float64(smallestC)/(1<<20))
	t.Logf("Zip overhead (for this two chunk file) = %d bytes", maxBlobSize-1-smallestC-sizeAB)

	mu.Lock()
	if sawTruncate != refB {
		t.Errorf("truncate after = %v; want %v", sawTruncate, refB)
	}
	if !stoppedBeforeOverflow {
		t.Error("never hit the code path where it calculates that another data chunk would push it over the 16MB boundary")
	}
}
func TestReindex(t *testing.T) {
	if testing.Short() {
		t.Skip("skipping in short mode")
	}

	type file struct {
		size     int64
		name     string
		contents []byte
	}
	files := []file{
		{17 << 20, "foo.dat", randBytesSrc(17<<20, 42)},
		{10 << 20, "bar.dat", randBytesSrc(10<<20, 43)},
		{5 << 20, "baz.dat", randBytesSrc(5<<20, 44)},
	}

	pt := testPack(t,
		func(sto blobserver.Storage) error {
			for _, f := range files {
				if _, err := schema.WriteFileFromReader(sto, f.name, bytes.NewReader(f.contents)); err != nil {
					return err
				}
			}
			return nil
		},
		wantNumLargeBlobs(4),
		wantNumSmallBlobs(0),
	)

	// backup the meta that is supposed to be lost/erased.
	// pt.sto.reindex allocates a new pt.sto.meta, so meta != pt.sto.meta after it is called.
	meta := pt.sto.meta

	// and build new meta index
	if err := pt.sto.reindex(context.TODO(), func() (sorted.KeyValue, error) {
		return sorted.NewMemoryKeyValue(), nil
	}); err != nil {
		t.Fatal(err)
	}

	validBlobKey := func(key, value string) error {
		if !strings.HasPrefix(key, "b:") {
			return errors.New("not a blob meta key")
		}
		wantRef, ok := blob.Parse(key[2:])
		if !ok {
			return errors.New("bogus blobref in key")
		}
		m, err := parseMetaRow([]byte(value))
		if err != nil {
			return err
		}
		rc, err := pt.large.SubFetch(m.largeRef, int64(m.largeOff), int64(m.size))
		if err != nil {
			return err
		}
		defer rc.Close()
		h := wantRef.Hash()
		n, err := io.Copy(h, rc)
		if err != nil {
			return err
		}

		if !wantRef.HashMatches(h) {
			return errors.New("content doesn't match")
		}
		if n != int64(m.size) {
			return errors.New("size doesn't match")
		}
		return nil
	}

	// check that new meta is identical to "lost" one
	newRows := 0
	if err := sorted.Foreach(pt.sto.meta, func(key, newValue string) error {
		oldValue, err := meta.Get(key)
		if err != nil {
			t.Fatalf("Could not get value for %v in old meta: %v", key, err)
		}
		newRows++
		// Exact match is fine.
		if oldValue == newValue {
			return nil
		}
		// If it differs, it should at least be correct. (blob metadata
		// can now point to different packed zips, depending on sorting)
		err = validBlobKey(key, newValue)
		if err == nil {
			return nil
		}
		t.Errorf("Reindexing error: for key %v: %v\n got: %q\nwant: %q", key, err, newValue, oldValue)
		return nil // keep enumerating, regardless of errors
	}); err != nil {
		t.Fatal(err)
	}

	// make sure they have the same number of entries too, to be sure that the reindexing
	// did not miss entries that the old meta had.
	oldRows := countSortedRows(t, meta)
	if oldRows != newRows {
		t.Fatalf("index number of entries mismatch: got %d entries in new index, wanted %d (as in index before reindexing)", newRows, oldRows)
	}

	// And verify we can read one of the files back out.
	hash := blob.NewHash()
	hash.Write(files[0].contents)
	pt.testOpenWholeRef(t, blob.RefFromHash(hash), files[0].size)
}
func TestPackLarge(t *testing.T) {
	if testing.Short() {
		t.Skip("skipping in short mode")
	}
	const fileSize = 17 << 20 // more than 16 MB, so more than one zip
	const fileName = "foo.dat"
	fileContents := randBytes(fileSize)

	hash := blob.NewHash()
	hash.Write(fileContents)
	wholeRef := blob.RefFromHash(hash)

	pt := testPack(t,
		func(sto blobserver.Storage) error {
			_, err := schema.WriteFileFromReader(sto, fileName, bytes.NewReader(fileContents))
			return err
		},
		wantNumLargeBlobs(2),
		wantNumSmallBlobs(0),
	)

	// Gather the "w:*" meta rows we wrote.
	got := map[string]string{}
	if err := sorted.Foreach(pt.sto.meta, func(key, value string) error {
		if strings.HasPrefix(key, "b:") {
			return nil
		}
		got[key] = value
		return nil
	}); err != nil {
		t.Fatal(err)
	}

	// Verify the two zips are correctly described.

	// There should be one row to say that we have two zip, and
	// that the overall file is 17MB:
	keyBase := "w:" + wholeRef.String()
	if g, w := got[keyBase], "17825792 2"; g != w {
		t.Fatalf("meta row for key %q = %q; want %q", keyBase, g, w)
	}

	// ... (and a little helper) ...
	parseMeta := func(n int) (zipOff, dataOff, dataLen int64) {
		key := keyBase + ":" + strconv.Itoa(n)
		v := got[key]
		f := strings.Fields(v)
		if len(f) != 4 {
			t.Fatalf("meta for key %q = %q; expected 4 space-separated fields", key, v)
		}
		i64 := func(n int) int64 {
			i, err := strconv.ParseInt(f[n], 10, 64)
			if err != nil {
				t.Fatalf("error parsing int64 %q in field index %d of meta key %q (value %q): %v", f[n], n, key, v, err)
			}
			return i
		}
		zipOff, dataOff, dataLen = i64(1), i64(2), i64(3)
		return
	}

	// And then verify if we have the two "w:<wholeref>:0" and
	// "w:<wholeref>:1" rows and that they're consistent.
	z0, d0, l0 := parseMeta(0)
	z1, d1, l1 := parseMeta(1)
	if z0 != z1 {
		t.Errorf("expected zip offset in zip0 and zip1 to match. got %d and %d", z0, z0)
	}
	if d0 != 0 {
		t.Errorf("zip0's data offset = %d; want 0", d0)
	}
	if d1 != l0 {
		t.Errorf("zip1 data offset %d != zip0 data length %d", d1, l0)
	}
	if d1+l1 != fileSize {
		t.Errorf("zip1's offset %d + length %d = %d; want %d (fileSize)", d1, l1, d1+l1, fileSize)
	}

	// And verify we can read it back out.
	pt.testOpenWholeRef(t, wholeRef, fileSize)
}
func TestReindex(t *testing.T) {
	if testing.Short() {
		t.Skip("skipping in short mode")
	}

	type file struct {
		size     int64
		name     string
		contents []byte
	}
	files := []file{
		{17 << 20, "foo.dat", randBytesSrc(17<<20, 42)},
		{10 << 20, "bar.dat", randBytesSrc(10<<20, 43)},
		{5 << 20, "baz.dat", randBytesSrc(5<<20, 44)},
	}

	pt := testPack(t,
		func(sto blobserver.Storage) error {
			for _, f := range files {
				if _, err := schema.WriteFileFromReader(sto, f.name, bytes.NewReader(f.contents)); err != nil {
					return err
				}
			}
			return nil
		},
		wantNumLargeBlobs(4),
		wantNumSmallBlobs(0),
	)

	// backup the meta that is supposed to be lost/erased.
	// pt.sto.reindex allocates a new pt.sto.meta, so meta != pt.sto.meta after it is called.
	meta := pt.sto.meta

	// and build new meta index
	if err := pt.sto.reindex(context.TODO(), func() (sorted.KeyValue, error) {
		return sorted.NewMemoryKeyValue(), nil
	}); err != nil {
		t.Fatal(err)
	}

	// check that new meta is identical to "lost" one
	newRows := 0
	if err := sorted.Foreach(pt.sto.meta, func(key, newValue string) error {
		oldValue, err := meta.Get(key)
		if err != nil {
			t.Fatalf("Could not get value for %v in old meta: %v", key, err)
		}
		if oldValue != newValue {
			t.Fatalf("Reindexing error: for key %v, got %v, want %v", key, newValue, oldValue)
		}
		newRows++
		return nil
	}); err != nil {
		t.Fatal(err)
	}

	// make sure they have the same number of entries too, to be sure that the reindexing
	// did not miss entries that the old meta had.
	oldRows := countSortedRows(t, meta)
	if oldRows != newRows {
		t.Fatalf("index number of entries mismatch: got %d entries in new index, wanted %d (as in index before reindexing)", newRows, oldRows)
	}

	// And verify we can read one of the files back out.
	hash := blob.NewHash()
	hash.Write(files[0].contents)
	pt.testOpenWholeRef(t, blob.RefFromHash(hash), files[0].size)
}