Exemple #1
0
// "wholetofile|sha1-17b53c7c3e664d3613dfdce50ef1f2a09e8f04b5|sha1-fb88f3eab3acfcf3cfc8cd77ae4366f6f975d227" -> "1"
func (c *Corpus) mergeWholeToFileRow(k, v []byte) error {
	pair := k[len("wholetofile|"):]
	pipe := bytes.IndexByte(pair, '|')
	if pipe < 0 {
		return fmt.Errorf("bogus row %q = %q", k, v)
	}
	wholeRef, ok1 := blob.ParseBytes(pair[:pipe])
	fileRef, ok2 := blob.ParseBytes(pair[pipe+1:])
	if !ok1 || !ok2 {
		return fmt.Errorf("bogus row %q = %q", k, v)
	}
	c.fileWholeRef[fileRef] = wholeRef
	return nil
}
Exemple #2
0
func (ns *nsto) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) error {
	defer close(dest)
	done := ctx.Done()

	it := ns.inventory.Find(after, "")
	first := true
	for limit > 0 && it.Next() {
		if first {
			first = false
			if after != "" && it.Key() == after {
				continue
			}
		}
		br, ok := blob.ParseBytes(it.KeyBytes())
		size, err := strutil.ParseUintBytes(it.ValueBytes(), 10, 32)
		if !ok || err != nil {
			log.Printf("Bogus namespace key %q / value %q", it.Key(), it.Value())
			continue
		}
		select {
		case dest <- blob.SizedRef{br, uint32(size)}:
		case <-done:
			return context.ErrCanceled
		}
		limit--
	}
	if err := it.Close(); err != nil {
		return err
	}
	return nil
}
Exemple #3
0
func (s enumerator) EnumerateBlobs(ctx context.Context, dest chan<- blob.SizedRef, after string, limit int) (err error) {
	defer close(dest)
	t := s.meta.Find(blobMetaPrefix+after, blobMetaPrefixLimit)
	defer func() {
		closeErr := t.Close()
		if err == nil {
			err = closeErr
		}
	}()
	n := 0
	afterb := []byte(after)
	for n < limit && t.Next() {
		key := t.KeyBytes()[len(blobMetaPrefix):]
		if n == 0 && bytes.Equal(key, afterb) {
			continue
		}
		n++
		br, ok := blob.ParseBytes(key)
		if !ok {
			return fmt.Errorf("unknown key %q in meta index", t.Key())
		}
		size, err := parseMetaRowSizeOnly(t.ValueBytes())
		if err != nil {
			return err
		}
		select {
		case <-ctx.Done():
			return ctx.Err()
		case dest <- blob.SizedRef{Ref: br, Size: size}:
		}
	}
	return nil
}
Exemple #4
0
// parses:
// "<size_u32> <big-blobref> <big-offset>"
func parseMetaRow(v []byte) (m meta, err error) {
	row := v
	sp := bytes.IndexByte(v, ' ')
	if sp < 1 || sp == len(v)-1 {
		return meta{}, fmt.Errorf("invalid metarow %q", v)
	}
	m.exists = true
	size, err := strutil.ParseUintBytes(v[:sp], 10, 32)
	if err != nil {
		return meta{}, fmt.Errorf("invalid metarow size %q", v)
	}
	m.size = uint32(size)
	v = v[sp+1:]

	// remains: "<big-blobref> <big-offset>"
	if bytes.Count(v, singleSpace) != 1 {
		return meta{}, fmt.Errorf("invalid metarow %q: wrong number of spaces", row)
	}
	sp = bytes.IndexByte(v, ' ')
	largeRef, ok := blob.ParseBytes(v[:sp])
	if !ok {
		return meta{}, fmt.Errorf("invalid metarow %q: bad blobref %q", row, v[:sp])
	}
	m.largeRef = largeRef
	off, err := strutil.ParseUintBytes(v[sp+1:], 10, 32)
	if err != nil {
		return meta{}, fmt.Errorf("invalid metarow %q: bad offset: %v", row, err)
	}
	m.largeOff = uint32(off)
	return m, nil
}
Exemple #5
0
func (c *Corpus) mergeFileInfoRow(k, v []byte) error {
	// fileinfo|sha1-579f7f246bd420d486ddeb0dadbb256cfaf8bf6b" "5|some-stuff.txt|"
	pipe := bytes.IndexByte(k, '|')
	if pipe < 0 {
		return fmt.Errorf("unexpected fileinfo key %q", k)
	}
	br, ok := blob.ParseBytes(k[pipe+1:])
	if !ok {
		return fmt.Errorf("unexpected fileinfo blobref in key %q", k)
	}

	// TODO: could at least use strutil.ParseUintBytes to not stringify and retain
	// the length bytes of v.
	c.ss = strutil.AppendSplitN(c.ss[:0], string(v), "|", 3)
	if len(c.ss) != 3 {
		return fmt.Errorf("unexpected fileinfo value %q", k)
	}
	size, err := strconv.ParseInt(c.ss[0], 10, 64)
	if err != nil {
		return fmt.Errorf("unexpected fileinfo value %q", k)
	}
	c.mutateFileInfo(br, func(fi *camtypes.FileInfo) {
		fi.Size = size
		fi.FileName = c.str(urld(c.ss[1]))
		fi.MIMEType = c.str(urld(c.ss[2]))
	})
	return nil
}
Exemple #6
0
func (c *Corpus) mergeSignerKeyIdRow(k, v []byte) error {
	br, ok := blob.ParseBytes(k[len("signerkeyid:"):])
	if !ok {
		return fmt.Errorf("bogus signerid row: %q -> %q", k, v)
	}
	c.keyId[br] = string(v)
	return nil
}
Exemple #7
0
func (c *Corpus) mergeImageSizeRow(k, v []byte) error {
	br, okk := blob.ParseBytes(k[len("imagesize|"):])
	ii, okv := kvImageInfo(v)
	if !okk || !okv {
		return fmt.Errorf("bogus row %q = %q", k, v)
	}
	br = c.br(br)
	c.imageInfo[br] = ii
	return nil
}
Exemple #8
0
// initNeededMaps initializes x.needs and x.neededBy on start-up.
func (x *Index) initNeededMaps() (err error) {
	x.deletes = newDeletionCache()
	it := x.queryPrefix(keyMissing)
	defer closeIterator(it, &err)
	for it.Next() {
		key := it.KeyBytes()
		pair := key[len("missing|"):]
		pipe := bytes.IndexByte(pair, '|')
		if pipe < 0 {
			return fmt.Errorf("Bogus missing key %q", key)
		}
		have, ok1 := blob.ParseBytes(pair[:pipe])
		missing, ok2 := blob.ParseBytes(pair[pipe+1:])
		if !ok1 || !ok2 {
			return fmt.Errorf("Bogus missing key %q", key)
		}
		x.noteNeededMemory(have, missing)
	}
	return
}
Exemple #9
0
// "exifgps|sha1-17b53c7c3e664d3613dfdce50ef1f2a09e8f04b5" -> "-122.39897155555556|37.61952208333334"
func (c *Corpus) mergeEXIFGPSRow(k, v []byte) error {
	wholeRef, ok := blob.ParseBytes(k[len("exifgps|"):])
	pipe := bytes.IndexByte(v, '|')
	if pipe < 0 || !ok {
		return fmt.Errorf("bogus row %q = %q", k, v)
	}
	lat, err := strconv.ParseFloat(string(v[:pipe]), 64)
	long, err1 := strconv.ParseFloat(string(v[pipe+1:]), 64)
	if err != nil || err1 != nil {
		return fmt.Errorf("bogus row %q = %q", k, v)
	}
	c.gps[wholeRef] = latLong{lat, long}
	return nil
}
Exemple #10
0
func ParseFields(v []byte, dst ...interface{}) error {
	for i, dv := range dst {
		thisv := v
		if i < len(dst)-1 {
			sp := bytes.IndexByte(v, ' ')
			if sp == -1 {
				return fmt.Errorf("missing space following field index %d", i)
			}
			thisv = v[:sp]
			v = v[sp+1:]
		}
		switch dv := dv.(type) {
		case *blob.Ref:
			br, ok := blob.ParseBytes(thisv)
			if !ok {

			}
			*dv = br
		case *uint32:
			n, err := strutil.ParseUintBytes(thisv, 10, 32)
			if err != nil {
				return err
			}
			*dv = uint32(n)
		case *uint64:
			n, err := strutil.ParseUintBytes(thisv, 10, 64)
			if err != nil {
				return err
			}
			*dv = n
		case *int64:
			n, err := strutil.ParseUintBytes(thisv, 10, 64)
			if err != nil {
				return err
			}
			if int64(n) < 0 {
				return errors.New("conv: negative numbers not accepted with int64 dest type")
			}
			*dv = int64(n)
		default:
			return fmt.Errorf("conv: unsupported target pointer type %T", dv)
		}
	}
	return nil
}
Exemple #11
0
func (c *Corpus) mergeFileTimesRow(k, v []byte) error {
	if len(v) == 0 {
		return nil
	}
	// "filetimes|sha1-579f7f246bd420d486ddeb0dadbb256cfaf8bf6b" "1970-01-01T00%3A02%3A03Z"
	pipe := bytes.IndexByte(k, '|')
	if pipe < 0 {
		return fmt.Errorf("unexpected fileinfo key %q", k)
	}
	br, ok := blob.ParseBytes(k[pipe+1:])
	if !ok {
		return fmt.Errorf("unexpected filetimes blobref in key %q", k)
	}
	c.ss = strutil.AppendSplitN(c.ss[:0], urld(string(v)), ",", -1)
	times := c.ss
	c.mutateFileInfo(br, func(fi *camtypes.FileInfo) {
		updateFileInfoTimes(fi, times)
	})
	return nil
}
Exemple #12
0
func kvBlobMeta_bytes(k, v []byte) (bm camtypes.BlobMeta, ok bool) {
	ref := k[len("meta:"):]
	br, ok := blob.ParseBytes(ref)
	if !ok {
		return
	}
	pipe := bytes.IndexByte(v, '|')
	if pipe < 0 {
		return
	}
	size, err := strutil.ParseUintBytes(v[:pipe], 10, 32)
	if err != nil {
		return
	}
	return camtypes.BlobMeta{
		Ref:       br,
		Size:      uint32(size),
		CamliType: camliTypeFromMIME_bytes(v[pipe+1:]),
	}, true
}
Exemple #13
0
func (c *Corpus) mergeFileInfoRow(k, v []byte) error {
	// fileinfo|sha1-579f7f246bd420d486ddeb0dadbb256cfaf8bf6b" "5|some-stuff.txt|"
	pipe := bytes.IndexByte(k, '|')
	if pipe < 0 {
		return fmt.Errorf("unexpected fileinfo key %q", k)
	}
	br, ok := blob.ParseBytes(k[pipe+1:])
	if !ok {
		return fmt.Errorf("unexpected fileinfo blobref in key %q", k)
	}

	// TODO: could at least use strutil.ParseUintBytes to not stringify and retain
	// the length bytes of v.
	c.ss = strutil.AppendSplitN(c.ss[:0], string(v), "|", 4)
	if len(c.ss) != 3 && len(c.ss) != 4 {
		return fmt.Errorf("unexpected fileinfo value %q", v)
	}
	size, err := strconv.ParseInt(c.ss[0], 10, 64)
	if err != nil {
		return fmt.Errorf("unexpected fileinfo value %q", v)
	}
	var wholeRef blob.Ref
	if len(c.ss) == 4 && c.ss[3] != "" { // checking for "" because of special files such as symlinks.
		var ok bool
		wholeRef, ok = blob.Parse(urld(c.ss[3]))
		if !ok {
			return fmt.Errorf("invalid wholeRef blobref in value %q for fileinfo key %q", v, k)
		}
	}
	c.mutateFileInfo(br, func(fi *camtypes.FileInfo) {
		fi.Size = size
		fi.FileName = c.str(urld(c.ss[1]))
		fi.MIMEType = c.str(urld(c.ss[2]))
		fi.WholeRef = wholeRef
	})
	return nil
}
Exemple #14
0
// fixMissingWholeRef appends the wholeRef to all the keyFileInfo rows values. It should
// only be called to upgrade a version 4 index schema to version 5.
func (x *Index) fixMissingWholeRef(fetcher blob.Fetcher) (err error) {
	// We did that check from the caller, but double-check again to prevent from misuse
	// of that function.
	if x.schemaVersion() != 4 || requiredSchemaVersion != 5 {
		panic("fixMissingWholeRef should only be used when upgrading from v4 to v5 of the index schema")
	}
	log.Println("index: fixing the missing wholeRef in the fileInfo rows...")
	defer func() {
		if err != nil {
			log.Printf("index: fixing the fileInfo rows failed: %v", err)
			return
		}
		log.Print("index: successfully fixed wholeRef in FileInfo rows.")
	}()

	// first build a reverted keyWholeToFileRef map, so we can get the wholeRef from the fileRef easily.
	fileRefToWholeRef := make(map[blob.Ref]blob.Ref)
	it := x.queryPrefix(keyWholeToFileRef)
	var keyA [3]string
	for it.Next() {
		keyPart := strutil.AppendSplitN(keyA[:0], it.Key(), "|", 3)
		if len(keyPart) != 3 {
			return fmt.Errorf("bogus keyWholeToFileRef key: got %q, wanted \"wholetofile|wholeRef|fileRef\"", it.Key())
		}
		wholeRef, ok1 := blob.Parse(keyPart[1])
		fileRef, ok2 := blob.Parse(keyPart[2])
		if !ok1 || !ok2 {
			return fmt.Errorf("bogus part in keyWholeToFileRef key: %q", it.Key())
		}
		fileRefToWholeRef[fileRef] = wholeRef
	}
	if err := it.Close(); err != nil {
		return err
	}

	var fixedEntries, missedEntries int
	t := time.NewTicker(5 * time.Second)
	defer t.Stop()
	// We record the mutations and set them all after the iteration because of the sqlite locking:
	// since BeginBatch takes a lock, and Find too, we would deadlock at queryPrefix if we
	// started a batch mutation before.
	mutations := make(map[string]string)
	keyPrefix := keyFileInfo.name + "|"
	it = x.queryPrefix(keyFileInfo)
	defer it.Close()
	var valA [3]string
	for it.Next() {
		select {
		case <-t.C:
			log.Printf("Recorded %d missing wholeRef that we'll try to fix, and %d that we can't fix.", fixedEntries, missedEntries)
		default:
		}
		br, ok := blob.ParseBytes(it.KeyBytes()[len(keyPrefix):])
		if !ok {
			return fmt.Errorf("invalid blobRef %q", it.KeyBytes()[len(keyPrefix):])
		}
		wholeRef, ok := fileRefToWholeRef[br]
		if !ok {
			missedEntries++
			log.Printf("WARNING: wholeRef for %v not found in index. You should probably rebuild the whole index.", br)
			continue
		}
		valPart := strutil.AppendSplitN(valA[:0], it.Value(), "|", 3)
		// The old format we're fixing should be: size|filename|mimetype
		if len(valPart) != 3 {
			return fmt.Errorf("bogus keyFileInfo value: got %q, wanted \"size|filename|mimetype\"", it.Value())
		}
		size_s, filename, mimetype := valPart[0], valPart[1], urld(valPart[2])
		if strings.Contains(mimetype, "|") {
			// I think this can only happen for people migrating from a commit at least as recent as
			// 8229c1985079681a652cb65551b4e80a10d135aa, when wholeRef was introduced to keyFileInfo
			// but there was no migration code yet.
			// For the "production" migrations between 0.8 and 0.9, the index should not have any wholeRef
			// in the keyFileInfo entries. So if something goes wrong and is somehow linked to that happening,
			// I'd like to know about it, hence the logging.
			log.Printf("%v: %v already has a wholeRef, not fixing it", it.Key(), it.Value())
			continue
		}
		size, err := strconv.Atoi(size_s)
		if err != nil {
			return fmt.Errorf("bogus size in keyFileInfo value %v: %v", it.Value(), err)
		}
		mutations[keyFileInfo.Key(br)] = keyFileInfo.Val(size, filename, mimetype, wholeRef)
		fixedEntries++
	}
	if err := it.Close(); err != nil {
		return err
	}
	log.Printf("Starting to commit the missing wholeRef fixes (%d entries) now, this can take a while.", fixedEntries)
	bm := x.s.BeginBatch()
	for k, v := range mutations {
		bm.Set(k, v)
	}
	bm.Set(keySchemaVersion.name, "5")
	if err := x.s.CommitBatch(bm); err != nil {
		return err
	}
	if missedEntries > 0 {
		log.Printf("Some missing wholeRef entries were not fixed (%d), you should do a full reindex.", missedEntries)
	}
	return nil
}
Exemple #15
0
// StreamBlobs Implements the blobserver.StreamBlobs interface.
func (s *storage) StreamBlobs(ctx *context.Context, dest chan<- blobserver.BlobAndToken, contToken string) error {
	defer close(dest)

	fileNum, offset, err := parseContToken(contToken)
	if err != nil {
		return errors.New("diskpacked: invalid continuation token")
	}
	debug.Printf("Continuing blob streaming from pack %s, offset %d",
		s.filename(fileNum), offset)

	fd, err := os.Open(s.filename(fileNum))
	if err != nil {
		return err
	}
	// fd will change over time; Close whichever is current when we exit.
	defer func() {
		if fd != nil { // may be nil on os.Open error below
			fd.Close()
		}
	}()

	// ContToken always refers to the exact next place we will read from.
	// Note that seeking past the end is legal on Unix and for io.Seeker,
	// but that will just result in a mostly harmless EOF.
	//
	// TODO: probably be stricter here and don't allow seek past
	// the end, since we know the size of closed files and the
	// size of the file diskpacked currently still writing.
	_, err = fd.Seek(offset, os.SEEK_SET)
	if err != nil {
		return err
	}

	const ioBufSize = 256 * 1024

	// We'll use bufio to avoid read system call overhead.
	r := bufio.NewReaderSize(fd, ioBufSize)

	for {
		//  Are we at the EOF of this pack?
		if _, err := r.Peek(1); err != nil {
			if err != io.EOF {
				return err
			}
			// EOF case; continue to the next pack, if any.
			fileNum += 1
			offset = 0
			fd.Close() // Close the previous pack
			fd, err = os.Open(s.filename(fileNum))
			if os.IsNotExist(err) {
				// We reached the end.
				return nil
			} else if err != nil {
				return err
			}
			r.Reset(fd)
			continue
		}

		thisOffset := offset // of current blob's header
		consumed, digest, size, err := readHeader(r)
		if err != nil {
			return err
		}

		offset += int64(consumed)
		if deletedBlobRef.Match(digest) {
			// Skip over deletion padding
			if _, err := io.CopyN(ioutil.Discard, r, int64(size)); err != nil {
				return err
			}
			offset += int64(size)
			continue
		}

		// Finally, read and send the blob.

		// TODO: remove this allocation per blob. We can make one instead
		// outside of the loop, guarded by a mutex, and re-use it, only to
		// lock the mutex and clone it if somebody actually calls Open
		// on the *blob.Blob. Otherwise callers just scanning all the blobs
		// to see if they have everything incur lots of garbage if they
		// don't open any blobs.
		data := make([]byte, size)
		if _, err := io.ReadFull(r, data); err != nil {
			return err
		}
		offset += int64(size)
		ref, ok := blob.ParseBytes(digest)
		if !ok {
			return fmt.Errorf("diskpacked: Invalid blobref %q", digest)
		}
		newReader := func() types.ReadSeekCloser {
			return newReadSeekNopCloser(bytes.NewReader(data))
		}
		blob := blob.NewBlob(ref, size, newReader)
		select {
		case dest <- blobserver.BlobAndToken{
			Blob:  blob,
			Token: fmt.Sprintf("%d %d", fileNum, thisOffset),
		}:
			// Nothing.
		case <-ctx.Done():
			return context.ErrCanceled
		}
	}
}