// "wholetofile|sha1-17b53c7c3e664d3613dfdce50ef1f2a09e8f04b5|sha1-fb88f3eab3acfcf3cfc8cd77ae4366f6f975d227" -> "1" func (c *Corpus) mergeWholeToFileRow(k, v []byte) error { pair := k[len("wholetofile|"):] pipe := bytes.IndexByte(pair, '|') if pipe < 0 { return fmt.Errorf("bogus row %q = %q", k, v) } wholeRef, ok1 := blob.ParseBytes(pair[:pipe]) fileRef, ok2 := blob.ParseBytes(pair[pipe+1:]) if !ok1 || !ok2 { return fmt.Errorf("bogus row %q = %q", k, v) } c.fileWholeRef[fileRef] = wholeRef return nil }
func (ns *nsto) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) error { defer close(dest) done := ctx.Done() it := ns.inventory.Find(after, "") first := true for limit > 0 && it.Next() { if first { first = false if after != "" && it.Key() == after { continue } } br, ok := blob.ParseBytes(it.KeyBytes()) size, err := strutil.ParseUintBytes(it.ValueBytes(), 10, 32) if !ok || err != nil { log.Printf("Bogus namespace key %q / value %q", it.Key(), it.Value()) continue } select { case dest <- blob.SizedRef{br, uint32(size)}: case <-done: return context.ErrCanceled } limit-- } if err := it.Close(); err != nil { return err } return nil }
func (s enumerator) EnumerateBlobs(ctx context.Context, dest chan<- blob.SizedRef, after string, limit int) (err error) { defer close(dest) t := s.meta.Find(blobMetaPrefix+after, blobMetaPrefixLimit) defer func() { closeErr := t.Close() if err == nil { err = closeErr } }() n := 0 afterb := []byte(after) for n < limit && t.Next() { key := t.KeyBytes()[len(blobMetaPrefix):] if n == 0 && bytes.Equal(key, afterb) { continue } n++ br, ok := blob.ParseBytes(key) if !ok { return fmt.Errorf("unknown key %q in meta index", t.Key()) } size, err := parseMetaRowSizeOnly(t.ValueBytes()) if err != nil { return err } select { case <-ctx.Done(): return ctx.Err() case dest <- blob.SizedRef{Ref: br, Size: size}: } } return nil }
// parses: // "<size_u32> <big-blobref> <big-offset>" func parseMetaRow(v []byte) (m meta, err error) { row := v sp := bytes.IndexByte(v, ' ') if sp < 1 || sp == len(v)-1 { return meta{}, fmt.Errorf("invalid metarow %q", v) } m.exists = true size, err := strutil.ParseUintBytes(v[:sp], 10, 32) if err != nil { return meta{}, fmt.Errorf("invalid metarow size %q", v) } m.size = uint32(size) v = v[sp+1:] // remains: "<big-blobref> <big-offset>" if bytes.Count(v, singleSpace) != 1 { return meta{}, fmt.Errorf("invalid metarow %q: wrong number of spaces", row) } sp = bytes.IndexByte(v, ' ') largeRef, ok := blob.ParseBytes(v[:sp]) if !ok { return meta{}, fmt.Errorf("invalid metarow %q: bad blobref %q", row, v[:sp]) } m.largeRef = largeRef off, err := strutil.ParseUintBytes(v[sp+1:], 10, 32) if err != nil { return meta{}, fmt.Errorf("invalid metarow %q: bad offset: %v", row, err) } m.largeOff = uint32(off) return m, nil }
func (c *Corpus) mergeFileInfoRow(k, v []byte) error { // fileinfo|sha1-579f7f246bd420d486ddeb0dadbb256cfaf8bf6b" "5|some-stuff.txt|" pipe := bytes.IndexByte(k, '|') if pipe < 0 { return fmt.Errorf("unexpected fileinfo key %q", k) } br, ok := blob.ParseBytes(k[pipe+1:]) if !ok { return fmt.Errorf("unexpected fileinfo blobref in key %q", k) } // TODO: could at least use strutil.ParseUintBytes to not stringify and retain // the length bytes of v. c.ss = strutil.AppendSplitN(c.ss[:0], string(v), "|", 3) if len(c.ss) != 3 { return fmt.Errorf("unexpected fileinfo value %q", k) } size, err := strconv.ParseInt(c.ss[0], 10, 64) if err != nil { return fmt.Errorf("unexpected fileinfo value %q", k) } c.mutateFileInfo(br, func(fi *camtypes.FileInfo) { fi.Size = size fi.FileName = c.str(urld(c.ss[1])) fi.MIMEType = c.str(urld(c.ss[2])) }) return nil }
func (c *Corpus) mergeSignerKeyIdRow(k, v []byte) error { br, ok := blob.ParseBytes(k[len("signerkeyid:"):]) if !ok { return fmt.Errorf("bogus signerid row: %q -> %q", k, v) } c.keyId[br] = string(v) return nil }
func (c *Corpus) mergeImageSizeRow(k, v []byte) error { br, okk := blob.ParseBytes(k[len("imagesize|"):]) ii, okv := kvImageInfo(v) if !okk || !okv { return fmt.Errorf("bogus row %q = %q", k, v) } br = c.br(br) c.imageInfo[br] = ii return nil }
// initNeededMaps initializes x.needs and x.neededBy on start-up. func (x *Index) initNeededMaps() (err error) { x.deletes = newDeletionCache() it := x.queryPrefix(keyMissing) defer closeIterator(it, &err) for it.Next() { key := it.KeyBytes() pair := key[len("missing|"):] pipe := bytes.IndexByte(pair, '|') if pipe < 0 { return fmt.Errorf("Bogus missing key %q", key) } have, ok1 := blob.ParseBytes(pair[:pipe]) missing, ok2 := blob.ParseBytes(pair[pipe+1:]) if !ok1 || !ok2 { return fmt.Errorf("Bogus missing key %q", key) } x.noteNeededMemory(have, missing) } return }
// "exifgps|sha1-17b53c7c3e664d3613dfdce50ef1f2a09e8f04b5" -> "-122.39897155555556|37.61952208333334" func (c *Corpus) mergeEXIFGPSRow(k, v []byte) error { wholeRef, ok := blob.ParseBytes(k[len("exifgps|"):]) pipe := bytes.IndexByte(v, '|') if pipe < 0 || !ok { return fmt.Errorf("bogus row %q = %q", k, v) } lat, err := strconv.ParseFloat(string(v[:pipe]), 64) long, err1 := strconv.ParseFloat(string(v[pipe+1:]), 64) if err != nil || err1 != nil { return fmt.Errorf("bogus row %q = %q", k, v) } c.gps[wholeRef] = latLong{lat, long} return nil }
func ParseFields(v []byte, dst ...interface{}) error { for i, dv := range dst { thisv := v if i < len(dst)-1 { sp := bytes.IndexByte(v, ' ') if sp == -1 { return fmt.Errorf("missing space following field index %d", i) } thisv = v[:sp] v = v[sp+1:] } switch dv := dv.(type) { case *blob.Ref: br, ok := blob.ParseBytes(thisv) if !ok { } *dv = br case *uint32: n, err := strutil.ParseUintBytes(thisv, 10, 32) if err != nil { return err } *dv = uint32(n) case *uint64: n, err := strutil.ParseUintBytes(thisv, 10, 64) if err != nil { return err } *dv = n case *int64: n, err := strutil.ParseUintBytes(thisv, 10, 64) if err != nil { return err } if int64(n) < 0 { return errors.New("conv: negative numbers not accepted with int64 dest type") } *dv = int64(n) default: return fmt.Errorf("conv: unsupported target pointer type %T", dv) } } return nil }
func (c *Corpus) mergeFileTimesRow(k, v []byte) error { if len(v) == 0 { return nil } // "filetimes|sha1-579f7f246bd420d486ddeb0dadbb256cfaf8bf6b" "1970-01-01T00%3A02%3A03Z" pipe := bytes.IndexByte(k, '|') if pipe < 0 { return fmt.Errorf("unexpected fileinfo key %q", k) } br, ok := blob.ParseBytes(k[pipe+1:]) if !ok { return fmt.Errorf("unexpected filetimes blobref in key %q", k) } c.ss = strutil.AppendSplitN(c.ss[:0], urld(string(v)), ",", -1) times := c.ss c.mutateFileInfo(br, func(fi *camtypes.FileInfo) { updateFileInfoTimes(fi, times) }) return nil }
func kvBlobMeta_bytes(k, v []byte) (bm camtypes.BlobMeta, ok bool) { ref := k[len("meta:"):] br, ok := blob.ParseBytes(ref) if !ok { return } pipe := bytes.IndexByte(v, '|') if pipe < 0 { return } size, err := strutil.ParseUintBytes(v[:pipe], 10, 32) if err != nil { return } return camtypes.BlobMeta{ Ref: br, Size: uint32(size), CamliType: camliTypeFromMIME_bytes(v[pipe+1:]), }, true }
func (c *Corpus) mergeFileInfoRow(k, v []byte) error { // fileinfo|sha1-579f7f246bd420d486ddeb0dadbb256cfaf8bf6b" "5|some-stuff.txt|" pipe := bytes.IndexByte(k, '|') if pipe < 0 { return fmt.Errorf("unexpected fileinfo key %q", k) } br, ok := blob.ParseBytes(k[pipe+1:]) if !ok { return fmt.Errorf("unexpected fileinfo blobref in key %q", k) } // TODO: could at least use strutil.ParseUintBytes to not stringify and retain // the length bytes of v. c.ss = strutil.AppendSplitN(c.ss[:0], string(v), "|", 4) if len(c.ss) != 3 && len(c.ss) != 4 { return fmt.Errorf("unexpected fileinfo value %q", v) } size, err := strconv.ParseInt(c.ss[0], 10, 64) if err != nil { return fmt.Errorf("unexpected fileinfo value %q", v) } var wholeRef blob.Ref if len(c.ss) == 4 && c.ss[3] != "" { // checking for "" because of special files such as symlinks. var ok bool wholeRef, ok = blob.Parse(urld(c.ss[3])) if !ok { return fmt.Errorf("invalid wholeRef blobref in value %q for fileinfo key %q", v, k) } } c.mutateFileInfo(br, func(fi *camtypes.FileInfo) { fi.Size = size fi.FileName = c.str(urld(c.ss[1])) fi.MIMEType = c.str(urld(c.ss[2])) fi.WholeRef = wholeRef }) return nil }
// fixMissingWholeRef appends the wholeRef to all the keyFileInfo rows values. It should // only be called to upgrade a version 4 index schema to version 5. func (x *Index) fixMissingWholeRef(fetcher blob.Fetcher) (err error) { // We did that check from the caller, but double-check again to prevent from misuse // of that function. if x.schemaVersion() != 4 || requiredSchemaVersion != 5 { panic("fixMissingWholeRef should only be used when upgrading from v4 to v5 of the index schema") } log.Println("index: fixing the missing wholeRef in the fileInfo rows...") defer func() { if err != nil { log.Printf("index: fixing the fileInfo rows failed: %v", err) return } log.Print("index: successfully fixed wholeRef in FileInfo rows.") }() // first build a reverted keyWholeToFileRef map, so we can get the wholeRef from the fileRef easily. fileRefToWholeRef := make(map[blob.Ref]blob.Ref) it := x.queryPrefix(keyWholeToFileRef) var keyA [3]string for it.Next() { keyPart := strutil.AppendSplitN(keyA[:0], it.Key(), "|", 3) if len(keyPart) != 3 { return fmt.Errorf("bogus keyWholeToFileRef key: got %q, wanted \"wholetofile|wholeRef|fileRef\"", it.Key()) } wholeRef, ok1 := blob.Parse(keyPart[1]) fileRef, ok2 := blob.Parse(keyPart[2]) if !ok1 || !ok2 { return fmt.Errorf("bogus part in keyWholeToFileRef key: %q", it.Key()) } fileRefToWholeRef[fileRef] = wholeRef } if err := it.Close(); err != nil { return err } var fixedEntries, missedEntries int t := time.NewTicker(5 * time.Second) defer t.Stop() // We record the mutations and set them all after the iteration because of the sqlite locking: // since BeginBatch takes a lock, and Find too, we would deadlock at queryPrefix if we // started a batch mutation before. mutations := make(map[string]string) keyPrefix := keyFileInfo.name + "|" it = x.queryPrefix(keyFileInfo) defer it.Close() var valA [3]string for it.Next() { select { case <-t.C: log.Printf("Recorded %d missing wholeRef that we'll try to fix, and %d that we can't fix.", fixedEntries, missedEntries) default: } br, ok := blob.ParseBytes(it.KeyBytes()[len(keyPrefix):]) if !ok { return fmt.Errorf("invalid blobRef %q", it.KeyBytes()[len(keyPrefix):]) } wholeRef, ok := fileRefToWholeRef[br] if !ok { missedEntries++ log.Printf("WARNING: wholeRef for %v not found in index. You should probably rebuild the whole index.", br) continue } valPart := strutil.AppendSplitN(valA[:0], it.Value(), "|", 3) // The old format we're fixing should be: size|filename|mimetype if len(valPart) != 3 { return fmt.Errorf("bogus keyFileInfo value: got %q, wanted \"size|filename|mimetype\"", it.Value()) } size_s, filename, mimetype := valPart[0], valPart[1], urld(valPart[2]) if strings.Contains(mimetype, "|") { // I think this can only happen for people migrating from a commit at least as recent as // 8229c1985079681a652cb65551b4e80a10d135aa, when wholeRef was introduced to keyFileInfo // but there was no migration code yet. // For the "production" migrations between 0.8 and 0.9, the index should not have any wholeRef // in the keyFileInfo entries. So if something goes wrong and is somehow linked to that happening, // I'd like to know about it, hence the logging. log.Printf("%v: %v already has a wholeRef, not fixing it", it.Key(), it.Value()) continue } size, err := strconv.Atoi(size_s) if err != nil { return fmt.Errorf("bogus size in keyFileInfo value %v: %v", it.Value(), err) } mutations[keyFileInfo.Key(br)] = keyFileInfo.Val(size, filename, mimetype, wholeRef) fixedEntries++ } if err := it.Close(); err != nil { return err } log.Printf("Starting to commit the missing wholeRef fixes (%d entries) now, this can take a while.", fixedEntries) bm := x.s.BeginBatch() for k, v := range mutations { bm.Set(k, v) } bm.Set(keySchemaVersion.name, "5") if err := x.s.CommitBatch(bm); err != nil { return err } if missedEntries > 0 { log.Printf("Some missing wholeRef entries were not fixed (%d), you should do a full reindex.", missedEntries) } return nil }
// StreamBlobs Implements the blobserver.StreamBlobs interface. func (s *storage) StreamBlobs(ctx *context.Context, dest chan<- blobserver.BlobAndToken, contToken string) error { defer close(dest) fileNum, offset, err := parseContToken(contToken) if err != nil { return errors.New("diskpacked: invalid continuation token") } debug.Printf("Continuing blob streaming from pack %s, offset %d", s.filename(fileNum), offset) fd, err := os.Open(s.filename(fileNum)) if err != nil { return err } // fd will change over time; Close whichever is current when we exit. defer func() { if fd != nil { // may be nil on os.Open error below fd.Close() } }() // ContToken always refers to the exact next place we will read from. // Note that seeking past the end is legal on Unix and for io.Seeker, // but that will just result in a mostly harmless EOF. // // TODO: probably be stricter here and don't allow seek past // the end, since we know the size of closed files and the // size of the file diskpacked currently still writing. _, err = fd.Seek(offset, os.SEEK_SET) if err != nil { return err } const ioBufSize = 256 * 1024 // We'll use bufio to avoid read system call overhead. r := bufio.NewReaderSize(fd, ioBufSize) for { // Are we at the EOF of this pack? if _, err := r.Peek(1); err != nil { if err != io.EOF { return err } // EOF case; continue to the next pack, if any. fileNum += 1 offset = 0 fd.Close() // Close the previous pack fd, err = os.Open(s.filename(fileNum)) if os.IsNotExist(err) { // We reached the end. return nil } else if err != nil { return err } r.Reset(fd) continue } thisOffset := offset // of current blob's header consumed, digest, size, err := readHeader(r) if err != nil { return err } offset += int64(consumed) if deletedBlobRef.Match(digest) { // Skip over deletion padding if _, err := io.CopyN(ioutil.Discard, r, int64(size)); err != nil { return err } offset += int64(size) continue } // Finally, read and send the blob. // TODO: remove this allocation per blob. We can make one instead // outside of the loop, guarded by a mutex, and re-use it, only to // lock the mutex and clone it if somebody actually calls Open // on the *blob.Blob. Otherwise callers just scanning all the blobs // to see if they have everything incur lots of garbage if they // don't open any blobs. data := make([]byte, size) if _, err := io.ReadFull(r, data); err != nil { return err } offset += int64(size) ref, ok := blob.ParseBytes(digest) if !ok { return fmt.Errorf("diskpacked: Invalid blobref %q", digest) } newReader := func() types.ReadSeekCloser { return newReadSeekNopCloser(bytes.NewReader(data)) } blob := blob.NewBlob(ref, size, newReader) select { case dest <- blobserver.BlobAndToken{ Blob: blob, Token: fmt.Sprintf("%d %d", fileNum, thisOffset), }: // Nothing. case <-ctx.Done(): return context.ErrCanceled } } }