// readPrefixOrFile executes a given func with a reader on the passed prefix and // falls back to passing a reader on the whole file if the func returns an error. func readPrefixOrFile(prefix []byte, fetcher blob.Fetcher, b *schema.Blob, fn func(filePrefixReader) error) (err error) { pr := bytes.NewReader(prefix) err = fn(pr) if err == io.EOF || err == io.ErrUnexpectedEOF { var fr *schema.FileReader fr, err = b.NewFileReader(fetcher) if err == nil { err = fn(fr) fr.Close() } } return err }
func setFileMeta(name string, blob *schema.Blob) error { err1 := os.Chmod(name, blob.FileMode()) var err2 error if mt := blob.ModTime(); !mt.IsZero() { err2 = os.Chtimes(name, mt, mt) } // TODO: we previously did os.Chown here, but it's rarely wanted, // then the schema.Blob refactor broke it, so it's gone. // Add it back later once we care? for _, err := range []error{err1, err2} { if err != nil { return err } } return nil }
// b: the parsed file schema blob // mm: keys to populate func (ix *Index) populateDir(fetcher blob.Fetcher, b *schema.Blob, mm *mutationMap) error { blobRef := b.BlobRef() // TODO(bradfitz): move the NewDirReader and FileName method off *schema.Blob and onto // StaticFile/StaticDirectory or something. dr, err := b.NewDirReader(fetcher) if err != nil { // TODO(bradfitz): propagate up a transient failure // error type, so we can retry indexing files in the // future if blobs are only temporarily unavailable. log.Printf("index: error indexing directory, creating NewDirReader %s: %v", blobRef, err) return nil } sts, err := dr.StaticSet() if err != nil { log.Printf("index: error indexing directory: can't get StaticSet: %v\n", err) return nil } mm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(len(sts), b.FileName(), "", blob.Ref{})) for _, br := range sts { mm.Set(keyStaticDirChild.Key(blobRef, br.String()), "1") } return nil }
// blobref: of the file or schema blob // ss: the parsed file schema blob // bm: keys to populate func (ix *Index) populateDir(b *schema.Blob, bm BatchMutation) error { blobRef := b.BlobRef() // TODO(bradfitz): move the NewDirReader and FileName method off *schema.Blob and onto seekFetcher := blob.SeekerFromStreamingFetcher(ix.BlobSource) dr, err := b.NewDirReader(seekFetcher) if err != nil { // TODO(bradfitz): propagate up a transient failure // error type, so we can retry indexing files in the // future if blobs are only temporarily unavailable. log.Printf("index: error indexing directory, creating NewDirReader %s: %v", blobRef, err) return nil } sts, err := dr.StaticSet() if err != nil { log.Printf("index: error indexing directory: can't get StaticSet: %v\n", err) return nil } bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(len(sts), b.FileName(), "")) for _, br := range sts { bm.Set(keyStaticDirChild.Key(blobRef, br.String()), "1") } return nil }
func (ix *Index) populateClaim(fetcher *missTrackFetcher, b *schema.Blob, mm *mutationMap) error { br := b.BlobRef() claim, ok := b.AsClaim() if !ok { // Skip bogus claim with malformed permanode. return nil } vr := jsonsign.NewVerificationRequest(b.JSON(), blob.NewSerialFetcher(ix.KeyFetcher, fetcher)) if !vr.Verify() { // TODO(bradfitz): ask if the vr.Err.(jsonsign.Error).IsPermanent() and retry // later if it's not permanent? or maybe do this up a level? if vr.Err != nil { return vr.Err } return errors.New("index: populateClaim verification failure") } verifiedKeyId := vr.SignerKeyId mm.Set("signerkeyid:"+vr.CamliSigner.String(), verifiedKeyId) if claim.ClaimType() == string(schema.DeleteClaim) { if err := ix.populateDeleteClaim(claim, vr, mm); err != nil { return err } mm.noteDelete(claim) return nil } pnbr := claim.ModifiedPermanode() if !pnbr.Valid() { // A different type of claim; not modifying a permanode. return nil } attr, value := claim.Attribute(), claim.Value() recentKey := keyRecentPermanode.Key(verifiedKeyId, claim.ClaimDateString(), br) mm.Set(recentKey, pnbr.String()) claimKey := keyPermanodeClaim.Key(pnbr, verifiedKeyId, claim.ClaimDateString(), br) mm.Set(claimKey, keyPermanodeClaim.Val(claim.ClaimType(), attr, value, vr.CamliSigner)) if strings.HasPrefix(attr, "camliPath:") { targetRef, ok := blob.Parse(value) if ok { // TODO: deal with set-attribute vs. del-attribute // properly? I think we get it for free when // del-attribute has no Value, but we need to deal // with the case where they explicitly delete the // current value. suffix := attr[len("camliPath:"):] active := "Y" if claim.ClaimType() == "del-attribute" { active = "N" } baseRef := pnbr claimRef := br key := keyPathBackward.Key(verifiedKeyId, targetRef, claimRef) val := keyPathBackward.Val(claim.ClaimDateString(), baseRef, active, suffix) mm.Set(key, val) key = keyPathForward.Key(verifiedKeyId, baseRef, suffix, claim.ClaimDateString(), claimRef) val = keyPathForward.Val(active, targetRef) mm.Set(key, val) } } if claim.ClaimType() != string(schema.DelAttributeClaim) && IsIndexedAttribute(attr) { key := keySignerAttrValue.Key(verifiedKeyId, attr, value, claim.ClaimDateString(), br) mm.Set(key, keySignerAttrValue.Val(pnbr)) } if IsBlobReferenceAttribute(attr) { targetRef, ok := blob.Parse(value) if ok { key := keyEdgeBackward.Key(targetRef, pnbr, br) mm.Set(key, keyEdgeBackward.Val("permanode", "")) } } return nil }
// b: the parsed file schema blob // mm: keys to populate func (ix *Index) populateFile(fetcher blob.Fetcher, b *schema.Blob, mm *mutationMap) (err error) { var times []time.Time // all creation or mod times seen; may be zero times = append(times, b.ModTime()) blobRef := b.BlobRef() fr, err := b.NewFileReader(fetcher) if err != nil { return err } defer fr.Close() mime, reader := magic.MIMETypeFromReader(fr) sha1 := sha1.New() var copyDest io.Writer = sha1 var imageBuf *keepFirstN // or nil if strings.HasPrefix(mime, "image/") { imageBuf = &keepFirstN{N: 512 << 10} copyDest = io.MultiWriter(copyDest, imageBuf) } size, err := io.Copy(copyDest, reader) if err != nil { return err } wholeRef := blob.RefFromHash(sha1) if imageBuf != nil { conf, err := images.DecodeConfig(bytes.NewReader(imageBuf.Bytes)) // If our optimistic 512KB in-memory prefix from above was too short to get the dimensions, pass the whole thing instead and try again. if err == io.ErrUnexpectedEOF { var fr *schema.FileReader fr, err = b.NewFileReader(fetcher) if err == nil { conf, err = images.DecodeConfig(fr) fr.Close() } } if err == nil { mm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) } if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) times = append(times, ft) } else { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) } // TODO(mpl): find (generate?) more broken EXIF images to experiment with. err = indexEXIF(wholeRef, bytes.NewReader(imageBuf.Bytes), mm) if err == io.EOF { var fr *schema.FileReader fr, err = b.NewFileReader(fetcher) if err == nil { err = indexEXIF(wholeRef, fr, mm) fr.Close() } } if err != nil { log.Printf("error parsing EXIF: %v", err) } } var sortTimes []time.Time for _, t := range times { if !t.IsZero() { sortTimes = append(sortTimes, t) } } sort.Sort(types.ByTime(sortTimes)) var time3339s string switch { case len(sortTimes) == 1: time3339s = types.Time3339(sortTimes[0]).String() case len(sortTimes) >= 2: oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1] time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String() } mm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") mm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime, wholeRef)) mm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s)) if strings.HasPrefix(mime, "audio/") { indexMusic(io.NewSectionReader(fr, 0, fr.Size()), wholeRef, mm) } return nil }
// b: the parsed file schema blob // mm: keys to populate func (ix *Index) populateFile(fetcher blob.Fetcher, b *schema.Blob, mm *mutationMap) (err error) { var times []time.Time // all creation or mod times seen; may be zero times = append(times, b.ModTime()) blobRef := b.BlobRef() fr, err := b.NewFileReader(fetcher) if err != nil { return err } defer fr.Close() mime, reader := magic.MIMETypeFromReader(fr) sha1 := sha1.New() var copyDest io.Writer = sha1 var imageBuf *keepFirstN // or nil if strings.HasPrefix(mime, "image/") { // Emperically derived 1MiB assuming CR2 images require more than any // other filetype we support: // https://gist.github.com/wathiede/7982372 imageBuf = &keepFirstN{N: 1 << 20} copyDest = io.MultiWriter(copyDest, imageBuf) } size, err := io.Copy(copyDest, reader) if err != nil { return err } wholeRef := blob.RefFromHash(sha1) if imageBuf != nil { if conf, err := images.DecodeConfig(bytes.NewReader(imageBuf.Bytes)); err == nil { mm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) } if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) times = append(times, ft) } else { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) } indexEXIF(wholeRef, imageBuf.Bytes, mm) } var sortTimes []time.Time for _, t := range times { if !t.IsZero() { sortTimes = append(sortTimes, t) } } sort.Sort(types.ByTime(sortTimes)) var time3339s string switch { case len(sortTimes) == 1: time3339s = types.Time3339(sortTimes[0]).String() case len(sortTimes) >= 2: oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1] time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String() } mm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") mm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime)) mm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s)) if strings.HasPrefix(mime, "audio/") { indexMusic(io.NewSectionReader(fr, 0, fr.Size()), wholeRef, mm) } return nil }
// blobref: of the file or schema blob // blob: the parsed file schema blob // bm: keys to populate func (ix *Index) populateFile(b *schema.Blob, bm BatchMutation) error { var times []time.Time // all creation or mod times seen; may be zero times = append(times, b.ModTime()) blobRef := b.BlobRef() seekFetcher := blob.SeekerFromStreamingFetcher(ix.BlobSource) fr, err := b.NewFileReader(seekFetcher) if err != nil { // TODO(bradfitz): propagate up a transient failure // error type, so we can retry indexing files in the // future if blobs are only temporarily unavailable. // Basically the same as the TODO just below. log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err) return nil } defer fr.Close() mime, reader := magic.MIMETypeFromReader(fr) sha1 := sha1.New() var copyDest io.Writer = sha1 var imageBuf *keepFirstN // or nil if strings.HasPrefix(mime, "image/") { imageBuf = &keepFirstN{N: 256 << 10} copyDest = io.MultiWriter(copyDest, imageBuf) } size, err := io.Copy(copyDest, reader) if err != nil { // TODO: job scheduling system to retry this spaced // out max n times. Right now our options are // ignoring this error (forever) or returning the // error and making the indexing try again (likely // forever failing). Both options suck. For now just // log and act like all's okay. log.Printf("index: error indexing file %s: %v", blobRef, err) return nil } if imageBuf != nil { if conf, err := images.DecodeConfig(bytes.NewReader(imageBuf.Bytes)); err == nil { bm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) } if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) times = append(times, ft) } else { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) } } var sortTimes []time.Time for _, t := range times { if !t.IsZero() { sortTimes = append(sortTimes, t) } } sort.Sort(types.ByTime(sortTimes)) var time3339s string switch { case len(sortTimes) == 1: time3339s = types.Time3339(sortTimes[0]).String() case len(sortTimes) >= 2: oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1] time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String() } wholeRef := blob.RefFromHash(sha1) bm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime)) bm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s)) if strings.HasPrefix(mime, "audio/") { tag, err := taglib.Decode(fr, fr.Size()) if err == nil { indexMusic(tag, wholeRef, bm) } else { log.Print("index: error parsing tag: ", err) } } return nil }
func (st *shareTester) put(blob *schema.Blob) { st.putRaw(blob.BlobRef(), blob.JSON()) }
// b: the parsed file schema blob // mm: keys to populate func (ix *Index) populateFile(b *schema.Blob, mm *mutationMap) (err error) { var times []time.Time // all creation or mod times seen; may be zero times = append(times, b.ModTime()) blobRef := b.BlobRef() fetcher := &seekFetcherMissTracker{ // TODO(bradfitz): cache this SeekFetcher on ix so it // it's have to be re-made each time? Probably small. src: blob.SeekerFromStreamingFetcher(ix.BlobSource), } defer func() { if err == nil { return } fetcher.mu.Lock() defer fetcher.mu.Unlock() if len(fetcher.missing) == 0 { return } // TODO(bradfitz): there was an error indexing this file, and // we failed to load the blobs in f.missing. Add those as dependencies // somewhere so when we get one of those missing blobs, we kick off // a re-index of this file for whenever the indexer is idle. }() fr, err := b.NewFileReader(fetcher) if err != nil { // TODO(bradfitz): propagate up a transient failure // error type, so we can retry indexing files in the // future if blobs are only temporarily unavailable. // Basically the same as the TODO just below. // // We'll also want to bump the schemaVersion after this, // to fix anybody's index which is only partial due to // this old bug where it would return nil instead of doing // the necessary work. log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err) return nil } defer fr.Close() mime, reader := magic.MIMETypeFromReader(fr) sha1 := sha1.New() var copyDest io.Writer = sha1 var imageBuf *keepFirstN // or nil if strings.HasPrefix(mime, "image/") { // Emperically derived 1MiB assuming CR2 images require more than any // other filetype we support: // https://gist.github.com/wathiede/7982372 imageBuf = &keepFirstN{N: 1 << 20} copyDest = io.MultiWriter(copyDest, imageBuf) } size, err := io.Copy(copyDest, reader) if err != nil { // TODO: job scheduling system to retry this spaced // out max n times. Right now our options are // ignoring this error (forever) or returning the // error and making the indexing try again (likely // forever failing). Both options suck. For now just // log and act like all's okay. // // See TODOs above, and the fetcher.missing stuff. log.Printf("index: error indexing file %s: %v", blobRef, err) return nil } wholeRef := blob.RefFromHash(sha1) if imageBuf != nil { if conf, err := images.DecodeConfig(bytes.NewReader(imageBuf.Bytes)); err == nil { mm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) } if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) times = append(times, ft) } else { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) } indexEXIF(wholeRef, imageBuf.Bytes, mm) } var sortTimes []time.Time for _, t := range times { if !t.IsZero() { sortTimes = append(sortTimes, t) } } sort.Sort(types.ByTime(sortTimes)) var time3339s string switch { case len(sortTimes) == 1: time3339s = types.Time3339(sortTimes[0]).String() case len(sortTimes) >= 2: oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1] time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String() } mm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") mm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime)) mm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s)) if strings.HasPrefix(mime, "audio/") { indexMusic(io.NewSectionReader(fr, 0, fr.Size()), wholeRef, mm) } return nil }
// b: the parsed file schema blob // mm: keys to populate func (ix *Index) populateFile(fetcher blob.Fetcher, b *schema.Blob, mm *mutationMap) (err error) { var times []time.Time // all creation or mod times seen; may be zero times = append(times, b.ModTime()) blobRef := b.BlobRef() fr, err := b.NewFileReader(fetcher) if err != nil { return err } defer fr.Close() mime, mr := magic.MIMETypeFromReader(fr) sha1 := sha1.New() var copyDest io.Writer = sha1 var imageBuf *keepFirstN // or nil if strings.HasPrefix(mime, "image/") { imageBuf = &keepFirstN{N: 512 << 10} copyDest = io.MultiWriter(copyDest, imageBuf) } size, err := io.Copy(copyDest, mr) if err != nil { return err } wholeRef := blob.RefFromHash(sha1) if imageBuf != nil { var conf images.Config decodeConfig := func(r filePrefixReader) error { conf, err = images.DecodeConfig(r) return err } if err := readPrefixOrFile(imageBuf.Bytes, fetcher, b, decodeConfig); err == nil { mm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) } var ft time.Time fileTime := func(r filePrefixReader) error { ft, err = schema.FileTime(r) return err } if err = readPrefixOrFile(imageBuf.Bytes, fetcher, b, fileTime); err == nil { times = append(times, ft) } if exifDebug { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) } // TODO(mpl): find (generate?) more broken EXIF images to experiment with. indexEXIFData := func(r filePrefixReader) error { return indexEXIF(wholeRef, r, mm) } if err = readPrefixOrFile(imageBuf.Bytes, fetcher, b, indexEXIFData); err != nil { if exifDebug { log.Printf("error parsing EXIF: %v", err) } } } var sortTimes []time.Time for _, t := range times { if !t.IsZero() { sortTimes = append(sortTimes, t) } } sort.Sort(types.ByTime(sortTimes)) var time3339s string switch { case len(sortTimes) == 1: time3339s = types.Time3339(sortTimes[0]).String() case len(sortTimes) >= 2: oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1] time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String() } mm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") mm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime, wholeRef)) mm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s)) if strings.HasPrefix(mime, "audio/") { indexMusic(io.NewSectionReader(fr, 0, fr.Size()), wholeRef, mm) } return nil }
func indexSchemaBlob(fsck *db.DB, s *schema.Blob) (needs []string) { camliType := s.Type() switch camliType { case "static-set": for _, r := range s.StaticSetMembers() { needs = append(needs, r.String()) } case "bytes": fallthrough case "file": for i, bp := range s.ByteParts() { ok := false if r := bp.BlobRef; r.Valid() { needs = append(needs, r.String()) ok = true } if r := bp.BytesRef; r.Valid() { needs = append(needs, r.String()) ok = true } if !ok { log.Printf("%s (%s): no valid ref in part %d", s.BlobRef(), camliType, i) } } case "directory": switch r, ok := s.DirectoryEntries(); { case !ok: log.Printf("%s (%s): bad entries", s.BlobRef(), camliType) case !r.Valid(): log.Printf("%s (%s): invalid entries", s.BlobRef(), camliType) default: needs = append(needs, r.String()) } } return }
// blobref: of the file or schema blob // ss: the parsed file schema blob // bm: keys to populate func (ix *Index) populateFile(blob *schema.Blob, bm BatchMutation) error { // TODO: move the NewFileReader off of blob. blobRef := blob.BlobRef() seekFetcher := blobref.SeekerFromStreamingFetcher(ix.BlobSource) fr, err := blob.NewFileReader(seekFetcher) if err != nil { // TODO(bradfitz): propagate up a transient failure // error type, so we can retry indexing files in the // future if blobs are only temporarily unavailable. // Basically the same as the TODO just below. log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err) return nil } defer fr.Close() mime, reader := magic.MimeTypeFromReader(fr) sha1 := sha1.New() var copyDest io.Writer = sha1 var withCopyErr func(error) // or nil if strings.HasPrefix(mime, "image/") { pr, pw := io.Pipe() copyDest = io.MultiWriter(copyDest, pw) confc := make(chan *image.Config, 1) go func() { conf, _, err := image.DecodeConfig(pr) defer io.Copy(ioutil.Discard, pr) if err == nil { confc <- &conf } else { confc <- nil } }() withCopyErr = func(err error) { pw.CloseWithError(err) if conf := <-confc; conf != nil { bm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) } } } size, err := io.Copy(copyDest, reader) if f := withCopyErr; f != nil { f(err) } if err != nil { // TODO: job scheduling system to retry this spaced // out max n times. Right now our options are // ignoring this error (forever) or returning the // error and making the indexing try again (likely // forever failing). Both options suck. For now just // log and act like all's okay. log.Printf("index: error indexing file %s: %v", blobRef, err) return nil } wholeRef := blobref.FromHash(sha1) bm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, blob.FileName(), mime)) return nil }