// b: the parsed file schema blob // mm: keys to populate func (ix *Index) populateFile(fetcher blob.Fetcher, b *schema.Blob, mm *mutationMap) (err error) { var times []time.Time // all creation or mod times seen; may be zero times = append(times, b.ModTime()) blobRef := b.BlobRef() fr, err := b.NewFileReader(fetcher) if err != nil { return err } defer fr.Close() mime, reader := magic.MIMETypeFromReader(fr) sha1 := sha1.New() var copyDest io.Writer = sha1 var imageBuf *keepFirstN // or nil if strings.HasPrefix(mime, "image/") { imageBuf = &keepFirstN{N: 512 << 10} copyDest = io.MultiWriter(copyDest, imageBuf) } size, err := io.Copy(copyDest, reader) if err != nil { return err } wholeRef := blob.RefFromHash(sha1) if imageBuf != nil { conf, err := images.DecodeConfig(bytes.NewReader(imageBuf.Bytes)) // If our optimistic 512KB in-memory prefix from above was too short to get the dimensions, pass the whole thing instead and try again. if err == io.ErrUnexpectedEOF { var fr *schema.FileReader fr, err = b.NewFileReader(fetcher) if err == nil { conf, err = images.DecodeConfig(fr) fr.Close() } } if err == nil { mm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) } if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) times = append(times, ft) } else { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) } // TODO(mpl): find (generate?) more broken EXIF images to experiment with. err = indexEXIF(wholeRef, bytes.NewReader(imageBuf.Bytes), mm) if err == io.EOF { var fr *schema.FileReader fr, err = b.NewFileReader(fetcher) if err == nil { err = indexEXIF(wholeRef, fr, mm) fr.Close() } } if err != nil { log.Printf("error parsing EXIF: %v", err) } } var sortTimes []time.Time for _, t := range times { if !t.IsZero() { sortTimes = append(sortTimes, t) } } sort.Sort(types.ByTime(sortTimes)) var time3339s string switch { case len(sortTimes) == 1: time3339s = types.Time3339(sortTimes[0]).String() case len(sortTimes) >= 2: oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1] time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String() } mm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") mm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime, wholeRef)) mm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s)) if strings.HasPrefix(mime, "audio/") { indexMusic(io.NewSectionReader(fr, 0, fr.Size()), wholeRef, mm) } return nil }
// b: the parsed file schema blob // mm: keys to populate func (ix *Index) populateFile(fetcher blob.Fetcher, b *schema.Blob, mm *mutationMap) (err error) { var times []time.Time // all creation or mod times seen; may be zero times = append(times, b.ModTime()) blobRef := b.BlobRef() fr, err := b.NewFileReader(fetcher) if err != nil { return err } defer fr.Close() mime, reader := magic.MIMETypeFromReader(fr) sha1 := sha1.New() var copyDest io.Writer = sha1 var imageBuf *keepFirstN // or nil if strings.HasPrefix(mime, "image/") { // Emperically derived 1MiB assuming CR2 images require more than any // other filetype we support: // https://gist.github.com/wathiede/7982372 imageBuf = &keepFirstN{N: 1 << 20} copyDest = io.MultiWriter(copyDest, imageBuf) } size, err := io.Copy(copyDest, reader) if err != nil { return err } wholeRef := blob.RefFromHash(sha1) if imageBuf != nil { if conf, err := images.DecodeConfig(bytes.NewReader(imageBuf.Bytes)); err == nil { mm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) } if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) times = append(times, ft) } else { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) } indexEXIF(wholeRef, imageBuf.Bytes, mm) } var sortTimes []time.Time for _, t := range times { if !t.IsZero() { sortTimes = append(sortTimes, t) } } sort.Sort(types.ByTime(sortTimes)) var time3339s string switch { case len(sortTimes) == 1: time3339s = types.Time3339(sortTimes[0]).String() case len(sortTimes) >= 2: oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1] time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String() } mm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") mm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime)) mm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s)) if strings.HasPrefix(mime, "audio/") { indexMusic(io.NewSectionReader(fr, 0, fr.Size()), wholeRef, mm) } return nil }
func (up *Uploader) uploadNodeRegularFile(n *node) (*client.PutResult, error) { var filebb *schema.Builder if up.fileOpts.contentsOnly { filebb = schema.NewFileMap("") } else { filebb = schema.NewCommonFileMap(n.fullPath, n.fi) } filebb.SetType("file") up.fdGate.Start() defer up.fdGate.Done() file, err := up.open(n.fullPath) if err != nil { return nil, err } defer file.Close() if !up.fileOpts.contentsOnly { if up.fileOpts.exifTime { ra, ok := file.(io.ReaderAt) if !ok { return nil, errors.New("Error asserting local file to io.ReaderAt") } modtime, err := schema.FileTime(ra) if err != nil { log.Printf("warning: getting time from EXIF failed for %v: %v", n.fullPath, err) } else { filebb.SetModTime(modtime) } } if up.fileOpts.wantCapCtime() { filebb.CapCreationTime() } } var ( size = n.fi.Size() fileContents io.Reader = io.LimitReader(file, size) br blob.Ref // of file schemaref sum string // sha1 hashsum of the file to upload pr *client.PutResult // of the final "file" schema blob ) const dupCheckThreshold = 256 << 10 if size > dupCheckThreshold { sumRef, err := up.wholeFileDigest(n.fullPath) if err == nil { sum = sumRef.String() ok := false pr, ok = up.fileMapFromDuplicate(up.statReceiver(n), filebb, sum) if ok { br = pr.BlobRef android.NoteFileUploaded(n.fullPath, !pr.Skipped) if up.fileOpts.wantVivify() { // we can return early in that case, because the other options // are disallowed in the vivify case. return pr, nil } } } } if up.fileOpts.wantVivify() { // If vivify wasn't already done in fileMapFromDuplicate. err := schema.WriteFileChunks(up.noStatReceiver(up.statReceiver(n)), filebb, fileContents) if err != nil { return nil, err } json, err := filebb.JSON() if err != nil { return nil, err } br = blob.SHA1FromString(json) h := &client.UploadHandle{ BlobRef: br, Size: uint32(len(json)), Contents: strings.NewReader(json), Vivify: true, } pr, err = up.Upload(h) if err != nil { return nil, err } android.NoteFileUploaded(n.fullPath, true) return pr, nil } if !br.Valid() { // br still zero means fileMapFromDuplicate did not find the file on the server, // and the file has not just been uploaded subsequently to a vivify request. // So we do the full file + file schema upload here. if sum == "" && up.fileOpts.wantFilePermanode() { fileContents = &trackDigestReader{r: fileContents} } br, err = schema.WriteFileMap(up.noStatReceiver(up.statReceiver(n)), filebb, fileContents) if err != nil { return nil, err } } // The work for those planned permanodes (and the claims) is redone // everytime we get here (i.e past the stat cache). However, they're // caught by the have cache, so they won't be reuploaded for nothing // at least. if up.fileOpts.wantFilePermanode() { if td, ok := fileContents.(*trackDigestReader); ok { sum = td.Sum() } // claimTime is both the time of the "claimDate" in the // JSON claim, as well as the date in the OpenPGP // header. // TODO(bradfitz): this is a little clumsy to do by hand. // There should probably be a method on *Uploader to do this // from an unsigned schema map. Maybe ditch the schema.Claimer // type and just have the Uploader override the claimDate. claimTime, ok := filebb.ModTime() if !ok { return nil, fmt.Errorf("couldn't get modtime for file %v", n.fullPath) } err = up.uploadFilePermanode(sum, br, claimTime) if err != nil { return nil, fmt.Errorf("Error uploading permanode for node %v: %v", n, err) } } // TODO(bradfitz): faking a PutResult here to return // is kinda gross. should instead make a // blobserver.Storage wrapper type (wrapping // statReceiver) that can track some of this? or make // schemaWriteFileMap return it? json, _ := filebb.JSON() pr = &client.PutResult{BlobRef: br, Size: uint32(len(json)), Skipped: false} return pr, nil }
// blobref: of the file or schema blob // blob: the parsed file schema blob // bm: keys to populate func (ix *Index) populateFile(b *schema.Blob, bm BatchMutation) error { var times []time.Time // all creation or mod times seen; may be zero times = append(times, b.ModTime()) blobRef := b.BlobRef() seekFetcher := blob.SeekerFromStreamingFetcher(ix.BlobSource) fr, err := b.NewFileReader(seekFetcher) if err != nil { // TODO(bradfitz): propagate up a transient failure // error type, so we can retry indexing files in the // future if blobs are only temporarily unavailable. // Basically the same as the TODO just below. log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err) return nil } defer fr.Close() mime, reader := magic.MIMETypeFromReader(fr) sha1 := sha1.New() var copyDest io.Writer = sha1 var imageBuf *keepFirstN // or nil if strings.HasPrefix(mime, "image/") { imageBuf = &keepFirstN{N: 256 << 10} copyDest = io.MultiWriter(copyDest, imageBuf) } size, err := io.Copy(copyDest, reader) if err != nil { // TODO: job scheduling system to retry this spaced // out max n times. Right now our options are // ignoring this error (forever) or returning the // error and making the indexing try again (likely // forever failing). Both options suck. For now just // log and act like all's okay. log.Printf("index: error indexing file %s: %v", blobRef, err) return nil } if imageBuf != nil { if conf, err := images.DecodeConfig(bytes.NewReader(imageBuf.Bytes)); err == nil { bm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) } if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) times = append(times, ft) } else { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) } } var sortTimes []time.Time for _, t := range times { if !t.IsZero() { sortTimes = append(sortTimes, t) } } sort.Sort(types.ByTime(sortTimes)) var time3339s string switch { case len(sortTimes) == 1: time3339s = types.Time3339(sortTimes[0]).String() case len(sortTimes) >= 2: oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1] time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String() } wholeRef := blob.RefFromHash(sha1) bm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime)) bm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s)) if strings.HasPrefix(mime, "audio/") { tag, err := taglib.Decode(fr, fr.Size()) if err == nil { indexMusic(tag, wholeRef, bm) } else { log.Print("index: error parsing tag: ", err) } } return nil }
func (up *Uploader) uploadNodeRegularFile(n *node) (*client.PutResult, error) { // TODO(mpl): maybe break this func into more maintainable pieces? filebb := schema.NewCommonFileMap(n.fullPath, n.fi) filebb.SetType("file") file, err := up.open(n.fullPath) if err != nil { return nil, err } defer file.Close() if up.fileOpts.exifTime { ra, ok := file.(io.ReaderAt) if !ok { return nil, errors.New("Error asserting local file to io.ReaderAt") } modtime, err := schema.FileTime(ra) if err != nil { log.Printf("warning: getting time from EXIF failed for %v: %v", n.fullPath, err) } else { filebb.SetModTime(modtime) } } var ( size = n.fi.Size() fileContents io.Reader = io.LimitReader(file, size) br *blobref.BlobRef // of file schemaref sum string // sha1 hashsum of the file to upload pr *client.PutResult // of the final "file" schema blob ) const dupCheckThreshold = 256 << 10 if size > dupCheckThreshold { sumRef, err := up.wholeFileDigest(n.fullPath) if err == nil { sum = sumRef.String() ok := false pr, ok = up.fileMapFromDuplicate(up.statReceiver(n), filebb, sum) if ok { br = pr.BlobRef noteFileUploaded(n.fullPath, !pr.Skipped) if up.fileOpts.wantVivify() { // we can return early in that case, because the other options // are disallowed in the vivify case. return pr, nil } } } } if up.fileOpts.wantVivify() { // If vivify wasn't already done in fileMapFromDuplicate. err := schema.WriteFileChunks(up.statReceiver(n), filebb, fileContents) if err != nil { return nil, err } json, err := filebb.JSON() if err != nil { return nil, err } br = blobref.SHA1FromString(json) h := &client.UploadHandle{ BlobRef: br, Size: int64(len(json)), Contents: strings.NewReader(json), Vivify: true, } pr, err = up.Upload(h) if err != nil { return nil, err } noteFileUploaded(n.fullPath, true) return pr, nil } if br == nil { // br still nil means fileMapFromDuplicate did not find the file on the server, // and the file has not just been uploaded subsequently to a vivify request. // So we do the full file + file schema upload here. if sum == "" && up.fileOpts.wantFilePermanode() { fileContents = &trackDigestReader{r: fileContents} } br, err = schema.WriteFileMap(up.statReceiver(n), filebb, fileContents) if err != nil { return nil, err } } // TODO(mpl): test that none of these claims get uploaded if they've already been done if up.fileOpts.wantFilePermanode() { if td, ok := fileContents.(*trackDigestReader); ok { sum = td.Sum() } // Use a fixed time value for signing; not using modtime // so two identical files don't have different modtimes? // TODO(bradfitz): consider this more? permaNodeSigTime := time.Unix(0, 0) permaNode, err := up.UploadPlannedPermanode(sum, permaNodeSigTime) if err != nil { return nil, fmt.Errorf("Error uploading permanode for node %v: %v", n, err) } handleResult("node-permanode", permaNode, nil) // claimTime is both the time of the "claimDate" in the // JSON claim, as well as the date in the OpenPGP // header. // TODO(bradfitz): this is a little clumsy to do by hand. // There should probably be a method on *Uploader to do this // from an unsigned schema map. Maybe ditch the schema.Claimer // type and just have the Uploader override the claimDate. claimTime, ok := filebb.ModTime() if !ok { return nil, fmt.Errorf("couldn't get modtime back for file %v", n.fullPath) } contentAttr := schema.NewSetAttributeClaim(permaNode.BlobRef, "camliContent", br.String()) contentAttr.SetClaimDate(claimTime) signed, err := up.SignBlob(contentAttr, claimTime) if err != nil { return nil, fmt.Errorf("Failed to sign content claim for node %v: %v", n, err) } put, err := up.uploadString(signed) if err != nil { return nil, fmt.Errorf("Error uploading permanode's attribute for node %v: %v", n, err) } handleResult("node-permanode-contentattr", put, nil) if tags := up.fileOpts.tags(); len(tags) > 0 { errch := make(chan error) for _, tag := range tags { go func(tag string) { m := schema.NewAddAttributeClaim(permaNode.BlobRef, "tag", tag) m.SetClaimDate(claimTime) signed, err := up.SignBlob(m, claimTime) if err != nil { errch <- fmt.Errorf("Failed to sign tag claim for node %v: %v", n, err) return } put, err := up.uploadString(signed) if err != nil { errch <- fmt.Errorf("Error uploading permanode's tag attribute %v for node %v: %v", tag, n, err) return } handleResult("node-permanode-tag", put, nil) errch <- nil }(tag) } for _ = range tags { if e := <-errch; e != nil && err == nil { err = e } } if err != nil { return nil, err } } } // TODO(bradfitz): faking a PutResult here to return // is kinda gross. should instead make a // blobserver.Storage wrapper type (wrapping // statReceiver) that can track some of this? or make // schemaWriteFileMap return it? json, _ := filebb.JSON() pr = &client.PutResult{BlobRef: br, Size: int64(len(json)), Skipped: false} return pr, nil }
func (up *Uploader) uploadNodeRegularFile(n *node) (*client.PutResult, error) { m := schema.NewCommonFileMap(n.fullPath, n.fi) m["camliType"] = "file" file, err := up.open(n.fullPath) if err != nil { return nil, err } defer file.Close() if up.fileOpts.exifTime { ra, ok := file.(io.ReaderAt) if !ok { return nil, errors.New("Error asserting local file to io.ReaderAt") } modtime, err := schema.FileTime(ra) if err != nil { log.Printf("warning: getting time from EXIF failed for %v: %v", n.fullPath, err) } else { m["unixMtime"] = schema.RFC3339FromTime(modtime) } } size := n.fi.Size() var fileContents io.Reader = io.LimitReader(file, size) if up.fileOpts.wantVivify() { err := schema.WriteFileChunks(up.statReceiver(), m, fileContents) if err != nil { return nil, err } json, err := m.JSON() if err != nil { return nil, err } bref := blobref.SHA1FromString(json) h := &client.UploadHandle{ BlobRef: bref, Size: int64(len(json)), Contents: strings.NewReader(json), Vivify: true, } return up.Upload(h) } var ( blobref *blobref.BlobRef // of file schemaref sum string // "sha1-xxxxx" ) const dupCheckThreshold = 256 << 10 if size > dupCheckThreshold { sumRef, err := up.wholeFileDigest(n.fullPath) if err == nil { sum = sumRef.String() if ref, ok := up.fileMapFromDuplicate(up.statReceiver(), m, sum); ok { blobref = ref } } } if blobref == nil { if sum == "" && up.fileOpts.wantFilePermanode() { fileContents = &trackDigestReader{r: fileContents} } blobref, err = schema.WriteFileMap(up.statReceiver(), m, fileContents) if err != nil { return nil, err } } // TODO(mpl): test that none of these claims get uploaded if they've already been done if up.fileOpts.wantFilePermanode() { if td, ok := fileContents.(*trackDigestReader); ok { sum = td.Sum() } // Use a fixed time value for signing; not using modtime // so two identical files don't have different modtimes? // TODO(bradfitz): consider this more? permaNodeSigTime := time.Unix(0, 0) permaNode, err := up.UploadPlannedPermanode(sum, permaNodeSigTime) if err != nil { return nil, fmt.Errorf("Error uploading permanode for node %v: %v", n, err) } handleResult("node-permanode", permaNode, nil) // claimTime is both the time of the "claimDate" in the // JSON claim, as well as the date in the OpenPGP // header. // TODO(bradfitz): this is a little clumsy to do by hand. // There should probably be a method on *Uploader to do this // from an unsigned schema map. Maybe ditch the schema.Claimer // type and just have the Uploader override the claimDate. claimTime, err := time.Parse(time.RFC3339, m["unixMtime"].(string)) if err != nil { return nil, fmt.Errorf("While parsing modtime for file %v: %v", n.fullPath, err) } contentAttr := schema.NewSetAttributeClaim(permaNode.BlobRef, "camliContent", blobref.String()) contentAttr.SetClaimDate(claimTime) signed, err := up.SignMap(contentAttr, claimTime) if err != nil { return nil, fmt.Errorf("Failed to sign content claim for node %v: %v", n, err) } put, err := up.uploadString(signed) if err != nil { return nil, fmt.Errorf("Error uploading permanode's attribute for node %v: %v", n, err) } handleResult("node-permanode-contentattr", put, nil) if tags := up.fileOpts.tags(); len(tags) > 0 { // TODO(mpl): do these claims concurrently, not in series for _, tag := range tags { m := schema.NewAddAttributeClaim(permaNode.BlobRef, "tag", tag) m.SetClaimDate(claimTime) // TODO(mpl): verify that SetClaimDate does modify the GPG signature date of the claim signed, err := up.SignMap(m, claimTime) if err != nil { return nil, fmt.Errorf("Failed to sign tag claim for node %v: %v", n, err) } put, err := up.uploadString(signed) if err != nil { return nil, fmt.Errorf("Error uploading permanode's tag attribute %v for node %v: %v", tag, n, err) } handleResult("node-permanode-tag", put, nil) } } } // TODO(bradfitz): faking a PutResult here to return // is kinda gross. should instead make a // blobserver.Storage wrapper type (wrapping // statReceiver) that can track some of this? or make // schemaWriteFileMap return it? json, _ := m.JSON() pr := &client.PutResult{BlobRef: blobref, Size: int64(len(json)), Skipped: false} return pr, nil }
// b: the parsed file schema blob // mm: keys to populate func (ix *Index) populateFile(b *schema.Blob, mm *mutationMap) (err error) { var times []time.Time // all creation or mod times seen; may be zero times = append(times, b.ModTime()) blobRef := b.BlobRef() fetcher := &seekFetcherMissTracker{ // TODO(bradfitz): cache this SeekFetcher on ix so it // it's have to be re-made each time? Probably small. src: blob.SeekerFromStreamingFetcher(ix.BlobSource), } defer func() { if err == nil { return } fetcher.mu.Lock() defer fetcher.mu.Unlock() if len(fetcher.missing) == 0 { return } // TODO(bradfitz): there was an error indexing this file, and // we failed to load the blobs in f.missing. Add those as dependencies // somewhere so when we get one of those missing blobs, we kick off // a re-index of this file for whenever the indexer is idle. }() fr, err := b.NewFileReader(fetcher) if err != nil { // TODO(bradfitz): propagate up a transient failure // error type, so we can retry indexing files in the // future if blobs are only temporarily unavailable. // Basically the same as the TODO just below. // // We'll also want to bump the schemaVersion after this, // to fix anybody's index which is only partial due to // this old bug where it would return nil instead of doing // the necessary work. log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err) return nil } defer fr.Close() mime, reader := magic.MIMETypeFromReader(fr) sha1 := sha1.New() var copyDest io.Writer = sha1 var imageBuf *keepFirstN // or nil if strings.HasPrefix(mime, "image/") { // Emperically derived 1MiB assuming CR2 images require more than any // other filetype we support: // https://gist.github.com/wathiede/7982372 imageBuf = &keepFirstN{N: 1 << 20} copyDest = io.MultiWriter(copyDest, imageBuf) } size, err := io.Copy(copyDest, reader) if err != nil { // TODO: job scheduling system to retry this spaced // out max n times. Right now our options are // ignoring this error (forever) or returning the // error and making the indexing try again (likely // forever failing). Both options suck. For now just // log and act like all's okay. // // See TODOs above, and the fetcher.missing stuff. log.Printf("index: error indexing file %s: %v", blobRef, err) return nil } wholeRef := blob.RefFromHash(sha1) if imageBuf != nil { if conf, err := images.DecodeConfig(bytes.NewReader(imageBuf.Bytes)); err == nil { mm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) } if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) times = append(times, ft) } else { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) } indexEXIF(wholeRef, imageBuf.Bytes, mm) } var sortTimes []time.Time for _, t := range times { if !t.IsZero() { sortTimes = append(sortTimes, t) } } sort.Sort(types.ByTime(sortTimes)) var time3339s string switch { case len(sortTimes) == 1: time3339s = types.Time3339(sortTimes[0]).String() case len(sortTimes) >= 2: oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1] time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String() } mm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") mm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime)) mm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s)) if strings.HasPrefix(mime, "audio/") { indexMusic(io.NewSectionReader(fr, 0, fr.Size()), wholeRef, mm) } return nil }
// b: the parsed file schema blob // mm: keys to populate func (ix *Index) populateFile(fetcher blob.Fetcher, b *schema.Blob, mm *mutationMap) (err error) { var times []time.Time // all creation or mod times seen; may be zero times = append(times, b.ModTime()) blobRef := b.BlobRef() fr, err := b.NewFileReader(fetcher) if err != nil { return err } defer fr.Close() mime, mr := magic.MIMETypeFromReader(fr) sha1 := sha1.New() var copyDest io.Writer = sha1 var imageBuf *keepFirstN // or nil if strings.HasPrefix(mime, "image/") { imageBuf = &keepFirstN{N: 512 << 10} copyDest = io.MultiWriter(copyDest, imageBuf) } size, err := io.Copy(copyDest, mr) if err != nil { return err } wholeRef := blob.RefFromHash(sha1) if imageBuf != nil { var conf images.Config decodeConfig := func(r filePrefixReader) error { conf, err = images.DecodeConfig(r) return err } if err := readPrefixOrFile(imageBuf.Bytes, fetcher, b, decodeConfig); err == nil { mm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) } var ft time.Time fileTime := func(r filePrefixReader) error { ft, err = schema.FileTime(r) return err } if err = readPrefixOrFile(imageBuf.Bytes, fetcher, b, fileTime); err == nil { times = append(times, ft) } if exifDebug { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) } // TODO(mpl): find (generate?) more broken EXIF images to experiment with. indexEXIFData := func(r filePrefixReader) error { return indexEXIF(wholeRef, r, mm) } if err = readPrefixOrFile(imageBuf.Bytes, fetcher, b, indexEXIFData); err != nil { if exifDebug { log.Printf("error parsing EXIF: %v", err) } } } var sortTimes []time.Time for _, t := range times { if !t.IsZero() { sortTimes = append(sortTimes, t) } } sort.Sort(types.ByTime(sortTimes)) var time3339s string switch { case len(sortTimes) == 1: time3339s = types.Time3339(sortTimes[0]).String() case len(sortTimes) >= 2: oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1] time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String() } mm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") mm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime, wholeRef)) mm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s)) if strings.HasPrefix(mime, "audio/") { indexMusic(io.NewSectionReader(fr, 0, fr.Size()), wholeRef, mm) } return nil }