// blobref: of the file or schema blob // ss: the parsed file schema blob // bm: keys to populate func (ix *Index) populateDir(b *schema.Blob, bm BatchMutation) error { blobRef := b.BlobRef() // TODO(bradfitz): move the NewDirReader and FileName method off *schema.Blob and onto seekFetcher := blob.SeekerFromStreamingFetcher(ix.BlobSource) dr, err := b.NewDirReader(seekFetcher) if err != nil { // TODO(bradfitz): propagate up a transient failure // error type, so we can retry indexing files in the // future if blobs are only temporarily unavailable. log.Printf("index: error indexing directory, creating NewDirReader %s: %v", blobRef, err) return nil } sts, err := dr.StaticSet() if err != nil { log.Printf("index: error indexing directory: can't get StaticSet: %v\n", err) return nil } bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(len(sts), b.FileName(), "")) for _, br := range sts { bm.Set(keyStaticDirChild.Key(blobRef, br.String()), "1") } return nil }
// cached returns a FileReader for the given file schema blobref. // The FileReader should be closed when done reading. func (ih *ImageHandler) cached(fileRef blob.Ref) (*schema.FileReader, error) { fetchSeeker := blob.SeekerFromStreamingFetcher(ih.Cache) fr, err := schema.NewFileReader(fetchSeeker, fileRef) if err != nil { return nil, err } if imageDebug { log.Printf("Image Cache: hit: %v\n", fileRef) } return fr, nil }
// ServeBlobRef serves a blob. func ServeBlobRef(rw http.ResponseWriter, req *http.Request, blobRef blob.Ref, fetcher blob.StreamingFetcher) { if w, ok := fetcher.(blobserver.ContextWrapper); ok { fetcher = w.WrapContext(req) } seekFetcher := blob.SeekerFromStreamingFetcher(fetcher) file, size, err := seekFetcher.Fetch(blobRef) switch err { case nil: break case os.ErrNotExist: rw.WriteHeader(http.StatusNotFound) fmt.Fprintf(rw, "Blob %q not found", blobRef) return default: httputil.ServeError(rw, req, err) return } defer file.Close() var content io.ReadSeeker = file rw.Header().Set("Content-Type", "application/octet-stream") if req.Header.Get("Range") == "" { // If it's small and all UTF-8, assume it's text and // just render it in the browser. This is more for // demos/debuggability than anything else. It isn't // part of the spec. if size <= 32<<10 { var buf bytes.Buffer _, err := io.Copy(&buf, file) if err != nil { httputil.ServeError(rw, req, err) return } if utf8.Valid(buf.Bytes()) { rw.Header().Set("Content-Type", "text/plain; charset=utf-8") } content = bytes.NewReader(buf.Bytes()) } } http.ServeContent(rw, req, "", dummyModTime, content) }
// cached returns a FileReader for the given blobref, which may // point to either a blob representing the entire thumbnail (max // 16MB) or a file schema blob. // // The ReadCloser should be closed when done reading. func (ih *ImageHandler) cached(br blob.Ref) (io.ReadCloser, error) { rsc, _, err := ih.Cache.FetchStreaming(br) if err != nil { return nil, err } slurp, err := ioutil.ReadAll(rsc) rsc.Close() if err != nil { return nil, err } // In the common case, when the scaled image itself is less than 16 MB, it's // all together in one blob. if strings.HasPrefix(magic.MIMEType(slurp), "image/") { thumbCacheHitFull.Add(1) if imageDebug { log.Printf("Image Cache: hit: %v\n", br) } return ioutil.NopCloser(bytes.NewReader(slurp)), nil } // For large scaled images, the cached blob is a file schema blob referencing // the sub-chunks. fileBlob, err := schema.BlobFromReader(br, bytes.NewReader(slurp)) if err != nil { log.Printf("Failed to parse non-image thumbnail cache blob %v: %v", br, err) return nil, err } fetchSeeker := blob.SeekerFromStreamingFetcher(ih.Cache) fr, err := fileBlob.NewFileReader(fetchSeeker) if err != nil { log.Printf("cached(%d) NewFileReader = %v", br, err) return nil, err } thumbCacheHitFile.Add(1) if imageDebug { log.Printf("Image Cache: fileref hit: %v\n", br) } return fr, nil }
func (ih *ImageHandler) storageSeekFetcher() blob.SeekFetcher { return blob.SeekerFromStreamingFetcher(ih.Fetcher) // TODO: pass ih.Cache? }
// vivify verifies that all the chunks for the file described by fileblob are on the blobserver. // It makes a planned permanode, signs it, and uploads it. It finally makes a camliContent claim // on that permanode for fileblob, signs it, and uploads it to the blobserver. func vivify(blobReceiver blobserver.BlobReceiveConfiger, fileblob blob.SizedRef) error { sf, ok := blobReceiver.(blob.StreamingFetcher) if !ok { return fmt.Errorf("BlobReceiver is not a StreamingFetcher") } fetcher := blob.SeekerFromStreamingFetcher(sf) fr, err := schema.NewFileReader(fetcher, fileblob.Ref) if err != nil { return fmt.Errorf("Filereader error for blobref %v: %v", fileblob.Ref.String(), err) } defer fr.Close() h := sha1.New() n, err := io.Copy(h, fr) if err != nil { return fmt.Errorf("Could not read all file of blobref %v: %v", fileblob.Ref.String(), err) } if n != fr.Size() { return fmt.Errorf("Could not read all file of blobref %v. Wanted %v, got %v", fileblob.Ref.String(), fr.Size(), n) } config := blobReceiver.Config() if config == nil { return errors.New("blobReceiver has no config") } hf := config.HandlerFinder if hf == nil { return errors.New("blobReceiver config has no HandlerFinder") } JSONSignRoot, sh, err := hf.FindHandlerByType("jsonsign") if err != nil || sh == nil { return errors.New("jsonsign handler not found") } sigHelper, ok := sh.(*signhandler.Handler) if !ok { return errors.New("handler is not a JSON signhandler") } discoMap := sigHelper.DiscoveryMap(JSONSignRoot) publicKeyBlobRef, ok := discoMap["publicKeyBlobRef"].(string) if !ok { return fmt.Errorf("Discovery: json decoding error: %v", err) } // The file schema must have a modtime to vivify, as the modtime is used for all three of: // 1) the permanode's signature // 2) the camliContent attribute claim's "claimDate" // 3) the signature time of 2) claimDate, err := time.Parse(time.RFC3339, fr.FileSchema().UnixMtime) if err != nil { return fmt.Errorf("While parsing modtime for file %v: %v", fr.FileSchema().FileName, err) } permanodeBB := schema.NewHashPlannedPermanode(h) permanodeBB.SetSigner(blob.MustParse(publicKeyBlobRef)) permanodeBB.SetClaimDate(claimDate) permanodeSigned, err := sigHelper.Sign(permanodeBB) if err != nil { return fmt.Errorf("Signing permanode %v: %v", permanodeSigned, err) } permanodeRef := blob.SHA1FromString(permanodeSigned) _, err = blobserver.ReceiveNoHash(blobReceiver, permanodeRef, strings.NewReader(permanodeSigned)) if err != nil { return fmt.Errorf("While uploading signed permanode %v, %v: %v", permanodeRef, permanodeSigned, err) } contentClaimBB := schema.NewSetAttributeClaim(permanodeRef, "camliContent", fileblob.Ref.String()) contentClaimBB.SetSigner(blob.MustParse(publicKeyBlobRef)) contentClaimBB.SetClaimDate(claimDate) contentClaimSigned, err := sigHelper.Sign(contentClaimBB) if err != nil { return fmt.Errorf("Signing camliContent claim: %v", err) } contentClaimRef := blob.SHA1FromString(contentClaimSigned) _, err = blobserver.ReceiveNoHash(blobReceiver, contentClaimRef, strings.NewReader(contentClaimSigned)) if err != nil { return fmt.Errorf("While uploading signed camliContent claim %v, %v: %v", contentClaimRef, contentClaimSigned, err) } return nil }
// blobref: of the file or schema blob // blob: the parsed file schema blob // bm: keys to populate func (ix *Index) populateFile(b *schema.Blob, bm BatchMutation) error { var times []time.Time // all creation or mod times seen; may be zero times = append(times, b.ModTime()) blobRef := b.BlobRef() seekFetcher := blob.SeekerFromStreamingFetcher(ix.BlobSource) fr, err := b.NewFileReader(seekFetcher) if err != nil { // TODO(bradfitz): propagate up a transient failure // error type, so we can retry indexing files in the // future if blobs are only temporarily unavailable. // Basically the same as the TODO just below. log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err) return nil } defer fr.Close() mime, reader := magic.MIMETypeFromReader(fr) sha1 := sha1.New() var copyDest io.Writer = sha1 var imageBuf *keepFirstN // or nil if strings.HasPrefix(mime, "image/") { imageBuf = &keepFirstN{N: 256 << 10} copyDest = io.MultiWriter(copyDest, imageBuf) } size, err := io.Copy(copyDest, reader) if err != nil { // TODO: job scheduling system to retry this spaced // out max n times. Right now our options are // ignoring this error (forever) or returning the // error and making the indexing try again (likely // forever failing). Both options suck. For now just // log and act like all's okay. log.Printf("index: error indexing file %s: %v", blobRef, err) return nil } if imageBuf != nil { if conf, err := images.DecodeConfig(bytes.NewReader(imageBuf.Bytes)); err == nil { bm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) } if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) times = append(times, ft) } else { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) } } var sortTimes []time.Time for _, t := range times { if !t.IsZero() { sortTimes = append(sortTimes, t) } } sort.Sort(types.ByTime(sortTimes)) var time3339s string switch { case len(sortTimes) == 1: time3339s = types.Time3339(sortTimes[0]).String() case len(sortTimes) >= 2: oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1] time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String() } wholeRef := blob.RefFromHash(sha1) bm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime)) bm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s)) if strings.HasPrefix(mime, "audio/") { tag, err := taglib.Decode(fr, fr.Size()) if err == nil { indexMusic(tag, wholeRef, bm) } else { log.Print("index: error parsing tag: ", err) } } return nil }
func (zh *zipHandler) storageSeekFetcher() blob.SeekFetcher { return blob.SeekerFromStreamingFetcher(zh.fetcher) }
// smartFetch the things that blobs point to, not just blobs. func smartFetch(src blob.StreamingFetcher, targ string, br blob.Ref) error { rc, err := fetch(src, br) if err != nil { return err } defer rc.Close() sniffer := index.NewBlobSniffer(br) _, err = io.CopyN(sniffer, rc, sniffSize) if err != nil && err != io.EOF { return err } sniffer.Parse() b, ok := sniffer.SchemaBlob() if !ok { if *flagVerbose { log.Printf("Fetching opaque data %v into %q", br, targ) } // opaque data - put it in a file f, err := os.Create(targ) if err != nil { return fmt.Errorf("opaque: %v", err) } defer f.Close() body, _ := sniffer.Body() r := io.MultiReader(bytes.NewReader(body), rc) _, err = io.Copy(f, r) return err } switch b.Type() { case "directory": dir := filepath.Join(targ, b.FileName()) if *flagVerbose { log.Printf("Fetching directory %v into %s", br, dir) } if err := os.MkdirAll(dir, b.FileMode()); err != nil { return err } if err := setFileMeta(dir, b); err != nil { log.Print(err) } entries, ok := b.DirectoryEntries() if !ok { return fmt.Errorf("bad entries blobref in dir %v", b.BlobRef()) } return smartFetch(src, dir, entries) case "static-set": if *flagVerbose { log.Printf("Fetching directory entries %v into %s", br, targ) } // directory entries const numWorkers = 10 type work struct { br blob.Ref errc chan<- error } members := b.StaticSetMembers() workc := make(chan work, len(members)) defer close(workc) for i := 0; i < numWorkers; i++ { go func() { for wi := range workc { wi.errc <- smartFetch(src, targ, wi.br) } }() } var errcs []<-chan error for _, mref := range members { errc := make(chan error, 1) errcs = append(errcs, errc) workc <- work{mref, errc} } for _, errc := range errcs { if err := <-errc; err != nil { return err } } return nil case "file": seekFetcher := blob.SeekerFromStreamingFetcher(src) fr, err := schema.NewFileReader(seekFetcher, br) if err != nil { return fmt.Errorf("NewFileReader: %v", err) } fr.LoadAllChunks() defer fr.Close() name := filepath.Join(targ, b.FileName()) if fi, err := os.Stat(name); err == nil && fi.Size() == fi.Size() { if *flagVerbose { log.Printf("Skipping %s; already exists.", name) return nil } } if *flagVerbose { log.Printf("Writing %s to %s ...", br, name) } f, err := os.Create(name) if err != nil { return fmt.Errorf("file type: %v", err) } defer f.Close() if _, err := io.Copy(f, fr); err != nil { return fmt.Errorf("Copying %s to %s: %v", br, name, err) } if err := setFileMeta(name, b); err != nil { log.Print(err) } return nil default: return errors.New("unknown blob type: " + b.Type()) } panic("unreachable") }
// b: the parsed file schema blob // mm: keys to populate func (ix *Index) populateFile(b *schema.Blob, mm *mutationMap) (err error) { var times []time.Time // all creation or mod times seen; may be zero times = append(times, b.ModTime()) blobRef := b.BlobRef() fetcher := &seekFetcherMissTracker{ // TODO(bradfitz): cache this SeekFetcher on ix so it // it's have to be re-made each time? Probably small. src: blob.SeekerFromStreamingFetcher(ix.BlobSource), } defer func() { if err == nil { return } fetcher.mu.Lock() defer fetcher.mu.Unlock() if len(fetcher.missing) == 0 { return } // TODO(bradfitz): there was an error indexing this file, and // we failed to load the blobs in f.missing. Add those as dependencies // somewhere so when we get one of those missing blobs, we kick off // a re-index of this file for whenever the indexer is idle. }() fr, err := b.NewFileReader(fetcher) if err != nil { // TODO(bradfitz): propagate up a transient failure // error type, so we can retry indexing files in the // future if blobs are only temporarily unavailable. // Basically the same as the TODO just below. // // We'll also want to bump the schemaVersion after this, // to fix anybody's index which is only partial due to // this old bug where it would return nil instead of doing // the necessary work. log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err) return nil } defer fr.Close() mime, reader := magic.MIMETypeFromReader(fr) sha1 := sha1.New() var copyDest io.Writer = sha1 var imageBuf *keepFirstN // or nil if strings.HasPrefix(mime, "image/") { // Emperically derived 1MiB assuming CR2 images require more than any // other filetype we support: // https://gist.github.com/wathiede/7982372 imageBuf = &keepFirstN{N: 1 << 20} copyDest = io.MultiWriter(copyDest, imageBuf) } size, err := io.Copy(copyDest, reader) if err != nil { // TODO: job scheduling system to retry this spaced // out max n times. Right now our options are // ignoring this error (forever) or returning the // error and making the indexing try again (likely // forever failing). Both options suck. For now just // log and act like all's okay. // // See TODOs above, and the fetcher.missing stuff. log.Printf("index: error indexing file %s: %v", blobRef, err) return nil } wholeRef := blob.RefFromHash(sha1) if imageBuf != nil { if conf, err := images.DecodeConfig(bytes.NewReader(imageBuf.Bytes)); err == nil { mm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) } if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) times = append(times, ft) } else { log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err) } indexEXIF(wholeRef, imageBuf.Bytes, mm) } var sortTimes []time.Time for _, t := range times { if !t.IsZero() { sortTimes = append(sortTimes, t) } } sort.Sort(types.ByTime(sortTimes)) var time3339s string switch { case len(sortTimes) == 1: time3339s = types.Time3339(sortTimes[0]).String() case len(sortTimes) >= 2: oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1] time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String() } mm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") mm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime)) mm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s)) if strings.HasPrefix(mime, "audio/") { indexMusic(io.NewSectionReader(fr, 0, fr.Size()), wholeRef, mm) } return nil }
func (dh *DownloadHandler) storageSeekFetcher() blob.SeekFetcher { return blob.SeekerFromStreamingFetcher(dh.Fetcher) // TODO: pass dh.Cache? }
// smartFetch the things that blobs point to, not just blobs. func smartFetch(src blob.StreamingFetcher, targ string, br blob.Ref) error { rc, err := fetch(src, br) if err != nil { return err } defer rc.Close() sniffer := index.NewBlobSniffer(br) _, err = io.CopyN(sniffer, rc, sniffSize) if err != nil && err != io.EOF { return err } sniffer.Parse() b, ok := sniffer.SchemaBlob() if !ok { if *flagVerbose { log.Printf("Fetching opaque data %v into %q", br, targ) } // opaque data - put it in a file f, err := os.Create(targ) if err != nil { return fmt.Errorf("opaque: %v", err) } defer f.Close() body, _ := sniffer.Body() r := io.MultiReader(bytes.NewReader(body), rc) _, err = io.Copy(f, r) return err } switch b.Type() { case "directory": dir := filepath.Join(targ, b.FileName()) if *flagVerbose { log.Printf("Fetching directory %v into %s", br, dir) } if err := os.MkdirAll(dir, b.FileMode()); err != nil { return err } if err := setFileMeta(dir, b); err != nil { log.Print(err) } entries, ok := b.DirectoryEntries() if !ok { return fmt.Errorf("bad entries blobref in dir %v", b.BlobRef()) } return smartFetch(src, dir, entries) case "static-set": if *flagVerbose { log.Printf("Fetching directory entries %v into %s", br, targ) } // directory entries const numWorkers = 10 type work struct { br blob.Ref errc chan<- error } members := b.StaticSetMembers() workc := make(chan work, len(members)) defer close(workc) for i := 0; i < numWorkers; i++ { go func() { for wi := range workc { wi.errc <- smartFetch(src, targ, wi.br) } }() } var errcs []<-chan error for _, mref := range members { errc := make(chan error, 1) errcs = append(errcs, errc) workc <- work{mref, errc} } for _, errc := range errcs { if err := <-errc; err != nil { return err } } return nil case "file": seekFetcher := blob.SeekerFromStreamingFetcher(src) fr, err := schema.NewFileReader(seekFetcher, br) if err != nil { return fmt.Errorf("NewFileReader: %v", err) } fr.LoadAllChunks() defer fr.Close() name := filepath.Join(targ, b.FileName()) if fi, err := os.Stat(name); err == nil && fi.Size() == fi.Size() { if *flagVerbose { log.Printf("Skipping %s; already exists.", name) return nil } } if *flagVerbose { log.Printf("Writing %s to %s ...", br, name) } f, err := os.Create(name) if err != nil { return fmt.Errorf("file type: %v", err) } defer f.Close() if _, err := io.Copy(f, fr); err != nil { return fmt.Errorf("Copying %s to %s: %v", br, name, err) } if err := setFileMeta(name, b); err != nil { log.Print(err) } return nil case "symlink": sf, ok := b.AsStaticFile() if !ok { return errors.New("blob is not a static file") } sl, ok := sf.AsStaticSymlink() if !ok { return errors.New("blob is not a symlink") } name := filepath.Join(targ, sl.FileName()) if _, err := os.Lstat(name); err == nil { if *flagVerbose { log.Printf("Skipping creating symbolic link %s: A file with that name exists", name) } return nil } target := sl.SymlinkTargetString() if target == "" { return errors.New("symlink without target") } // TODO (marete): The Go docs promise that everything // in pkg os should work the same everywhere. Not true // for os.Symlin() at the moment. See what to do for // windows here. err := os.Symlink(target, name) // We won't call setFileMeta for a symlink because: // the permissions of a symlink do not matter and Go's // os.Chtimes always dereferences (does not act on the // symlink but its target). return err default: return errors.New("unknown blob type: " + b.Type()) } panic("unreachable") }