// blobref: of the file or schema blob // ss: the parsed file schema blob // bm: keys to populate func (ix *Index) populateFile(blobRef *blobref.BlobRef, ss *schema.Superset, bm BatchMutation) error { seekFetcher, err := blobref.SeekerFromStreamingFetcher(ix.BlobSource) if err != nil { return err } sha1 := sha1.New() fr, err := ss.NewFileReader(seekFetcher) if err != nil { // TODO(bradfitz): propagate up a transient failure // error type, so we can retry indexing files in the // future if blobs are only temporarily unavailable. // Basically the same as the TODO just below. log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err) return nil } mime, reader := magic.MimeTypeFromReader(fr) size, err := io.Copy(sha1, reader) if err != nil { // TODO: job scheduling system to retry this spaced // out max n times. Right now our options are // ignoring this error (forever) or returning the // error and making the indexing try again (likely // forever failing). Both options suck. For now just // log and act like all's okay. log.Printf("index: error indexing file %s: %v", blobRef, err) return nil } wholeRef := blobref.FromHash("sha1", sha1) bm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, ss.FileName, mime)) return nil }
func showMIME(file string) { f, err := os.Open(file) if err != nil { log.Fatal(err) } mime, _ := magic.MimeTypeFromReader(f) fmt.Println(mime) }
// blobref: of the file or schema blob // ss: the parsed file schema blob // bm: keys to populate func (ix *Index) populateFile(blobRef *blobref.BlobRef, ss *schema.Superset, bm BatchMutation) error { seekFetcher := blobref.SeekerFromStreamingFetcher(ix.BlobSource) fr, err := ss.NewFileReader(seekFetcher) if err != nil { // TODO(bradfitz): propagate up a transient failure // error type, so we can retry indexing files in the // future if blobs are only temporarily unavailable. // Basically the same as the TODO just below. log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err) return nil } defer fr.Close() mime, reader := magic.MimeTypeFromReader(fr) sha1 := sha1.New() var copyDest io.Writer = sha1 var withCopyErr func(error) // or nil if strings.HasPrefix(mime, "image/") { pr, pw := io.Pipe() copyDest = io.MultiWriter(copyDest, pw) confc := make(chan *image.Config, 1) go func() { conf, _, err := image.DecodeConfig(pr) defer io.Copy(ioutil.Discard, pr) if err == nil { confc <- &conf } else { confc <- nil } }() withCopyErr = func(err error) { pw.CloseWithError(err) if conf := <-confc; conf != nil { bm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) } } } size, err := io.Copy(copyDest, reader) if f := withCopyErr; f != nil { f(err) } if err != nil { // TODO: job scheduling system to retry this spaced // out max n times. Right now our options are // ignoring this error (forever) or returning the // error and making the indexing try again (likely // forever failing). Both options suck. For now just // log and act like all's okay. log.Printf("index: error indexing file %s: %v", blobRef, err) return nil } wholeRef := blobref.FromHash("sha1", sha1) bm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, ss.FileName, mime)) return nil }
func showMIME() { file := flag.Arg(0) f, err := os.Open(file) if err != nil { log.Fatal(err) } mime, _ := magic.MimeTypeFromReader(f) fmt.Println(mime) }
func (dh *DownloadHandler) ServeHTTP(rw http.ResponseWriter, req *http.Request, file *blobref.BlobRef) { if req.Method != "GET" && req.Method != "HEAD" { http.Error(rw, "Invalid download method", 400) return } fr, err := schema.NewFileReader(dh.storageSeekFetcher(), file) if err != nil { http.Error(rw, "Can't serve file: "+err.Error(), 500) return } defer fr.Close() schema := fr.FileSchema() rw.Header().Set("Content-Length", fmt.Sprintf("%d", schema.SumPartsSize())) mimeType, reader := magic.MimeTypeFromReader(fr) if dh.ForceMime != "" { mimeType = dh.ForceMime } if mimeType == "" { mimeType = "application/octet-stream" } rw.Header().Set("Content-Type", mimeType) if mimeType == "application/octet-stream" { // Chrome seems to silently do nothing on // application/octet-stream unless this is set. // Maybe it's confused by lack of URL it recognizes // along with lack of mime type? rw.Header().Set("Content-Disposition", "attachment; filename=file-"+file.String()+".dat") } if req.Method == "HEAD" { vbr := blobref.Parse(req.FormValue("verifycontents")) if vbr == nil { return } hash := vbr.Hash() if hash == nil { return } io.Copy(hash, reader) // ignore errors, caught later if vbr.HashMatches(hash) { rw.Header().Set("X-Camli-Contents", vbr.String()) } return } n, err := io.Copy(rw, reader) log.Printf("For %q request of %s: copied %d, %v", req.Method, req.URL.Path, n, err) if err != nil { log.Printf("error serving download of file schema %s: %v", file, err) return } if size := schema.SumPartsSize(); n != int64(size) { log.Printf("error serving download of file schema %s: sent %d, expected size of %d", file, n, size) return } }
// blobref: of the file or schema blob // blob: the parsed file schema blob // bm: keys to populate func (ix *Index) populateFile(blob *schema.Blob, bm BatchMutation) error { var times []time.Time // all creation or mod times seen; may be zero times = append(times, blob.ModTime()) blobRef := blob.BlobRef() seekFetcher := blobref.SeekerFromStreamingFetcher(ix.BlobSource) fr, err := blob.NewFileReader(seekFetcher) if err != nil { // TODO(bradfitz): propagate up a transient failure // error type, so we can retry indexing files in the // future if blobs are only temporarily unavailable. // Basically the same as the TODO just below. log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err) return nil } defer fr.Close() mime, reader := magic.MimeTypeFromReader(fr) sha1 := sha1.New() var copyDest io.Writer = sha1 var imageBuf *keepFirstN // or nil if strings.HasPrefix(mime, "image/") { imageBuf = &keepFirstN{N: 256 << 10} copyDest = io.MultiWriter(copyDest, imageBuf) } size, err := io.Copy(copyDest, reader) if err != nil { // TODO: job scheduling system to retry this spaced // out max n times. Right now our options are // ignoring this error (forever) or returning the // error and making the indexing try again (likely // forever failing). Both options suck. For now just // log and act like all's okay. log.Printf("index: error indexing file %s: %v", blobRef, err) return nil } if imageBuf != nil { if conf, err := images.DecodeConfig(bytes.NewReader(imageBuf.Bytes)); err == nil { bm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) } if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil { log.Printf("filename %q exif = %v, %v", blob.FileName(), ft, err) times = append(times, ft) } else { log.Printf("filename %q exif = %v, %v", blob.FileName(), ft, err) } } var sortTimes []time.Time for _, t := range times { if !t.IsZero() { sortTimes = append(sortTimes, t) } } sort.Sort(types.ByTime(sortTimes)) var time3339s string switch { case len(sortTimes) == 1: time3339s = types.Time3339(sortTimes[0]).String() case len(sortTimes) >= 2: oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1] time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String() } wholeRef := blobref.FromHash(sha1) bm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, blob.FileName(), mime)) bm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s)) return nil }