// This is the simple 1MB chunk version. The rolling checksum version is below. func WriteFileMap(bs blobserver.StatReceiver, fileMap map[string]interface{}, r io.Reader) (*blobref.BlobRef, error) { parts, size := []BytesPart{}, int64(0) buf := new(bytes.Buffer) for { buf.Reset() n, err := io.Copy(buf, io.LimitReader(r, 1<<20)) if err != nil { return nil, err } if n == 0 { break } hash := crypto.SHA1.New() io.Copy(hash, bytes.NewBuffer(buf.Bytes())) br := blobref.FromHash("sha1", hash) hasBlob, err := serverHasBlob(bs, br) if err != nil { return nil, err } if !hasBlob { sb, err := bs.ReceiveBlob(br, buf) if err != nil { return nil, err } if expect := (blobref.SizedBlobRef{br, n}); !expect.Equal(sb) { return nil, fmt.Errorf("schema/filewriter: wrote %s bytes, got %s ack'd", expect, sb) } } size += n parts = append(parts, BytesPart{ BlobRef: br, Size: uint64(n), Offset: 0, // into BlobRef to read from (not of dest) }) } err := PopulateParts(fileMap, size, parts) if err != nil { return nil, err } json, err := MapToCamliJSON(fileMap) if err != nil { return nil, err } br := blobref.SHA1FromString(json) sb, err := bs.ReceiveBlob(br, strings.NewReader(json)) if err != nil { return nil, err } if expect := (blobref.SizedBlobRef{br, int64(len(json))}); !expect.Equal(sb) { return nil, fmt.Errorf("schema/filewriter: wrote %s bytes, got %s ack'd", expect, sb) } return br, nil }
// blobref: of the file or schema blob // ss: the parsed file schema blob // bm: keys to populate func (ix *Index) populateFile(blobRef *blobref.BlobRef, ss *schema.Superset, bm BatchMutation) error { seekFetcher, err := blobref.SeekerFromStreamingFetcher(ix.BlobSource) if err != nil { return err } sha1 := sha1.New() fr, err := ss.NewFileReader(seekFetcher) if err != nil { // TODO(bradfitz): propagate up a transient failure // error type, so we can retry indexing files in the // future if blobs are only temporarily unavailable. // Basically the same as the TODO just below. log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err) return nil } mime, reader := magic.MimeTypeFromReader(fr) size, err := io.Copy(sha1, reader) if err != nil { // TODO: job scheduling system to retry this spaced // out max n times. Right now our options are // ignoring this error (forever) or returning the // error and making the indexing try again (likely // forever failing). Both options suck. For now just // log and act like all's okay. log.Printf("index: error indexing file %s: %v", blobRef, err) return nil } wholeRef := blobref.FromHash("sha1", sha1) bm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, ss.FileName, mime)) return nil }
// This is the simple 1MB chunk version. The rolling checksum version is below. func writeFileMapOld(bs blobserver.StatReceiver, file *Builder, r io.Reader) (*blobref.BlobRef, error) { parts, size := []BytesPart{}, int64(0) var buf bytes.Buffer for { buf.Reset() n, err := io.Copy(&buf, io.LimitReader(r, maxBlobSize)) if err != nil { return nil, err } if n == 0 { break } hash := blobref.NewHash() io.Copy(hash, bytes.NewReader(buf.Bytes())) br := blobref.FromHash(hash) hasBlob, err := serverHasBlob(bs, br) if err != nil { return nil, err } if !hasBlob { sb, err := bs.ReceiveBlob(br, &buf) if err != nil { return nil, err } if expect := (blobref.SizedBlobRef{br, n}); !expect.Equal(sb) { return nil, fmt.Errorf("schema/filewriter: wrote %s bytes, got %s ack'd", expect, sb) } } size += n parts = append(parts, BytesPart{ BlobRef: br, Size: uint64(n), Offset: 0, // into BlobRef to read from (not of dest) }) } err := file.PopulateParts(size, parts) if err != nil { return nil, err } json := file.Blob().JSON() if err != nil { return nil, err } br := blobref.SHA1FromString(json) sb, err := bs.ReceiveBlob(br, strings.NewReader(json)) if err != nil { return nil, err } if expect := (blobref.SizedBlobRef{br, int64(len(json))}); !expect.Equal(sb) { return nil, fmt.Errorf("schema/filewriter: wrote %s bytes, got %s ack'd", expect, sb) } return br, nil }
// blobref: of the file or schema blob // ss: the parsed file schema blob // bm: keys to populate func (ix *Index) populateFile(blobRef *blobref.BlobRef, ss *schema.Superset, bm BatchMutation) error { seekFetcher := blobref.SeekerFromStreamingFetcher(ix.BlobSource) fr, err := ss.NewFileReader(seekFetcher) if err != nil { // TODO(bradfitz): propagate up a transient failure // error type, so we can retry indexing files in the // future if blobs are only temporarily unavailable. // Basically the same as the TODO just below. log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err) return nil } defer fr.Close() mime, reader := magic.MimeTypeFromReader(fr) sha1 := sha1.New() var copyDest io.Writer = sha1 var withCopyErr func(error) // or nil if strings.HasPrefix(mime, "image/") { pr, pw := io.Pipe() copyDest = io.MultiWriter(copyDest, pw) confc := make(chan *image.Config, 1) go func() { conf, _, err := image.DecodeConfig(pr) defer io.Copy(ioutil.Discard, pr) if err == nil { confc <- &conf } else { confc <- nil } }() withCopyErr = func(err error) { pw.CloseWithError(err) if conf := <-confc; conf != nil { bm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) } } } size, err := io.Copy(copyDest, reader) if f := withCopyErr; f != nil { f(err) } if err != nil { // TODO: job scheduling system to retry this spaced // out max n times. Right now our options are // ignoring this error (forever) or returning the // error and making the indexing try again (likely // forever failing). Both options suck. For now just // log and act like all's okay. log.Printf("index: error indexing file %s: %v", blobRef, err) return nil } wholeRef := blobref.FromHash("sha1", sha1) bm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, ss.FileName, mime)) return nil }
func blobDetails(contents io.ReadSeeker) (bref *blobref.BlobRef, size int64, err error) { s1 := sha1.New() contents.Seek(0, 0) size, err = io.Copy(s1, contents) if err == nil { bref = blobref.FromHash(s1) } contents.Seek(0, 0) return }
func (d *defaultStatHasher) Hash(fileName string) (*blobref.BlobRef, error) { s1 := sha1.New() file, err := os.Open(fileName) if err != nil { return nil, err } defer file.Close() _, err = io.Copy(s1, file) if err != nil { return nil, err } return blobref.FromHash("sha1", s1), nil }
// Blob builds the Blob. The builder continues to be usable after a call to Build. func (bb *Builder) Blob() *Blob { json, err := mapJSON(bb.m) if err != nil { panic(err) } ss, err := parseSuperset(strings.NewReader(json)) if err != nil { panic(err) } h := blobref.NewHash() h.Write([]byte(json)) return &Blob{ str: json, ss: ss, br: blobref.FromHash(h), } }
func stdinBlobHandle() (uh *client.UploadHandle, err error) { var buf bytes.Buffer size, err := io.Copy(&buf, cmdmain.Stdin) if err != nil { return } // TODO(bradfitz,mpl): limit this buffer size? file := buf.Bytes() h := blobref.NewHash() size, err = io.Copy(h, bytes.NewReader(file)) if err != nil { return } return &client.UploadHandle{ BlobRef: blobref.FromHash(h), Size: size, Contents: io.LimitReader(bytes.NewReader(file), size), }, nil }
func stdinBlobHandle() (uh *client.UploadHandle, err error) { var buf bytes.Buffer size, err := io.Copy(&buf, stdin) if err != nil { return } // TODO(bradfitz,mpl): limit this buffer size? file := buf.Bytes() s1 := sha1.New() size, err = io.Copy(s1, &buf) if err != nil { return } return &client.UploadHandle{ BlobRef: blobref.FromHash("sha1", s1), Size: size, Contents: io.LimitReader(bytes.NewReader(file), size), }, nil }
func (tb *Blob) BlobRef() *blobref.BlobRef { h := sha1.New() h.Write([]byte(tb.Contents)) return blobref.FromHash(h) }
// blobref: of the file or schema blob // blob: the parsed file schema blob // bm: keys to populate func (ix *Index) populateFile(blob *schema.Blob, bm BatchMutation) error { var times []time.Time // all creation or mod times seen; may be zero times = append(times, blob.ModTime()) blobRef := blob.BlobRef() seekFetcher := blobref.SeekerFromStreamingFetcher(ix.BlobSource) fr, err := blob.NewFileReader(seekFetcher) if err != nil { // TODO(bradfitz): propagate up a transient failure // error type, so we can retry indexing files in the // future if blobs are only temporarily unavailable. // Basically the same as the TODO just below. log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err) return nil } defer fr.Close() mime, reader := magic.MIMETypeFromReader(fr) sha1 := sha1.New() var copyDest io.Writer = sha1 var imageBuf *keepFirstN // or nil if strings.HasPrefix(mime, "image/") { imageBuf = &keepFirstN{N: 256 << 10} copyDest = io.MultiWriter(copyDest, imageBuf) } size, err := io.Copy(copyDest, reader) if err != nil { // TODO: job scheduling system to retry this spaced // out max n times. Right now our options are // ignoring this error (forever) or returning the // error and making the indexing try again (likely // forever failing). Both options suck. For now just // log and act like all's okay. log.Printf("index: error indexing file %s: %v", blobRef, err) return nil } if imageBuf != nil { if conf, err := images.DecodeConfig(bytes.NewReader(imageBuf.Bytes)); err == nil { bm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height))) } if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil { log.Printf("filename %q exif = %v, %v", blob.FileName(), ft, err) times = append(times, ft) } else { log.Printf("filename %q exif = %v, %v", blob.FileName(), ft, err) } } var sortTimes []time.Time for _, t := range times { if !t.IsZero() { sortTimes = append(sortTimes, t) } } sort.Sort(types.ByTime(sortTimes)) var time3339s string switch { case len(sortTimes) == 1: time3339s = types.Time3339(sortTimes[0]).String() case len(sortTimes) >= 2: oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1] time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String() } wholeRef := blobref.FromHash(sha1) bm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1") bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, blob.FileName(), mime)) bm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s)) return nil }
func (tb *testBlob) BlobRef() *blobref.BlobRef { h := sha1.New() h.Write([]byte(tb.val)) return blobref.FromHash(h) }