Пример #1
0
// readPrefixOrFile executes a given func with a reader on the passed prefix and
// falls back to passing a reader on the whole file if the func returns an error.
func readPrefixOrFile(prefix []byte, fetcher blob.Fetcher, b *schema.Blob, fn func(filePrefixReader) error) (err error) {
	pr := bytes.NewReader(prefix)
	err = fn(pr)
	if err == io.EOF || err == io.ErrUnexpectedEOF {
		var fr *schema.FileReader
		fr, err = b.NewFileReader(fetcher)
		if err == nil {
			err = fn(fr)
			fr.Close()
		}
	}
	return err
}
Пример #2
0
func setFileMeta(name string, blob *schema.Blob) error {
	err1 := os.Chmod(name, blob.FileMode())
	var err2 error
	if mt := blob.ModTime(); !mt.IsZero() {
		err2 = os.Chtimes(name, mt, mt)
	}
	// TODO: we previously did os.Chown here, but it's rarely wanted,
	// then the schema.Blob refactor broke it, so it's gone.
	// Add it back later once we care?
	for _, err := range []error{err1, err2} {
		if err != nil {
			return err
		}
	}
	return nil
}
Пример #3
0
// b: the parsed file schema blob
// mm: keys to populate
func (ix *Index) populateDir(fetcher blob.Fetcher, b *schema.Blob, mm *mutationMap) error {
	blobRef := b.BlobRef()
	// TODO(bradfitz): move the NewDirReader and FileName method off *schema.Blob and onto
	// StaticFile/StaticDirectory or something.

	dr, err := b.NewDirReader(fetcher)
	if err != nil {
		// TODO(bradfitz): propagate up a transient failure
		// error type, so we can retry indexing files in the
		// future if blobs are only temporarily unavailable.
		log.Printf("index: error indexing directory, creating NewDirReader %s: %v", blobRef, err)
		return nil
	}
	sts, err := dr.StaticSet()
	if err != nil {
		log.Printf("index: error indexing directory: can't get StaticSet: %v\n", err)
		return nil
	}

	mm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(len(sts), b.FileName(), "", blob.Ref{}))
	for _, br := range sts {
		mm.Set(keyStaticDirChild.Key(blobRef, br.String()), "1")
	}
	return nil
}
Пример #4
0
// blobref: of the file or schema blob
//      ss: the parsed file schema blob
//      bm: keys to populate
func (ix *Index) populateDir(b *schema.Blob, bm BatchMutation) error {
	blobRef := b.BlobRef()
	// TODO(bradfitz): move the NewDirReader and FileName method off *schema.Blob and onto

	seekFetcher := blob.SeekerFromStreamingFetcher(ix.BlobSource)
	dr, err := b.NewDirReader(seekFetcher)
	if err != nil {
		// TODO(bradfitz): propagate up a transient failure
		// error type, so we can retry indexing files in the
		// future if blobs are only temporarily unavailable.
		log.Printf("index: error indexing directory, creating NewDirReader %s: %v", blobRef, err)
		return nil
	}
	sts, err := dr.StaticSet()
	if err != nil {
		log.Printf("index: error indexing directory: can't get StaticSet: %v\n", err)
		return nil
	}

	bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(len(sts), b.FileName(), ""))
	for _, br := range sts {
		bm.Set(keyStaticDirChild.Key(blobRef, br.String()), "1")
	}
	return nil
}
Пример #5
0
func (ix *Index) populateClaim(fetcher *missTrackFetcher, b *schema.Blob, mm *mutationMap) error {
	br := b.BlobRef()

	claim, ok := b.AsClaim()
	if !ok {
		// Skip bogus claim with malformed permanode.
		return nil
	}

	vr := jsonsign.NewVerificationRequest(b.JSON(), blob.NewSerialFetcher(ix.KeyFetcher, fetcher))
	if !vr.Verify() {
		// TODO(bradfitz): ask if the vr.Err.(jsonsign.Error).IsPermanent() and retry
		// later if it's not permanent? or maybe do this up a level?
		if vr.Err != nil {
			return vr.Err
		}
		return errors.New("index: populateClaim verification failure")
	}
	verifiedKeyId := vr.SignerKeyId
	mm.Set("signerkeyid:"+vr.CamliSigner.String(), verifiedKeyId)

	if claim.ClaimType() == string(schema.DeleteClaim) {
		if err := ix.populateDeleteClaim(claim, vr, mm); err != nil {
			return err
		}
		mm.noteDelete(claim)
		return nil
	}

	pnbr := claim.ModifiedPermanode()
	if !pnbr.Valid() {
		// A different type of claim; not modifying a permanode.
		return nil
	}

	attr, value := claim.Attribute(), claim.Value()
	recentKey := keyRecentPermanode.Key(verifiedKeyId, claim.ClaimDateString(), br)
	mm.Set(recentKey, pnbr.String())
	claimKey := keyPermanodeClaim.Key(pnbr, verifiedKeyId, claim.ClaimDateString(), br)
	mm.Set(claimKey, keyPermanodeClaim.Val(claim.ClaimType(), attr, value, vr.CamliSigner))

	if strings.HasPrefix(attr, "camliPath:") {
		targetRef, ok := blob.Parse(value)
		if ok {
			// TODO: deal with set-attribute vs. del-attribute
			// properly? I think we get it for free when
			// del-attribute has no Value, but we need to deal
			// with the case where they explicitly delete the
			// current value.
			suffix := attr[len("camliPath:"):]
			active := "Y"
			if claim.ClaimType() == "del-attribute" {
				active = "N"
			}
			baseRef := pnbr
			claimRef := br

			key := keyPathBackward.Key(verifiedKeyId, targetRef, claimRef)
			val := keyPathBackward.Val(claim.ClaimDateString(), baseRef, active, suffix)
			mm.Set(key, val)

			key = keyPathForward.Key(verifiedKeyId, baseRef, suffix, claim.ClaimDateString(), claimRef)
			val = keyPathForward.Val(active, targetRef)
			mm.Set(key, val)
		}
	}

	if claim.ClaimType() != string(schema.DelAttributeClaim) && IsIndexedAttribute(attr) {
		key := keySignerAttrValue.Key(verifiedKeyId, attr, value, claim.ClaimDateString(), br)
		mm.Set(key, keySignerAttrValue.Val(pnbr))
	}

	if IsBlobReferenceAttribute(attr) {
		targetRef, ok := blob.Parse(value)
		if ok {
			key := keyEdgeBackward.Key(targetRef, pnbr, br)
			mm.Set(key, keyEdgeBackward.Val("permanode", ""))
		}
	}

	return nil
}
Пример #6
0
// b: the parsed file schema blob
// mm: keys to populate
func (ix *Index) populateFile(fetcher blob.Fetcher, b *schema.Blob, mm *mutationMap) (err error) {
	var times []time.Time // all creation or mod times seen; may be zero
	times = append(times, b.ModTime())

	blobRef := b.BlobRef()
	fr, err := b.NewFileReader(fetcher)
	if err != nil {
		return err
	}
	defer fr.Close()
	mime, reader := magic.MIMETypeFromReader(fr)

	sha1 := sha1.New()
	var copyDest io.Writer = sha1
	var imageBuf *keepFirstN // or nil
	if strings.HasPrefix(mime, "image/") {
		imageBuf = &keepFirstN{N: 512 << 10}
		copyDest = io.MultiWriter(copyDest, imageBuf)
	}
	size, err := io.Copy(copyDest, reader)
	if err != nil {
		return err
	}
	wholeRef := blob.RefFromHash(sha1)

	if imageBuf != nil {
		conf, err := images.DecodeConfig(bytes.NewReader(imageBuf.Bytes))
		// If our optimistic 512KB in-memory prefix from above was too short to get the dimensions, pass the whole thing instead and try again.
		if err == io.ErrUnexpectedEOF {
			var fr *schema.FileReader
			fr, err = b.NewFileReader(fetcher)
			if err == nil {
				conf, err = images.DecodeConfig(fr)
				fr.Close()
			}
		}
		if err == nil {
			mm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height)))
		}
		if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil {
			log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err)
			times = append(times, ft)
		} else {
			log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err)
		}

		// TODO(mpl): find (generate?) more broken EXIF images to experiment with.
		err = indexEXIF(wholeRef, bytes.NewReader(imageBuf.Bytes), mm)
		if err == io.EOF {
			var fr *schema.FileReader
			fr, err = b.NewFileReader(fetcher)
			if err == nil {
				err = indexEXIF(wholeRef, fr, mm)
				fr.Close()
			}
		}
		if err != nil {
			log.Printf("error parsing EXIF: %v", err)
		}
	}

	var sortTimes []time.Time
	for _, t := range times {
		if !t.IsZero() {
			sortTimes = append(sortTimes, t)
		}
	}
	sort.Sort(types.ByTime(sortTimes))
	var time3339s string
	switch {
	case len(sortTimes) == 1:
		time3339s = types.Time3339(sortTimes[0]).String()
	case len(sortTimes) >= 2:
		oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1]
		time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String()
	}

	mm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1")
	mm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime, wholeRef))
	mm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s))

	if strings.HasPrefix(mime, "audio/") {
		indexMusic(io.NewSectionReader(fr, 0, fr.Size()), wholeRef, mm)
	}

	return nil
}
Пример #7
0
// b: the parsed file schema blob
// mm: keys to populate
func (ix *Index) populateFile(fetcher blob.Fetcher, b *schema.Blob, mm *mutationMap) (err error) {
	var times []time.Time // all creation or mod times seen; may be zero
	times = append(times, b.ModTime())

	blobRef := b.BlobRef()
	fr, err := b.NewFileReader(fetcher)
	if err != nil {
		return err
	}
	defer fr.Close()
	mime, reader := magic.MIMETypeFromReader(fr)

	sha1 := sha1.New()
	var copyDest io.Writer = sha1
	var imageBuf *keepFirstN // or nil
	if strings.HasPrefix(mime, "image/") {
		// Emperically derived 1MiB assuming CR2 images require more than any
		// other filetype we support:
		//   https://gist.github.com/wathiede/7982372
		imageBuf = &keepFirstN{N: 1 << 20}
		copyDest = io.MultiWriter(copyDest, imageBuf)
	}
	size, err := io.Copy(copyDest, reader)
	if err != nil {
		return err
	}
	wholeRef := blob.RefFromHash(sha1)

	if imageBuf != nil {
		if conf, err := images.DecodeConfig(bytes.NewReader(imageBuf.Bytes)); err == nil {
			mm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height)))
		}
		if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil {
			log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err)
			times = append(times, ft)
		} else {
			log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err)
		}

		indexEXIF(wholeRef, imageBuf.Bytes, mm)
	}

	var sortTimes []time.Time
	for _, t := range times {
		if !t.IsZero() {
			sortTimes = append(sortTimes, t)
		}
	}
	sort.Sort(types.ByTime(sortTimes))
	var time3339s string
	switch {
	case len(sortTimes) == 1:
		time3339s = types.Time3339(sortTimes[0]).String()
	case len(sortTimes) >= 2:
		oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1]
		time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String()
	}

	mm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1")
	mm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime))
	mm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s))

	if strings.HasPrefix(mime, "audio/") {
		indexMusic(io.NewSectionReader(fr, 0, fr.Size()), wholeRef, mm)
	}

	return nil
}
Пример #8
0
// blobref: of the file or schema blob
//      blob: the parsed file schema blob
//      bm: keys to populate
func (ix *Index) populateFile(b *schema.Blob, bm BatchMutation) error {
	var times []time.Time // all creation or mod times seen; may be zero
	times = append(times, b.ModTime())

	blobRef := b.BlobRef()
	seekFetcher := blob.SeekerFromStreamingFetcher(ix.BlobSource)
	fr, err := b.NewFileReader(seekFetcher)
	if err != nil {
		// TODO(bradfitz): propagate up a transient failure
		// error type, so we can retry indexing files in the
		// future if blobs are only temporarily unavailable.
		// Basically the same as the TODO just below.
		log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err)
		return nil
	}
	defer fr.Close()
	mime, reader := magic.MIMETypeFromReader(fr)

	sha1 := sha1.New()
	var copyDest io.Writer = sha1
	var imageBuf *keepFirstN // or nil
	if strings.HasPrefix(mime, "image/") {
		imageBuf = &keepFirstN{N: 256 << 10}
		copyDest = io.MultiWriter(copyDest, imageBuf)
	}
	size, err := io.Copy(copyDest, reader)
	if err != nil {
		// TODO: job scheduling system to retry this spaced
		// out max n times.  Right now our options are
		// ignoring this error (forever) or returning the
		// error and making the indexing try again (likely
		// forever failing).  Both options suck.  For now just
		// log and act like all's okay.
		log.Printf("index: error indexing file %s: %v", blobRef, err)
		return nil
	}

	if imageBuf != nil {
		if conf, err := images.DecodeConfig(bytes.NewReader(imageBuf.Bytes)); err == nil {
			bm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height)))
		}
		if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil {
			log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err)
			times = append(times, ft)
		} else {
			log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err)
		}
	}

	var sortTimes []time.Time
	for _, t := range times {
		if !t.IsZero() {
			sortTimes = append(sortTimes, t)
		}
	}
	sort.Sort(types.ByTime(sortTimes))
	var time3339s string
	switch {
	case len(sortTimes) == 1:
		time3339s = types.Time3339(sortTimes[0]).String()
	case len(sortTimes) >= 2:
		oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1]
		time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String()
	}

	wholeRef := blob.RefFromHash(sha1)
	bm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1")
	bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime))
	bm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s))

	if strings.HasPrefix(mime, "audio/") {
		tag, err := taglib.Decode(fr, fr.Size())
		if err == nil {
			indexMusic(tag, wholeRef, bm)
		} else {
			log.Print("index: error parsing tag: ", err)
		}
	}

	return nil
}
Пример #9
0
func (st *shareTester) put(blob *schema.Blob) {
	st.putRaw(blob.BlobRef(), blob.JSON())
}
Пример #10
0
// b: the parsed file schema blob
// mm: keys to populate
func (ix *Index) populateFile(b *schema.Blob, mm *mutationMap) (err error) {
	var times []time.Time // all creation or mod times seen; may be zero
	times = append(times, b.ModTime())

	blobRef := b.BlobRef()
	fetcher := &seekFetcherMissTracker{
		// TODO(bradfitz): cache this SeekFetcher on ix so it
		// it's have to be re-made each time? Probably small.
		src: blob.SeekerFromStreamingFetcher(ix.BlobSource),
	}
	defer func() {
		if err == nil {
			return
		}
		fetcher.mu.Lock()
		defer fetcher.mu.Unlock()
		if len(fetcher.missing) == 0 {
			return
		}
		// TODO(bradfitz): there was an error indexing this file, and
		// we failed to load the blobs in f.missing.  Add those as dependencies
		// somewhere so when we get one of those missing blobs, we kick off
		// a re-index of this file for whenever the indexer is idle.
	}()
	fr, err := b.NewFileReader(fetcher)
	if err != nil {
		// TODO(bradfitz): propagate up a transient failure
		// error type, so we can retry indexing files in the
		// future if blobs are only temporarily unavailable.
		// Basically the same as the TODO just below.
		//
		// We'll also want to bump the schemaVersion after this,
		// to fix anybody's index which is only partial due to
		// this old bug where it would return nil instead of doing
		// the necessary work.
		log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err)
		return nil
	}
	defer fr.Close()
	mime, reader := magic.MIMETypeFromReader(fr)

	sha1 := sha1.New()
	var copyDest io.Writer = sha1
	var imageBuf *keepFirstN // or nil
	if strings.HasPrefix(mime, "image/") {
		// Emperically derived 1MiB assuming CR2 images require more than any
		// other filetype we support:
		//   https://gist.github.com/wathiede/7982372
		imageBuf = &keepFirstN{N: 1 << 20}
		copyDest = io.MultiWriter(copyDest, imageBuf)
	}
	size, err := io.Copy(copyDest, reader)
	if err != nil {
		// TODO: job scheduling system to retry this spaced
		// out max n times.  Right now our options are
		// ignoring this error (forever) or returning the
		// error and making the indexing try again (likely
		// forever failing).  Both options suck.  For now just
		// log and act like all's okay.
		//
		// See TODOs above, and the fetcher.missing stuff.
		log.Printf("index: error indexing file %s: %v", blobRef, err)
		return nil
	}
	wholeRef := blob.RefFromHash(sha1)

	if imageBuf != nil {
		if conf, err := images.DecodeConfig(bytes.NewReader(imageBuf.Bytes)); err == nil {
			mm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height)))
		}
		if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil {
			log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err)
			times = append(times, ft)
		} else {
			log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err)
		}

		indexEXIF(wholeRef, imageBuf.Bytes, mm)
	}

	var sortTimes []time.Time
	for _, t := range times {
		if !t.IsZero() {
			sortTimes = append(sortTimes, t)
		}
	}
	sort.Sort(types.ByTime(sortTimes))
	var time3339s string
	switch {
	case len(sortTimes) == 1:
		time3339s = types.Time3339(sortTimes[0]).String()
	case len(sortTimes) >= 2:
		oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1]
		time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String()
	}

	mm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1")
	mm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime))
	mm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s))

	if strings.HasPrefix(mime, "audio/") {
		indexMusic(io.NewSectionReader(fr, 0, fr.Size()), wholeRef, mm)
	}

	return nil
}
Пример #11
0
// b: the parsed file schema blob
// mm: keys to populate
func (ix *Index) populateFile(fetcher blob.Fetcher, b *schema.Blob, mm *mutationMap) (err error) {
	var times []time.Time // all creation or mod times seen; may be zero
	times = append(times, b.ModTime())

	blobRef := b.BlobRef()
	fr, err := b.NewFileReader(fetcher)
	if err != nil {
		return err
	}
	defer fr.Close()
	mime, mr := magic.MIMETypeFromReader(fr)

	sha1 := sha1.New()
	var copyDest io.Writer = sha1
	var imageBuf *keepFirstN // or nil
	if strings.HasPrefix(mime, "image/") {
		imageBuf = &keepFirstN{N: 512 << 10}
		copyDest = io.MultiWriter(copyDest, imageBuf)
	}
	size, err := io.Copy(copyDest, mr)
	if err != nil {
		return err
	}
	wholeRef := blob.RefFromHash(sha1)

	if imageBuf != nil {
		var conf images.Config
		decodeConfig := func(r filePrefixReader) error {
			conf, err = images.DecodeConfig(r)
			return err
		}
		if err := readPrefixOrFile(imageBuf.Bytes, fetcher, b, decodeConfig); err == nil {
			mm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height)))
		}

		var ft time.Time
		fileTime := func(r filePrefixReader) error {
			ft, err = schema.FileTime(r)
			return err
		}
		if err = readPrefixOrFile(imageBuf.Bytes, fetcher, b, fileTime); err == nil {
			times = append(times, ft)
		}
		if exifDebug {
			log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err)
		}

		// TODO(mpl): find (generate?) more broken EXIF images to experiment with.
		indexEXIFData := func(r filePrefixReader) error {
			return indexEXIF(wholeRef, r, mm)
		}
		if err = readPrefixOrFile(imageBuf.Bytes, fetcher, b, indexEXIFData); err != nil {
			if exifDebug {
				log.Printf("error parsing EXIF: %v", err)
			}
		}
	}

	var sortTimes []time.Time
	for _, t := range times {
		if !t.IsZero() {
			sortTimes = append(sortTimes, t)
		}
	}
	sort.Sort(types.ByTime(sortTimes))
	var time3339s string
	switch {
	case len(sortTimes) == 1:
		time3339s = types.Time3339(sortTimes[0]).String()
	case len(sortTimes) >= 2:
		oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1]
		time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String()
	}

	mm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1")
	mm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime, wholeRef))
	mm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s))

	if strings.HasPrefix(mime, "audio/") {
		indexMusic(io.NewSectionReader(fr, 0, fr.Size()), wholeRef, mm)
	}

	return nil
}
Пример #12
0
func indexSchemaBlob(fsck *db.DB, s *schema.Blob) (needs []string) {
	camliType := s.Type()
	switch camliType {
	case "static-set":
		for _, r := range s.StaticSetMembers() {
			needs = append(needs, r.String())
		}
	case "bytes":
		fallthrough
	case "file":
		for i, bp := range s.ByteParts() {
			ok := false
			if r := bp.BlobRef; r.Valid() {
				needs = append(needs, r.String())
				ok = true
			}
			if r := bp.BytesRef; r.Valid() {
				needs = append(needs, r.String())
				ok = true
			}
			if !ok {
				log.Printf("%s (%s): no valid ref in part %d", s.BlobRef(), camliType, i)
			}
		}
	case "directory":
		switch r, ok := s.DirectoryEntries(); {
		case !ok:
			log.Printf("%s (%s): bad entries", s.BlobRef(), camliType)
		case !r.Valid():
			log.Printf("%s (%s): invalid entries", s.BlobRef(), camliType)
		default:
			needs = append(needs, r.String())
		}
	}
	return
}
Пример #13
0
// blobref: of the file or schema blob
//      ss: the parsed file schema blob
//      bm: keys to populate
func (ix *Index) populateFile(blob *schema.Blob, bm BatchMutation) error {
	// TODO: move the NewFileReader off of blob.

	blobRef := blob.BlobRef()
	seekFetcher := blobref.SeekerFromStreamingFetcher(ix.BlobSource)
	fr, err := blob.NewFileReader(seekFetcher)
	if err != nil {
		// TODO(bradfitz): propagate up a transient failure
		// error type, so we can retry indexing files in the
		// future if blobs are only temporarily unavailable.
		// Basically the same as the TODO just below.
		log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err)
		return nil
	}
	defer fr.Close()
	mime, reader := magic.MimeTypeFromReader(fr)

	sha1 := sha1.New()
	var copyDest io.Writer = sha1
	var withCopyErr func(error) // or nil
	if strings.HasPrefix(mime, "image/") {
		pr, pw := io.Pipe()
		copyDest = io.MultiWriter(copyDest, pw)
		confc := make(chan *image.Config, 1)
		go func() {
			conf, _, err := image.DecodeConfig(pr)
			defer io.Copy(ioutil.Discard, pr)
			if err == nil {
				confc <- &conf
			} else {
				confc <- nil
			}
		}()
		withCopyErr = func(err error) {
			pw.CloseWithError(err)
			if conf := <-confc; conf != nil {
				bm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height)))
			}
		}
	}

	size, err := io.Copy(copyDest, reader)
	if f := withCopyErr; f != nil {
		f(err)
	}
	if err != nil {
		// TODO: job scheduling system to retry this spaced
		// out max n times.  Right now our options are
		// ignoring this error (forever) or returning the
		// error and making the indexing try again (likely
		// forever failing).  Both options suck.  For now just
		// log and act like all's okay.
		log.Printf("index: error indexing file %s: %v", blobRef, err)
		return nil
	}

	wholeRef := blobref.FromHash(sha1)
	bm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1")
	bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, blob.FileName(), mime))
	return nil
}