Exemple #1
0
// blobref: of the file or schema blob
//      ss: the parsed file schema blob
//      bm: keys to populate
func (ix *Index) populateDir(b *schema.Blob, bm BatchMutation) error {
	blobRef := b.BlobRef()
	// TODO(bradfitz): move the NewDirReader and FileName method off *schema.Blob and onto

	seekFetcher := blob.SeekerFromStreamingFetcher(ix.BlobSource)
	dr, err := b.NewDirReader(seekFetcher)
	if err != nil {
		// TODO(bradfitz): propagate up a transient failure
		// error type, so we can retry indexing files in the
		// future if blobs are only temporarily unavailable.
		log.Printf("index: error indexing directory, creating NewDirReader %s: %v", blobRef, err)
		return nil
	}
	sts, err := dr.StaticSet()
	if err != nil {
		log.Printf("index: error indexing directory: can't get StaticSet: %v\n", err)
		return nil
	}

	bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(len(sts), b.FileName(), ""))
	for _, br := range sts {
		bm.Set(keyStaticDirChild.Key(blobRef, br.String()), "1")
	}
	return nil
}
Exemple #2
0
// cached returns a FileReader for the given file schema blobref.
// The FileReader should be closed when done reading.
func (ih *ImageHandler) cached(fileRef blob.Ref) (*schema.FileReader, error) {
	fetchSeeker := blob.SeekerFromStreamingFetcher(ih.Cache)
	fr, err := schema.NewFileReader(fetchSeeker, fileRef)
	if err != nil {
		return nil, err
	}
	if imageDebug {
		log.Printf("Image Cache: hit: %v\n", fileRef)
	}
	return fr, nil
}
Exemple #3
0
// ServeBlobRef serves a blob.
func ServeBlobRef(rw http.ResponseWriter, req *http.Request, blobRef blob.Ref, fetcher blob.StreamingFetcher) {
	if w, ok := fetcher.(blobserver.ContextWrapper); ok {
		fetcher = w.WrapContext(req)
	}
	seekFetcher := blob.SeekerFromStreamingFetcher(fetcher)

	file, size, err := seekFetcher.Fetch(blobRef)
	switch err {
	case nil:
		break
	case os.ErrNotExist:
		rw.WriteHeader(http.StatusNotFound)
		fmt.Fprintf(rw, "Blob %q not found", blobRef)
		return
	default:
		httputil.ServeError(rw, req, err)
		return
	}
	defer file.Close()
	var content io.ReadSeeker = file

	rw.Header().Set("Content-Type", "application/octet-stream")
	if req.Header.Get("Range") == "" {
		// If it's small and all UTF-8, assume it's text and
		// just render it in the browser.  This is more for
		// demos/debuggability than anything else.  It isn't
		// part of the spec.
		if size <= 32<<10 {
			var buf bytes.Buffer
			_, err := io.Copy(&buf, file)
			if err != nil {
				httputil.ServeError(rw, req, err)
				return
			}
			if utf8.Valid(buf.Bytes()) {
				rw.Header().Set("Content-Type", "text/plain; charset=utf-8")
			}
			content = bytes.NewReader(buf.Bytes())
		}
	}

	http.ServeContent(rw, req, "", dummyModTime, content)
}
Exemple #4
0
// cached returns a FileReader for the given blobref, which may
// point to either a blob representing the entire thumbnail (max
// 16MB) or a file schema blob.
//
// The ReadCloser should be closed when done reading.
func (ih *ImageHandler) cached(br blob.Ref) (io.ReadCloser, error) {
	rsc, _, err := ih.Cache.FetchStreaming(br)
	if err != nil {
		return nil, err
	}
	slurp, err := ioutil.ReadAll(rsc)
	rsc.Close()
	if err != nil {
		return nil, err
	}
	// In the common case, when the scaled image itself is less than 16 MB, it's
	// all together in one blob.
	if strings.HasPrefix(magic.MIMEType(slurp), "image/") {
		thumbCacheHitFull.Add(1)
		if imageDebug {
			log.Printf("Image Cache: hit: %v\n", br)
		}
		return ioutil.NopCloser(bytes.NewReader(slurp)), nil
	}

	// For large scaled images, the cached blob is a file schema blob referencing
	// the sub-chunks.
	fileBlob, err := schema.BlobFromReader(br, bytes.NewReader(slurp))
	if err != nil {
		log.Printf("Failed to parse non-image thumbnail cache blob %v: %v", br, err)
		return nil, err
	}
	fetchSeeker := blob.SeekerFromStreamingFetcher(ih.Cache)
	fr, err := fileBlob.NewFileReader(fetchSeeker)
	if err != nil {
		log.Printf("cached(%d) NewFileReader = %v", br, err)
		return nil, err
	}
	thumbCacheHitFile.Add(1)
	if imageDebug {
		log.Printf("Image Cache: fileref hit: %v\n", br)
	}
	return fr, nil
}
Exemple #5
0
func (ih *ImageHandler) storageSeekFetcher() blob.SeekFetcher {
	return blob.SeekerFromStreamingFetcher(ih.Fetcher) // TODO: pass ih.Cache?
}
Exemple #6
0
// vivify verifies that all the chunks for the file described by fileblob are on the blobserver.
// It makes a planned permanode, signs it, and uploads it. It finally makes a camliContent claim
// on that permanode for fileblob, signs it, and uploads it to the blobserver.
func vivify(blobReceiver blobserver.BlobReceiveConfiger, fileblob blob.SizedRef) error {
	sf, ok := blobReceiver.(blob.StreamingFetcher)
	if !ok {
		return fmt.Errorf("BlobReceiver is not a StreamingFetcher")
	}
	fetcher := blob.SeekerFromStreamingFetcher(sf)
	fr, err := schema.NewFileReader(fetcher, fileblob.Ref)
	if err != nil {
		return fmt.Errorf("Filereader error for blobref %v: %v", fileblob.Ref.String(), err)
	}
	defer fr.Close()

	h := sha1.New()
	n, err := io.Copy(h, fr)
	if err != nil {
		return fmt.Errorf("Could not read all file of blobref %v: %v", fileblob.Ref.String(), err)
	}
	if n != fr.Size() {
		return fmt.Errorf("Could not read all file of blobref %v. Wanted %v, got %v", fileblob.Ref.String(), fr.Size(), n)
	}

	config := blobReceiver.Config()
	if config == nil {
		return errors.New("blobReceiver has no config")
	}
	hf := config.HandlerFinder
	if hf == nil {
		return errors.New("blobReceiver config has no HandlerFinder")
	}
	JSONSignRoot, sh, err := hf.FindHandlerByType("jsonsign")
	if err != nil || sh == nil {
		return errors.New("jsonsign handler not found")
	}
	sigHelper, ok := sh.(*signhandler.Handler)
	if !ok {
		return errors.New("handler is not a JSON signhandler")
	}
	discoMap := sigHelper.DiscoveryMap(JSONSignRoot)
	publicKeyBlobRef, ok := discoMap["publicKeyBlobRef"].(string)
	if !ok {
		return fmt.Errorf("Discovery: json decoding error: %v", err)
	}

	// The file schema must have a modtime to vivify, as the modtime is used for all three of:
	// 1) the permanode's signature
	// 2) the camliContent attribute claim's "claimDate"
	// 3) the signature time of 2)
	claimDate, err := time.Parse(time.RFC3339, fr.FileSchema().UnixMtime)
	if err != nil {
		return fmt.Errorf("While parsing modtime for file %v: %v", fr.FileSchema().FileName, err)
	}

	permanodeBB := schema.NewHashPlannedPermanode(h)
	permanodeBB.SetSigner(blob.MustParse(publicKeyBlobRef))
	permanodeBB.SetClaimDate(claimDate)
	permanodeSigned, err := sigHelper.Sign(permanodeBB)
	if err != nil {
		return fmt.Errorf("Signing permanode %v: %v", permanodeSigned, err)
	}
	permanodeRef := blob.SHA1FromString(permanodeSigned)
	_, err = blobserver.ReceiveNoHash(blobReceiver, permanodeRef, strings.NewReader(permanodeSigned))
	if err != nil {
		return fmt.Errorf("While uploading signed permanode %v, %v: %v", permanodeRef, permanodeSigned, err)
	}

	contentClaimBB := schema.NewSetAttributeClaim(permanodeRef, "camliContent", fileblob.Ref.String())
	contentClaimBB.SetSigner(blob.MustParse(publicKeyBlobRef))
	contentClaimBB.SetClaimDate(claimDate)
	contentClaimSigned, err := sigHelper.Sign(contentClaimBB)
	if err != nil {
		return fmt.Errorf("Signing camliContent claim: %v", err)
	}
	contentClaimRef := blob.SHA1FromString(contentClaimSigned)
	_, err = blobserver.ReceiveNoHash(blobReceiver, contentClaimRef, strings.NewReader(contentClaimSigned))
	if err != nil {
		return fmt.Errorf("While uploading signed camliContent claim %v, %v: %v", contentClaimRef, contentClaimSigned, err)
	}
	return nil
}
Exemple #7
0
// blobref: of the file or schema blob
//      blob: the parsed file schema blob
//      bm: keys to populate
func (ix *Index) populateFile(b *schema.Blob, bm BatchMutation) error {
	var times []time.Time // all creation or mod times seen; may be zero
	times = append(times, b.ModTime())

	blobRef := b.BlobRef()
	seekFetcher := blob.SeekerFromStreamingFetcher(ix.BlobSource)
	fr, err := b.NewFileReader(seekFetcher)
	if err != nil {
		// TODO(bradfitz): propagate up a transient failure
		// error type, so we can retry indexing files in the
		// future if blobs are only temporarily unavailable.
		// Basically the same as the TODO just below.
		log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err)
		return nil
	}
	defer fr.Close()
	mime, reader := magic.MIMETypeFromReader(fr)

	sha1 := sha1.New()
	var copyDest io.Writer = sha1
	var imageBuf *keepFirstN // or nil
	if strings.HasPrefix(mime, "image/") {
		imageBuf = &keepFirstN{N: 256 << 10}
		copyDest = io.MultiWriter(copyDest, imageBuf)
	}
	size, err := io.Copy(copyDest, reader)
	if err != nil {
		// TODO: job scheduling system to retry this spaced
		// out max n times.  Right now our options are
		// ignoring this error (forever) or returning the
		// error and making the indexing try again (likely
		// forever failing).  Both options suck.  For now just
		// log and act like all's okay.
		log.Printf("index: error indexing file %s: %v", blobRef, err)
		return nil
	}

	if imageBuf != nil {
		if conf, err := images.DecodeConfig(bytes.NewReader(imageBuf.Bytes)); err == nil {
			bm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height)))
		}
		if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil {
			log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err)
			times = append(times, ft)
		} else {
			log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err)
		}
	}

	var sortTimes []time.Time
	for _, t := range times {
		if !t.IsZero() {
			sortTimes = append(sortTimes, t)
		}
	}
	sort.Sort(types.ByTime(sortTimes))
	var time3339s string
	switch {
	case len(sortTimes) == 1:
		time3339s = types.Time3339(sortTimes[0]).String()
	case len(sortTimes) >= 2:
		oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1]
		time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String()
	}

	wholeRef := blob.RefFromHash(sha1)
	bm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1")
	bm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime))
	bm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s))

	if strings.HasPrefix(mime, "audio/") {
		tag, err := taglib.Decode(fr, fr.Size())
		if err == nil {
			indexMusic(tag, wholeRef, bm)
		} else {
			log.Print("index: error parsing tag: ", err)
		}
	}

	return nil
}
Exemple #8
0
func (zh *zipHandler) storageSeekFetcher() blob.SeekFetcher {
	return blob.SeekerFromStreamingFetcher(zh.fetcher)
}
Exemple #9
0
// smartFetch the things that blobs point to, not just blobs.
func smartFetch(src blob.StreamingFetcher, targ string, br blob.Ref) error {
	rc, err := fetch(src, br)
	if err != nil {
		return err
	}
	defer rc.Close()

	sniffer := index.NewBlobSniffer(br)
	_, err = io.CopyN(sniffer, rc, sniffSize)
	if err != nil && err != io.EOF {
		return err
	}

	sniffer.Parse()
	b, ok := sniffer.SchemaBlob()

	if !ok {
		if *flagVerbose {
			log.Printf("Fetching opaque data %v into %q", br, targ)
		}

		// opaque data - put it in a file
		f, err := os.Create(targ)
		if err != nil {
			return fmt.Errorf("opaque: %v", err)
		}
		defer f.Close()
		body, _ := sniffer.Body()
		r := io.MultiReader(bytes.NewReader(body), rc)
		_, err = io.Copy(f, r)
		return err
	}

	switch b.Type() {
	case "directory":
		dir := filepath.Join(targ, b.FileName())
		if *flagVerbose {
			log.Printf("Fetching directory %v into %s", br, dir)
		}
		if err := os.MkdirAll(dir, b.FileMode()); err != nil {
			return err
		}
		if err := setFileMeta(dir, b); err != nil {
			log.Print(err)
		}
		entries, ok := b.DirectoryEntries()
		if !ok {
			return fmt.Errorf("bad entries blobref in dir %v", b.BlobRef())
		}
		return smartFetch(src, dir, entries)
	case "static-set":
		if *flagVerbose {
			log.Printf("Fetching directory entries %v into %s", br, targ)
		}

		// directory entries
		const numWorkers = 10
		type work struct {
			br   blob.Ref
			errc chan<- error
		}
		members := b.StaticSetMembers()
		workc := make(chan work, len(members))
		defer close(workc)
		for i := 0; i < numWorkers; i++ {
			go func() {
				for wi := range workc {
					wi.errc <- smartFetch(src, targ, wi.br)
				}
			}()
		}
		var errcs []<-chan error
		for _, mref := range members {
			errc := make(chan error, 1)
			errcs = append(errcs, errc)
			workc <- work{mref, errc}
		}
		for _, errc := range errcs {
			if err := <-errc; err != nil {
				return err
			}
		}
		return nil
	case "file":
		seekFetcher := blob.SeekerFromStreamingFetcher(src)
		fr, err := schema.NewFileReader(seekFetcher, br)
		if err != nil {
			return fmt.Errorf("NewFileReader: %v", err)
		}
		fr.LoadAllChunks()
		defer fr.Close()

		name := filepath.Join(targ, b.FileName())

		if fi, err := os.Stat(name); err == nil && fi.Size() == fi.Size() {
			if *flagVerbose {
				log.Printf("Skipping %s; already exists.", name)
				return nil
			}
		}

		if *flagVerbose {
			log.Printf("Writing %s to %s ...", br, name)
		}

		f, err := os.Create(name)
		if err != nil {
			return fmt.Errorf("file type: %v", err)
		}
		defer f.Close()
		if _, err := io.Copy(f, fr); err != nil {
			return fmt.Errorf("Copying %s to %s: %v", br, name, err)
		}
		if err := setFileMeta(name, b); err != nil {
			log.Print(err)
		}
		return nil
	default:
		return errors.New("unknown blob type: " + b.Type())
	}
	panic("unreachable")
}
Exemple #10
0
// b: the parsed file schema blob
// mm: keys to populate
func (ix *Index) populateFile(b *schema.Blob, mm *mutationMap) (err error) {
	var times []time.Time // all creation or mod times seen; may be zero
	times = append(times, b.ModTime())

	blobRef := b.BlobRef()
	fetcher := &seekFetcherMissTracker{
		// TODO(bradfitz): cache this SeekFetcher on ix so it
		// it's have to be re-made each time? Probably small.
		src: blob.SeekerFromStreamingFetcher(ix.BlobSource),
	}
	defer func() {
		if err == nil {
			return
		}
		fetcher.mu.Lock()
		defer fetcher.mu.Unlock()
		if len(fetcher.missing) == 0 {
			return
		}
		// TODO(bradfitz): there was an error indexing this file, and
		// we failed to load the blobs in f.missing.  Add those as dependencies
		// somewhere so when we get one of those missing blobs, we kick off
		// a re-index of this file for whenever the indexer is idle.
	}()
	fr, err := b.NewFileReader(fetcher)
	if err != nil {
		// TODO(bradfitz): propagate up a transient failure
		// error type, so we can retry indexing files in the
		// future if blobs are only temporarily unavailable.
		// Basically the same as the TODO just below.
		//
		// We'll also want to bump the schemaVersion after this,
		// to fix anybody's index which is only partial due to
		// this old bug where it would return nil instead of doing
		// the necessary work.
		log.Printf("index: error indexing file, creating NewFileReader %s: %v", blobRef, err)
		return nil
	}
	defer fr.Close()
	mime, reader := magic.MIMETypeFromReader(fr)

	sha1 := sha1.New()
	var copyDest io.Writer = sha1
	var imageBuf *keepFirstN // or nil
	if strings.HasPrefix(mime, "image/") {
		// Emperically derived 1MiB assuming CR2 images require more than any
		// other filetype we support:
		//   https://gist.github.com/wathiede/7982372
		imageBuf = &keepFirstN{N: 1 << 20}
		copyDest = io.MultiWriter(copyDest, imageBuf)
	}
	size, err := io.Copy(copyDest, reader)
	if err != nil {
		// TODO: job scheduling system to retry this spaced
		// out max n times.  Right now our options are
		// ignoring this error (forever) or returning the
		// error and making the indexing try again (likely
		// forever failing).  Both options suck.  For now just
		// log and act like all's okay.
		//
		// See TODOs above, and the fetcher.missing stuff.
		log.Printf("index: error indexing file %s: %v", blobRef, err)
		return nil
	}
	wholeRef := blob.RefFromHash(sha1)

	if imageBuf != nil {
		if conf, err := images.DecodeConfig(bytes.NewReader(imageBuf.Bytes)); err == nil {
			mm.Set(keyImageSize.Key(blobRef), keyImageSize.Val(fmt.Sprint(conf.Width), fmt.Sprint(conf.Height)))
		}
		if ft, err := schema.FileTime(bytes.NewReader(imageBuf.Bytes)); err == nil {
			log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err)
			times = append(times, ft)
		} else {
			log.Printf("filename %q exif = %v, %v", b.FileName(), ft, err)
		}

		indexEXIF(wholeRef, imageBuf.Bytes, mm)
	}

	var sortTimes []time.Time
	for _, t := range times {
		if !t.IsZero() {
			sortTimes = append(sortTimes, t)
		}
	}
	sort.Sort(types.ByTime(sortTimes))
	var time3339s string
	switch {
	case len(sortTimes) == 1:
		time3339s = types.Time3339(sortTimes[0]).String()
	case len(sortTimes) >= 2:
		oldest, newest := sortTimes[0], sortTimes[len(sortTimes)-1]
		time3339s = types.Time3339(oldest).String() + "," + types.Time3339(newest).String()
	}

	mm.Set(keyWholeToFileRef.Key(wholeRef, blobRef), "1")
	mm.Set(keyFileInfo.Key(blobRef), keyFileInfo.Val(size, b.FileName(), mime))
	mm.Set(keyFileTimes.Key(blobRef), keyFileTimes.Val(time3339s))

	if strings.HasPrefix(mime, "audio/") {
		indexMusic(io.NewSectionReader(fr, 0, fr.Size()), wholeRef, mm)
	}

	return nil
}
Exemple #11
0
func (dh *DownloadHandler) storageSeekFetcher() blob.SeekFetcher {
	return blob.SeekerFromStreamingFetcher(dh.Fetcher) // TODO: pass dh.Cache?
}
Exemple #12
0
// smartFetch the things that blobs point to, not just blobs.
func smartFetch(src blob.StreamingFetcher, targ string, br blob.Ref) error {
	rc, err := fetch(src, br)
	if err != nil {
		return err
	}
	defer rc.Close()

	sniffer := index.NewBlobSniffer(br)
	_, err = io.CopyN(sniffer, rc, sniffSize)
	if err != nil && err != io.EOF {
		return err
	}

	sniffer.Parse()
	b, ok := sniffer.SchemaBlob()

	if !ok {
		if *flagVerbose {
			log.Printf("Fetching opaque data %v into %q", br, targ)
		}

		// opaque data - put it in a file
		f, err := os.Create(targ)
		if err != nil {
			return fmt.Errorf("opaque: %v", err)
		}
		defer f.Close()
		body, _ := sniffer.Body()
		r := io.MultiReader(bytes.NewReader(body), rc)
		_, err = io.Copy(f, r)
		return err
	}

	switch b.Type() {
	case "directory":
		dir := filepath.Join(targ, b.FileName())
		if *flagVerbose {
			log.Printf("Fetching directory %v into %s", br, dir)
		}
		if err := os.MkdirAll(dir, b.FileMode()); err != nil {
			return err
		}
		if err := setFileMeta(dir, b); err != nil {
			log.Print(err)
		}
		entries, ok := b.DirectoryEntries()
		if !ok {
			return fmt.Errorf("bad entries blobref in dir %v", b.BlobRef())
		}
		return smartFetch(src, dir, entries)
	case "static-set":
		if *flagVerbose {
			log.Printf("Fetching directory entries %v into %s", br, targ)
		}

		// directory entries
		const numWorkers = 10
		type work struct {
			br   blob.Ref
			errc chan<- error
		}
		members := b.StaticSetMembers()
		workc := make(chan work, len(members))
		defer close(workc)
		for i := 0; i < numWorkers; i++ {
			go func() {
				for wi := range workc {
					wi.errc <- smartFetch(src, targ, wi.br)
				}
			}()
		}
		var errcs []<-chan error
		for _, mref := range members {
			errc := make(chan error, 1)
			errcs = append(errcs, errc)
			workc <- work{mref, errc}
		}
		for _, errc := range errcs {
			if err := <-errc; err != nil {
				return err
			}
		}
		return nil
	case "file":
		seekFetcher := blob.SeekerFromStreamingFetcher(src)
		fr, err := schema.NewFileReader(seekFetcher, br)
		if err != nil {
			return fmt.Errorf("NewFileReader: %v", err)
		}
		fr.LoadAllChunks()
		defer fr.Close()

		name := filepath.Join(targ, b.FileName())

		if fi, err := os.Stat(name); err == nil && fi.Size() == fi.Size() {
			if *flagVerbose {
				log.Printf("Skipping %s; already exists.", name)
				return nil
			}
		}

		if *flagVerbose {
			log.Printf("Writing %s to %s ...", br, name)
		}

		f, err := os.Create(name)
		if err != nil {
			return fmt.Errorf("file type: %v", err)
		}
		defer f.Close()
		if _, err := io.Copy(f, fr); err != nil {
			return fmt.Errorf("Copying %s to %s: %v", br, name, err)
		}
		if err := setFileMeta(name, b); err != nil {
			log.Print(err)
		}
		return nil
	case "symlink":
		sf, ok := b.AsStaticFile()
		if !ok {
			return errors.New("blob is not a static file")
		}
		sl, ok := sf.AsStaticSymlink()
		if !ok {
			return errors.New("blob is not a symlink")
		}
		name := filepath.Join(targ, sl.FileName())
		if _, err := os.Lstat(name); err == nil {
			if *flagVerbose {
				log.Printf("Skipping creating symbolic link %s: A file with that name exists", name)
			}
			return nil
		}
		target := sl.SymlinkTargetString()
		if target == "" {
			return errors.New("symlink without target")
		}

		// TODO (marete): The Go docs promise that everything
		// in pkg os should work the same everywhere. Not true
		// for os.Symlin() at the moment. See what to do for
		// windows here.
		err := os.Symlink(target, name)
		// We won't call setFileMeta for a symlink because:
		// the permissions of a symlink do not matter and Go's
		// os.Chtimes always dereferences (does not act on the
		// symlink but its target).
		return err

	default:
		return errors.New("unknown blob type: " + b.Type())
	}
	panic("unreachable")
}