Beispiel #1
0
func populatePacked2(t *testing.T, s *storage) (wants []storagetest.StreamerTestOpt) {
	const fileSize = 1 << 20
	data := randBytes(fileSize)
	_, err := schema.WriteFileFromReader(s, "first-half.dat", bytes.NewReader(data[:fileSize/2]))
	if err != nil {
		t.Fatalf("WriteFileFromReader: %v", err)
	}
	_, err = schema.WriteFileFromReader(s, "second-half.dat", bytes.NewReader(data[fileSize/2:]))
	if err != nil {
		t.Fatalf("WriteFileFromReader: %v", err)
	}
	return nil
}
// urlFileRef slurps urlstr from the net, writes to a file and returns its
// fileref or "" on error
func (r *run) urlFileRef(urlstr, filename string) string {
	im := r.im
	im.mu.Lock()
	if br, ok := im.imageFileRef[urlstr]; ok {
		im.mu.Unlock()
		return br.String()
	}
	im.mu.Unlock()

	res, err := r.HTTPClient().Get(urlstr)
	if err != nil {
		log.Printf("couldn't get image: %v", err)
		return ""
	}
	defer res.Body.Close()

	fileRef, err := schema.WriteFileFromReader(r.Host.Target(), filename, res.Body)
	if err != nil {
		r.errorf("couldn't write file: %v", err)
		return ""
	}

	im.mu.Lock()
	defer im.mu.Unlock()
	im.imageFileRef[urlstr] = fileRef
	return fileRef.String()
}
Beispiel #3
0
func TestHandlerRightRef(t *testing.T) {
	b := test.Blob{Contents: "Foo"}
	storage := new(test.Fetcher)
	ref, err := schema.WriteFileFromReader(storage, "", b.Reader())
	if err != nil {
		t.Fatal(err)
	}
	if err != nil {
		t.Fatal(err)
	}

	ts := httptest.NewServer(createVideothumbnailHandler(ref, storage))
	defer ts.Close()

	resp, err := http.Get(ts.URL + "/" + ref.String())

	if err != nil {
		t.Fatal(err)
	}
	if resp.StatusCode != 200 {
		t.Fatalf("expected 200 status: %v", resp)
	}
	content, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		t.Fatal(err)
	}
	if string(content) != b.Contents {
		t.Errorf("excepted handler to serve data")
	}
}
Beispiel #4
0
// urlFileRef slurps urlstr from the net, writes to a file and returns its
// fileref or "" on error
func (r *run) urlFileRef(urlstr string) string {
	if urlstr == "" {
		return ""
	}
	im := r.im
	im.mu.Lock()
	if br, ok := im.urlFileRef[urlstr]; ok {
		im.mu.Unlock()
		return br.String()
	}
	im.mu.Unlock()

	res, err := r.HTTPClient().Get(urlstr)
	if err != nil {
		log.Printf("couldn't get file: %v", err)
		return ""
	}
	defer res.Body.Close()

	filename := urlstr[strings.LastIndex(urlstr, "/")+1:]
	fileRef, err := schema.WriteFileFromReader(r.Host.Target(), filename, res.Body)
	if err != nil {
		log.Printf("couldn't write file: %v", err)
		return ""
	}

	im.mu.Lock()
	defer im.mu.Unlock()
	im.urlFileRef[urlstr] = fileRef
	return fileRef.String()
}
Beispiel #5
0
func (h *mutFileHandle) Release(req *fuse.ReleaseRequest, intr fuse.Intr) fuse.Error {
	if h.tmp == nil {
		log.Printf("Release called on camli mutFileHandle without a tempfile set")
		return fuse.EIO
	}
	log.Printf("mutFileHandle release.")
	_, err := h.tmp.Seek(0, 0)
	if err != nil {
		log.Println("mutFileHandle.Release:", err)
		return fuse.EIO
	}
	var n int64
	br, err := schema.WriteFileFromReader(h.f.fs.client, h.f.name, readerutil.CountingReader{Reader: h.tmp, N: &n})
	if err != nil {
		log.Println("mutFileHandle.Release:", err)
		return fuse.EIO
	}
	h.f.setContent(br, n)

	h.tmp.Close()
	os.Remove(h.tmp.Name())
	h.tmp = nil

	return nil
}
Beispiel #6
0
// urlFileRef slurps urlstr from the net, writes to a file and returns its
// fileref or "" on error or if urlstr was empty.
func (r *run) urlFileRef(urlstr, filename string) string {
	im := r.im
	im.mu.Lock()
	if br, ok := im.imageFileRef[urlstr]; ok {
		im.mu.Unlock()
		return br.String()
	}
	im.mu.Unlock()

	if urlstr == "" {
		return ""
	}
	res, err := ctxutil.Client(r.Context()).Get(urlstr)
	if err != nil {
		log.Printf("foursquare: couldn't fetch image %q: %v", urlstr, err)
		return ""
	}
	defer res.Body.Close()

	fileRef, err := schema.WriteFileFromReader(r.Host.Target(), filename, res.Body)
	if err != nil {
		r.errorf("couldn't write file: %v", err)
		return ""
	}

	im.mu.Lock()
	defer im.mu.Unlock()
	im.imageFileRef[urlstr] = fileRef
	return fileRef.String()
}
Beispiel #7
0
func (ih *ImageHandler) writeToCache(tr io.Reader, name string) (blob.Ref, error) {
	br, err := schema.WriteFileFromReader(ih.Cache, name, tr)
	if err != nil {
		return br, errors.New("failed to cache " + name + ": " + err.Error())
	}
	if imageDebug {
		log.Printf("Image Cache: saved as %v\n", br)
	}
	return br, nil
}
Beispiel #8
0
func populatePacked(t *testing.T, s *storage) (wants []storagetest.StreamerTestOpt) {
	const fileSize = 5 << 20
	const fileName = "foo.dat"
	fileContents := randBytes(fileSize)
	_, err := schema.WriteFileFromReader(s, fileName, bytes.NewReader(fileContents))
	if err != nil {
		t.Fatalf("WriteFileFromReader: %v", err)
	}
	return nil
}
Beispiel #9
0
func TestPackTwoIdenticalfiles(t *testing.T) {
	const fileSize = 1 << 20
	fileContents := randBytes(fileSize)
	testPack(t,
		func(sto blobserver.Storage) (err error) {
			if _, err = schema.WriteFileFromReader(sto, "a.txt", bytes.NewReader(fileContents)); err != nil {
				return
			}
			if _, err = schema.WriteFileFromReader(sto, "b.txt", bytes.NewReader(fileContents)); err != nil {
				return
			}
			return
		},
		func(pt *packTest) { pt.sto.packGate = syncutil.NewGate(1) }, // one pack at a time
		wantNumLargeBlobs(1),
		wantNumSmallBlobs(1), // just the "b.txt" file schema blob
		okayWithoutMeta("sha1-cb4399f6b3b31ace417e1ec9326f9818bb3f8387"),
	)
}
func storageAndBlobRef(t *testing.T) (blobserver.Storage, blob.Ref) {
	storage := new(test.Fetcher)
	inFile, err := os.Open(testFilepath)
	if err != nil {
		t.Fatal(err)
	}
	ref, err := schema.WriteFileFromReader(storage, "small.webm", inFile)
	if err != nil {
		t.Fatal(err)
	}
	return storage, ref
}
Beispiel #11
0
func TestPackNoDelete(t *testing.T) {
	const fileSize = 1 << 20
	const fileName = "foo.dat"
	fileContents := randBytes(fileSize)
	testPack(t,
		func(sto blobserver.Storage) error {
			_, err := schema.WriteFileFromReader(sto, fileName, bytes.NewReader(fileContents))
			return err
		},
		func(pt *packTest) { pt.sto.skipDelete = true },
		wantNumLargeBlobs(1),
		wantNumSmallBlobs(15), // empirically
	)
}
Beispiel #12
0
func (ui *UIHandler) serveUploadHelper(rw http.ResponseWriter, req *http.Request) {
	ret := make(map[string]interface{})
	defer httputil.ReturnJSON(rw, ret)

	if ui.root.Storage == nil {
		ret["error"] = "No BlobRoot configured"
		ret["errorType"] = "server"
		return
	}

	mr, err := req.MultipartReader()
	if err != nil {
		ret["error"] = "reading body: " + err.Error()
		ret["errorType"] = "server"
		return
	}

	got := make([]map[string]interface{}, 0)
	for {
		part, err := mr.NextPart()
		if err == io.EOF {
			break
		}
		if err != nil {
			ret["error"] = "reading body: " + err.Error()
			ret["errorType"] = "server"
			break
		}
		fileName := part.FileName()
		if fileName == "" {
			continue
		}
		br, err := schema.WriteFileFromReader(ui.root.Storage, fileName, part)

		if err == nil {
			got = append(got, map[string]interface{}{
				"filename": part.FileName(),
				"formname": part.FormName(),
				"fileref":  br.String(),
			})
		} else {
			ret["error"] = "writing to blobserver: " + err.Error()
			return
		}
	}
	ret["got"] = got
}
Beispiel #13
0
func writeToCache(cache blobserver.Storage, thumbBytes []byte, name string) (br blob.Ref, err error) {
	tr := bytes.NewReader(thumbBytes)
	if len(thumbBytes) < constants.MaxBlobSize {
		br = blob.SHA1FromBytes(thumbBytes)
		_, err = blobserver.Receive(cache, br, tr)
	} else {
		// TODO: don't use rolling checksums when writing this. Tell
		// the filewriter to use 16 MB chunks instead.
		br, err = schema.WriteFileFromReader(cache, name, tr)
	}
	if err != nil {
		return br, errors.New("failed to cache " + name + ": " + err.Error())
	}
	if imageDebug {
		log.Printf("Image Cache: saved as %v\n", br)
	}
	return br, nil
}
Beispiel #14
0
func TestPackLarge(t *testing.T) {
	if testing.Short() {
		t.Skip("skipping in short mode")
	}
	const fileSize = 17 << 20 // more than 16 MB, so more than one zip
	const fileName = "foo.dat"
	fileContents := randBytes(fileSize)

	hash := blob.NewHash()
	hash.Write(fileContents)
	wholeRef := blob.RefFromHash(hash)

	pt := testPack(t,
		func(sto blobserver.Storage) error {
			_, err := schema.WriteFileFromReader(sto, fileName, bytes.NewReader(fileContents))
			return err
		},
		wantNumLargeBlobs(2),
		wantNumSmallBlobs(0),
	)

	// Verify we wrote the correct "w:*" meta rows.
	got := map[string]string{}
	want := map[string]string{
		"w:" + wholeRef.String():        "17825792 2",
		"w:" + wholeRef.String() + ":0": "sha1-9b4a3d114c059988075c87293c86ee7cbc6f4af5 37 0 16709479",
		"w:" + wholeRef.String() + ":1": "sha1-fe6326ac6b389ffe302623e4a501bfc8c6272e8e 37 16709479 1116313",
	}
	if err := sorted.Foreach(pt.sto.meta, func(key, value string) error {
		if strings.HasPrefix(key, "b:") {
			return nil
		}
		got[key] = value
		return nil
	}); err != nil {
		t.Fatal(err)
	}
	if !reflect.DeepEqual(got, want) {
		t.Errorf("'w:*' meta rows = %v; want %v", got, want)
	}

	// And verify we can read it back out.
	pt.testOpenWholeRef(t, wholeRef, fileSize)
}
Beispiel #15
0
func (im *imp) Run(intr importer.Interrupt) (err error) {
	log.Printf("Running dummy importer.")
	defer func() {
		log.Printf("Dummy importer returned: %v", err)
	}()
	root, err := im.host.RootObject()
	if err != nil {
		return err
	}
	fileRef, err := schema.WriteFileFromReader(im.host.Target(), "foo.txt", strings.NewReader("Some file.\n"))
	if err != nil {
		return err
	}
	obj, err := root.ChildPathObject("foo.txt")
	if err != nil {
		return err
	}
	return obj.SetAttr("camliContent", fileRef.String())
}
Beispiel #16
0
// urlFileRef slurps urlstr from the net, writes to a file and returns its
// fileref or "" on error
func (im *imp) urlFileRef(urlstr string) string {
	if br, ok := im.imageFileRef[urlstr]; ok {
		return br.String()
	}
	res, err := im.host.HTTPClient().Get(urlstr)
	if err != nil {
		log.Printf("couldn't get image: %v", err)
		return ""
	}
	defer res.Body.Close()

	fileRef, err := schema.WriteFileFromReader(im.host.Target(), "category.png", res.Body)
	if err != nil {
		log.Printf("couldn't write file: %v", err)
		return ""
	}

	im.imageFileRef[urlstr] = fileRef
	return fileRef.String()
}
Beispiel #17
0
func TestPackNormal(t *testing.T) {
	const fileSize = 5 << 20
	const fileName = "foo.dat"
	fileContents := randBytes(fileSize)

	hash := blob.NewHash()
	hash.Write(fileContents)
	wholeRef := blob.RefFromHash(hash)

	pt := testPack(t,
		func(sto blobserver.Storage) error {
			_, err := schema.WriteFileFromReader(sto, fileName, bytes.NewReader(fileContents))
			return err
		},
		wantNumLargeBlobs(1),
		wantNumSmallBlobs(0),
	)
	// And verify we can read it back out.
	pt.testOpenWholeRef(t, wholeRef, fileSize)
}
Beispiel #18
0
// TODO(aa):
// * Parallelize: http://golang.org/doc/effective_go.html#concurrency
// * Do more than one "page" worth of results
// * Report progress and errors back through host interface
// * All the rest of the metadata (see photoMeta)
// * Conflicts: For all metadata changes, prefer any non-imported claims
// * Test!
func (im *imp) importPhoto(parent *importer.Object, photo *photosSearchItem) error {
	filename := fmt.Sprintf("%s.%s", photo.Id, photo.Originalformat)
	photoNode, err := parent.ChildPathObject(filename)
	if err != nil {
		return err
	}

	// Import all the metadata. SetAttrs() is a no-op if the value hasn't changed, so there's no cost to doing these on every run.
	// And this way if we add more things to import, they will get picked up.
	if err := photoNode.SetAttrs(
		"flickrId", photo.Id,
		"title", photo.Title,
		"description", photo.Description.Content); err != nil {
		return err
	}

	// Import the photo itself. Since it is expensive to fetch the image, we store its lastupdate and only refetch if it might have changed.
	if photoNode.Attr("flickrLastupdate") == photo.Lastupdate {
		return nil
	}
	res, err := im.flickrRequest(photo.URL, url.Values{})
	if err != nil {
		log.Printf("Flickr importer: Could not fetch %s: %s", photo.URL, err)
		return err
	}
	defer res.Body.Close()

	fileRef, err := schema.WriteFileFromReader(im.host.Target(), filename, res.Body)
	if err != nil {
		return err
	}
	if err := photoNode.SetAttr("camliContent", fileRef.String()); err != nil {
		return err
	}
	// Write lastupdate last, so that if any of the preceding fails, we will try again next time.
	if err := photoNode.SetAttr("flickrLastupdate", photo.Lastupdate); err != nil {
		return err
	}

	return nil
}
Beispiel #19
0
func storePhoto(p photo) (string, error) {
	srcFile := localPathOf(p)

	f, err := os.Open(srcFile)
	if err != nil {
		return "", err
	}
	defer f.Close()

	fileRef, err := schema.WriteFileFromReader(camliClient, p.Id+"."+p.Extension, f)

	res, err := camliClient.UploadNewPermanode()
	if err != nil {
		return "", err
	}
	perma := res.BlobRef

	p.Description = cleanHTML(p.Description)

	claims := []*schema.Builder{}
	claims = append(claims, schema.NewSetAttributeClaim(perma, "camliContent", fileRef.String()))
	claims = append(claims, schema.NewSetAttributeClaim(perma, "title", mkTitle(p.Description)))
	claims = append(claims, schema.NewSetAttributeClaim(perma, "description", p.Description))
	for _, t := range p.Tags {
		claims = append(claims, schema.NewAddAttributeClaim(perma, "tag", t))
	}
	if p.Cat == "Public" {
		claims = append(claims, schema.NewSetAttributeClaim(perma, "camliAccess", "public"))
	}

	grp := syncutil.Group{}
	for _, claimBuilder := range claims {
		claim := claimBuilder.Blob()
		grp.Go(func() error {
			_, err := camliClient.UploadAndSignBlob(claim)
			return err
		})
	}

	return perma.String(), grp.Err()
}
Beispiel #20
0
func (ui *UIHandler) serveUploadHelper(rw http.ResponseWriter, req *http.Request) {
	if ui.root.Storage == nil {
		httputil.ServeJSONError(rw, httputil.ServerError("No BlobRoot configured"))
		return
	}

	mr, err := req.MultipartReader()
	if err != nil {
		httputil.ServeJSONError(rw, httputil.ServerError("reading body: "+err.Error()))
		return
	}

	var got []*uploadHelperGotItem
	for {
		part, err := mr.NextPart()
		if err == io.EOF {
			break
		}
		if err != nil {
			httputil.ServeJSONError(rw, httputil.ServerError("reading body: "+err.Error()))
			break
		}
		fileName := part.FileName()
		if fileName == "" {
			continue
		}
		br, err := schema.WriteFileFromReader(ui.root.Storage, fileName, part)
		if err != nil {
			httputil.ServeJSONError(rw, httputil.ServerError("writing to blobserver: "+err.Error()))
			return
		}
		got = append(got, &uploadHelperGotItem{
			FileName: part.FileName(),
			FormName: part.FormName(),
			FileRef:  br,
		})
	}

	httputil.ReturnJSON(rw, &uploadHelperResponse{Got: got})
}
Beispiel #21
0
func (r *run) importItem(parent *importer.Object, item *item) error {
	itemNode, err := parent.ChildPathObject(item.ID)
	if err != nil {
		return err
	}
	fileRef, err := schema.WriteFileFromReader(r.Host.Target(), "", bytes.NewBufferString(item.Content))
	if err != nil {
		return err
	}
	if err := itemNode.SetAttrs(
		"feedItemId", item.ID,
		"camliNodeType", "feed:item",
		"title", item.Title,
		"link", item.Link,
		"author", item.Author,
		"camliContent", fileRef.String(),
		"feedMediaContentURL", item.MediaContent,
	); err != nil {
		return err
	}
	return nil
}
Beispiel #22
0
func (im *imp) Run(ctx *importer.RunContext) (err error) {
	log.Printf("Running dummy importer.")
	defer func() {
		log.Printf("Dummy importer returned: %v", err)
	}()
	root := ctx.RootNode()
	fileRef, err := schema.WriteFileFromReader(ctx.Host.Target(), "foo.txt", strings.NewReader("Some file.\n"))
	if err != nil {
		return err
	}
	obj, err := root.ChildPathObject("foo.txt")
	if err != nil {
		return err
	}
	if err = obj.SetAttr("camliContent", fileRef.String()); err != nil {
		return err
	}
	n, _ := strconv.Atoi(ctx.AccountNode().Attr(acctAttrRunNumber))
	n++
	ctx.AccountNode().SetAttr(acctAttrRunNumber, fmt.Sprint(n))
	// Update the title each time, just to show it working. You
	// wouldn't actually do this:
	return root.SetAttr("title", fmt.Sprintf("dummy: %s import #%d", ctx.AccountNode().Attr(acctAttrUsername), n))
}
Beispiel #23
0
// Flush is called to let the file system clean up any data buffers
// and to pass any errors in the process of closing a file to the user
// application.
//
// Flush *may* be called more than once in the case where a file is
// opened more than once, but it's not possible to detect from the
// call itself whether this is a final flush.
//
// This is generally the last opportunity to finalize data and the
// return value sets the return value of the Close that led to the
// calling of Flush.
//
// Note that this is distinct from Fsync -- which is a user-requested
// flush (fsync, etc...)
func (h *mutFileHandle) Flush(*fuse.FlushRequest, fuse.Intr) fuse.Error {
	if h.tmp == nil {
		log.Printf("Flush called on camli mutFileHandle without a tempfile set")
		return fuse.EIO
	}
	_, err := h.tmp.Seek(0, 0)
	if err != nil {
		log.Println("mutFileHandle.Flush:", err)
		return fuse.EIO
	}
	var n int64
	br, err := schema.WriteFileFromReader(h.f.fs.client, h.f.name, readerutil.CountingReader{Reader: h.tmp, N: &n})
	if err != nil {
		log.Println("mutFileHandle.Flush:", err)
		return fuse.EIO
	}
	err = h.f.setContent(br, n)
	if err != nil {
		log.Printf("mutFileHandle.Flush: %v", err)
		return fuse.EIO
	}

	return nil
}
func TestReindex(t *testing.T) {
	if testing.Short() {
		t.Skip("skipping in short mode")
	}

	type file struct {
		size     int64
		name     string
		contents []byte
	}
	files := []file{
		{17 << 20, "foo.dat", randBytesSrc(17<<20, 42)},
		{10 << 20, "bar.dat", randBytesSrc(10<<20, 43)},
		{5 << 20, "baz.dat", randBytesSrc(5<<20, 44)},
	}

	pt := testPack(t,
		func(sto blobserver.Storage) error {
			for _, f := range files {
				if _, err := schema.WriteFileFromReader(sto, f.name, bytes.NewReader(f.contents)); err != nil {
					return err
				}
			}
			return nil
		},
		wantNumLargeBlobs(4),
		wantNumSmallBlobs(0),
	)

	// backup the meta that is supposed to be lost/erased.
	// pt.sto.reindex allocates a new pt.sto.meta, so meta != pt.sto.meta after it is called.
	meta := pt.sto.meta

	// and build new meta index
	if err := pt.sto.reindex(context.TODO(), func() (sorted.KeyValue, error) {
		return sorted.NewMemoryKeyValue(), nil
	}); err != nil {
		t.Fatal(err)
	}

	validBlobKey := func(key, value string) error {
		if !strings.HasPrefix(key, "b:") {
			return errors.New("not a blob meta key")
		}
		wantRef, ok := blob.Parse(key[2:])
		if !ok {
			return errors.New("bogus blobref in key")
		}
		m, err := parseMetaRow([]byte(value))
		if err != nil {
			return err
		}
		rc, err := pt.large.SubFetch(m.largeRef, int64(m.largeOff), int64(m.size))
		if err != nil {
			return err
		}
		defer rc.Close()
		h := wantRef.Hash()
		n, err := io.Copy(h, rc)
		if err != nil {
			return err
		}

		if !wantRef.HashMatches(h) {
			return errors.New("content doesn't match")
		}
		if n != int64(m.size) {
			return errors.New("size doesn't match")
		}
		return nil
	}

	// check that new meta is identical to "lost" one
	newRows := 0
	if err := sorted.Foreach(pt.sto.meta, func(key, newValue string) error {
		oldValue, err := meta.Get(key)
		if err != nil {
			t.Fatalf("Could not get value for %v in old meta: %v", key, err)
		}
		newRows++
		// Exact match is fine.
		if oldValue == newValue {
			return nil
		}
		// If it differs, it should at least be correct. (blob metadata
		// can now point to different packed zips, depending on sorting)
		err = validBlobKey(key, newValue)
		if err == nil {
			return nil
		}
		t.Errorf("Reindexing error: for key %v: %v\n got: %q\nwant: %q", key, err, newValue, oldValue)
		return nil // keep enumerating, regardless of errors
	}); err != nil {
		t.Fatal(err)
	}

	// make sure they have the same number of entries too, to be sure that the reindexing
	// did not miss entries that the old meta had.
	oldRows := countSortedRows(t, meta)
	if oldRows != newRows {
		t.Fatalf("index number of entries mismatch: got %d entries in new index, wanted %d (as in index before reindexing)", newRows, oldRows)
	}

	// And verify we can read one of the files back out.
	hash := blob.NewHash()
	hash.Write(files[0].contents)
	pt.testOpenWholeRef(t, blob.RefFromHash(hash), files[0].size)
}
Beispiel #25
0
// Tests a bunch of rounds on a bunch of data.
func TestArchiverStress(t *testing.T) {
	if testing.Short() {
		t.Skip("Skipping in short mode")
	}
	src := new(test.Fetcher)
	fileRef, err := schema.WriteFileFromReader(src, "random", io.LimitReader(randReader{}, 10<<20))
	if err != nil {
		t.Fatal(err)
	}
	n0 := src.NumBlobs()
	t.Logf("Wrote %v in %d blobs", fileRef, n0)

	refs0 := src.BlobrefStrings()

	var zips [][]byte
	archived := map[blob.Ref]bool{}
	a := &Archiver{
		Source:                 src,
		MinZipSize:             1 << 20,
		DeleteSourceAfterStore: true,
		Store: func(zipd []byte, brs []blob.SizedRef) error {
			zips = append(zips, zipd)
			for _, sbr := range brs {
				if archived[sbr.Ref] {
					t.Error("duplicate archive of %v", sbr.Ref)
				}
				archived[sbr.Ref] = true
			}
			return nil
		},
	}
	for {
		err := a.RunOnce()
		if err == ErrSourceTooSmall {
			break
		}
		if err != nil {
			t.Fatal(err)
		}
	}

	if len(archived) == 0 {
		t.Errorf("unexpected small number of archived blobs = %d", len(archived))
	}
	if len(zips) < 2 {
		t.Errorf("unexpected small number of zip files = %d", len(zips))
	}
	if n1 := src.NumBlobs() + len(archived); n0 != n1 {
		t.Errorf("original %d blobs != %d after + %d archived (%d)", n0, src.NumBlobs(), len(archived), n1)
	}

	// And restore:
	for _, zipd := range zips {
		if err := foreachZipEntry(zipd, func(br blob.Ref, contents []byte) {
			tb := &test.Blob{Contents: string(contents)}
			if tb.BlobRef() != br {
				t.Fatal("corrupt zip callback")
			}
			src.AddBlob(tb)
		}); err != nil {
			t.Fatal(err)
		}
	}

	refs1 := src.BlobrefStrings()
	if !reflect.DeepEqual(refs0, refs1) {
		t.Error("Restore error.")
	}
}
Beispiel #26
0
func TestForeachZipBlob(t *testing.T) {
	const fileSize = 2 << 20
	const fileName = "foo.dat"
	fileContents := randBytes(fileSize)

	ctx, cancel := context.WithCancel(context.TODO())
	defer cancel()

	pt := testPack(t,
		func(sto blobserver.Storage) error {
			_, err := schema.WriteFileFromReader(sto, fileName, bytes.NewReader(fileContents))
			return err
		},
		wantNumLargeBlobs(1),
		wantNumSmallBlobs(0),
	)

	zipBlob, err := singleBlob(pt.large)
	if err != nil {
		t.Fatal(err)
	}
	zipBytes := slurpBlob(t, pt.large, zipBlob.Ref)
	zipSize := len(zipBytes)

	all := map[blob.Ref]blob.SizedRef{}
	if err := blobserver.EnumerateAll(ctx, pt.logical, func(sb blob.SizedRef) error {
		all[sb.Ref] = sb
		return nil
	}); err != nil {
		t.Fatal(err)
	}
	foreachSaw := 0
	blobSizeSum := 0
	if err := pt.sto.foreachZipBlob(zipBlob.Ref, func(bap BlobAndPos) error {
		foreachSaw++
		blobSizeSum += int(bap.Size)
		want, ok := all[bap.Ref]
		if !ok {
			t.Errorf("unwanted blob ref returned from foreachZipBlob: %v", bap.Ref)
			return nil
		}
		delete(all, bap.Ref)
		if want.Size != bap.Size {
			t.Errorf("for %v, foreachZipBlob size = %d; want %d", bap.Ref, bap.Size, want.Size)
			return nil
		}

		// Verify the offset.
		h := bap.Ref.Hash()
		h.Write(zipBytes[bap.Offset : bap.Offset+int64(bap.Size)])
		if !bap.Ref.HashMatches(h) {
			return fmt.Errorf("foreachZipBlob returned blob %v at offset %d that failed validation", bap.Ref, bap.Offset)
		}

		return nil
	}); err != nil {
		t.Fatal(err)
	}

	t.Logf("foreachZipBlob enumerated %d blobs", foreachSaw)
	if len(all) > 0 {
		t.Errorf("foreachZipBlob forgot to enumerate %d blobs: %v", len(all), all)
	}
	// Calculate per-blobref zip overhead (zip file headers/TOC/manifest file, etc)
	zipOverhead := zipSize - blobSizeSum
	t.Logf("zip fixed overhead = %d bytes, for %d blobs (%d bytes each)", zipOverhead, foreachSaw, zipOverhead/foreachSaw)
}
Beispiel #27
0
// TODO(aa):
// * Parallelize: http://golang.org/doc/effective_go.html#concurrency
// * Do more than one "page" worth of results
// * Report progress and errors back through host interface
// * All the rest of the metadata (see photoMeta)
// * Conflicts: For all metadata changes, prefer any non-imported claims
// * Test!
func (r *run) importPhoto(parent *importer.Object, photo *photosSearchItem) error {
	filename := fmt.Sprintf("%s.%s", photo.Id, photo.OriginalFormat)
	photoNode, err := parent.ChildPathObject(filename)
	if err != nil {
		return err
	}

	// https://www.flickr.com/services/api/misc.dates.html
	dateTaken, err := time.ParseInLocation("2006-01-02 15:04:05", photo.DateTaken, schema.UnknownLocation)
	if err != nil {
		// default to the published date otherwise
		log.Printf("Flickr importer: problem with date taken of photo %v, defaulting to published date instead.", photo.Id)
		seconds, err := strconv.ParseInt(photo.DateUpload, 10, 64)
		if err != nil {
			return fmt.Errorf("could not parse date upload time %q for image %v: %v", photo.DateUpload, photo.Id, err)
		}
		dateTaken = time.Unix(seconds, 0)
	}

	attrs := []string{
		attrFlickrId, photo.Id,
		nodeattr.DateCreated, schema.RFC3339FromTime(dateTaken),
		nodeattr.Description, photo.Description.Content,
	}
	if schema.IsInterestingTitle(photo.Title) {
		attrs = append(attrs, nodeattr.Title, photo.Title)
	}
	// Import all the metadata. SetAttrs() is a no-op if the value hasn't changed, so there's no cost to doing these on every run.
	// And this way if we add more things to import, they will get picked up.
	if err := photoNode.SetAttrs(attrs...); err != nil {
		return err
	}

	// Import the photo itself. Since it is expensive to fetch the image, we store its lastupdate and only refetch if it might have changed.
	// lastupdate is a Unix timestamp according to https://www.flickr.com/services/api/flickr.photos.getInfo.html
	seconds, err := strconv.ParseInt(photo.LastUpdate, 10, 64)
	if err != nil {
		return fmt.Errorf("could not parse lastupdate time for image %v: %v", photo.Id, err)
	}
	lastUpdate := time.Unix(seconds, 0)
	if lastUpdateString := photoNode.Attr(nodeattr.DateModified); lastUpdateString != "" {
		oldLastUpdate, err := time.Parse(time.RFC3339, lastUpdateString)
		if err != nil {
			return fmt.Errorf("could not parse last stored update time for image %v: %v", photo.Id, err)
		}
		if lastUpdate.Equal(oldLastUpdate) {
			if err := r.updatePrimaryPhoto(photoNode); err != nil {
				return err
			}
			return nil
		}
	}
	form := url.Values{}
	form.Set("user_id", r.userID)
	res, err := r.fetch(photo.URL, form)
	if err != nil {
		log.Printf("Flickr importer: Could not fetch %s: %s", photo.URL, err)
		return err
	}
	defer res.Body.Close()

	fileRef, err := schema.WriteFileFromReader(r.Host.Target(), filename, res.Body)
	if err != nil {
		return err
	}
	if err := photoNode.SetAttr(nodeattr.CamliContent, fileRef.String()); err != nil {
		return err
	}
	if err := r.updatePrimaryPhoto(photoNode); err != nil {
		return err
	}
	// Write lastupdate last, so that if any of the preceding fails, we will try again next time.
	if err := photoNode.SetAttr(nodeattr.DateModified, schema.RFC3339FromTime(lastUpdate)); err != nil {
		return err
	}

	return nil
}
Beispiel #28
0
func (r *run) updatePhotoInAlbum(ctx context.Context, albumNode *importer.Object, photo picago.Photo) (ret error) {
	if photo.ID == "" {
		return errors.New("photo has no ID")
	}

	getMediaBytes := func() (io.ReadCloser, error) {
		log.Printf("Importing media from %v", photo.URL)
		resp, err := ctxutil.Client(ctx).Get(photo.URL)
		if err != nil {
			return nil, fmt.Errorf("importing photo %s: %v", photo.ID, err)
		}
		if resp.StatusCode != http.StatusOK {
			resp.Body.Close()
			return nil, fmt.Errorf("importing photo %s: status code = %d", photo.ID, resp.StatusCode)
		}
		return resp.Body, nil
	}

	var fileRefStr string
	idFilename := photo.ID + "-" + photo.Filename
	photoNode, err := albumNode.ChildPathObjectOrFunc(idFilename, func() (*importer.Object, error) {
		h := blob.NewHash()
		rc, err := getMediaBytes()
		if err != nil {
			return nil, err
		}
		fileRef, err := schema.WriteFileFromReader(r.Host.Target(), photo.Filename, io.TeeReader(rc, h))
		if err != nil {
			return nil, err
		}
		fileRefStr = fileRef.String()
		wholeRef := blob.RefFromHash(h)
		if pn, err := findExistingPermanode(r.Host.Searcher(), wholeRef); err == nil {
			return r.Host.ObjectFromRef(pn)
		}
		return r.Host.NewObject()
	})
	if err != nil {
		return err
	}

	const attrMediaURL = "picasaMediaURL"
	if fileRefStr == "" {
		fileRefStr = photoNode.Attr(nodeattr.CamliContent)
		// Only re-download the source photo if its URL has changed.
		// Empirically this seems to work: cropping a photo in the
		// photos.google.com UI causes its URL to change. And it makes
		// sense, looking at the ugliness of the URLs with all their
		// encoded/signed state.
		if !mediaURLsEqual(photoNode.Attr(attrMediaURL), photo.URL) {
			rc, err := getMediaBytes()
			if err != nil {
				return err
			}
			fileRef, err := schema.WriteFileFromReader(r.Host.Target(), photo.Filename, rc)
			rc.Close()
			if err != nil {
				return err
			}
			fileRefStr = fileRef.String()
		}
	}

	title := strings.TrimSpace(photo.Description)
	if strings.Contains(title, "\n") {
		title = title[:strings.Index(title, "\n")]
	}
	if title == "" && schema.IsInterestingTitle(photo.Filename) {
		title = photo.Filename
	}

	// TODO(tgulacsi): add more attrs (comments ?)
	// for names, see http://schema.org/ImageObject and http://schema.org/CreativeWork
	attrs := []string{
		nodeattr.CamliContent, fileRefStr,
		attrPicasaId, photo.ID,
		nodeattr.Title, title,
		nodeattr.Description, photo.Description,
		nodeattr.LocationText, photo.Location,
		nodeattr.DateModified, schema.RFC3339FromTime(photo.Updated),
		nodeattr.DatePublished, schema.RFC3339FromTime(photo.Published),
		nodeattr.URL, photo.PageURL,
	}
	if photo.Latitude != 0 || photo.Longitude != 0 {
		attrs = append(attrs,
			nodeattr.Latitude, fmt.Sprintf("%f", photo.Latitude),
			nodeattr.Longitude, fmt.Sprintf("%f", photo.Longitude),
		)
	}
	if err := photoNode.SetAttrs(attrs...); err != nil {
		return err
	}
	if err := photoNode.SetAttrValues("tag", photo.Keywords); err != nil {
		return err
	}
	if photo.Position > 0 {
		if err := albumNode.SetAttr(
			nodeattr.CamliPathOrderColon+strconv.Itoa(photo.Position-1),
			photoNode.PermanodeRef().String()); err != nil {
			return err
		}
	}

	// Do this last, after we're sure the "camliContent" attribute
	// has been saved successfully, because this is the one that
	// causes us to do it again in the future or not.
	if err := photoNode.SetAttrs(attrMediaURL, photo.URL); err != nil {
		return err
	}
	return nil
}
Beispiel #29
0
// viaAPI is true if it came via the REST API, or false if it came via a zip file.
func (r *run) importTweet(parent *importer.Object, tweet tweetItem, viaAPI bool) (dup bool, err error) {
	select {
	case <-r.Context().Done():
		r.errorf("Twitter importer: interrupted")
		return false, r.Context().Err()
	default:
	}
	id := tweet.ID()
	tweetNode, err := parent.ChildPathObject(id)
	if err != nil {
		return false, err
	}

	// Because the zip format and the API format differ a bit, and
	// might diverge more in the future, never use the zip content
	// to overwrite data fetched via the API. If we add new
	// support for different fields in the future, we might want
	// to revisit this decision.  Be wary of flip/flopping data if
	// modifying this, though.
	if tweetNode.Attr(attrImportMethod) == "api" && !viaAPI {
		return true, nil
	}

	// e.g. "2014-06-12 19:11:51 +0000"
	createdTime, err := timeParseFirstFormat(tweet.CreatedAt(), time.RubyDate, "2006-01-02 15:04:05 -0700")
	if err != nil {
		return false, fmt.Errorf("could not parse time %q: %v", tweet.CreatedAt(), err)
	}

	url := fmt.Sprintf("https://twitter.com/%s/status/%v",
		r.AccountNode().Attr(importer.AcctAttrUserName),
		id)

	attrs := []string{
		"twitterId", id,
		nodeattr.Type, "twitter.com:tweet",
		nodeattr.StartDate, schema.RFC3339FromTime(createdTime),
		nodeattr.Content, tweet.Text(),
		nodeattr.URL, url,
	}
	if lat, long, ok := tweet.LatLong(); ok {
		attrs = append(attrs,
			nodeattr.Latitude, fmt.Sprint(lat),
			nodeattr.Longitude, fmt.Sprint(long),
		)
	}
	if viaAPI {
		attrs = append(attrs, attrImportMethod, "api")
	} else {
		attrs = append(attrs, attrImportMethod, "zip")
	}

	for i, m := range tweet.Media() {
		filename := m.BaseFilename()
		if tweetNode.Attr("camliPath:"+filename) != "" && (i > 0 || tweetNode.Attr("camliContentImage") != "") {
			// Don't re-import media we've already fetched.
			continue
		}
		tried, gotMedia := 0, false
		for _, mediaURL := range m.URLs() {
			tried++
			res, err := ctxutil.Client(r.Context()).Get(mediaURL)
			if err != nil {
				return false, fmt.Errorf("Error fetching %s for tweet %s : %v", mediaURL, url, err)
			}
			if res.StatusCode == http.StatusNotFound {
				continue
			}
			if res.StatusCode != 200 {
				return false, fmt.Errorf("HTTP status %d fetching %s for tweet %s", res.StatusCode, mediaURL, url)
			}
			if !viaAPI {
				log.Printf("For zip tweet %s, reading %v", url, mediaURL)
			}
			fileRef, err := schema.WriteFileFromReader(r.Host.Target(), filename, res.Body)
			res.Body.Close()
			if err != nil {
				return false, fmt.Errorf("Error fetching media %s for tweet %s: %v", mediaURL, url, err)
			}
			attrs = append(attrs, "camliPath:"+filename, fileRef.String())
			if i == 0 {
				attrs = append(attrs, "camliContentImage", fileRef.String())
			}
			log.Printf("Slurped %s as %s for tweet %s (%v)", mediaURL, fileRef.String(), url, tweetNode.PermanodeRef())
			gotMedia = true
			break
		}
		if !gotMedia && tried > 0 {
			return false, fmt.Errorf("All media URLs 404s for tweet %s", url)
		}
	}

	changes, err := tweetNode.SetAttrs2(attrs...)
	if err == nil && changes {
		log.Printf("Imported tweet %s", url)
	}
	return !changes, err
}
Beispiel #30
0
func TestRemoveBlobs(t *testing.T) {
	ctx, cancel := context.WithCancel(context.TODO())
	defer cancel()

	// The basic small cases are handled via storagetest in TestStorage,
	// so this only tests removing packed blobs.

	small := new(test.Fetcher)
	large := new(test.Fetcher)
	sto := &storage{
		small: small,
		large: large,
		meta:  sorted.NewMemoryKeyValue(),
		log:   test.NewLogger(t, "blobpacked: "),
	}
	sto.init()

	const fileSize = 1 << 20
	fileContents := randBytes(fileSize)
	if _, err := schema.WriteFileFromReader(sto, "foo.dat", bytes.NewReader(fileContents)); err != nil {
		t.Fatal(err)
	}
	if small.NumBlobs() != 0 || large.NumBlobs() == 0 {
		t.Fatalf("small, large counts == %d, %d; want 0, non-zero", small.NumBlobs(), large.NumBlobs())
	}
	var all []blob.SizedRef
	if err := blobserver.EnumerateAll(ctx, sto, func(sb blob.SizedRef) error {
		all = append(all, sb)
		return nil
	}); err != nil {
		t.Fatal(err)
	}

	// Find the zip
	zipBlob, err := singleBlob(sto.large)
	if err != nil {
		t.Fatalf("failed to find packed zip: %v", err)
	}

	// The zip file is in use, so verify we can't delete it.
	if err := sto.deleteZipPack(zipBlob.Ref); err == nil {
		t.Fatalf("zip pack blob deleted but it should not have been allowed")
	}

	// Delete everything
	for len(all) > 0 {
		del := all[0].Ref
		all = all[1:]
		if err := sto.RemoveBlobs([]blob.Ref{del}); err != nil {
			t.Fatalf("RemoveBlobs: %v", err)
		}
		if err := storagetest.CheckEnumerate(sto, all); err != nil {
			t.Fatalf("After deleting %v, %v", del, err)
		}
	}

	dRows := func() (n int) {
		if err := sorted.ForeachInRange(sto.meta, "d:", "", func(key, value string) error {
			if strings.HasPrefix(key, "d:") {
				n++
			}
			return nil
		}); err != nil {
			t.Fatalf("meta iteration error: %v", err)
		}
		return
	}

	if n := dRows(); n == 0 {
		t.Fatalf("expected a 'd:' row after deletes")
	}

	// TODO: test the background pack-deleter loop? figure out its design first.
	if err := sto.deleteZipPack(zipBlob.Ref); err != nil {
		t.Errorf("error deleting zip %v: %v", zipBlob.Ref, err)
	}
	if n := dRows(); n != 0 {
		t.Errorf("expected the 'd:' row to be deleted")
	}
}