Exemplo n.º 1
0
func TestS3(t *testing.T) {
	if *bucket == "" || *key == "" || *secret == "" {
		t.Skip("Skipping test because at least one of -s3_key, -s3_secret, or -s3_bucket flags has not been provided.")
	}
	if !strings.HasPrefix(*bucket, "camlistore-") || !strings.HasSuffix(*bucket, "-test") {
		t.Fatalf("bogus bucket name %q; must begin with 'camlistore-' and end in '-test'", *bucket)
	}
	storagetest.Test(t, func(t *testing.T) (sto blobserver.Storage, cleanup func()) {
		sto, err := newFromConfig(nil, jsonconfig.Obj{
			"aws_access_key":        *key,
			"aws_secret_access_key": *secret,
			"bucket":                *bucket,
		})
		if err != nil {
			t.Fatalf("newFromConfig error: %v", err)
		}
		if !testing.Short() {
			log.Printf("Warning: this test does many serial operations. Without the go test -short flag, this test will be very slow.")
		}
		clearBucket := func() {
			var all []blob.Ref
			blobserver.EnumerateAll(context.New(), sto, func(sb blob.SizedRef) error {
				t.Logf("Deleting: %v", sb.Ref)
				all = append(all, sb.Ref)
				return nil
			})
			if err := sto.RemoveBlobs(all); err != nil {
				t.Fatalf("Error removing blobs during cleanup: %v", err)
			}
		}
		clearBucket()
		return sto, clearBucket
	})
}
Exemplo n.º 2
0
func TestIsolation(t *testing.T) {
	ld := test.NewLoader()
	master, _ := ld.GetStorage("/good-storage/")

	ns1 := newNamespace(t, ld)
	ns2 := newNamespace(t, ld)
	stoMap := map[string]blobserver.Storage{
		"ns1":    ns1,
		"ns2":    ns2,
		"master": master,
	}

	want := func(src string, want ...blob.Ref) {
		if _, ok := stoMap[src]; !ok {
			t.Fatalf("undefined storage %q", src)
		}
		sort.Sort(blob.ByRef(want))
		var got []blob.Ref
		if err := blobserver.EnumerateAll(context.TODO(), stoMap[src], func(sb blob.SizedRef) error {
			got = append(got, sb.Ref)
			return nil
		}); err != nil {
			t.Fatal(err)
		}
		if !reflect.DeepEqual(got, want) {
			t.Errorf("server %q = %q; want %q", src, got, want)
		}
	}

	b1 := &test.Blob{Contents: "Blob 1"}
	b1r := b1.BlobRef()
	b2 := &test.Blob{Contents: "Blob 2"}
	b2r := b2.BlobRef()
	b3 := &test.Blob{Contents: "Shared Blob"}
	b3r := b3.BlobRef()

	b1.MustUpload(t, ns1)
	want("ns1", b1r)
	want("ns2")
	want("master", b1r)

	b2.MustUpload(t, ns2)
	want("ns1", b1r)
	want("ns2", b2r)
	want("master", b1r, b2r)

	b3.MustUpload(t, ns2)
	want("ns1", b1r)
	want("ns2", b2r, b3r)
	want("master", b1r, b2r, b3r)

	b3.MustUpload(t, ns1)
	want("ns1", b1r, b3r)
	want("ns2", b2r, b3r)
	want("master", b1r, b2r, b3r)

	if _, _, err := ns2.FetchStreaming(b1r); err == nil {
		t.Errorf("b1 shouldn't be accessible via ns2")
	}
}
Exemplo n.º 3
0
func blobserverEnumerator(ctx *context.Context, src blobserver.BlobEnumerator) func(chan<- blob.SizedRef, <-chan struct{}) error {
	return func(dst chan<- blob.SizedRef, intr <-chan struct{}) error {
		return blobserver.EnumerateAll(ctx, src, func(sb blob.SizedRef) error {
			select {
			case dst <- sb:
			case <-intr:
				return errors.New("interrupted")
			}
			return nil
		})
	}
}
Exemplo n.º 4
0
func enumerateAllBlobs(s blobserver.Storage, destc chan<- blob.SizedRef) error {
	// Use *client.Client's support for enumerating all blobs if
	// possible, since it could probably do a better job knowing
	// HTTP boundaries and such.
	if nh, ok := s.(noHub); ok {
		return nh.Client.SimpleEnumerateBlobs(destc)
	}

	defer close(destc)
	return blobserver.EnumerateAll(s, func(sb blob.SizedRef) error {
		destc <- sb
		return nil
	})
}
Exemplo n.º 5
0
// singleBlob assumes that sto contains a single blob and returns it.
// If there are more or fewer than one blob, it's an error.
func singleBlob(sto blobserver.BlobEnumerator) (ret blob.SizedRef, err error) {
	ctx, cancel := context.WithCancel(context.TODO())
	defer cancel()

	n := 0
	if err = blobserver.EnumerateAll(ctx, sto, func(sb blob.SizedRef) error {
		ret = sb
		n++
		return nil
	}); err != nil {
		return blob.SizedRef{}, err
	}
	if n != 1 {
		return blob.SizedRef{}, fmt.Errorf("saw %d blobs; want 1", n)
	}
	return
}
Exemplo n.º 6
0
// see if storage proxies through to small for Fetch, Stat, and Enumerate.
func TestSmallFallback(t *testing.T) {
	small := new(test.Fetcher)
	s := &storage{
		small: small,
		large: new(test.Fetcher),
		meta:  sorted.NewMemoryKeyValue(),
		log:   test.NewLogger(t, "blobpacked: "),
	}
	s.init()
	b1 := &test.Blob{"foo"}
	b1.MustUpload(t, small)
	wantSB := b1.SizedRef()

	// Fetch
	rc, _, err := s.Fetch(b1.BlobRef())
	if err != nil {
		t.Errorf("failed to Get blob: %v", err)
	} else {
		rc.Close()
	}

	// Stat.
	sb, err := blobserver.StatBlob(s, b1.BlobRef())
	if err != nil {
		t.Errorf("failed to Stat blob: %v", err)
	} else if sb != wantSB {
		t.Errorf("Stat = %v; want %v", sb, wantSB)
	}

	// Enumerate
	saw := false
	ctx, cancel := context.WithCancel(context.TODO())
	defer cancel()
	if err := blobserver.EnumerateAll(ctx, s, func(sb blob.SizedRef) error {
		if sb != wantSB {
			return fmt.Errorf("saw blob %v; want %v", sb, wantSB)
		}
		saw = true
		return nil
	}); err != nil {
		t.Errorf("EnuerateAll: %v", err)
	}
	if !saw {
		t.Error("didn't see blob in Enumerate")
	}
}
Exemplo n.º 7
0
func enumerateAllBlobs(ctx context.Context, s blobserver.Storage, destc chan<- blob.SizedRef) error {
	// Use *client.Client's support for enumerating all blobs if
	// possible, since it could probably do a better job knowing
	// HTTP boundaries and such.
	if c, ok := s.(*client.Client); ok {
		return c.SimpleEnumerateBlobs(ctx, destc)
	}

	defer close(destc)
	return blobserver.EnumerateAll(ctx, s, func(sb blob.SizedRef) error {
		select {
		case destc <- sb:
		case <-ctx.Done():
			return ctx.Err()
		}
		return nil
	})
}
Exemplo n.º 8
0
// RunOnce scans a.Source and conditionally creates a new zip.
// It returns ErrSourceTooSmall if there aren't enough blobs on Source.
func (a *Archiver) RunOnce() error {
	if a.Source == nil {
		return errors.New("archiver: nil Source")
	}
	if a.Store == nil {
		return errors.New("archiver: nil Store func")
	}
	pz := &potentialZip{a: a}
	err := blobserver.EnumerateAll(context.New(), a.Source, func(sb blob.SizedRef) error {
		if err := pz.addBlob(sb); err != nil {
			return err
		}
		if pz.bigEnough() {
			return errStopEnumerate
		}
		return nil
	})
	if err == errStopEnumerate {
		err = nil
	}
	if err != nil {
		return err
	}
	if err := pz.condClose(); err != nil {
		return err
	}
	if !pz.bigEnough() {
		return ErrSourceTooSmall
	}
	if err := a.Store(pz.buf.Bytes(), pz.blobs); err != nil {
		return err
	}
	if a.DeleteSourceAfterStore {
		blobs := make([]blob.Ref, 0, len(pz.blobs))
		for _, sb := range pz.blobs {
			blobs = append(blobs, sb.Ref)
		}
		if err := a.Source.RemoveBlobs(blobs); err != nil {
			return err
		}
	}
	return nil
}
Exemplo n.º 9
0
func testStorage(t *testing.T, bucketDir string) {
	if *bucket == "" || *key == "" || *secret == "" {
		t.Skip("Skipping test because at least one of -s3_key, -s3_secret, or -s3_bucket flags has not been provided.")
	}
	if !strings.HasPrefix(*bucket, "camlistore-") || !strings.HasSuffix(*bucket, "-test") {
		t.Fatalf("bogus bucket name %q; must begin with 'camlistore-' and end in '-test'", *bucket)
	}

	bucketWithDir := path.Join(*bucket, bucketDir)

	storagetest.Test(t, func(t *testing.T) (sto blobserver.Storage, cleanup func()) {
		sto, err := newFromConfig(nil, jsonconfig.Obj{
			"aws_access_key":        *key,
			"aws_secret_access_key": *secret,
			"bucket":                bucketWithDir,
		})
		if err != nil {
			t.Fatalf("newFromConfig error: %v", err)
		}
		if !testing.Short() {
			log.Printf("Warning: this test does many serial operations. Without the go test -short flag, this test will be very slow.")
		}
		if bucketWithDir != *bucket {
			// Adding "a", and "c" objects in the bucket to make sure objects out of the
			// "directory" are not touched and have no influence.
			for _, key := range []string{"a", "c"} {
				var buf bytes.Buffer
				md5h := md5.New()
				size, err := io.Copy(io.MultiWriter(&buf, md5h), strings.NewReader(key))
				if err != nil {
					t.Fatalf("could not insert object %s in bucket %v: %v", key, sto.(*s3Storage).bucket, err)
				}
				if err := sto.(*s3Storage).s3Client.PutObject(
					key, sto.(*s3Storage).bucket, md5h, size, &buf); err != nil {
					t.Fatalf("could not insert object %s in bucket %v: %v", key, sto.(*s3Storage).bucket, err)
				}
			}
		}
		clearBucket := func(beforeTests bool) func() {
			return func() {
				var all []blob.Ref
				blobserver.EnumerateAll(context.TODO(), sto, func(sb blob.SizedRef) error {
					t.Logf("Deleting: %v", sb.Ref)
					all = append(all, sb.Ref)
					return nil
				})
				if err := sto.RemoveBlobs(all); err != nil {
					t.Fatalf("Error removing blobs during cleanup: %v", err)
				}
				if beforeTests {
					return
				}
				if bucketWithDir != *bucket {
					// checking that "a" and "c" at the root were left untouched.
					for _, key := range []string{"a", "c"} {
						if _, _, err := sto.(*s3Storage).s3Client.Get(sto.(*s3Storage).bucket, key); err != nil {
							t.Fatalf("could not find object %s after tests: %v", key, err)
						}
						if err := sto.(*s3Storage).s3Client.Delete(sto.(*s3Storage).bucket, key); err != nil {
							t.Fatalf("could not remove object %s after tests: %v", key, err)
						}
					}
				}
			}
		}
		clearBucket(true)()
		return sto, clearBucket(false)
	})
}
Exemplo n.º 10
0
func (s *storage) readAllMetaBlobs() error {
	type metaBlob struct {
		br  *blobref.BlobRef
		dat []byte // encrypted blob
		err error
	}
	metac := make(chan metaBlob, 16)

	const maxInFlight = 50
	var gate = make(chan bool, maxInFlight)

	var stopEnumerate = make(chan bool) // closed on error
	enumErrc := make(chan error, 1)
	go func() {
		var wg sync.WaitGroup
		enumErrc <- blobserver.EnumerateAll(s.meta, func(sb blobref.SizedBlobRef) error {
			select {
			case <-stopEnumerate:
				return errors.New("enumeration stopped")
			default:
			}

			wg.Add(1)
			gate <- true
			go func() {
				defer wg.Done()
				defer func() { <-gate }()
				rc, _, err := s.meta.FetchStreaming(sb.BlobRef)
				var all []byte
				if err == nil {
					all, err = ioutil.ReadAll(rc)
					rc.Close()
				}
				metac <- metaBlob{sb.BlobRef, all, err}
			}()
			return nil
		})
		wg.Wait()
		close(metac)
	}()

	for mi := range metac {
		err := mi.err
		if err == nil {
			err = s.processEncryptedMetaBlob(mi.br, mi.dat)
		}
		if err != nil {
			close(stopEnumerate)
			go func() {
				for _ = range metac {
				}
			}()
			// TODO: advertise in this error message a new option or environment variable
			// to skip a certain or all meta blobs, to allow partial recovery, if some
			// are corrupt. For now, require all to be correct.
			return fmt.Errorf("Error with meta blob %v: %v", mi.br, err)
		}
	}

	return <-enumErrc
}
Exemplo n.º 11
0
func testStorage(t *testing.T, bucketDir string) {
	if *bucket == "" && *configFile == "" {
		t.Skip("Skipping test without --bucket or --config flag")
	}
	var refreshToken string
	if *configFile != "" {
		data, err := ioutil.ReadFile(*configFile)
		if err != nil {
			t.Fatalf("Error reading config file %v: %v", *configFile, err)
		}
		var conf Config
		if err := json.Unmarshal(data, &conf); err != nil {
			t.Fatalf("Error decoding config file %v: %v", *configFile, err)
		}
		*clientID = conf.Auth.ClientID
		*clientSecret = conf.Auth.ClientSecret
		refreshToken = conf.Auth.RefreshToken
		*bucket = conf.Bucket
	}
	if *bucket == "" {
		t.Fatal("bucket not provided in config file or as a flag.")
	}
	if *clientID == "" {
		if !metadata.OnGCE() {
			if *clientSecret == "" {
				t.Fatal("client ID and client secret required. Obtain from https://console.developers.google.com/ > Project > APIs & Auth > Credentials. Should be a 'native' or 'Installed application'")
			}
		} else {
			*clientID = "auto"
		}
	}
	if *configFile == "" {
		config := &oauth2.Config{
			Scopes:       []string{storage.ScopeReadWrite},
			Endpoint:     google.Endpoint,
			ClientID:     *clientID,
			ClientSecret: *clientSecret,
			RedirectURL:  oauthutil.TitleBarRedirectURL,
		}
		if !metadata.OnGCE() {
			token, err := oauth2.ReuseTokenSource(nil,
				&oauthutil.TokenSource{
					Config:    config,
					CacheFile: *tokenCache,
					AuthCode: func() string {
						if *authCode == "" {
							t.Skipf("Re-run using --auth_code= with the value obtained from %s",
								config.AuthCodeURL("", oauth2.AccessTypeOffline, oauth2.ApprovalForce))
							return ""
						}
						return *authCode
					},
				}).Token()
			if err != nil {
				t.Fatalf("could not acquire token: %v", err)
			}
			refreshToken = token.RefreshToken
		}
	}

	bucketWithDir := path.Join(*bucket, bucketDir)

	storagetest.TestOpt(t, storagetest.Opts{
		New: func(t *testing.T) (sto blobserver.Storage, cleanup func()) {
			sto, err := newFromConfig(nil, jsonconfig.Obj{
				"bucket": bucketWithDir,
				"auth": map[string]interface{}{
					"client_id":     *clientID,
					"client_secret": *clientSecret,
					"refresh_token": refreshToken,
				},
			})
			if err != nil {
				t.Fatal(err)
			}
			if !testing.Short() {
				log.Printf("Warning: this test does many serial operations. Without the go test -short flag, this test will be very slow.")
			}
			// Bail if bucket is not empty
			ctx := context.Background()
			stor := sto.(*Storage)
			objs, err := stor.client.Bucket(stor.bucket).List(ctx, nil)
			if err != nil {
				t.Fatalf("Error checking if bucket is empty: %v", err)
			}
			if len(objs.Results) != 0 {
				t.Fatalf("Refusing to run test: bucket %v is not empty", *bucket)
			}
			if bucketWithDir != *bucket {
				// Adding "a", and "c" objects in the bucket to make sure objects out of the
				// "directory" are not touched and have no influence.
				for _, key := range []string{"a", "c"} {
					w := stor.client.Bucket(stor.bucket).Object(key).NewWriter(ctx)
					if _, err := io.Copy(w, strings.NewReader(key)); err != nil {
						t.Fatalf("could not insert object %s in bucket %v: %v", key, sto.(*Storage).bucket, err)
					}
					if err := w.Close(); err != nil {
						t.Fatalf("could not insert object %s in bucket %v: %v", key, sto.(*Storage).bucket, err)
					}
				}
			}

			clearBucket := func(beforeTests bool) func() {
				return func() {
					var all []blob.Ref
					blobserver.EnumerateAll(context.TODO(), sto, func(sb blob.SizedRef) error {
						t.Logf("Deleting: %v", sb.Ref)
						all = append(all, sb.Ref)
						return nil
					})
					if err := sto.RemoveBlobs(all); err != nil {
						t.Fatalf("Error removing blobs during cleanup: %v", err)
					}
					if beforeTests {
						return
					}
					if bucketWithDir != *bucket {
						// checking that "a" and "c" at the root were left untouched.
						for _, key := range []string{"a", "c"} {
							rc, err := stor.client.Bucket(stor.bucket).Object(key).NewReader(ctx)
							if err != nil {
								t.Fatalf("could not find object %s after tests: %v", key, err)
							}
							if _, err := io.Copy(ioutil.Discard, rc); err != nil {
								t.Fatalf("could not find object %s after tests: %v", key, err)
							}
							if err := stor.client.Bucket(stor.bucket).Object(key).Delete(ctx); err != nil {
								t.Fatalf("could not remove object %s after tests: %v", key, err)
							}

						}
					}
				}
			}
			clearBucket(true)()
			return sto, clearBucket(false)
		},
	})
}
Exemplo n.º 12
0
// TestStreamer tests that the BlobStreamer bs implements all of the
// promised interface behavior and ultimately yields the provided
// blobs.
//
// If bs also implements BlobEnumerator, the two are compared for
// consistency.
func TestStreamer(t *testing.T, bs blobserver.BlobStreamer, opts ...StreamerTestOpt) {

	var sawEnum map[blob.SizedRef]bool
	if enumer, ok := bs.(blobserver.BlobEnumerator); ok {
		sawEnum = make(map[blob.SizedRef]bool)
		// First do an enumerate over all blobs as a baseline. The Streamer should
		// yield the same blobs, even if it's in a different order.
		enumCtx := context.New()
		defer enumCtx.Cancel()
		if err := blobserver.EnumerateAll(enumCtx, enumer, func(sb blob.SizedRef) error {
			sawEnum[sb] = true
			return nil
		}); err != nil {
			t.Fatalf("Enumerate: %v", err)
		}
	}

	// See if, without cancelation, it yields the right
	// result and without errors.
	ch := make(chan blobserver.BlobAndToken)
	errCh := make(chan error, 1)
	go func() {
		ctx := context.New()
		defer ctx.Cancel()
		errCh <- bs.StreamBlobs(ctx, ch, "")
	}()
	var gotRefs []blob.SizedRef
	sawStreamed := map[blob.Ref]int{}
	for b := range ch {
		sawStreamed[b.Ref()]++
		sbr := b.SizedRef()
		if sawEnum != nil {
			if _, ok := sawEnum[sbr]; ok {
				delete(sawEnum, sbr)
			} else {
				t.Errorf("Streamer yielded blob not returned by Enumerate: %v", sbr)
			}
		}
		gotRefs = append(gotRefs, sbr)
	}
	if err := <-errCh; err != nil {
		t.Errorf("initial uninterrupted StreamBlobs error: %v", err)
	}
	for br, n := range sawStreamed {
		if n > 1 {
			t.Errorf("Streamed returned duplicate %v, %d times", br, n)
		}
	}
	nMissing := 0
	for sbr := range sawEnum {
		t.Errorf("Enumerate found %v but Streamer didn't return it", sbr)
		nMissing++
		if nMissing == 10 && len(sawEnum) > 10 {
			t.Errorf("... etc ...")
			break
		}
	}
	for _, opt := range opts {
		if err := opt.verify(gotRefs); err != nil {
			t.Errorf("error after first uninterrupted StreamBlobs pass: %v", err)
		}
	}
	if t.Failed() {
		return
	}

	// Next, the "complex pass": test a cancelation at each point,
	// to test that resume works properly.
	//
	// Basic strategy:
	// -- receive 1 blob, note the blobref, cancel.
	// -- start again with that blobref, receive 2, cancel. first should be same,
	//    second should be new. note its blobref.
	// Each iteration should yield 1 new unique blob and all but
	// the first and last will return 2 blobs.
	wantRefs := append([]blob.SizedRef(nil), gotRefs...) // copy
	sawStreamed = map[blob.Ref]int{}
	gotRefs = gotRefs[:0]
	contToken := ""
	for i := 0; i < len(wantRefs); i++ {
		ctx := context.New()
		ch := make(chan blobserver.BlobAndToken)
		errc := make(chan error, 1)
		go func() {
			errc <- bs.StreamBlobs(ctx, ch, contToken)
		}()
		nrecv := 0
		nextToken := ""
		for bt := range ch {
			nrecv++
			sbr := bt.Blob.SizedRef()
			isNew := len(gotRefs) == 0 || sbr != gotRefs[len(gotRefs)-1]
			if isNew {
				if sawStreamed[sbr.Ref] > 0 {
					t.Fatalf("In complex pass, returned duplicate blob %v\n\nSo far, before interrupting:\n%v\n\nWant:\n%v", sbr, gotRefs, wantRefs)
				}
				sawStreamed[sbr.Ref]++
				gotRefs = append(gotRefs, sbr)
				nextToken = bt.Token
				ctx.Cancel()
				break
			} else if i == 0 {
				t.Fatalf("first iteration should receive a new value")
			} else if nrecv == 2 {
				t.Fatalf("at cut point %d of testStream, Streamer received 2 values, both not unique. Looping?", i)
			}
		}
		err := <-errc
		if err != nil && err != context.ErrCanceled {
			t.Fatalf("StreamBlobs on iteration %d (token %q) returned error: %v", i, contToken, err)
		}
		if err == nil {
			break
		}
		contToken = nextToken
	}
	if !reflect.DeepEqual(gotRefs, wantRefs) {
		t.Errorf("Mismatch on complex pass (got %d, want %d):\n got %q\nwant %q\n", len(gotRefs), len(wantRefs), gotRefs, wantRefs)
		wantMap := map[blob.SizedRef]bool{}
		for _, sbr := range wantRefs {
			wantMap[sbr] = true
		}
		for _, sbr := range gotRefs {
			if _, ok := wantMap[sbr]; ok {
				delete(wantMap, sbr)
			} else {
				t.Errorf("got has unwanted: %v", sbr)
			}
		}
		missing := wantMap // found stuff has been deleted
		for sbr := range missing {
			t.Errorf("got is missing: %v", sbr)
		}

		t.FailNow()
	}
}
Exemplo n.º 13
0
func (x *Index) Reindex() error {
	reindexMaxProcs.RLock()
	defer reindexMaxProcs.RUnlock()
	ctx := context.TODO()

	wiper, ok := x.s.(sorted.Wiper)
	if !ok {
		return fmt.Errorf("index's storage type %T doesn't support sorted.Wiper", x.s)
	}
	log.Printf("Wiping index storage type %T ...", x.s)
	if err := wiper.Wipe(); err != nil {
		return fmt.Errorf("error wiping index's sorted key/value type %T: %v", x.s, err)
	}
	log.Printf("Index wiped. Rebuilding...")

	reindexStart, _ := blob.Parse(os.Getenv("CAMLI_REINDEX_START"))

	err := x.s.Set(keySchemaVersion.name, fmt.Sprintf("%d", requiredSchemaVersion))
	if err != nil {
		return err
	}

	var nerrmu sync.Mutex
	nerr := 0

	blobc := make(chan blob.Ref, 32)

	enumCtx := context.TODO()
	enumErr := make(chan error, 1)
	go func() {
		defer close(blobc)
		donec := enumCtx.Done()
		var lastTick time.Time
		enumErr <- blobserver.EnumerateAll(enumCtx, x.blobSource, func(sb blob.SizedRef) error {
			now := time.Now()
			if lastTick.Before(now.Add(-1 * time.Second)) {
				log.Printf("Reindexing at %v", sb.Ref)
				lastTick = now
			}
			if reindexStart.Valid() && sb.Ref.Less(reindexStart) {
				return nil
			}
			select {
			case <-donec:
				return ctx.Err()
			case blobc <- sb.Ref:
				return nil
			}
		})
	}()
	var wg sync.WaitGroup
	for i := 0; i < reindexMaxProcs.v; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for br := range blobc {
				if err := x.indexBlob(br); err != nil {
					log.Printf("Error reindexing %v: %v", br, err)
					nerrmu.Lock()
					nerr++
					nerrmu.Unlock()
					// TODO: flag (or default?) to stop the EnumerateAll above once
					// there's any error with reindexing?
				}
			}
		}()
	}
	if err := <-enumErr; err != nil {
		return err
	}

	wg.Wait()

	x.mu.Lock()
	readyCount := len(x.readyReindex)
	x.mu.Unlock()
	if readyCount > 0 {
		return fmt.Errorf("%d blobs were ready to reindex in out-of-order queue, but not yet ran", readyCount)
	}

	log.Printf("Index rebuild complete.")
	nerrmu.Lock() // no need to unlock
	if nerr != 0 {
		return fmt.Errorf("%d blobs failed to re-index", nerr)
	}
	if err := x.initDeletesCache(); err != nil {
		return err
	}
	return nil
}
Exemplo n.º 14
0
func TestForeachZipBlob(t *testing.T) {
	const fileSize = 2 << 20
	const fileName = "foo.dat"
	fileContents := randBytes(fileSize)

	ctx, cancel := context.WithCancel(context.TODO())
	defer cancel()

	pt := testPack(t,
		func(sto blobserver.Storage) error {
			_, err := schema.WriteFileFromReader(sto, fileName, bytes.NewReader(fileContents))
			return err
		},
		wantNumLargeBlobs(1),
		wantNumSmallBlobs(0),
	)

	zipBlob, err := singleBlob(pt.large)
	if err != nil {
		t.Fatal(err)
	}
	zipBytes := slurpBlob(t, pt.large, zipBlob.Ref)
	zipSize := len(zipBytes)

	all := map[blob.Ref]blob.SizedRef{}
	if err := blobserver.EnumerateAll(ctx, pt.logical, func(sb blob.SizedRef) error {
		all[sb.Ref] = sb
		return nil
	}); err != nil {
		t.Fatal(err)
	}
	foreachSaw := 0
	blobSizeSum := 0
	if err := pt.sto.foreachZipBlob(zipBlob.Ref, func(bap BlobAndPos) error {
		foreachSaw++
		blobSizeSum += int(bap.Size)
		want, ok := all[bap.Ref]
		if !ok {
			t.Errorf("unwanted blob ref returned from foreachZipBlob: %v", bap.Ref)
			return nil
		}
		delete(all, bap.Ref)
		if want.Size != bap.Size {
			t.Errorf("for %v, foreachZipBlob size = %d; want %d", bap.Ref, bap.Size, want.Size)
			return nil
		}

		// Verify the offset.
		h := bap.Ref.Hash()
		h.Write(zipBytes[bap.Offset : bap.Offset+int64(bap.Size)])
		if !bap.Ref.HashMatches(h) {
			return fmt.Errorf("foreachZipBlob returned blob %v at offset %d that failed validation", bap.Ref, bap.Offset)
		}

		return nil
	}); err != nil {
		t.Fatal(err)
	}

	t.Logf("foreachZipBlob enumerated %d blobs", foreachSaw)
	if len(all) > 0 {
		t.Errorf("foreachZipBlob forgot to enumerate %d blobs: %v", len(all), all)
	}
	// Calculate per-blobref zip overhead (zip file headers/TOC/manifest file, etc)
	zipOverhead := zipSize - blobSizeSum
	t.Logf("zip fixed overhead = %d bytes, for %d blobs (%d bytes each)", zipOverhead, foreachSaw, zipOverhead/foreachSaw)
}
Exemplo n.º 15
0
func testPack(t *testing.T,
	write func(sto blobserver.Storage) error,
	checks ...func(*packTest),
) *packTest {
	ctx, cancel := context.WithCancel(context.TODO())
	defer cancel()

	logical := new(test.Fetcher)
	small, large := new(test.Fetcher), new(test.Fetcher)
	pt := &packTest{
		logical: logical,
		small:   small,
		large:   large,
	}
	// Figure out the logical baseline blobs we'll later expect in the packed storage.
	if err := write(logical); err != nil {
		t.Fatal(err)
	}
	t.Logf("items in logical storage: %d", logical.NumBlobs())

	pt.sto = &storage{
		small: small,
		large: large,
		meta:  sorted.NewMemoryKeyValue(),
		log:   test.NewLogger(t, "blobpacked: "),
	}
	pt.sto.init()

	for _, setOpt := range checks {
		setOpt(pt)
	}

	if err := write(pt.sto); err != nil {
		t.Fatal(err)
	}

	t.Logf("items in small: %v", small.NumBlobs())
	t.Logf("items in large: %v", large.NumBlobs())

	if want, ok := pt.wantLargeBlobs.(int); ok && want != large.NumBlobs() {
		t.Fatalf("num large blobs = %d; want %d", large.NumBlobs(), want)
	}
	if want, ok := pt.wantSmallBlobs.(int); ok && want != small.NumBlobs() {
		t.Fatalf("num small blobs = %d; want %d", small.NumBlobs(), want)
	}

	var zipRefs []blob.Ref
	var zipSeen = map[blob.Ref]bool{}
	blobserver.EnumerateAll(ctx, large, func(sb blob.SizedRef) error {
		zipRefs = append(zipRefs, sb.Ref)
		zipSeen[sb.Ref] = true
		return nil
	})
	if len(zipRefs) != large.NumBlobs() {
		t.Fatalf("Enumerated only %d zip files; expected %d", len(zipRefs), large.NumBlobs())
	}

	bytesOfZip := map[blob.Ref][]byte{}
	for _, zipRef := range zipRefs {
		rc, _, err := large.Fetch(zipRef)
		if err != nil {
			t.Fatal(err)
		}
		zipBytes, err := ioutil.ReadAll(rc)
		rc.Close()
		if err != nil {
			t.Fatalf("Error slurping %s: %v", zipRef, err)
		}
		if len(zipBytes) > constants.MaxBlobSize {
			t.Fatalf("zip is too large: %d > max %d", len(zipBytes), constants.MaxBlobSize)
		}
		bytesOfZip[zipRef] = zipBytes
		zr, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes)))
		if err != nil {
			t.Fatalf("Error reading resulting zip file: %v", err)
		}
		if len(zr.File) == 0 {
			t.Fatal("zip is empty")
		}
		nameSeen := map[string]bool{}
		for i, zf := range zr.File {
			if nameSeen[zf.Name] {
				t.Errorf("duplicate name %q seen", zf.Name)
			}
			nameSeen[zf.Name] = true
			t.Logf("zip[%d] size %d, %v", i, zf.UncompressedSize64, zf.Name)
		}
		mfr, err := zr.File[len(zr.File)-1].Open()
		if err != nil {
			t.Fatalf("Error opening manifest JSON: %v", err)
		}
		maniJSON, err := ioutil.ReadAll(mfr)
		if err != nil {
			t.Fatalf("Error reading manifest JSON: %v", err)
		}
		var mf Manifest
		if err := json.Unmarshal(maniJSON, &mf); err != nil {
			t.Fatalf("invalid JSON: %v", err)
		}

		// Verify each chunk described in the manifest:
		baseOffset, err := zr.File[0].DataOffset()
		if err != nil {
			t.Fatal(err)
		}
		for _, bo := range mf.DataBlobs {
			h := bo.Ref.Hash()
			h.Write(zipBytes[baseOffset+bo.Offset : baseOffset+bo.Offset+int64(bo.Size)])
			if !bo.Ref.HashMatches(h) {
				t.Errorf("blob %+v didn't describe the actual data in the zip", bo)
			}
		}
		if debug {
			t.Logf("Manifest: %s", maniJSON)
		}
	}

	// Verify that each chunk in the logical mapping is in the meta.
	logBlobs := 0
	if err := blobserver.EnumerateAll(ctx, logical, func(sb blob.SizedRef) error {
		logBlobs++
		v, err := pt.sto.meta.Get(blobMetaPrefix + sb.Ref.String())
		if err == sorted.ErrNotFound && pt.okayNoMeta[sb.Ref] {
			return nil
		}
		if err != nil {
			return fmt.Errorf("error looking up logical blob %v in meta: %v", sb.Ref, err)
		}
		m, err := parseMetaRow([]byte(v))
		if err != nil {
			return fmt.Errorf("error parsing logical blob %v meta %q: %v", sb.Ref, v, err)
		}
		if !m.exists || m.size != sb.Size || !zipSeen[m.largeRef] {
			return fmt.Errorf("logical blob %v = %+v; want in zip", sb.Ref, m)
		}
		h := sb.Ref.Hash()
		h.Write(bytesOfZip[m.largeRef][m.largeOff : m.largeOff+sb.Size])
		if !sb.Ref.HashMatches(h) {
			t.Errorf("blob %v not found matching in zip", sb.Ref)
		}
		return nil
	}); err != nil {
		t.Fatal(err)
	}
	if logBlobs != logical.NumBlobs() {
		t.Error("enumerate over logical blobs didn't work?")
	}

	// TODO, more tests:
	// -- like TestPackTwoIdenticalfiles, but instead of testing
	// no dup for 100% identical file bytes, test that uploading a
	// 49% identical one does not denormalize and repack.
	// -- test StreamBlobs in all its various flavours, and recovering from stream blobs.
	// -- overflowing the 16MB chunk size with huge initial chunks
	return pt
}
Exemplo n.º 16
0
func testStorage(t *testing.T, bucketDir string) {
	if *accountID == "" && *appKey == "" {
		t.Skip("Skipping test without --account-id or --application-key flag")
	}

	rn := rand.New(rand.NewSource(time.Now().UnixNano())).Intn(1000000)
	bucket := fmt.Sprintf("camli-test-%d", rn)
	bucketWithDir := path.Join(bucket, bucketDir)

	storagetest.TestOpt(t, storagetest.Opts{
		New: func(t *testing.T) (sto blobserver.Storage, cleanup func()) {
			sto, err := newFromConfig(nil, jsonconfig.Obj{
				"bucket": bucketWithDir,
				"auth": map[string]interface{}{
					"account_id":      *accountID,
					"application_key": *appKey,
				},
			})
			if err != nil {
				t.Fatal(err)
			}
			if !testing.Short() {
				log.Printf("Warning: this test does many serial operations. Without the go test -short flag, this test will be very slow.")
			}
			if bucketWithDir != bucket {
				// Adding "a", and "c" objects in the bucket to make sure objects out of the
				// "directory" are not touched and have no influence.
				for _, key := range []string{"a", "c"} {
					if _, err := sto.(*Storage).b.Upload(strings.NewReader(key), key, ""); err != nil {
						t.Fatalf("could not insert object %s in bucket %v: %v", key, sto.(*Storage).b.Name, err)
					}
				}
			}

			clearBucket := func(beforeTests bool) func() {
				return func() {
					var all []blob.Ref
					blobserver.EnumerateAll(context.TODO(), sto, func(sb blob.SizedRef) error {
						t.Logf("Deleting: %v", sb.Ref)
						all = append(all, sb.Ref)
						return nil
					})
					if err := sto.RemoveBlobs(all); err != nil {
						t.Fatalf("Error removing blobs during cleanup: %v", err)
					}
					if beforeTests {
						return
					}
					if bucketWithDir != bucket {
						// checking that "a" and "c" at the root were left untouched.
						for _, key := range []string{"a", "c"} {
							fi, err := sto.(*Storage).b.GetFileInfoByName(key)
							if err != nil {
								t.Fatalf("could not remove object %s after tests: %v", key, err)
							}
							if err := sto.(*Storage).cl.DeleteFile(fi.ID, fi.Name); err != nil {
								t.Fatalf("could not remove object %s after tests: %v", key, err)
							}

						}
					}
					if err := sto.(*Storage).b.Delete(); err != nil {
						t.Fatalf("could not remove bucket %s after tests: %v", sto.(*Storage).b.Name, err)
					}
				}
			}
			clearBucket(true)()
			return sto, clearBucket(false)
		},
	})
}
Exemplo n.º 17
0
func TestRemoveBlobs(t *testing.T) {
	ctx, cancel := context.WithCancel(context.TODO())
	defer cancel()

	// The basic small cases are handled via storagetest in TestStorage,
	// so this only tests removing packed blobs.

	small := new(test.Fetcher)
	large := new(test.Fetcher)
	sto := &storage{
		small: small,
		large: large,
		meta:  sorted.NewMemoryKeyValue(),
		log:   test.NewLogger(t, "blobpacked: "),
	}
	sto.init()

	const fileSize = 1 << 20
	fileContents := randBytes(fileSize)
	if _, err := schema.WriteFileFromReader(sto, "foo.dat", bytes.NewReader(fileContents)); err != nil {
		t.Fatal(err)
	}
	if small.NumBlobs() != 0 || large.NumBlobs() == 0 {
		t.Fatalf("small, large counts == %d, %d; want 0, non-zero", small.NumBlobs(), large.NumBlobs())
	}
	var all []blob.SizedRef
	if err := blobserver.EnumerateAll(ctx, sto, func(sb blob.SizedRef) error {
		all = append(all, sb)
		return nil
	}); err != nil {
		t.Fatal(err)
	}

	// Find the zip
	zipBlob, err := singleBlob(sto.large)
	if err != nil {
		t.Fatalf("failed to find packed zip: %v", err)
	}

	// The zip file is in use, so verify we can't delete it.
	if err := sto.deleteZipPack(zipBlob.Ref); err == nil {
		t.Fatalf("zip pack blob deleted but it should not have been allowed")
	}

	// Delete everything
	for len(all) > 0 {
		del := all[0].Ref
		all = all[1:]
		if err := sto.RemoveBlobs([]blob.Ref{del}); err != nil {
			t.Fatalf("RemoveBlobs: %v", err)
		}
		if err := storagetest.CheckEnumerate(sto, all); err != nil {
			t.Fatalf("After deleting %v, %v", del, err)
		}
	}

	dRows := func() (n int) {
		if err := sorted.ForeachInRange(sto.meta, "d:", "", func(key, value string) error {
			if strings.HasPrefix(key, "d:") {
				n++
			}
			return nil
		}); err != nil {
			t.Fatalf("meta iteration error: %v", err)
		}
		return
	}

	if n := dRows(); n == 0 {
		t.Fatalf("expected a 'd:' row after deletes")
	}

	// TODO: test the background pack-deleter loop? figure out its design first.
	if err := sto.deleteZipPack(zipBlob.Ref); err != nil {
		t.Errorf("error deleting zip %v: %v", zipBlob.Ref, err)
	}
	if n := dRows(); n != 0 {
		t.Errorf("expected the 'd:' row to be deleted")
	}
}