func TestS3(t *testing.T) { if *bucket == "" || *key == "" || *secret == "" { t.Skip("Skipping test because at least one of -s3_key, -s3_secret, or -s3_bucket flags has not been provided.") } if !strings.HasPrefix(*bucket, "camlistore-") || !strings.HasSuffix(*bucket, "-test") { t.Fatalf("bogus bucket name %q; must begin with 'camlistore-' and end in '-test'", *bucket) } storagetest.Test(t, func(t *testing.T) (sto blobserver.Storage, cleanup func()) { sto, err := newFromConfig(nil, jsonconfig.Obj{ "aws_access_key": *key, "aws_secret_access_key": *secret, "bucket": *bucket, }) if err != nil { t.Fatalf("newFromConfig error: %v", err) } if !testing.Short() { log.Printf("Warning: this test does many serial operations. Without the go test -short flag, this test will be very slow.") } clearBucket := func() { var all []blob.Ref blobserver.EnumerateAll(context.New(), sto, func(sb blob.SizedRef) error { t.Logf("Deleting: %v", sb.Ref) all = append(all, sb.Ref) return nil }) if err := sto.RemoveBlobs(all); err != nil { t.Fatalf("Error removing blobs during cleanup: %v", err) } } clearBucket() return sto, clearBucket }) }
func TestIsolation(t *testing.T) { ld := test.NewLoader() master, _ := ld.GetStorage("/good-storage/") ns1 := newNamespace(t, ld) ns2 := newNamespace(t, ld) stoMap := map[string]blobserver.Storage{ "ns1": ns1, "ns2": ns2, "master": master, } want := func(src string, want ...blob.Ref) { if _, ok := stoMap[src]; !ok { t.Fatalf("undefined storage %q", src) } sort.Sort(blob.ByRef(want)) var got []blob.Ref if err := blobserver.EnumerateAll(context.TODO(), stoMap[src], func(sb blob.SizedRef) error { got = append(got, sb.Ref) return nil }); err != nil { t.Fatal(err) } if !reflect.DeepEqual(got, want) { t.Errorf("server %q = %q; want %q", src, got, want) } } b1 := &test.Blob{Contents: "Blob 1"} b1r := b1.BlobRef() b2 := &test.Blob{Contents: "Blob 2"} b2r := b2.BlobRef() b3 := &test.Blob{Contents: "Shared Blob"} b3r := b3.BlobRef() b1.MustUpload(t, ns1) want("ns1", b1r) want("ns2") want("master", b1r) b2.MustUpload(t, ns2) want("ns1", b1r) want("ns2", b2r) want("master", b1r, b2r) b3.MustUpload(t, ns2) want("ns1", b1r) want("ns2", b2r, b3r) want("master", b1r, b2r, b3r) b3.MustUpload(t, ns1) want("ns1", b1r, b3r) want("ns2", b2r, b3r) want("master", b1r, b2r, b3r) if _, _, err := ns2.FetchStreaming(b1r); err == nil { t.Errorf("b1 shouldn't be accessible via ns2") } }
func blobserverEnumerator(ctx *context.Context, src blobserver.BlobEnumerator) func(chan<- blob.SizedRef, <-chan struct{}) error { return func(dst chan<- blob.SizedRef, intr <-chan struct{}) error { return blobserver.EnumerateAll(ctx, src, func(sb blob.SizedRef) error { select { case dst <- sb: case <-intr: return errors.New("interrupted") } return nil }) } }
func enumerateAllBlobs(s blobserver.Storage, destc chan<- blob.SizedRef) error { // Use *client.Client's support for enumerating all blobs if // possible, since it could probably do a better job knowing // HTTP boundaries and such. if nh, ok := s.(noHub); ok { return nh.Client.SimpleEnumerateBlobs(destc) } defer close(destc) return blobserver.EnumerateAll(s, func(sb blob.SizedRef) error { destc <- sb return nil }) }
// singleBlob assumes that sto contains a single blob and returns it. // If there are more or fewer than one blob, it's an error. func singleBlob(sto blobserver.BlobEnumerator) (ret blob.SizedRef, err error) { ctx, cancel := context.WithCancel(context.TODO()) defer cancel() n := 0 if err = blobserver.EnumerateAll(ctx, sto, func(sb blob.SizedRef) error { ret = sb n++ return nil }); err != nil { return blob.SizedRef{}, err } if n != 1 { return blob.SizedRef{}, fmt.Errorf("saw %d blobs; want 1", n) } return }
// see if storage proxies through to small for Fetch, Stat, and Enumerate. func TestSmallFallback(t *testing.T) { small := new(test.Fetcher) s := &storage{ small: small, large: new(test.Fetcher), meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } s.init() b1 := &test.Blob{"foo"} b1.MustUpload(t, small) wantSB := b1.SizedRef() // Fetch rc, _, err := s.Fetch(b1.BlobRef()) if err != nil { t.Errorf("failed to Get blob: %v", err) } else { rc.Close() } // Stat. sb, err := blobserver.StatBlob(s, b1.BlobRef()) if err != nil { t.Errorf("failed to Stat blob: %v", err) } else if sb != wantSB { t.Errorf("Stat = %v; want %v", sb, wantSB) } // Enumerate saw := false ctx, cancel := context.WithCancel(context.TODO()) defer cancel() if err := blobserver.EnumerateAll(ctx, s, func(sb blob.SizedRef) error { if sb != wantSB { return fmt.Errorf("saw blob %v; want %v", sb, wantSB) } saw = true return nil }); err != nil { t.Errorf("EnuerateAll: %v", err) } if !saw { t.Error("didn't see blob in Enumerate") } }
func enumerateAllBlobs(ctx context.Context, s blobserver.Storage, destc chan<- blob.SizedRef) error { // Use *client.Client's support for enumerating all blobs if // possible, since it could probably do a better job knowing // HTTP boundaries and such. if c, ok := s.(*client.Client); ok { return c.SimpleEnumerateBlobs(ctx, destc) } defer close(destc) return blobserver.EnumerateAll(ctx, s, func(sb blob.SizedRef) error { select { case destc <- sb: case <-ctx.Done(): return ctx.Err() } return nil }) }
// RunOnce scans a.Source and conditionally creates a new zip. // It returns ErrSourceTooSmall if there aren't enough blobs on Source. func (a *Archiver) RunOnce() error { if a.Source == nil { return errors.New("archiver: nil Source") } if a.Store == nil { return errors.New("archiver: nil Store func") } pz := &potentialZip{a: a} err := blobserver.EnumerateAll(context.New(), a.Source, func(sb blob.SizedRef) error { if err := pz.addBlob(sb); err != nil { return err } if pz.bigEnough() { return errStopEnumerate } return nil }) if err == errStopEnumerate { err = nil } if err != nil { return err } if err := pz.condClose(); err != nil { return err } if !pz.bigEnough() { return ErrSourceTooSmall } if err := a.Store(pz.buf.Bytes(), pz.blobs); err != nil { return err } if a.DeleteSourceAfterStore { blobs := make([]blob.Ref, 0, len(pz.blobs)) for _, sb := range pz.blobs { blobs = append(blobs, sb.Ref) } if err := a.Source.RemoveBlobs(blobs); err != nil { return err } } return nil }
func testStorage(t *testing.T, bucketDir string) { if *bucket == "" || *key == "" || *secret == "" { t.Skip("Skipping test because at least one of -s3_key, -s3_secret, or -s3_bucket flags has not been provided.") } if !strings.HasPrefix(*bucket, "camlistore-") || !strings.HasSuffix(*bucket, "-test") { t.Fatalf("bogus bucket name %q; must begin with 'camlistore-' and end in '-test'", *bucket) } bucketWithDir := path.Join(*bucket, bucketDir) storagetest.Test(t, func(t *testing.T) (sto blobserver.Storage, cleanup func()) { sto, err := newFromConfig(nil, jsonconfig.Obj{ "aws_access_key": *key, "aws_secret_access_key": *secret, "bucket": bucketWithDir, }) if err != nil { t.Fatalf("newFromConfig error: %v", err) } if !testing.Short() { log.Printf("Warning: this test does many serial operations. Without the go test -short flag, this test will be very slow.") } if bucketWithDir != *bucket { // Adding "a", and "c" objects in the bucket to make sure objects out of the // "directory" are not touched and have no influence. for _, key := range []string{"a", "c"} { var buf bytes.Buffer md5h := md5.New() size, err := io.Copy(io.MultiWriter(&buf, md5h), strings.NewReader(key)) if err != nil { t.Fatalf("could not insert object %s in bucket %v: %v", key, sto.(*s3Storage).bucket, err) } if err := sto.(*s3Storage).s3Client.PutObject( key, sto.(*s3Storage).bucket, md5h, size, &buf); err != nil { t.Fatalf("could not insert object %s in bucket %v: %v", key, sto.(*s3Storage).bucket, err) } } } clearBucket := func(beforeTests bool) func() { return func() { var all []blob.Ref blobserver.EnumerateAll(context.TODO(), sto, func(sb blob.SizedRef) error { t.Logf("Deleting: %v", sb.Ref) all = append(all, sb.Ref) return nil }) if err := sto.RemoveBlobs(all); err != nil { t.Fatalf("Error removing blobs during cleanup: %v", err) } if beforeTests { return } if bucketWithDir != *bucket { // checking that "a" and "c" at the root were left untouched. for _, key := range []string{"a", "c"} { if _, _, err := sto.(*s3Storage).s3Client.Get(sto.(*s3Storage).bucket, key); err != nil { t.Fatalf("could not find object %s after tests: %v", key, err) } if err := sto.(*s3Storage).s3Client.Delete(sto.(*s3Storage).bucket, key); err != nil { t.Fatalf("could not remove object %s after tests: %v", key, err) } } } } } clearBucket(true)() return sto, clearBucket(false) }) }
func (s *storage) readAllMetaBlobs() error { type metaBlob struct { br *blobref.BlobRef dat []byte // encrypted blob err error } metac := make(chan metaBlob, 16) const maxInFlight = 50 var gate = make(chan bool, maxInFlight) var stopEnumerate = make(chan bool) // closed on error enumErrc := make(chan error, 1) go func() { var wg sync.WaitGroup enumErrc <- blobserver.EnumerateAll(s.meta, func(sb blobref.SizedBlobRef) error { select { case <-stopEnumerate: return errors.New("enumeration stopped") default: } wg.Add(1) gate <- true go func() { defer wg.Done() defer func() { <-gate }() rc, _, err := s.meta.FetchStreaming(sb.BlobRef) var all []byte if err == nil { all, err = ioutil.ReadAll(rc) rc.Close() } metac <- metaBlob{sb.BlobRef, all, err} }() return nil }) wg.Wait() close(metac) }() for mi := range metac { err := mi.err if err == nil { err = s.processEncryptedMetaBlob(mi.br, mi.dat) } if err != nil { close(stopEnumerate) go func() { for _ = range metac { } }() // TODO: advertise in this error message a new option or environment variable // to skip a certain or all meta blobs, to allow partial recovery, if some // are corrupt. For now, require all to be correct. return fmt.Errorf("Error with meta blob %v: %v", mi.br, err) } } return <-enumErrc }
func testStorage(t *testing.T, bucketDir string) { if *bucket == "" && *configFile == "" { t.Skip("Skipping test without --bucket or --config flag") } var refreshToken string if *configFile != "" { data, err := ioutil.ReadFile(*configFile) if err != nil { t.Fatalf("Error reading config file %v: %v", *configFile, err) } var conf Config if err := json.Unmarshal(data, &conf); err != nil { t.Fatalf("Error decoding config file %v: %v", *configFile, err) } *clientID = conf.Auth.ClientID *clientSecret = conf.Auth.ClientSecret refreshToken = conf.Auth.RefreshToken *bucket = conf.Bucket } if *bucket == "" { t.Fatal("bucket not provided in config file or as a flag.") } if *clientID == "" { if !metadata.OnGCE() { if *clientSecret == "" { t.Fatal("client ID and client secret required. Obtain from https://console.developers.google.com/ > Project > APIs & Auth > Credentials. Should be a 'native' or 'Installed application'") } } else { *clientID = "auto" } } if *configFile == "" { config := &oauth2.Config{ Scopes: []string{storage.ScopeReadWrite}, Endpoint: google.Endpoint, ClientID: *clientID, ClientSecret: *clientSecret, RedirectURL: oauthutil.TitleBarRedirectURL, } if !metadata.OnGCE() { token, err := oauth2.ReuseTokenSource(nil, &oauthutil.TokenSource{ Config: config, CacheFile: *tokenCache, AuthCode: func() string { if *authCode == "" { t.Skipf("Re-run using --auth_code= with the value obtained from %s", config.AuthCodeURL("", oauth2.AccessTypeOffline, oauth2.ApprovalForce)) return "" } return *authCode }, }).Token() if err != nil { t.Fatalf("could not acquire token: %v", err) } refreshToken = token.RefreshToken } } bucketWithDir := path.Join(*bucket, bucketDir) storagetest.TestOpt(t, storagetest.Opts{ New: func(t *testing.T) (sto blobserver.Storage, cleanup func()) { sto, err := newFromConfig(nil, jsonconfig.Obj{ "bucket": bucketWithDir, "auth": map[string]interface{}{ "client_id": *clientID, "client_secret": *clientSecret, "refresh_token": refreshToken, }, }) if err != nil { t.Fatal(err) } if !testing.Short() { log.Printf("Warning: this test does many serial operations. Without the go test -short flag, this test will be very slow.") } // Bail if bucket is not empty ctx := context.Background() stor := sto.(*Storage) objs, err := stor.client.Bucket(stor.bucket).List(ctx, nil) if err != nil { t.Fatalf("Error checking if bucket is empty: %v", err) } if len(objs.Results) != 0 { t.Fatalf("Refusing to run test: bucket %v is not empty", *bucket) } if bucketWithDir != *bucket { // Adding "a", and "c" objects in the bucket to make sure objects out of the // "directory" are not touched and have no influence. for _, key := range []string{"a", "c"} { w := stor.client.Bucket(stor.bucket).Object(key).NewWriter(ctx) if _, err := io.Copy(w, strings.NewReader(key)); err != nil { t.Fatalf("could not insert object %s in bucket %v: %v", key, sto.(*Storage).bucket, err) } if err := w.Close(); err != nil { t.Fatalf("could not insert object %s in bucket %v: %v", key, sto.(*Storage).bucket, err) } } } clearBucket := func(beforeTests bool) func() { return func() { var all []blob.Ref blobserver.EnumerateAll(context.TODO(), sto, func(sb blob.SizedRef) error { t.Logf("Deleting: %v", sb.Ref) all = append(all, sb.Ref) return nil }) if err := sto.RemoveBlobs(all); err != nil { t.Fatalf("Error removing blobs during cleanup: %v", err) } if beforeTests { return } if bucketWithDir != *bucket { // checking that "a" and "c" at the root were left untouched. for _, key := range []string{"a", "c"} { rc, err := stor.client.Bucket(stor.bucket).Object(key).NewReader(ctx) if err != nil { t.Fatalf("could not find object %s after tests: %v", key, err) } if _, err := io.Copy(ioutil.Discard, rc); err != nil { t.Fatalf("could not find object %s after tests: %v", key, err) } if err := stor.client.Bucket(stor.bucket).Object(key).Delete(ctx); err != nil { t.Fatalf("could not remove object %s after tests: %v", key, err) } } } } } clearBucket(true)() return sto, clearBucket(false) }, }) }
// TestStreamer tests that the BlobStreamer bs implements all of the // promised interface behavior and ultimately yields the provided // blobs. // // If bs also implements BlobEnumerator, the two are compared for // consistency. func TestStreamer(t *testing.T, bs blobserver.BlobStreamer, opts ...StreamerTestOpt) { var sawEnum map[blob.SizedRef]bool if enumer, ok := bs.(blobserver.BlobEnumerator); ok { sawEnum = make(map[blob.SizedRef]bool) // First do an enumerate over all blobs as a baseline. The Streamer should // yield the same blobs, even if it's in a different order. enumCtx := context.New() defer enumCtx.Cancel() if err := blobserver.EnumerateAll(enumCtx, enumer, func(sb blob.SizedRef) error { sawEnum[sb] = true return nil }); err != nil { t.Fatalf("Enumerate: %v", err) } } // See if, without cancelation, it yields the right // result and without errors. ch := make(chan blobserver.BlobAndToken) errCh := make(chan error, 1) go func() { ctx := context.New() defer ctx.Cancel() errCh <- bs.StreamBlobs(ctx, ch, "") }() var gotRefs []blob.SizedRef sawStreamed := map[blob.Ref]int{} for b := range ch { sawStreamed[b.Ref()]++ sbr := b.SizedRef() if sawEnum != nil { if _, ok := sawEnum[sbr]; ok { delete(sawEnum, sbr) } else { t.Errorf("Streamer yielded blob not returned by Enumerate: %v", sbr) } } gotRefs = append(gotRefs, sbr) } if err := <-errCh; err != nil { t.Errorf("initial uninterrupted StreamBlobs error: %v", err) } for br, n := range sawStreamed { if n > 1 { t.Errorf("Streamed returned duplicate %v, %d times", br, n) } } nMissing := 0 for sbr := range sawEnum { t.Errorf("Enumerate found %v but Streamer didn't return it", sbr) nMissing++ if nMissing == 10 && len(sawEnum) > 10 { t.Errorf("... etc ...") break } } for _, opt := range opts { if err := opt.verify(gotRefs); err != nil { t.Errorf("error after first uninterrupted StreamBlobs pass: %v", err) } } if t.Failed() { return } // Next, the "complex pass": test a cancelation at each point, // to test that resume works properly. // // Basic strategy: // -- receive 1 blob, note the blobref, cancel. // -- start again with that blobref, receive 2, cancel. first should be same, // second should be new. note its blobref. // Each iteration should yield 1 new unique blob and all but // the first and last will return 2 blobs. wantRefs := append([]blob.SizedRef(nil), gotRefs...) // copy sawStreamed = map[blob.Ref]int{} gotRefs = gotRefs[:0] contToken := "" for i := 0; i < len(wantRefs); i++ { ctx := context.New() ch := make(chan blobserver.BlobAndToken) errc := make(chan error, 1) go func() { errc <- bs.StreamBlobs(ctx, ch, contToken) }() nrecv := 0 nextToken := "" for bt := range ch { nrecv++ sbr := bt.Blob.SizedRef() isNew := len(gotRefs) == 0 || sbr != gotRefs[len(gotRefs)-1] if isNew { if sawStreamed[sbr.Ref] > 0 { t.Fatalf("In complex pass, returned duplicate blob %v\n\nSo far, before interrupting:\n%v\n\nWant:\n%v", sbr, gotRefs, wantRefs) } sawStreamed[sbr.Ref]++ gotRefs = append(gotRefs, sbr) nextToken = bt.Token ctx.Cancel() break } else if i == 0 { t.Fatalf("first iteration should receive a new value") } else if nrecv == 2 { t.Fatalf("at cut point %d of testStream, Streamer received 2 values, both not unique. Looping?", i) } } err := <-errc if err != nil && err != context.ErrCanceled { t.Fatalf("StreamBlobs on iteration %d (token %q) returned error: %v", i, contToken, err) } if err == nil { break } contToken = nextToken } if !reflect.DeepEqual(gotRefs, wantRefs) { t.Errorf("Mismatch on complex pass (got %d, want %d):\n got %q\nwant %q\n", len(gotRefs), len(wantRefs), gotRefs, wantRefs) wantMap := map[blob.SizedRef]bool{} for _, sbr := range wantRefs { wantMap[sbr] = true } for _, sbr := range gotRefs { if _, ok := wantMap[sbr]; ok { delete(wantMap, sbr) } else { t.Errorf("got has unwanted: %v", sbr) } } missing := wantMap // found stuff has been deleted for sbr := range missing { t.Errorf("got is missing: %v", sbr) } t.FailNow() } }
func (x *Index) Reindex() error { reindexMaxProcs.RLock() defer reindexMaxProcs.RUnlock() ctx := context.TODO() wiper, ok := x.s.(sorted.Wiper) if !ok { return fmt.Errorf("index's storage type %T doesn't support sorted.Wiper", x.s) } log.Printf("Wiping index storage type %T ...", x.s) if err := wiper.Wipe(); err != nil { return fmt.Errorf("error wiping index's sorted key/value type %T: %v", x.s, err) } log.Printf("Index wiped. Rebuilding...") reindexStart, _ := blob.Parse(os.Getenv("CAMLI_REINDEX_START")) err := x.s.Set(keySchemaVersion.name, fmt.Sprintf("%d", requiredSchemaVersion)) if err != nil { return err } var nerrmu sync.Mutex nerr := 0 blobc := make(chan blob.Ref, 32) enumCtx := context.TODO() enumErr := make(chan error, 1) go func() { defer close(blobc) donec := enumCtx.Done() var lastTick time.Time enumErr <- blobserver.EnumerateAll(enumCtx, x.blobSource, func(sb blob.SizedRef) error { now := time.Now() if lastTick.Before(now.Add(-1 * time.Second)) { log.Printf("Reindexing at %v", sb.Ref) lastTick = now } if reindexStart.Valid() && sb.Ref.Less(reindexStart) { return nil } select { case <-donec: return ctx.Err() case blobc <- sb.Ref: return nil } }) }() var wg sync.WaitGroup for i := 0; i < reindexMaxProcs.v; i++ { wg.Add(1) go func() { defer wg.Done() for br := range blobc { if err := x.indexBlob(br); err != nil { log.Printf("Error reindexing %v: %v", br, err) nerrmu.Lock() nerr++ nerrmu.Unlock() // TODO: flag (or default?) to stop the EnumerateAll above once // there's any error with reindexing? } } }() } if err := <-enumErr; err != nil { return err } wg.Wait() x.mu.Lock() readyCount := len(x.readyReindex) x.mu.Unlock() if readyCount > 0 { return fmt.Errorf("%d blobs were ready to reindex in out-of-order queue, but not yet ran", readyCount) } log.Printf("Index rebuild complete.") nerrmu.Lock() // no need to unlock if nerr != 0 { return fmt.Errorf("%d blobs failed to re-index", nerr) } if err := x.initDeletesCache(); err != nil { return err } return nil }
func TestForeachZipBlob(t *testing.T) { const fileSize = 2 << 20 const fileName = "foo.dat" fileContents := randBytes(fileSize) ctx, cancel := context.WithCancel(context.TODO()) defer cancel() pt := testPack(t, func(sto blobserver.Storage) error { _, err := schema.WriteFileFromReader(sto, fileName, bytes.NewReader(fileContents)) return err }, wantNumLargeBlobs(1), wantNumSmallBlobs(0), ) zipBlob, err := singleBlob(pt.large) if err != nil { t.Fatal(err) } zipBytes := slurpBlob(t, pt.large, zipBlob.Ref) zipSize := len(zipBytes) all := map[blob.Ref]blob.SizedRef{} if err := blobserver.EnumerateAll(ctx, pt.logical, func(sb blob.SizedRef) error { all[sb.Ref] = sb return nil }); err != nil { t.Fatal(err) } foreachSaw := 0 blobSizeSum := 0 if err := pt.sto.foreachZipBlob(zipBlob.Ref, func(bap BlobAndPos) error { foreachSaw++ blobSizeSum += int(bap.Size) want, ok := all[bap.Ref] if !ok { t.Errorf("unwanted blob ref returned from foreachZipBlob: %v", bap.Ref) return nil } delete(all, bap.Ref) if want.Size != bap.Size { t.Errorf("for %v, foreachZipBlob size = %d; want %d", bap.Ref, bap.Size, want.Size) return nil } // Verify the offset. h := bap.Ref.Hash() h.Write(zipBytes[bap.Offset : bap.Offset+int64(bap.Size)]) if !bap.Ref.HashMatches(h) { return fmt.Errorf("foreachZipBlob returned blob %v at offset %d that failed validation", bap.Ref, bap.Offset) } return nil }); err != nil { t.Fatal(err) } t.Logf("foreachZipBlob enumerated %d blobs", foreachSaw) if len(all) > 0 { t.Errorf("foreachZipBlob forgot to enumerate %d blobs: %v", len(all), all) } // Calculate per-blobref zip overhead (zip file headers/TOC/manifest file, etc) zipOverhead := zipSize - blobSizeSum t.Logf("zip fixed overhead = %d bytes, for %d blobs (%d bytes each)", zipOverhead, foreachSaw, zipOverhead/foreachSaw) }
func testPack(t *testing.T, write func(sto blobserver.Storage) error, checks ...func(*packTest), ) *packTest { ctx, cancel := context.WithCancel(context.TODO()) defer cancel() logical := new(test.Fetcher) small, large := new(test.Fetcher), new(test.Fetcher) pt := &packTest{ logical: logical, small: small, large: large, } // Figure out the logical baseline blobs we'll later expect in the packed storage. if err := write(logical); err != nil { t.Fatal(err) } t.Logf("items in logical storage: %d", logical.NumBlobs()) pt.sto = &storage{ small: small, large: large, meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } pt.sto.init() for _, setOpt := range checks { setOpt(pt) } if err := write(pt.sto); err != nil { t.Fatal(err) } t.Logf("items in small: %v", small.NumBlobs()) t.Logf("items in large: %v", large.NumBlobs()) if want, ok := pt.wantLargeBlobs.(int); ok && want != large.NumBlobs() { t.Fatalf("num large blobs = %d; want %d", large.NumBlobs(), want) } if want, ok := pt.wantSmallBlobs.(int); ok && want != small.NumBlobs() { t.Fatalf("num small blobs = %d; want %d", small.NumBlobs(), want) } var zipRefs []blob.Ref var zipSeen = map[blob.Ref]bool{} blobserver.EnumerateAll(ctx, large, func(sb blob.SizedRef) error { zipRefs = append(zipRefs, sb.Ref) zipSeen[sb.Ref] = true return nil }) if len(zipRefs) != large.NumBlobs() { t.Fatalf("Enumerated only %d zip files; expected %d", len(zipRefs), large.NumBlobs()) } bytesOfZip := map[blob.Ref][]byte{} for _, zipRef := range zipRefs { rc, _, err := large.Fetch(zipRef) if err != nil { t.Fatal(err) } zipBytes, err := ioutil.ReadAll(rc) rc.Close() if err != nil { t.Fatalf("Error slurping %s: %v", zipRef, err) } if len(zipBytes) > constants.MaxBlobSize { t.Fatalf("zip is too large: %d > max %d", len(zipBytes), constants.MaxBlobSize) } bytesOfZip[zipRef] = zipBytes zr, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes))) if err != nil { t.Fatalf("Error reading resulting zip file: %v", err) } if len(zr.File) == 0 { t.Fatal("zip is empty") } nameSeen := map[string]bool{} for i, zf := range zr.File { if nameSeen[zf.Name] { t.Errorf("duplicate name %q seen", zf.Name) } nameSeen[zf.Name] = true t.Logf("zip[%d] size %d, %v", i, zf.UncompressedSize64, zf.Name) } mfr, err := zr.File[len(zr.File)-1].Open() if err != nil { t.Fatalf("Error opening manifest JSON: %v", err) } maniJSON, err := ioutil.ReadAll(mfr) if err != nil { t.Fatalf("Error reading manifest JSON: %v", err) } var mf Manifest if err := json.Unmarshal(maniJSON, &mf); err != nil { t.Fatalf("invalid JSON: %v", err) } // Verify each chunk described in the manifest: baseOffset, err := zr.File[0].DataOffset() if err != nil { t.Fatal(err) } for _, bo := range mf.DataBlobs { h := bo.Ref.Hash() h.Write(zipBytes[baseOffset+bo.Offset : baseOffset+bo.Offset+int64(bo.Size)]) if !bo.Ref.HashMatches(h) { t.Errorf("blob %+v didn't describe the actual data in the zip", bo) } } if debug { t.Logf("Manifest: %s", maniJSON) } } // Verify that each chunk in the logical mapping is in the meta. logBlobs := 0 if err := blobserver.EnumerateAll(ctx, logical, func(sb blob.SizedRef) error { logBlobs++ v, err := pt.sto.meta.Get(blobMetaPrefix + sb.Ref.String()) if err == sorted.ErrNotFound && pt.okayNoMeta[sb.Ref] { return nil } if err != nil { return fmt.Errorf("error looking up logical blob %v in meta: %v", sb.Ref, err) } m, err := parseMetaRow([]byte(v)) if err != nil { return fmt.Errorf("error parsing logical blob %v meta %q: %v", sb.Ref, v, err) } if !m.exists || m.size != sb.Size || !zipSeen[m.largeRef] { return fmt.Errorf("logical blob %v = %+v; want in zip", sb.Ref, m) } h := sb.Ref.Hash() h.Write(bytesOfZip[m.largeRef][m.largeOff : m.largeOff+sb.Size]) if !sb.Ref.HashMatches(h) { t.Errorf("blob %v not found matching in zip", sb.Ref) } return nil }); err != nil { t.Fatal(err) } if logBlobs != logical.NumBlobs() { t.Error("enumerate over logical blobs didn't work?") } // TODO, more tests: // -- like TestPackTwoIdenticalfiles, but instead of testing // no dup for 100% identical file bytes, test that uploading a // 49% identical one does not denormalize and repack. // -- test StreamBlobs in all its various flavours, and recovering from stream blobs. // -- overflowing the 16MB chunk size with huge initial chunks return pt }
func testStorage(t *testing.T, bucketDir string) { if *accountID == "" && *appKey == "" { t.Skip("Skipping test without --account-id or --application-key flag") } rn := rand.New(rand.NewSource(time.Now().UnixNano())).Intn(1000000) bucket := fmt.Sprintf("camli-test-%d", rn) bucketWithDir := path.Join(bucket, bucketDir) storagetest.TestOpt(t, storagetest.Opts{ New: func(t *testing.T) (sto blobserver.Storage, cleanup func()) { sto, err := newFromConfig(nil, jsonconfig.Obj{ "bucket": bucketWithDir, "auth": map[string]interface{}{ "account_id": *accountID, "application_key": *appKey, }, }) if err != nil { t.Fatal(err) } if !testing.Short() { log.Printf("Warning: this test does many serial operations. Without the go test -short flag, this test will be very slow.") } if bucketWithDir != bucket { // Adding "a", and "c" objects in the bucket to make sure objects out of the // "directory" are not touched and have no influence. for _, key := range []string{"a", "c"} { if _, err := sto.(*Storage).b.Upload(strings.NewReader(key), key, ""); err != nil { t.Fatalf("could not insert object %s in bucket %v: %v", key, sto.(*Storage).b.Name, err) } } } clearBucket := func(beforeTests bool) func() { return func() { var all []blob.Ref blobserver.EnumerateAll(context.TODO(), sto, func(sb blob.SizedRef) error { t.Logf("Deleting: %v", sb.Ref) all = append(all, sb.Ref) return nil }) if err := sto.RemoveBlobs(all); err != nil { t.Fatalf("Error removing blobs during cleanup: %v", err) } if beforeTests { return } if bucketWithDir != bucket { // checking that "a" and "c" at the root were left untouched. for _, key := range []string{"a", "c"} { fi, err := sto.(*Storage).b.GetFileInfoByName(key) if err != nil { t.Fatalf("could not remove object %s after tests: %v", key, err) } if err := sto.(*Storage).cl.DeleteFile(fi.ID, fi.Name); err != nil { t.Fatalf("could not remove object %s after tests: %v", key, err) } } } if err := sto.(*Storage).b.Delete(); err != nil { t.Fatalf("could not remove bucket %s after tests: %v", sto.(*Storage).b.Name, err) } } } clearBucket(true)() return sto, clearBucket(false) }, }) }
func TestRemoveBlobs(t *testing.T) { ctx, cancel := context.WithCancel(context.TODO()) defer cancel() // The basic small cases are handled via storagetest in TestStorage, // so this only tests removing packed blobs. small := new(test.Fetcher) large := new(test.Fetcher) sto := &storage{ small: small, large: large, meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } sto.init() const fileSize = 1 << 20 fileContents := randBytes(fileSize) if _, err := schema.WriteFileFromReader(sto, "foo.dat", bytes.NewReader(fileContents)); err != nil { t.Fatal(err) } if small.NumBlobs() != 0 || large.NumBlobs() == 0 { t.Fatalf("small, large counts == %d, %d; want 0, non-zero", small.NumBlobs(), large.NumBlobs()) } var all []blob.SizedRef if err := blobserver.EnumerateAll(ctx, sto, func(sb blob.SizedRef) error { all = append(all, sb) return nil }); err != nil { t.Fatal(err) } // Find the zip zipBlob, err := singleBlob(sto.large) if err != nil { t.Fatalf("failed to find packed zip: %v", err) } // The zip file is in use, so verify we can't delete it. if err := sto.deleteZipPack(zipBlob.Ref); err == nil { t.Fatalf("zip pack blob deleted but it should not have been allowed") } // Delete everything for len(all) > 0 { del := all[0].Ref all = all[1:] if err := sto.RemoveBlobs([]blob.Ref{del}); err != nil { t.Fatalf("RemoveBlobs: %v", err) } if err := storagetest.CheckEnumerate(sto, all); err != nil { t.Fatalf("After deleting %v, %v", del, err) } } dRows := func() (n int) { if err := sorted.ForeachInRange(sto.meta, "d:", "", func(key, value string) error { if strings.HasPrefix(key, "d:") { n++ } return nil }); err != nil { t.Fatalf("meta iteration error: %v", err) } return } if n := dRows(); n == 0 { t.Fatalf("expected a 'd:' row after deletes") } // TODO: test the background pack-deleter loop? figure out its design first. if err := sto.deleteZipPack(zipBlob.Ref); err != nil { t.Errorf("error deleting zip %v: %v", zipBlob.Ref, err) } if n := dRows(); n != 0 { t.Errorf("expected the 'd:' row to be deleted") } }