func TestBuffer(t *testing.T) { var ( toBack = []mod{ {false, "b", "b1"}, {false, "d", "d1"}, {false, "f", "f1"}, } toBuf = []mod{ {false, "a", "a2"}, {false, "b", "b2"}, {false, "c", "c2"}, {false, "e", "e2"}, {true, "f", ""}, {false, "g", "g2"}, } backBeforeFlush = []mod{ {false, "b", "b1"}, {false, "d", "d1"}, // f deleted } want = []mod{ {false, "a", "a2"}, {false, "b", "b2"}, {false, "c", "c2"}, {false, "d", "d1"}, {false, "e", "e2"}, // f deleted {false, "g", "g2"}, } ) // Populate backing storage. backing := sorted.NewMemoryKeyValue() for _, m := range toBack { backing.Set(m.key, m.value) } // Wrap with buffered storage, populate. buf := New(sorted.NewMemoryKeyValue(), backing, 1<<20) for _, m := range toBuf { if m.isDelete { buf.Delete(m.key) } else { buf.Set(m.key, m.value) } } // Check contents of buffered storage. check(t, buf, "buffered", want) check(t, backing, "backing before flush", backBeforeFlush) // Flush. if err := buf.Flush(); err != nil { t.Fatal("flush error: ", err) } // Check contents of backing storage. check(t, backing, "backing after flush", want) }
func BenchmarkCorpusFromStorage(b *testing.B) { defer test.TLog(b)() buildKvOnce.Do(func() { kvForBenchmark = sorted.NewMemoryKeyValue() idx := index.New(kvForBenchmark) id := indextest.NewIndexDeps(idx) id.Fataler = b for i := 0; i < 10; i++ { fileRef, _ := id.UploadFile("file.txt", fmt.Sprintf("some file %d", i), time.Unix(1382073153, 0)) pn := id.NewPlannedPermanode(fmt.Sprint(i)) id.SetAttribute(pn, "camliContent", fileRef.String()) } }) defer index.SetVerboseCorpusLogging(true) index.SetVerboseCorpusLogging(false) b.ResetTimer() for i := 0; i < b.N; i++ { _, err := index.NewCorpusFromStorage(kvForBenchmark) if err != nil { b.Fatal(err) } } }
// NewMemoryIndex returns an Index backed only by memory, for use in tests. func NewMemoryIndex() *Index { ix, err := New(sorted.NewMemoryKeyValue()) if err != nil { // Nothing to fail in memory, so worth panicing about // if we ever see something. panic(err) } return ix }
// TODO(mpl): move this test into kvtest. But that might require // kvtest taking a "func () sorted.KeyValue) constructor param, // so kvtest can create several and close in different ways. func TestMemoryKV_DoubleClose(t *testing.T) { kv := sorted.NewMemoryKeyValue() it := kv.Find("", "") it.Close() it.Close() kv.Close() kv.Close() }
func TestStorage(t *testing.T) { storagetest.Test(t, func(t *testing.T) (sto blobserver.Storage, cleanup func()) { s := &storage{ small: new(test.Fetcher), large: new(test.Fetcher), meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } s.init() return s, func() {} }) }
func TestIndexingClaimMissingPubkey(t *testing.T) { s := sorted.NewMemoryKeyValue() idx, err := index.New(s) if err != nil { t.Fatal(err) } id := indextest.NewIndexDeps(idx) id.Fataler = t goodKeyFetcher := id.Index.KeyFetcher emptyFetcher := new(test.Fetcher) pn := id.NewPermanode() // Prevent the index from being able to find the public key: idx.KeyFetcher = emptyFetcher // This previous failed to upload, since the signer's public key was // unavailable. claimRef := id.SetAttribute(pn, "tag", "foo") t.Logf(" Claim is %v", claimRef) t.Logf("Signer is %v", id.SignerBlobRef) // Verify that populateClaim noted the missing public key blob: { key := fmt.Sprintf("missing|%s|%s", claimRef, id.SignerBlobRef) if got, err := s.Get(key); got == "" || err != nil { t.Errorf("key %q missing (err: %v); want 1", key, err) } } // Now make it available again: idx.KeyFetcher = idx.Exp_BlobSource() if err := copyBlob(id.SignerBlobRef, idx.Exp_BlobSource().(*test.Fetcher), goodKeyFetcher); err != nil { t.Errorf("Error copying public key to BlobSource: %v", err) } if err := copyBlob(id.SignerBlobRef, idx, goodKeyFetcher); err != nil { t.Errorf("Error uploading public key to indexer: %v", err) } idx.Exp_AwaitReindexing(t) // Verify that populateClaim noted the missing public key blob: { key := fmt.Sprintf("missing|%s|%s", claimRef, id.SignerBlobRef) if got, err := s.Get(key); got != "" || err == nil { t.Errorf("row %q still exists", key) } } }
func TestOutOfOrderIndexing(t *testing.T) { tf := new(test.Fetcher) s := sorted.NewMemoryKeyValue() ix, err := index.New(s) if err != nil { t.Fatal(err) } ix.BlobSource = tf t.Logf("file ref = %v", fileBlobRef) t.Logf("missing data chunks = %v, %v, %v", chunk1ref, chunk2ref, chunk3ref) add := func(b *test.Blob) { tf.AddBlob(b) if _, err := ix.ReceiveBlob(b.BlobRef(), b.Reader()); err != nil { t.Fatalf("ReceiveBlob(%v): %v", b.BlobRef(), err) } } add(fileBlob) { key := fmt.Sprintf("missing|%s|%s", fileBlobRef, chunk1ref) if got, err := s.Get(key); got == "" || err != nil { t.Errorf("key %q missing (err: %v); want 1", key, err) } } add(chunk1) add(chunk2) ix.Exp_AwaitReindexing(t) { key := fmt.Sprintf("missing|%s|%s", fileBlobRef, chunk3ref) if got, err := s.Get(key); got == "" || err != nil { t.Errorf("key %q missing (err: %v); want 1", key, err) } } add(chunk3) ix.Exp_AwaitReindexing(t) foreachSorted(t, s, func(k, v string) { if strings.HasPrefix(k, "missing|") { t.Errorf("Shouldn't have missing key: %q", k) } }) }
func TestStorageNoSmallSubfetch(t *testing.T) { storagetest.Test(t, func(t *testing.T) (sto blobserver.Storage, cleanup func()) { s := &storage{ // We need to hide SubFetcher, to test *storage's SubFetch, as it delegates // to the underlying SubFetcher, if small implements that interface. small: hideSubFetcher(new(test.Fetcher)), large: new(test.Fetcher), meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } s.init() return s, func() {} }) }
func testStreamBlobs(t *testing.T, small blobserver.Storage, large subFetcherStorage, populate func(*testing.T, *storage) []storagetest.StreamerTestOpt) { s := &storage{ small: small, large: large, meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } s.init() wants := populate(t, s) storagetest.TestStreamer(t, s, wants...) }
// see if storage proxies through to small for Fetch, Stat, and Enumerate. func TestSmallFallback(t *testing.T) { small := new(test.Fetcher) s := &storage{ small: small, large: new(test.Fetcher), meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } s.init() b1 := &test.Blob{"foo"} b1.MustUpload(t, small) wantSB := b1.SizedRef() // Fetch rc, _, err := s.Fetch(b1.BlobRef()) if err != nil { t.Errorf("failed to Get blob: %v", err) } else { rc.Close() } // Stat. sb, err := blobserver.StatBlob(s, b1.BlobRef()) if err != nil { t.Errorf("failed to Stat blob: %v", err) } else if sb != wantSB { t.Errorf("Stat = %v; want %v", sb, wantSB) } // Enumerate saw := false ctx, cancel := context.WithCancel(context.TODO()) defer cancel() if err := blobserver.EnumerateAll(ctx, s, func(sb blob.SizedRef) error { if sb != wantSB { return fmt.Errorf("saw blob %v; want %v", sb, wantSB) } saw = true return nil }); err != nil { t.Errorf("EnuerateAll: %v", err) } if !saw { t.Error("didn't see blob in Enumerate") } }
func TestStreamBlobs(t *testing.T) { small := new(test.Fetcher) s := &storage{ small: small, large: new(test.Fetcher), meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } s.init() all := map[blob.Ref]bool{} const nBlobs = 10 for i := 0; i < nBlobs; i++ { b := &test.Blob{strconv.Itoa(i)} b.MustUpload(t, small) all[b.BlobRef()] = true } ctx, cancel := context.WithCancel(context.TODO()) defer cancel() token := "" // beginning got := map[blob.Ref]bool{} dest := make(chan blobserver.BlobAndToken, 16) done := make(chan bool) go func() { defer close(done) for bt := range dest { got[bt.Blob.Ref()] = true } }() err := s.StreamBlobs(ctx, dest, token) if err != nil { t.Fatalf("StreamBlobs = %v", err) } <-done if !reflect.DeepEqual(got, all) { t.Errorf("Got blobs %v; want %v", got, all) } storagetest.TestStreamer(t, s, storagetest.WantN(nBlobs)) }
// dataStores returns the blobserver that stores the instances configurations, and the kv // store for the instances states. func dataStores() (blobserver.Storage, sorted.KeyValue, error) { if DevHandler { return &memory.Storage{}, sorted.NewMemoryKeyValue(), nil } dataDir := os.Getenv("CAMLI_GCE_DATA") if dataDir == "" { dataDir = "camli-gce-data" log.Printf("data dir not provided as env var CAMLI_GCE_DATA, so defaulting to %v", dataDir) } blobsDir := filepath.Join(dataDir, "instance-conf") if err := os.MkdirAll(blobsDir, 0700); err != nil { return nil, nil, err } instConf, err := localdisk.New(blobsDir) if err != nil { return nil, nil, err } instState, err := leveldb.NewStorage(filepath.Join(dataDir, "instance-state")) if err != nil { return nil, nil, err } return instConf, instState, nil }
func main() { launchConfig.MaybeDeploy() addr := flag.String("addr", defaultListenAddr(), "specify address for server to listen on") flag.Parse() memkv := sorted.NewMemoryKeyValue() if err := memkv.Set("6401800c.camlistore.net.", "159.203.246.79"); err != nil { panic(err) } if err := memkv.Set("camlistore.net.", "104.154.231.160"); err != nil { panic(err) } if err := memkv.Set("www.camlistore.net.", "104.154.231.160"); err != nil { panic(err) } ds := NewDNSServer(memkv) log.Printf("serving DNS on %s\n", *addr) if err := dns.ListenAndServe(*addr, "udp", ds); err != nil { log.Fatal(err) } }
func newTestStorage() *testStorage { sto := &storage{ index: sorted.NewMemoryKeyValue(), } if err := sto.setKey(testKey); err != nil { panic(err) } ts := &testStorage{ sto: sto, blobs: new(test.Fetcher), meta: new(test.Fetcher), } sto.blobs = ts.blobs sto.meta = ts.meta sto.testRandIV = func() []byte { ts.mu.Lock() defer ts.mu.Unlock() var ret [16]byte ts.iv++ binary.BigEndian.PutUint64(ret[8:], ts.iv) return ret[:] } return ts }
// NewMemoryIndex returns an Index backed only by memory, for use in tests. func NewMemoryIndex() *Index { return New(sorted.NewMemoryKeyValue()) }
func main() { launchConfig.MaybeDeploy() flag.Parse() var kv keyValue var httpsListenAddr string if metadata.OnGCE() { httpsListenAddr = ":443" dsClient, err := datastore.NewClient(context.Background(), GCEProjectID) if err != nil { log.Fatalf("Error creating datastore client for records: %v", err) } kv = cachedStore{ dsClient: dsClient, cache: lru.New(cacheSize), } } else { httpsListenAddr = ":4430" kv = memkv{skv: sorted.NewMemoryKeyValue()} } if err := kv.Set("6401800c.camlistore.net.", "159.203.246.79"); err != nil { log.Fatalf("Error adding %v:%v record: %v", "6401800c.camlistore.net.", "159.203.246.79", err) } if err := kv.Set(domain, *flagServerIP); err != nil { log.Fatalf("Error adding %v:%v record: %v", domain, *flagServerIP, err) } if err := kv.Set("www.camlistore.net.", *flagServerIP); err != nil { log.Fatalf("Error adding %v:%v record: %v", "www.camlistore.net.", *flagServerIP, err) } ds := newDNSServer(kv) cs := &gpgchallenge.Server{ OnSuccess: func(identity string, address string) error { log.Printf("Adding %v.camlistore.net. as %v", identity, address) return ds.dataSource.Set(strings.ToLower(identity+".camlistore.net."), address) }, } tcperr := make(chan error, 1) udperr := make(chan error, 1) httperr := make(chan error, 1) log.Printf("serving DNS on %s\n", *addr) go func() { tcperr <- dns.ListenAndServe(*addr, "tcp", ds) }() go func() { udperr <- dns.ListenAndServe(*addr, "udp", ds) }() if metadata.OnGCE() { // TODO(mpl): if we want to get a cert for anything // *.camlistore.net, it's a bit of a chicken and egg problem, since // we need camnetdns itself to be already running and answering DNS // queries. It's probably doable, but easier for now to just ask // one for camnetdns.camlistore.org, since that name is not // resolved by camnetdns. hostname := strings.TrimSuffix(authorityNS, ".") m := autocert.Manager{ Prompt: autocert.AcceptTOS, HostPolicy: autocert.HostWhitelist(hostname), Cache: autocert.DirCache(osutil.DefaultLetsEncryptCache()), } ln, err := tls.Listen("tcp", httpsListenAddr, &tls.Config{ Rand: rand.Reader, Time: time.Now, NextProtos: []string{http2.NextProtoTLS, "http/1.1"}, MinVersion: tls.VersionTLS12, GetCertificate: m.GetCertificate, }) if err != nil { log.Fatalf("Error listening on %v: %v", httpsListenAddr, err) } go func() { httperr <- http.Serve(ln, cs) }() } select { case err := <-tcperr: log.Fatalf("DNS over TCP error: %v", err) case err := <-udperr: log.Fatalf("DNS error: %v", err) case err := <-httperr: log.Fatalf("HTTP server error: %v", err) } }
func TestInitNeededMaps(t *testing.T) { s := sorted.NewMemoryKeyValue() // Start unknowning that the data chunks are all gone: s.Set("schemaversion", fmt.Sprint(index.Exp_schemaVersion())) s.Set(index.Exp_missingKey(fileBlobRef, chunk1ref), "1") s.Set(index.Exp_missingKey(fileBlobRef, chunk2ref), "1") s.Set(index.Exp_missingKey(fileBlobRef, chunk3ref), "1") ix, err := index.New(s) if err != nil { t.Fatal(err) } { needs, neededBy, _ := ix.NeededMapsForTest() needsWant := map[blob.Ref][]blob.Ref{ fileBlobRef: []blob.Ref{chunk1ref, chunk2ref, chunk3ref}, } neededByWant := map[blob.Ref][]blob.Ref{ chunk1ref: []blob.Ref{fileBlobRef}, chunk2ref: []blob.Ref{fileBlobRef}, chunk3ref: []blob.Ref{fileBlobRef}, } if !reflect.DeepEqual(needs, needsWant) { t.Errorf("needs = %v; want %v", needs, needsWant) } if !reflect.DeepEqual(neededBy, neededByWant) { t.Errorf("neededBy = %v; want %v", neededBy, neededByWant) } } ix.Exp_noteBlobIndexed(chunk2ref) { needs, neededBy, ready := ix.NeededMapsForTest() needsWant := map[blob.Ref][]blob.Ref{ fileBlobRef: []blob.Ref{chunk1ref, chunk3ref}, } neededByWant := map[blob.Ref][]blob.Ref{ chunk1ref: []blob.Ref{fileBlobRef}, chunk3ref: []blob.Ref{fileBlobRef}, } if !reflect.DeepEqual(needs, needsWant) { t.Errorf("needs = %v; want %v", needs, needsWant) } if !reflect.DeepEqual(neededBy, neededByWant) { t.Errorf("neededBy = %v; want %v", neededBy, neededByWant) } if len(ready) != 0 { t.Errorf("ready = %v; want nothing", ready) } } ix.Exp_noteBlobIndexed(chunk1ref) { needs, neededBy, ready := ix.NeededMapsForTest() needsWant := map[blob.Ref][]blob.Ref{ fileBlobRef: []blob.Ref{chunk3ref}, } neededByWant := map[blob.Ref][]blob.Ref{ chunk3ref: []blob.Ref{fileBlobRef}, } if !reflect.DeepEqual(needs, needsWant) { t.Errorf("needs = %v; want %v", needs, needsWant) } if !reflect.DeepEqual(neededBy, neededByWant) { t.Errorf("neededBy = %v; want %v", neededBy, neededByWant) } if len(ready) != 0 { t.Errorf("ready = %v; want nothing", ready) } } ix.Exp_noteBlobIndexed(chunk3ref) { needs, neededBy, ready := ix.NeededMapsForTest() needsWant := map[blob.Ref][]blob.Ref{} neededByWant := map[blob.Ref][]blob.Ref{} if !reflect.DeepEqual(needs, needsWant) { t.Errorf("needs = %v; want %v", needs, needsWant) } if !reflect.DeepEqual(neededBy, neededByWant) { t.Errorf("neededBy = %v; want %v", neededBy, neededByWant) } if !ready[fileBlobRef] { t.Error("fileBlobRef not ready") } } dumpSorted(t, s) }
// Populates the bs, and the index at the same time through the sync handler func populate(b *testing.B, dbfile string, sortedProvider func(dbfile string) (sorted.KeyValue, error)) *index.Index { b.Logf("populating %v", dbfile) kv, err := sortedProvider(dbfile) if err != nil { b.Fatal(err) } bsRoot := filepath.Join(filepath.Dir(dbfile), "bs") if err := os.MkdirAll(bsRoot, 0700); err != nil { b.Fatal(err) } dataDir, err := os.Open("testdata") if err != nil { b.Fatal(err) } fis, err := dataDir.Readdir(-1) if err != nil { b.Fatal(err) } if len(fis) == 0 { b.Fatalf("no files in %s dir", "testdata") } ks := doKeyStuff(b) bs, err := localdisk.New(bsRoot) if err != nil { b.Fatal(err) } if _, err := blobserver.Receive(bs, ks.pubKeyRef, strings.NewReader(ks.pubKey)); err != nil { b.Fatal(err) } idx, err := index.New(kv) if err != nil { b.Fatal(err) } idx.InitBlobSource(bs) sh := server.NewSyncHandler("/bs/", "/index/", bs, idx, sorted.NewMemoryKeyValue()) b.ResetTimer() for _, v := range fis { f, err := os.Open(filepath.Join(dataDir.Name(), v.Name())) if err != nil { b.Fatal(err) } td := &trackDigestReader{r: f} fm := schema.NewFileMap(v.Name()) fm.SetModTime(v.ModTime()) fileRef, err := schema.WriteFileMap(bs, fm, td) if err != nil { b.Fatal(err) } f.Close() unsigned := schema.NewPlannedPermanode(td.Sum()) unsigned.SetSigner(ks.pubKeyRef) sr := &jsonsign.SignRequest{ UnsignedJSON: unsigned.Blob().JSON(), // TODO(mpl): if we make a bs that discards, replace this with a memory bs that has only the pubkey Fetcher: bs, EntityFetcher: ks.entityFetcher, SignatureTime: time.Unix(0, 0), } signed, err := sr.Sign() if err != nil { b.Fatal("problem signing: " + err.Error()) } pn := blob.SHA1FromString(signed) // N.B: use blobserver.Receive so that the blob hub gets notified, and the blob gets enqueued into the index if _, err := blobserver.Receive(bs, pn, strings.NewReader(signed)); err != nil { b.Fatal(err) } contentAttr := schema.NewSetAttributeClaim(pn, "camliContent", fileRef.String()) claimTime, ok := fm.ModTime() if !ok { b.Fatal(err) } contentAttr.SetClaimDate(claimTime) contentAttr.SetSigner(ks.pubKeyRef) sr = &jsonsign.SignRequest{ UnsignedJSON: contentAttr.Blob().JSON(), // TODO(mpl): if we make a bs that discards, replace this with a memory bs that has only the pubkey Fetcher: bs, EntityFetcher: ks.entityFetcher, SignatureTime: claimTime, } signed, err = sr.Sign() if err != nil { b.Fatal("problem signing: " + err.Error()) } cl := blob.SHA1FromString(signed) if _, err := blobserver.Receive(bs, cl, strings.NewReader(signed)); err != nil { b.Fatal(err) } } sh.IdleWait() return idx }
// tests that we add the missing wholeRef entries in FileInfo rows when going from // a version 4 to a version 5 index. func TestFixMissingWholeref(t *testing.T) { tf := new(test.Fetcher) s := sorted.NewMemoryKeyValue() ix, err := index.New(s) if err != nil { t.Fatal(err) } ix.InitBlobSource(tf) // populate with a file add := func(b *test.Blob) { tf.AddBlob(b) if _, err := ix.ReceiveBlob(b.BlobRef(), b.Reader()); err != nil { t.Fatalf("ReceiveBlob(%v): %v", b.BlobRef(), err) } } add(chunk1) add(chunk2) add(chunk3) add(fileBlob) // revert the row to the old form, by stripping the wholeRef suffix key := "fileinfo|" + fileBlobRef.String() val5, err := s.Get(key) if err != nil { t.Fatalf("could not get %v: %v", key, err) } parts := strings.SplitN(val5, "|", 4) val4 := strings.Join(parts[:3], "|") if err := s.Set(key, val4); err != nil { t.Fatalf("could not set (%v, %v): %v", key, val4, err) } // revert index version at 4 to trigger the fix if err := s.Set("schemaversion", "4"); err != nil { t.Fatal(err) } // init broken index ix, err = index.New(s) if err != index.Exp_ErrMissingWholeRef { t.Fatalf("wrong error upon index initialization: got %v, wanted %v", err, index.Exp_ErrMissingWholeRef) } // and fix it if err := ix.Exp_FixMissingWholeRef(tf); err != nil { t.Fatal(err) } // init fixed index ix, err = index.New(s) if err != nil { t.Fatal(err) } // and check that the value is now actually fixed fi, err := ix.GetFileInfo(fileBlobRef) if err != nil { t.Fatal(err) } if fi.WholeRef.String() != parts[3] { t.Fatalf("index fileInfo wholeref was not fixed: got %q, wanted %v", fi.WholeRef, parts[3]) } }
func TestReindex(t *testing.T) { if testing.Short() { t.Skip("skipping in short mode") } type file struct { size int64 name string contents []byte } files := []file{ {17 << 20, "foo.dat", randBytesSrc(17<<20, 42)}, {10 << 20, "bar.dat", randBytesSrc(10<<20, 43)}, {5 << 20, "baz.dat", randBytesSrc(5<<20, 44)}, } pt := testPack(t, func(sto blobserver.Storage) error { for _, f := range files { if _, err := schema.WriteFileFromReader(sto, f.name, bytes.NewReader(f.contents)); err != nil { return err } } return nil }, wantNumLargeBlobs(4), wantNumSmallBlobs(0), ) // backup the meta that is supposed to be lost/erased. // pt.sto.reindex allocates a new pt.sto.meta, so meta != pt.sto.meta after it is called. meta := pt.sto.meta // and build new meta index if err := pt.sto.reindex(context.TODO(), func() (sorted.KeyValue, error) { return sorted.NewMemoryKeyValue(), nil }); err != nil { t.Fatal(err) } // check that new meta is identical to "lost" one newRows := 0 if err := sorted.Foreach(pt.sto.meta, func(key, newValue string) error { oldValue, err := meta.Get(key) if err != nil { t.Fatalf("Could not get value for %v in old meta: %v", key, err) } if oldValue != newValue { t.Fatalf("Reindexing error: for key %v, got %v, want %v", key, newValue, oldValue) } newRows++ return nil }); err != nil { t.Fatal(err) } // make sure they have the same number of entries too, to be sure that the reindexing // did not miss entries that the old meta had. oldRows := countSortedRows(t, meta) if oldRows != newRows { t.Fatalf("index number of entries mismatch: got %d entries in new index, wanted %d (as in index before reindexing)", newRows, oldRows) } // And verify we can read one of the files back out. hash := blob.NewHash() hash.Write(files[0].contents) pt.testOpenWholeRef(t, blob.RefFromHash(hash), files[0].size) }
func TestReindex(t *testing.T) { if testing.Short() { t.Skip("skipping in short mode") } type file struct { size int64 name string contents []byte } files := []file{ {17 << 20, "foo.dat", randBytesSrc(17<<20, 42)}, {10 << 20, "bar.dat", randBytesSrc(10<<20, 43)}, {5 << 20, "baz.dat", randBytesSrc(5<<20, 44)}, } pt := testPack(t, func(sto blobserver.Storage) error { for _, f := range files { if _, err := schema.WriteFileFromReader(sto, f.name, bytes.NewReader(f.contents)); err != nil { return err } } return nil }, wantNumLargeBlobs(4), wantNumSmallBlobs(0), ) // backup the meta that is supposed to be lost/erased. // pt.sto.reindex allocates a new pt.sto.meta, so meta != pt.sto.meta after it is called. meta := pt.sto.meta // and build new meta index if err := pt.sto.reindex(context.TODO(), func() (sorted.KeyValue, error) { return sorted.NewMemoryKeyValue(), nil }); err != nil { t.Fatal(err) } validBlobKey := func(key, value string) error { if !strings.HasPrefix(key, "b:") { return errors.New("not a blob meta key") } wantRef, ok := blob.Parse(key[2:]) if !ok { return errors.New("bogus blobref in key") } m, err := parseMetaRow([]byte(value)) if err != nil { return err } rc, err := pt.large.SubFetch(m.largeRef, int64(m.largeOff), int64(m.size)) if err != nil { return err } defer rc.Close() h := wantRef.Hash() n, err := io.Copy(h, rc) if err != nil { return err } if !wantRef.HashMatches(h) { return errors.New("content doesn't match") } if n != int64(m.size) { return errors.New("size doesn't match") } return nil } // check that new meta is identical to "lost" one newRows := 0 if err := sorted.Foreach(pt.sto.meta, func(key, newValue string) error { oldValue, err := meta.Get(key) if err != nil { t.Fatalf("Could not get value for %v in old meta: %v", key, err) } newRows++ // Exact match is fine. if oldValue == newValue { return nil } // If it differs, it should at least be correct. (blob metadata // can now point to different packed zips, depending on sorting) err = validBlobKey(key, newValue) if err == nil { return nil } t.Errorf("Reindexing error: for key %v: %v\n got: %q\nwant: %q", key, err, newValue, oldValue) return nil // keep enumerating, regardless of errors }); err != nil { t.Fatal(err) } // make sure they have the same number of entries too, to be sure that the reindexing // did not miss entries that the old meta had. oldRows := countSortedRows(t, meta) if oldRows != newRows { t.Fatalf("index number of entries mismatch: got %d entries in new index, wanted %d (as in index before reindexing)", newRows, oldRows) } // And verify we can read one of the files back out. hash := blob.NewHash() hash.Write(files[0].contents) pt.testOpenWholeRef(t, blob.RefFromHash(hash), files[0].size) }
func testPack(t *testing.T, write func(sto blobserver.Storage) error, checks ...func(*packTest), ) *packTest { ctx, cancel := context.WithCancel(context.TODO()) defer cancel() logical := new(test.Fetcher) small, large := new(test.Fetcher), new(test.Fetcher) pt := &packTest{ logical: logical, small: small, large: large, } // Figure out the logical baseline blobs we'll later expect in the packed storage. if err := write(logical); err != nil { t.Fatal(err) } t.Logf("items in logical storage: %d", logical.NumBlobs()) pt.sto = &storage{ small: small, large: large, meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } pt.sto.init() for _, setOpt := range checks { setOpt(pt) } if err := write(pt.sto); err != nil { t.Fatal(err) } t.Logf("items in small: %v", small.NumBlobs()) t.Logf("items in large: %v", large.NumBlobs()) if want, ok := pt.wantLargeBlobs.(int); ok && want != large.NumBlobs() { t.Fatalf("num large blobs = %d; want %d", large.NumBlobs(), want) } if want, ok := pt.wantSmallBlobs.(int); ok && want != small.NumBlobs() { t.Fatalf("num small blobs = %d; want %d", small.NumBlobs(), want) } var zipRefs []blob.Ref var zipSeen = map[blob.Ref]bool{} blobserver.EnumerateAll(ctx, large, func(sb blob.SizedRef) error { zipRefs = append(zipRefs, sb.Ref) zipSeen[sb.Ref] = true return nil }) if len(zipRefs) != large.NumBlobs() { t.Fatalf("Enumerated only %d zip files; expected %d", len(zipRefs), large.NumBlobs()) } bytesOfZip := map[blob.Ref][]byte{} for _, zipRef := range zipRefs { rc, _, err := large.Fetch(zipRef) if err != nil { t.Fatal(err) } zipBytes, err := ioutil.ReadAll(rc) rc.Close() if err != nil { t.Fatalf("Error slurping %s: %v", zipRef, err) } if len(zipBytes) > constants.MaxBlobSize { t.Fatalf("zip is too large: %d > max %d", len(zipBytes), constants.MaxBlobSize) } bytesOfZip[zipRef] = zipBytes zr, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes))) if err != nil { t.Fatalf("Error reading resulting zip file: %v", err) } if len(zr.File) == 0 { t.Fatal("zip is empty") } nameSeen := map[string]bool{} for i, zf := range zr.File { if nameSeen[zf.Name] { t.Errorf("duplicate name %q seen", zf.Name) } nameSeen[zf.Name] = true t.Logf("zip[%d] size %d, %v", i, zf.UncompressedSize64, zf.Name) } mfr, err := zr.File[len(zr.File)-1].Open() if err != nil { t.Fatalf("Error opening manifest JSON: %v", err) } maniJSON, err := ioutil.ReadAll(mfr) if err != nil { t.Fatalf("Error reading manifest JSON: %v", err) } var mf Manifest if err := json.Unmarshal(maniJSON, &mf); err != nil { t.Fatalf("invalid JSON: %v", err) } // Verify each chunk described in the manifest: baseOffset, err := zr.File[0].DataOffset() if err != nil { t.Fatal(err) } for _, bo := range mf.DataBlobs { h := bo.Ref.Hash() h.Write(zipBytes[baseOffset+bo.Offset : baseOffset+bo.Offset+int64(bo.Size)]) if !bo.Ref.HashMatches(h) { t.Errorf("blob %+v didn't describe the actual data in the zip", bo) } } if debug { t.Logf("Manifest: %s", maniJSON) } } // Verify that each chunk in the logical mapping is in the meta. logBlobs := 0 if err := blobserver.EnumerateAll(ctx, logical, func(sb blob.SizedRef) error { logBlobs++ v, err := pt.sto.meta.Get(blobMetaPrefix + sb.Ref.String()) if err == sorted.ErrNotFound && pt.okayNoMeta[sb.Ref] { return nil } if err != nil { return fmt.Errorf("error looking up logical blob %v in meta: %v", sb.Ref, err) } m, err := parseMetaRow([]byte(v)) if err != nil { return fmt.Errorf("error parsing logical blob %v meta %q: %v", sb.Ref, v, err) } if !m.exists || m.size != sb.Size || !zipSeen[m.largeRef] { return fmt.Errorf("logical blob %v = %+v; want in zip", sb.Ref, m) } h := sb.Ref.Hash() h.Write(bytesOfZip[m.largeRef][m.largeOff : m.largeOff+sb.Size]) if !sb.Ref.HashMatches(h) { t.Errorf("blob %v not found matching in zip", sb.Ref) } return nil }); err != nil { t.Fatal(err) } if logBlobs != logical.NumBlobs() { t.Error("enumerate over logical blobs didn't work?") } // TODO, more tests: // -- like TestPackTwoIdenticalfiles, but instead of testing // no dup for 100% identical file bytes, test that uploading a // 49% identical one does not denormalize and repack. // -- test StreamBlobs in all its various flavours, and recovering from stream blobs. // -- overflowing the 16MB chunk size with huge initial chunks return pt }
func TestMemoryKV(t *testing.T) { kv := sorted.NewMemoryKeyValue() kvtest.TestSorted(t, kv) }
func TestRemoveBlobs(t *testing.T) { ctx, cancel := context.WithCancel(context.TODO()) defer cancel() // The basic small cases are handled via storagetest in TestStorage, // so this only tests removing packed blobs. small := new(test.Fetcher) large := new(test.Fetcher) sto := &storage{ small: small, large: large, meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: "), } sto.init() const fileSize = 1 << 20 fileContents := randBytes(fileSize) if _, err := schema.WriteFileFromReader(sto, "foo.dat", bytes.NewReader(fileContents)); err != nil { t.Fatal(err) } if small.NumBlobs() != 0 || large.NumBlobs() == 0 { t.Fatalf("small, large counts == %d, %d; want 0, non-zero", small.NumBlobs(), large.NumBlobs()) } var all []blob.SizedRef if err := blobserver.EnumerateAll(ctx, sto, func(sb blob.SizedRef) error { all = append(all, sb) return nil }); err != nil { t.Fatal(err) } // Find the zip zipBlob, err := singleBlob(sto.large) if err != nil { t.Fatalf("failed to find packed zip: %v", err) } // The zip file is in use, so verify we can't delete it. if err := sto.deleteZipPack(zipBlob.Ref); err == nil { t.Fatalf("zip pack blob deleted but it should not have been allowed") } // Delete everything for len(all) > 0 { del := all[0].Ref all = all[1:] if err := sto.RemoveBlobs([]blob.Ref{del}); err != nil { t.Fatalf("RemoveBlobs: %v", err) } if err := storagetest.CheckEnumerate(sto, all); err != nil { t.Fatalf("After deleting %v, %v", del, err) } } dRows := func() (n int) { if err := sorted.ForeachInRange(sto.meta, "d:", "", func(key, value string) error { if strings.HasPrefix(key, "d:") { n++ } return nil }); err != nil { t.Fatalf("meta iteration error: %v", err) } return } if n := dRows(); n == 0 { t.Fatalf("expected a 'd:' row after deletes") } // TODO: test the background pack-deleter loop? figure out its design first. if err := sto.deleteZipPack(zipBlob.Ref); err != nil { t.Errorf("error deleting zip %v: %v", zipBlob.Ref, err) } if n := dRows(); n != 0 { t.Errorf("expected the 'd:' row to be deleted") } }
func TestPackerBoundarySplits(t *testing.T) { if testing.Short() { t.Skip("skipping slow test") } // Test a file of three chunk sizes, totalling near the 16 MB // boundary: // - 1st chunk is 6 MB. ("blobA") // - 2nd chunk is 6 MB. ("blobB") // - 3rd chunk ("blobC") is binary-searched (up to 4MB) to find // which size causes the packer to write two zip files. // During the test we set zip overhead boundaries to 0, to // force the test to into its pathological misprediction code paths, // where it needs to back up and rewrite the zip with one part less. // That's why the test starts with two zip files: so there's at // least one that can be removed to make room. defer setIntTemporarily(&zipPerEntryOverhead, 0)() const sizeAB = 12 << 20 const maxBlobSize = 16 << 20 bytesAB := randBytes(sizeAB) blobA := &test.Blob{string(bytesAB[:sizeAB/2])} blobB := &test.Blob{string(bytesAB[sizeAB/2:])} refA := blobA.BlobRef() refB := blobB.BlobRef() bytesCFull := randBytes(maxBlobSize - sizeAB) // will be sliced down // Mechanism to verify we hit the back-up code path: var ( mu sync.Mutex sawTruncate blob.Ref stoppedBeforeOverflow bool ) testHookSawTruncate = func(after blob.Ref) { if after != refB { t.Errorf("unexpected truncate point %v", after) } mu.Lock() defer mu.Unlock() sawTruncate = after } testHookStopBeforeOverflowing = func() { mu.Lock() defer mu.Unlock() stoppedBeforeOverflow = true } defer func() { testHookSawTruncate = nil testHookStopBeforeOverflowing = nil }() generatesTwoZips := func(sizeC int) (ret bool) { large := new(test.Fetcher) s := &storage{ small: new(test.Fetcher), large: large, meta: sorted.NewMemoryKeyValue(), log: test.NewLogger(t, "blobpacked: ", // Ignore these phrases: "Packing file ", "Packed file ", ), } s.init() // Upload first two chunks blobA.MustUpload(t, s) blobB.MustUpload(t, s) // Upload second chunk bytesC := bytesCFull[:sizeC] h := blob.NewHash() h.Write(bytesC) refC := blob.RefFromHash(h) _, err := s.ReceiveBlob(refC, bytes.NewReader(bytesC)) if err != nil { t.Fatal(err) } // Upload the file schema blob. m := schema.NewFileMap("foo.dat") m.PopulateParts(sizeAB+int64(sizeC), []schema.BytesPart{ schema.BytesPart{ Size: sizeAB / 2, BlobRef: refA, }, schema.BytesPart{ Size: sizeAB / 2, BlobRef: refB, }, schema.BytesPart{ Size: uint64(sizeC), BlobRef: refC, }, }) fjson, err := m.JSON() if err != nil { t.Fatalf("schema filemap JSON: %v", err) } fb := &test.Blob{Contents: fjson} fb.MustUpload(t, s) num := large.NumBlobs() if num < 1 || num > 2 { t.Fatalf("for size %d, num packed zip blobs = %d; want 1 or 2", sizeC, num) } return num == 2 } maxC := maxBlobSize - sizeAB smallestC := sort.Search(maxC, generatesTwoZips) if smallestC == maxC { t.Fatalf("never found a point at which we generated 2 zip files") } t.Logf("After 12 MB of data (in 2 chunks), the smallest blob that generates two zip files is %d bytes (%.03f MB)", smallestC, float64(smallestC)/(1<<20)) t.Logf("Zip overhead (for this two chunk file) = %d bytes", maxBlobSize-1-smallestC-sizeAB) mu.Lock() if sawTruncate != refB { t.Errorf("truncate after = %v; want %v", sawTruncate, refB) } if !stoppedBeforeOverflow { t.Error("never hit the code path where it calculates that another data chunk would push it over the 16MB boundary") } }