func runMVCCConditionalPut(valueSize int, createFirst bool, b *testing.B) { rng, _ := randutil.NewPseudoRand() value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize)) keyBuf := append(make([]byte, 0, 64), []byte("key-")...) stopper := stop.NewStopper() defer stopper.Stop() rocksdb := NewInMem(roachpb.Attributes{}, testCacheSize, stopper) b.SetBytes(int64(valueSize)) var expected *roachpb.Value if createFirst { for i := 0; i < b.N; i++ { key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i))) ts := makeTS(timeutil.Now().UnixNano(), 0) if err := MVCCPut(rocksdb, nil, key, ts, value, nil); err != nil { b.Fatalf("failed put: %s", err) } } expected = &value } b.ResetTimer() for i := 0; i < b.N; i++ { key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i))) ts := makeTS(timeutil.Now().UnixNano(), 0) if err := MVCCConditionalPut(rocksdb, nil, key, ts, value, expected, nil); err != nil { b.Fatalf("failed put: %s", err) } } b.StopTimer() }
func runMVCCConditionalPut(emk engineMaker, valueSize int, createFirst bool, b *testing.B) { rng, _ := randutil.NewPseudoRand() value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize)) keyBuf := append(make([]byte, 0, 64), []byte("key-")...) eng, stopper := emk(b, fmt.Sprintf("cput_%d", valueSize)) defer stopper.Stop() b.SetBytes(int64(valueSize)) var expected *roachpb.Value if createFirst { for i := 0; i < b.N; i++ { key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i))) ts := makeTS(timeutil.Now().UnixNano(), 0) if err := MVCCPut(context.Background(), eng, nil, key, ts, value, nil); err != nil { b.Fatalf("failed put: %s", err) } } expected = &value } b.ResetTimer() for i := 0; i < b.N; i++ { key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i))) ts := makeTS(timeutil.Now().UnixNano(), 0) if err := MVCCConditionalPut(context.Background(), eng, nil, key, ts, value, expected, nil); err != nil { b.Fatalf("failed put: %s", err) } } b.StopTimer() }
// runClientScan first creates test data (and resets the benchmarking // timer). It then performs b.N client scans in increments of numRows // keys over all of the data, restarting at the beginning of the // keyspace, as many times as necessary. func runClientScan(useSSL bool, numRows, numVersions int, b *testing.B) { const numKeys = 100000 s, db := setupClientBenchData(useSSL, numVersions, numKeys, b) defer s.Stop() b.SetBytes(int64(numRows * valueSize)) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { startKeyBuf := append(make([]byte, 0, 64), []byte("key-")...) endKeyBuf := append(make([]byte, 0, 64), []byte("key-")...) for pb.Next() { // Choose a random key to start scan. keyIdx := rand.Int31n(int32(numKeys - numRows)) startKey := roachpb.Key(encoding.EncodeUvarintAscending( startKeyBuf, uint64(keyIdx))) endKey := roachpb.Key(encoding.EncodeUvarintAscending( endKeyBuf, uint64(keyIdx)+uint64(numRows))) rows, pErr := db.Scan(startKey, endKey, int64(numRows)) if pErr != nil { b.Fatalf("failed scan: %s", pErr) } if len(rows) != numRows { b.Fatalf("failed to scan: %d != %d", len(rows), numRows) } } }) b.StopTimer() }
func sqlKV(tableID uint32, indexID, descriptorID uint64) roachpb.KeyValue { k := keys.MakeTablePrefix(tableID) k = encoding.EncodeUvarintAscending(k, indexID) k = encoding.EncodeUvarintAscending(k, descriptorID) k = encoding.EncodeUvarintAscending(k, 12345) // Column ID, but could be anything. return kv(k, nil) }
// MakeColumnKey returns the key for the column in the given row. func MakeColumnKey(rowKey []byte, colID uint32) []byte { key := append([]byte(nil), rowKey...) size := len(key) key = encoding.EncodeUvarintAscending(key, uint64(colID)) // Note that we assume that `len(key)-size` will always be encoded to a // single byte by EncodeUvarint. This is currently always true because the // varint encoding will encode 1-9 bytes. return encoding.EncodeUvarintAscending(key, uint64(len(key)-size)) }
// MakeIndexKeyPrefix returns the key prefix used for the index's data. func MakeIndexKeyPrefix(desc *TableDescriptor, indexID IndexID) []byte { var key []byte if i, err := desc.FindIndexByID(indexID); err == nil && len(i.Interleave.Ancestors) > 0 { key = encoding.EncodeUvarintAscending(key, uint64(i.Interleave.Ancestors[0].TableID)) key = encoding.EncodeUvarintAscending(key, uint64(i.Interleave.Ancestors[0].IndexID)) return key } key = encoding.EncodeUvarintAscending(key, uint64(desc.ID)) key = encoding.EncodeUvarintAscending(key, uint64(indexID)) return key }
// MakeNameMetadataKey returns the key for the name. Pass name == "" in order // to generate the prefix key to use to scan over all of the names for the // specified parentID. func MakeNameMetadataKey(parentID ID, name string) roachpb.Key { name = NormalizeName(name) k := keys.MakeTablePrefix(uint32(namespaceTable.ID)) k = encoding.EncodeUvarintAscending(k, uint64(namespaceTable.PrimaryIndex.ID)) k = encoding.EncodeUvarintAscending(k, uint64(parentID)) if name != "" { k = encoding.EncodeBytesAscending(k, []byte(name)) k = keys.MakeColumnKey(k, uint32(namespaceTable.Columns[2].ID)) } return k }
// MakeFamilyKey returns the key for the family in the given row by appending to // the passed key. If SentinelFamilyID is passed, a sentinel key (which is the // first key in a sql table row) is returned. func MakeFamilyKey(key []byte, famID uint32) []byte { if famID == SentinelFamilyID { return encoding.EncodeUvarintAscending(key, 0) } size := len(key) key = encoding.EncodeUvarintAscending(key, uint64(famID)) // Note that we assume that `len(key)-size` will always be encoded to a // single byte by EncodeUvarint. This is currently always true because the // varint encoding will encode 1-9 bytes. return encoding.EncodeUvarintAscending(key, uint64(len(key)-size)) }
// MakeRangeIDPrefix creates a range-local key prefix from // rangeID. func MakeRangeIDPrefix(rangeID roachpb.RangeID) roachpb.Key { // Size the key buffer so that it is large enough for most callers. key := make(roachpb.Key, 0, 32) key = append(key, LocalRangeIDPrefix...) key = encoding.EncodeUvarintAscending(key, uint64(rangeID)) return key }
func BenchmarkBatchBuilderPut(b *testing.B) { value := make([]byte, 10) for i := range value { value[i] = byte(i) } keyBuf := append(make([]byte, 0, 64), []byte("key-")...) b.ResetTimer() const batchSize = 1000 batch := &rocksDBBatchBuilder{} for i := 0; i < b.N; i += batchSize { end := i + batchSize if end > b.N { end = b.N } for j := i; j < end; j++ { key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(j))) ts := hlc.Timestamp{WallTime: int64(j)} batch.Put(MVCCKey{key, ts}, value) } batch.Finish() } b.StopTimer() }
// runMVCCScan first creates test data (and resets the benchmarking // timer). It then performs b.N MVCCScans in increments of numRows // keys over all of the data in the rocksdb instance, restarting at // the beginning of the keyspace, as many times as necessary. func runMVCCScan(numRows, numVersions, valueSize int, b *testing.B) { // Use the same number of keys for all of the mvcc scan // benchmarks. Using a different number of keys per test gives // preferential treatment to tests with fewer keys. Note that the // datasets all fit in cache and the cache is pre-warmed. const numKeys = 100000 rocksdb, stopper := setupMVCCData(numVersions, numKeys, valueSize, b) defer stopper.Stop() b.SetBytes(int64(numRows * valueSize)) b.ResetTimer() keyBuf := append(make([]byte, 0, 64), []byte("key-")...) for i := 0; i < b.N; i++ { // Choose a random key to start scan. keyIdx := rand.Int31n(int32(numKeys - numRows)) startKey := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(keyIdx))) walltime := int64(5 * (rand.Int31n(int32(numVersions)) + 1)) ts := makeTS(walltime, 0) kvs, _, err := MVCCScan(rocksdb, startKey, keyMax, int64(numRows), ts, true, nil) if err != nil { b.Fatalf("failed scan: %s", err) } if len(kvs) != numRows { b.Fatalf("failed to scan: %d != %d", len(kvs), numRows) } } b.StopTimer() }
// runMVCCGet first creates test data (and resets the benchmarking // timer). It then performs b.N MVCCGets. func runMVCCGet(numVersions, valueSize int, b *testing.B) { const overhead = 48 // Per key/value overhead (empirically determined) const targetSize = 512 << 20 // 512 MB // Adjust the number of keys so that each test has approximately the same // amount of data. numKeys := targetSize / ((overhead + valueSize) * (1 + (numVersions-1)/2)) rocksdb, stopper := setupMVCCData(numVersions, numKeys, valueSize, b) defer stopper.Stop() b.SetBytes(int64(valueSize)) b.ResetTimer() keyBuf := append(make([]byte, 0, 64), []byte("key-")...) for i := 0; i < b.N; i++ { // Choose a random key to retrieve. keyIdx := rand.Int31n(int32(numKeys)) key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(keyIdx))) walltime := int64(5 * (rand.Int31n(int32(numVersions)) + 1)) ts := makeTS(walltime, 0) if v, _, err := MVCCGet(rocksdb, key, ts, true, nil); err != nil { b.Fatalf("failed get: %s", err) } else if v == nil { b.Fatalf("failed get (key not found): %d@%d", keyIdx, walltime) } else if valueBytes, err := v.GetBytes(); err != nil { b.Fatal(err) } else if len(valueBytes) != valueSize { b.Fatalf("unexpected value size: %d", len(valueBytes)) } } b.StopTimer() }
func makePrefixWithRangeID(prefix []byte, rangeID roachpb.RangeID, infix roachpb.RKey) roachpb.Key { // Size the key buffer so that it is large enough for most callers. key := make(roachpb.Key, 0, 32) key = append(key, prefix...) key = encoding.EncodeUvarintAscending(key, uint64(rangeID)) key = append(key, infix...) return key }
func (td *tableDeleter) deleteAllRowsFast(ctx context.Context) error { var tablePrefix []byte // TODO(dan): This should be moved into keys.MakeTablePrefix, but updating // all the uses of that will be a pain. if interleave := td.rd.helper.tableDesc.PrimaryIndex.Interleave; len(interleave.Ancestors) > 0 { tablePrefix = encoding.EncodeUvarintAscending(nil, uint64(interleave.Ancestors[0].TableID)) } tablePrefix = encoding.EncodeUvarintAscending(nil, uint64(td.rd.helper.tableDesc.ID)) // Delete rows and indexes starting with the table's prefix. tableStartKey := roachpb.Key(tablePrefix) tableEndKey := tableStartKey.PrefixEnd() if log.V(2) { log.Infof(ctx, "DelRange %s - %s", tableStartKey, tableEndKey) } td.b.DelRange(tableStartKey, tableEndKey, false) return td.finalize(ctx) }
// EncodeIndexKey creates a key by concatenating keyPrefix with the encodings of // the columns in the index. // // If a table or index is interleaved, `encoding.encodedNullDesc` is used in // place of the family id (a varint) to signal the next component of the key. // An example of one level of interleaving (a parent): // /<parent_table_id>/<parent_index_id>/<field_1>/<field_2>/NullDesc/<table_id>/<index_id>/<field_3>/<family> // // Returns the key and whether any of the encoded values were NULLs. // // Note that ImplicitColumnIDs are not encoded, so the result isn't always a // full index key. func EncodeIndexKey( tableDesc *TableDescriptor, index *IndexDescriptor, colMap map[ColumnID]int, values []parser.Datum, keyPrefix []byte, ) (key []byte, containsNull bool, err error) { key = keyPrefix colIDs := index.ColumnIDs dirs := directions(index.ColumnDirections) if len(index.Interleave.Ancestors) > 0 { for i, ancestor := range index.Interleave.Ancestors { // The first ancestor is assumed to already be encoded in keyPrefix. if i != 0 { key = encoding.EncodeUvarintAscending(key, uint64(ancestor.TableID)) key = encoding.EncodeUvarintAscending(key, uint64(ancestor.IndexID)) } length := int(ancestor.SharedPrefixLen) var n bool key, n, err = EncodeColumns(colIDs[:length], dirs[:length], colMap, values, key) if err != nil { return key, containsNull, err } colIDs, dirs = colIDs[length:], dirs[length:] containsNull = containsNull || n // We reuse NotNullDescending (0xfe) as the interleave sentinel. key = encoding.EncodeNotNullDescending(key) } key = encoding.EncodeUvarintAscending(key, uint64(tableDesc.ID)) key = encoding.EncodeUvarintAscending(key, uint64(index.ID)) } var n bool key, n, err = EncodeColumns(colIDs, dirs, colMap, values, key) containsNull = containsNull || n return key, containsNull, err }
func TestMakeSplitKey(t *testing.T) { e := func(vals ...uint64) roachpb.Key { var k roachpb.Key for _, v := range vals { k = encoding.EncodeUvarintAscending(k, v) } return k } goodData := []struct { in roachpb.Key expected roachpb.Key }{ {e(1, 2, 0), e(1, 2)}, // /Table/1/2/0 -> /Table/1/2 {e(1, 2, 1), e(1)}, // /Table/1/2/1 -> /Table/1 {e(1, 2, 2), e()}, // /Table/1/2/2 -> /Table {e(1, 2, 3, 0), e(1, 2, 3)}, // /Table/1/2/3/0 -> /Table/1/2/3 {e(1, 2, 3, 1), e(1, 2)}, // /Table/1/2/3/1 -> /Table/1/2 {e(1, 2, 200, 2), e(1, 2)}, // /Table/1/2/200/2 -> /Table/1/2 {e(1, 2, 3, 4, 1), e(1, 2, 3)}, // /Table/1/2/3/4/1 -> /Table/1/2/3 } for i, d := range goodData { out, err := MakeSplitKey(d.in) if err != nil { t.Fatalf("%d: %s: unexpected error: %v", i, d.in, err) } if !d.expected.Equal(out) { t.Fatalf("%d: %s: expected %s, but got %s", i, d.in, d.expected, out) } } errorData := []struct { in roachpb.Key err string }{ // Column ID suffix size is too large. {e(1), "malformed table key"}, {e(1, 2), "malformed table key"}, // The table ID is invalid. {e(200)[:1], "insufficient bytes to decode uvarint value"}, // The index ID is invalid. {e(1, 200)[:2], "insufficient bytes to decode uvarint value"}, // The column ID suffix is invalid. {e(1, 2, 200)[:3], "insufficient bytes to decode uvarint value"}, } for i, d := range errorData { _, err := MakeSplitKey(d.in) if !testutils.IsError(err, d.err) { t.Fatalf("%d: %s: expected %s, but got %v", i, d.in, d.err, err) } } }
func runMVCCBatchPut(valueSize, batchSize int, b *testing.B) { defer tracing.Disable()() rng, _ := randutil.NewPseudoRand() value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize)) keyBuf := append(make([]byte, 0, 64), []byte("key-")...) stopper := stop.NewStopper() defer stopper.Stop() rocksdb := NewInMem(roachpb.Attributes{}, testCacheSize, stopper) b.SetBytes(int64(valueSize)) b.ResetTimer() for i := 0; i < b.N; i += batchSize { end := i + batchSize if end > b.N { end = b.N } batch := rocksdb.NewBatch() for j := i; j < end; j++ { key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(j))) ts := makeTS(time.Now().UnixNano(), 0) if err := MVCCPut(batch, nil, key, ts, value, nil); err != nil { b.Fatalf("failed put: %s", err) } } if err := batch.Commit(); err != nil { b.Fatal(err) } batch.Close() } b.StopTimer() }
func runMVCCBatchPut(emk engineMaker, valueSize, batchSize int, b *testing.B) { rng, _ := randutil.NewPseudoRand() value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize)) keyBuf := append(make([]byte, 0, 64), []byte("key-")...) stopper := stop.NewStopper() eng, stopper := emk(b, fmt.Sprintf("batch_put_%d_%d", valueSize, batchSize)) defer stopper.Stop() b.SetBytes(int64(valueSize)) b.ResetTimer() for i := 0; i < b.N; i += batchSize { end := i + batchSize if end > b.N { end = b.N } batch := eng.NewBatch() for j := i; j < end; j++ { key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(j))) ts := makeTS(timeutil.Now().UnixNano(), 0) if err := MVCCPut(context.Background(), batch, nil, key, ts, value, nil); err != nil { b.Fatalf("failed put: %s", err) } } if err := batch.Commit(); err != nil { b.Fatal(err) } batch.Close() } b.StopTimer() }
// setupMVCCData writes up to numVersions values at each of numKeys // keys. The number of versions written for each key is chosen // randomly according to a uniform distribution. Each successive // version is written starting at 5ns and then in 5ns increments. This // allows scans at various times, starting at t=5ns, and continuing to // t=5ns*(numVersions+1). A version for each key will be read on every // such scan, but the dynamics of the scan will change depending on // the historical timestamp. Earlier timestamps mean scans which must // skip more historical versions; later timestamps mean scans which // skip fewer. // // The creation of the database is time consuming, especially for larger // numbers of versions. The database is persisted between runs and stored in // the current directory as "mvcc_scan_<versions>_<keys>_<valueBytes>" (which // is also returned). func setupMVCCData(emk engineMaker, numVersions, numKeys, valueBytes int, b *testing.B) (Engine, string, *stop.Stopper) { loc := fmt.Sprintf("mvcc_data_%d_%d_%d", numVersions, numKeys, valueBytes) exists := true if _, err := os.Stat(loc); os.IsNotExist(err) { exists = false } eng, stopper := emk(b, loc) if exists { readAllFiles(filepath.Join(loc, "*")) return eng, loc, stopper } log.Infof(context.Background(), "creating mvcc data: %s", loc) // Generate the same data every time. rng := rand.New(rand.NewSource(1449168817)) keys := make([]roachpb.Key, numKeys) var order []int for i := 0; i < numKeys; i++ { keys[i] = roachpb.Key(encoding.EncodeUvarintAscending([]byte("key-"), uint64(i))) keyVersions := rng.Intn(numVersions) + 1 for j := 0; j < keyVersions; j++ { order = append(order, i) } } // Randomize the order in which the keys are written. for i, n := 0, len(order); i < n-1; i++ { j := i + rng.Intn(n-i) order[i], order[j] = order[j], order[i] } counts := make([]int, numKeys) batch := eng.NewBatch() for i, idx := range order { // Output the keys in ~20 batches. If we used a single batch to output all // of the keys rocksdb would create a single sstable. We want multiple // sstables in order to exercise filtering of which sstables are examined // during iterator seeking. We fix the number of batches we output so that // optimizations which change the data size result in the same number of // sstables. if scaled := len(order) / 20; i > 0 && (i%scaled) == 0 { log.Infof(context.Background(), "committing (%d/~%d)", i/scaled, 20) if err := batch.Commit(); err != nil { b.Fatal(err) } batch.Close() batch = eng.NewBatch() if err := eng.Flush(); err != nil { b.Fatal(err) } } key := keys[idx] ts := makeTS(int64(counts[idx]+1)*5, 0) counts[idx]++ value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueBytes)) value.InitChecksum(key) if err := MVCCPut(context.Background(), batch, nil, key, ts, value, nil); err != nil { b.Fatal(err) } } if err := batch.Commit(); err != nil { b.Fatal(err) } batch.Close() if err := eng.Flush(); err != nil { b.Fatal(err) } return eng, loc, stopper }
// MakeTablePrefix returns the key prefix used for the table's data. func MakeTablePrefix(tableID uint32) []byte { return encoding.EncodeUvarintAscending(nil, uint64(tableID)) }
// MakeNonColumnKey creates a non-column key for a row by appending a 0 column // ID suffix size to rowKey. func MakeNonColumnKey(rowKey []byte) []byte { return encoding.EncodeUvarintAscending(rowKey, 0) }
func setupClientBenchData(useSSL bool, numVersions, numKeys int, b *testing.B) ( *server.TestServer, *client.DB) { const cacheSize = 8 << 30 // 8 GB const memtableBudget = 512 << 20 // 512 MB loc := fmt.Sprintf("client_bench_%d_%d", numVersions, numKeys) exists := true if _, err := os.Stat(loc); os.IsNotExist(err) { exists = false } s := &server.TestServer{} s.Ctx = server.NewTestContext() s.SkipBootstrap = exists if !useSSL { s.Ctx.Insecure = true } stopper := stop.NewStopper() s.Ctx.Engines = []engine.Engine{ engine.NewRocksDB(roachpb.Attributes{Attrs: []string{"ssd"}}, loc, cacheSize, memtableBudget, stopper), } if err := s.StartWithStopper(stopper); err != nil { b.Fatal(err) } db, err := client.Open(s.Stopper(), fmt.Sprintf("%s://%s@%s?certs=%s", s.Ctx.RPCRequestScheme(), security.NodeUser, s.ServingAddr(), s.Ctx.Certs)) if err != nil { b.Fatal(err) } if exists { return s, db } rng, _ := randutil.NewPseudoRand() keys := make([]roachpb.Key, numKeys) nvs := make([]int, numKeys) for t := 1; t <= numVersions; t++ { batch := &client.Batch{} for i := 0; i < numKeys; i++ { if t == 1 { keys[i] = roachpb.Key(encoding.EncodeUvarintAscending([]byte("key-"), uint64(i))) nvs[i] = int(rand.Int31n(int32(numVersions)) + 1) } // Only write values if this iteration is less than the random // number of versions chosen for this key. if t <= nvs[i] { batch.Put(roachpb.Key(keys[i]), randutil.RandBytes(rng, valueSize)) } if (i+1)%1000 == 0 { if pErr := db.Run(batch); pErr != nil { b.Fatal(pErr) } batch = &client.Batch{} } } if len(batch.Results) != 0 { if pErr := db.Run(batch); pErr != nil { b.Fatal(pErr) } } } if r, ok := s.Ctx.Engines[0].(*engine.RocksDB); ok { r.CompactRange(engine.NilKey, engine.NilKey) } return s, db }
// MakeDescMetadataKey returns the key for the descriptor. func MakeDescMetadataKey(descID ID) roachpb.Key { k := keys.MakeTablePrefix(uint32(descriptorTable.ID)) k = encoding.EncodeUvarintAscending(k, uint64(descriptorTable.PrimaryIndex.ID)) k = encoding.EncodeUvarintAscending(k, uint64(descID)) return keys.MakeColumnKey(k, uint32(descriptorTable.Columns[1].ID)) }
// MakeAllDescsMetadataKey returns the key for all descriptors. func MakeAllDescsMetadataKey() roachpb.Key { k := keys.MakeTablePrefix(uint32(DescriptorTable.ID)) return encoding.EncodeUvarintAscending(k, uint64(DescriptorTable.PrimaryIndex.ID)) }
// MakeIndexKeyPrefix returns the key prefix used for the index's data. func MakeIndexKeyPrefix(tableID ID, indexID IndexID) []byte { key := keys.MakeTablePrefix(uint32(tableID)) key = encoding.EncodeUvarintAscending(key, uint64(indexID)) return key }
// NodeLastUsageReportKey returns the key for accessing the node last update check // time (when version check or usage reporting was done). func NodeLastUsageReportKey(nodeID int32) roachpb.Key { prefix := append([]byte(nil), UpdateCheckPrefix...) return encoding.EncodeUvarintAscending(prefix, uint64(nodeID)) }
// setupMVCCData writes up to numVersions values at each of numKeys // keys. The number of versions written for each key is chosen // randomly according to a uniform distribution. Each successive // version is written starting at 5ns and then in 5ns increments. This // allows scans at various times, starting at t=5ns, and continuing to // t=5ns*(numVersions+1). A version for each key will be read on every // such scan, but the dynamics of the scan will change depending on // the historical timestamp. Earlier timestamps mean scans which must // skip more historical versions; later timestamps mean scans which // skip fewer. // // The creation of the rocksdb database is time consuming, especially // for larger numbers of versions. The database is persisted between // runs and stored in the current directory as // "mvcc_scan_<versions>_<keys>_<valueBytes>". func setupMVCCData(numVersions, numKeys, valueBytes int, b *testing.B) (*RocksDB, *stop.Stopper) { loc := fmt.Sprintf("mvcc_data_%d_%d_%d", numVersions, numKeys, valueBytes) exists := true if _, err := os.Stat(loc); os.IsNotExist(err) { exists = false } const cacheSize = 0 const memtableBudget = 512 << 20 // 512 MB stopper := stop.NewStopper() rocksdb := NewRocksDB(roachpb.Attributes{}, loc, cacheSize, memtableBudget, stopper) if err := rocksdb.Open(); err != nil { b.Fatalf("could not create new rocksdb db instance at %s: %v", loc, err) } if exists { readAllFiles(filepath.Join(loc, "*")) return rocksdb, stopper } log.Infof("creating mvcc data: %s", loc) // Generate the same data every time. rng := rand.New(rand.NewSource(1449168817)) keys := make([]roachpb.Key, numKeys) var order []int for i := 0; i < numKeys; i++ { keys[i] = roachpb.Key(encoding.EncodeUvarintAscending([]byte("key-"), uint64(i))) keyVersions := rng.Intn(numVersions) + 1 for j := 0; j < keyVersions; j++ { order = append(order, i) } } // Randomize the order in which the keys are written. for i, n := 0, len(order)-2; i < n; i++ { j := i + rng.Intn(n-i) order[i], order[j] = order[j], order[i] } counts := make([]int, numKeys) batch := rocksdb.NewBatch() for i, idx := range order { // Output the keys in ~20 batches. If we used a single batch to output all // of the keys rocksdb would create a single sstable. We want multiple // sstables in order to exercise filtering of which sstables are examined // during iterator seeking. We fix the number of batches we output so that // optimizations which change the data size result in the same number of // sstables. if i > 0 && (i%(len(order)/20)) == 0 { if err := batch.Commit(); err != nil { b.Fatal(err) } batch.Close() batch = rocksdb.NewBatch() if err := rocksdb.Flush(); err != nil { b.Fatal(err) } } key := keys[idx] ts := makeTS(int64(counts[idx]+1)*5, 0) counts[idx]++ value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueBytes)) value.InitChecksum(key) if err := MVCCPut(batch, nil, key, ts, value, nil); err != nil { b.Fatal(err) } } if err := batch.Commit(); err != nil { b.Fatal(err) } batch.Close() if err := rocksdb.Flush(); err != nil { b.Fatal(err) } return rocksdb, stopper }
// MakeDescMetadataKey returns the key for the descriptor. func MakeDescMetadataKey(descID ID) roachpb.Key { k := MakeAllDescsMetadataKey() k = encoding.EncodeUvarintAscending(k, uint64(descID)) return keys.MakeFamilyKey(k, uint32(DescriptorTable.Columns[1].ID)) }
// MakeZoneKey returns the key for 'id's entry in the system.zones table. func MakeZoneKey(id ID) roachpb.Key { k := keys.MakeTablePrefix(uint32(zonesTable.ID)) k = encoding.EncodeUvarintAscending(k, uint64(zonesTable.PrimaryIndex.ID)) k = encoding.EncodeUvarintAscending(k, uint64(id)) return keys.MakeColumnKey(k, uint32(zonesTable.Columns[1].ID)) }
// NodeStatusKey returns the key for accessing the node status for the // specified node ID. func NodeStatusKey(nodeID int32) roachpb.Key { key := make(roachpb.Key, 0, len(StatusNodePrefix)+9) key = append(key, StatusNodePrefix...) key = encoding.EncodeUvarintAscending(key, uint64(nodeID)) return key }