func BenchmarkSstRekey(b *testing.B) { // TODO(dan): DRY this with BenchmarkRocksDBSstFileReader. dir, cleanupFn := testutils.TempDir(b, 1) defer cleanupFn() sstPath := filepath.Join(dir, "sst") { const maxEntries = 100000 const keyLen = 10 const valLen = 100 b.SetBytes(keyLen + valLen) ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()} kv := engine.MVCCKeyValue{ Key: engine.MVCCKey{Key: roachpb.Key(make([]byte, keyLen)), Timestamp: ts}, Value: make([]byte, valLen), } sst := engine.MakeRocksDBSstFileWriter() if err := sst.Open(sstPath); err != nil { b.Fatal(sst) } var entries = b.N if entries > maxEntries { entries = maxEntries } for i := 0; i < entries; i++ { payload := []byte(fmt.Sprintf("%09d", i)) kv.Key.Key = kv.Key.Key[:0] kv.Key.Key = encoding.EncodeUvarintAscending(kv.Key.Key, uint64(i)) // tableID kv.Key.Key = encoding.EncodeUvarintAscending(kv.Key.Key, 0) // indexID kv.Key.Key = encoding.EncodeBytesAscending(kv.Key.Key, payload) kv.Key.Key = keys.MakeRowSentinelKey(kv.Key.Key) copy(kv.Value, payload) if err := sst.Add(kv); err != nil { b.Fatal(err) } } if err := sst.Close(); err != nil { b.Fatal(err) } } const newTableID = 100 b.ResetTimer() sst, err := engine.MakeRocksDBSstFileReader() if err != nil { b.Fatal(err) } if err := sst.AddFile(sstPath); err != nil { b.Fatal(err) } defer sst.Close() count := 0 iterateFn := sql.MakeRekeyMVCCKeyValFunc(newTableID, func(kv engine.MVCCKeyValue) (bool, error) { count++ if count >= b.N { return true, nil } return false, nil }) for { if err := sst.Iterate(engine.MVCCKey{Key: keys.MinKey}, engine.MVCCKey{Key: keys.MaxKey}, iterateFn); err != nil { b.Fatal(err) } if count >= b.N { break } } }
// Backup exports a snapshot of every kv entry into ranged sstables. // // The output is an sstable per range with files in the following locations: // - /<base>/<node_id>/<key_range>/data.sst // - <base> is given by the user and is expected to eventually be cloud storage // - The <key_range>s are non-overlapping. // // TODO(dan): Bikeshed this directory structure and naming. func Backup( ctx context.Context, db client.DB, base string, endTime hlc.Timestamp, ) (desc sqlbase.BackupDescriptor, retErr error) { // TODO(dan): Optionally take a start time for an incremental backup. // TODO(dan): Take a uri for the path prefix and support various cloud storages. // TODO(dan): Figure out how permissions should work. #6713 is tracking this // for grpc. var rangeDescs []roachpb.RangeDescriptor var sqlDescs []sqlbase.Descriptor opt := client.TxnExecOptions{ AutoRetry: true, AutoCommit: true, } { // TODO(dan): Pick an appropriate end time and set it in the txn. txn := client.NewTxn(ctx, db) err := txn.Exec(opt, func(txn *client.Txn, opt *client.TxnExecOptions) error { var err error SetTxnTimestamps(txn, endTime) rangeDescs, err = AllRangeDescriptors(txn) if err != nil { return err } sqlDescs, err = allSQLDescriptors(txn) return err }) if err != nil { return sqlbase.BackupDescriptor{}, err } } var dataSize int64 backupDescs := make([]sqlbase.BackupRangeDescriptor, len(rangeDescs)) crc := crc32.New(crc32.MakeTable(crc32.Castagnoli)) for i, rangeDesc := range rangeDescs { backupDescs[i] = sqlbase.BackupRangeDescriptor{ StartKey: rangeDesc.StartKey.AsRawKey(), EndKey: rangeDesc.EndKey.AsRawKey(), StartTime: hlc.Timestamp{}, } if backupDescs[i].StartKey.Compare(keys.LocalMax) < 0 { backupDescs[i].StartKey = keys.LocalMax } nodeID := 0 dir := filepath.Join(base, fmt.Sprintf("%03d", nodeID)) dir = filepath.Join(dir, fmt.Sprintf("%x-%x", rangeDesc.StartKey, rangeDesc.EndKey)) if err := os.MkdirAll(dir, 0700); err != nil { return sqlbase.BackupDescriptor{}, err } var kvs []client.KeyValue txn := client.NewTxn(ctx, db) err := txn.Exec(opt, func(txn *client.Txn, opt *client.TxnExecOptions) error { var err error SetTxnTimestamps(txn, endTime) // TODO(dan): Iterate with some batch size. kvs, err = txn.Scan(backupDescs[i].StartKey, backupDescs[i].EndKey, 0) return err }) if err != nil { return sqlbase.BackupDescriptor{}, err } if len(kvs) == 0 { if log.V(1) { log.Infof(ctx, "skipping backup of empty range %s-%s", backupDescs[i].StartKey, backupDescs[i].EndKey) } continue } backupDescs[i].Path = filepath.Join(dir, dataSSTableName) writeSST := func() (writeSSTErr error) { // This is a function so the defered Close (and resultant flush) is // called before the checksum is computed. sst := engine.MakeRocksDBSstFileWriter() if err := sst.Open(backupDescs[i].Path); err != nil { return err } defer func() { if closeErr := sst.Close(); closeErr != nil && writeSSTErr == nil { writeSSTErr = closeErr } }() // TODO(dan): Move all this iteration into cpp to avoid the cgo calls. for _, kv := range kvs { mvccKV := engine.MVCCKeyValue{ Key: engine.MVCCKey{Key: kv.Key, Timestamp: kv.Value.Timestamp}, Value: kv.Value.RawBytes, } if err := sst.Add(mvccKV); err != nil { return err } } dataSize += sst.DataSize return nil } if err := writeSST(); err != nil { return sqlbase.BackupDescriptor{}, err } crc.Reset() f, err := os.Open(backupDescs[i].Path) if err != nil { return sqlbase.BackupDescriptor{}, err } defer f.Close() if _, err := io.Copy(crc, f); err != nil { return sqlbase.BackupDescriptor{}, err } backupDescs[i].CRC = crc.Sum32() } desc = sqlbase.BackupDescriptor{ EndTime: endTime, Ranges: backupDescs, SQL: sqlDescs, DataSize: dataSize, } descBuf, err := desc.Marshal() if err != nil { return sqlbase.BackupDescriptor{}, err } if err = ioutil.WriteFile(filepath.Join(base, backupDescriptorName), descBuf, 0600); err != nil { return sqlbase.BackupDescriptor{}, err } return desc, nil }