// WriteStatusSummary generates a summary and immediately writes it to the given // client. func (mr *MetricsRecorder) WriteStatusSummary(ctx context.Context, db *client.DB) error { mr.writeSummaryMu.Lock() defer mr.writeSummaryMu.Unlock() nodeStatus := mr.GetStatusSummary() if nodeStatus != nil { key := keys.NodeStatusKey(nodeStatus.Desc.NodeID) // We use PutInline to store only a single version of the node status. // There's not much point in keeping the historical versions as we keep // all of the constituent data as timeseries. Further, due to the size // of the build info in the node status, writing one of these every 10s // will generate more versions than will easily fit into a range over // the course of a day. if err := db.PutInline(ctx, key, nodeStatus); err != nil { return err } if log.V(2) { statusJSON, err := json.Marshal(nodeStatus) if err != nil { log.Errorf(ctx, "error marshaling nodeStatus to json: %s", err) } log.Infof(ctx, "node %d status: %s", nodeStatus.Desc.NodeID, statusJSON) } } return nil }
func countRangeReplicas(db *client.DB) (int, error) { desc := &roachpb.RangeDescriptor{} if err := db.GetProto(context.TODO(), keys.RangeDescriptorKey(roachpb.RKeyMin), desc); err != nil { return 0, err } return len(desc.Replicas), nil }
// allocateNodeID increments the node id generator key to allocate // a new, unique node id. func allocateNodeID(ctx context.Context, db *client.DB) (roachpb.NodeID, error) { r, err := db.Inc(ctx, keys.NodeIDGenerator, 1) if err != nil { return 0, errors.Errorf("unable to allocate node ID: %s", err) } return roachpb.NodeID(r.ValueInt()), nil }
func checkKeyCount(t *testing.T, kvDB *client.DB, prefix roachpb.Key, numKeys int) { if kvs, err := kvDB.Scan(context.TODO(), prefix, prefix.PrefixEnd(), 0); err != nil { t.Fatal(err) } else if l := numKeys; len(kvs) != l { t.Fatalf("expected %d key value pairs, but got %d", l, len(kvs)) } }
// truncateTableInChunks truncates the data of a table in chunks. It deletes a // range of data for the table, which includes the PK and all indexes. func truncateTableInChunks( ctx context.Context, tableDesc *sqlbase.TableDescriptor, db *client.DB, ) error { const chunkSize = TableTruncateChunkSize var resume roachpb.Span for row, done := 0, false; !done; row += chunkSize { resumeAt := resume if log.V(2) { log.Infof(ctx, "table %s truncate at row: %d, span: %s", tableDesc.Name, row, resume) } if err := db.Txn(ctx, func(txn *client.Txn) error { rd, err := makeRowDeleter(txn, tableDesc, nil, nil, false) if err != nil { return err } td := tableDeleter{rd: rd} if err := td.init(txn); err != nil { return err } resume, err = td.deleteAllRows(txn.Context, resumeAt, chunkSize) return err }); err != nil { return err } done = resume.Key == nil } return nil }
// pushTxn attempts to abort the txn via push. The wait group is signaled on // completion. func pushTxn( ctx context.Context, db *client.DB, now hlc.Timestamp, txn *roachpb.Transaction, typ roachpb.PushTxnType, ) { // Attempt to push the transaction which created the intent. pushArgs := &roachpb.PushTxnRequest{ Span: roachpb.Span{ Key: txn.Key, }, Now: now, PusherTxn: roachpb.Transaction{TxnMeta: enginepb.TxnMeta{Priority: math.MaxInt32}}, PusheeTxn: txn.TxnMeta, PushType: typ, } b := &client.Batch{} b.AddRawRequest(pushArgs) if err := db.Run(ctx, b); err != nil { log.Warningf(ctx, "push of txn %s failed: %s", txn, err) return } br := b.RawResponse() // Update the supplied txn on successful push. *txn = br.Responses[0].GetInner().(*roachpb.PushTxnResponse).PusheeTxn }
// allocateStoreIDs increments the store id generator key for the // specified node to allocate "inc" new, unique store ids. The // first ID in a contiguous range is returned on success. func allocateStoreIDs( ctx context.Context, nodeID roachpb.NodeID, inc int64, db *client.DB, ) (roachpb.StoreID, error) { r, err := db.Inc(ctx, keys.StoreIDGenerator, inc) if err != nil { return 0, errors.Errorf("unable to allocate %d store IDs for node %d: %s", inc, nodeID, err) } return roachpb.StoreID(r.ValueInt() - inc + 1), nil }
func (hv *historyVerifier) runTxn( txnIdx int, priority int32, isolation enginepb.IsolationType, cmds []*cmd, db *client.DB, t *testing.T, ) error { var retry int txnName := fmt.Sprintf("txn %d", txnIdx+1) cmdIdx := -1 err := db.Txn(context.TODO(), func(txn *client.Txn) error { // If this is 2nd attempt, and a retry wasn't expected, return a // retry error which results in further histories being enumerated. if retry++; retry > 1 { if !cmds[cmdIdx].expRetry { // Propagate retry error to history execution to enumerate all // histories where this txn retries at this command. return &retryError{txnIdx: txnIdx, cmdIdx: cmdIdx} } // We're expecting a retry, so just send nil down the done channel. cmds[cmdIdx].done(nil) } txn.SetDebugName(txnName, 0) if isolation == enginepb.SNAPSHOT { if err := txn.SetIsolation(enginepb.SNAPSHOT); err != nil { return err } } txn.InternalSetPriority(priority) env := map[string]int64{} for cmdIdx+1 < len(cmds) { cmdIdx++ cmds[cmdIdx].env = env _, err := hv.runCmd(txn, txnIdx, retry, cmds[cmdIdx], t) if err != nil { if log.V(1) { log.Infof(context.Background(), "%s: failed running %s: %s", txnName, cmds[cmdIdx], err) } return err } } return nil }) if err != nil { for _, c := range cmds[cmdIdx:] { c.done(err) } } return err }
// getRangeKeys returns the end keys of all ranges. func getRangeKeys(db *client.DB) ([]roachpb.Key, error) { rows, err := db.Scan(context.TODO(), keys.Meta2Prefix, keys.MetaMax, 0) if err != nil { return nil, err } ret := make([]roachpb.Key, len(rows), len(rows)) for i := 0; i < len(rows); i++ { ret[i] = bytes.TrimPrefix(rows[i].Key, keys.Meta2Prefix) } return ret, nil }
// startTestWriter creates a writer which initiates a sequence of // transactions, each which writes up to 10 times to random keys with // random values. If not nil, txnChannel is written to non-blockingly // every time a new transaction starts. func startTestWriter( db *client.DB, i int64, valBytes int32, wg *sync.WaitGroup, retries *int32, txnChannel chan struct{}, done <-chan struct{}, t *testing.T, ) { src := rand.New(rand.NewSource(i)) defer func() { if wg != nil { wg.Done() } }() for j := 0; ; j++ { select { case <-done: return default: first := true err := db.Txn(context.TODO(), func(txn *client.Txn) error { if first && txnChannel != nil { select { case txnChannel <- struct{}{}: default: } } else if !first && retries != nil { atomic.AddInt32(retries, 1) } first = false for j := 0; j <= int(src.Int31n(10)); j++ { key := randutil.RandBytes(src, 10) val := randutil.RandBytes(src, int(src.Int31n(valBytes))) if err := txn.Put(key, val); err != nil { log.Infof(context.Background(), "experienced an error in routine %d: %s", i, err) return err } } return nil }) if err != nil { t.Error(err) } else { time.Sleep(1 * time.Millisecond) } } } }
// WaitForInitialSplits waits for the expected number of initial ranges to be // populated in the meta2 table. If the expected range count is not reached // within a configured timeout, an error is returned. func WaitForInitialSplits(db *client.DB) error { expectedRanges := ExpectedInitialRangeCount() return util.RetryForDuration(initialSplitsTimeout, func() error { // Scan all keys in the Meta2Prefix; we only need a count. rows, err := db.Scan(context.TODO(), keys.Meta2Prefix, keys.MetaMax, 0) if err != nil { return err } if a, e := len(rows), expectedRanges; a != e { return errors.Errorf("had %d ranges at startup, expected %d", a, e) } return nil }) }
// purgeOldLeases refreshes the leases on a table. Unused leases older than // minVersion will be released. // If deleted is set, minVersion is ignored; no lease is acquired and all // existing unused leases are released. The table is further marked for // deletion, which will cause existing in-use leases to be eagerly released once // they're not in use any more. // If t has no active leases, nothing is done. func (t *tableState) purgeOldLeases( db *client.DB, deleted bool, minVersion sqlbase.DescriptorVersion, store LeaseStore, ) error { t.mu.Lock() empty := len(t.active.data) == 0 t.mu.Unlock() if empty { // We don't currently have a lease on this table, so no need to refresh // anything. return nil } // Acquire and release a lease on the table at a version >= minVersion. var lease *LeaseState err := db.Txn(context.TODO(), func(txn *client.Txn) error { var err error if !deleted { lease, err = t.acquire(txn, minVersion, store) if err == errTableDropped { deleted = true } } if err == nil || deleted { t.mu.Lock() defer t.mu.Unlock() var toRelease []*LeaseState if deleted { t.deleted = true } toRelease = append([]*LeaseState(nil), t.active.data...) t.releaseLeasesIfNotActive(toRelease, store) return nil } return err }) if err != nil { return err } if lease == nil { return nil } return t.release(lease, store) }
// LeaseInfo runs a LeaseInfoRequest using the specified server. func LeaseInfo( t *testing.T, db *client.DB, rangeDesc roachpb.RangeDescriptor, readConsistency roachpb.ReadConsistencyType, ) roachpb.LeaseInfoResponse { leaseInfoReq := &roachpb.LeaseInfoRequest{ Span: roachpb.Span{ Key: rangeDesc.StartKey.AsRawKey(), }, } reply, pErr := client.SendWrappedWith(context.Background(), db.GetSender(), roachpb.Header{ ReadConsistency: readConsistency, }, leaseInfoReq) if pErr != nil { t.Fatal(pErr) } return *(reply.(*roachpb.LeaseInfoResponse)) }
// pruneTimeSeries will prune data for the supplied set of time series. Time // series series are identified by name and resolution. // // For each time series supplied, the pruning operation will delete all data // older than a constant threshold. The threshold is different depending on the // resolution; typically, lower-resolution time series data will be retained for // a longer period. // // If data is stored at a resolution which is not known to the system, it is // assumed that the resolution has been deprecated and all data for that time // series at that resolution will be deleted. // // As range deletion of inline data is an idempotent operation, it is safe to // run this operation concurrently on multiple nodes at the same time. func pruneTimeSeries( ctx context.Context, db *client.DB, timeSeriesList []timeSeriesResolutionInfo, now hlc.Timestamp, ) error { thresholds := computeThresholds(now.WallTime) for _, timeSeries := range timeSeriesList { // Time series data for a specific resolution falls in a contiguous key // range, and can be deleted with a DelRange command. // The start key is the prefix unique to this name/resolution pair. start := makeDataKeySeriesPrefix(timeSeries.Name, timeSeries.Resolution) // The end key can be created by generating a time series key with the // threshold timestamp for the resolution. If the resolution is not // supported, the start key's PrefixEnd is used instead (which will clear // the time series entirely). var end roachpb.Key threshold, ok := thresholds[timeSeries.Resolution] if ok { end = MakeDataKey(timeSeries.Name, "", timeSeries.Resolution, threshold) } else { end = start.PrefixEnd() } // TODO(mrtracy): There is no reason not to execute the individual // deletes in parallel, although the best way to do that is not clear. // See the RFC PR #9343 for details. b := &client.Batch{} b.AddRawRequest(&roachpb.DeleteRangeRequest{ Span: roachpb.Span{ Key: start, EndKey: end, }, Inline: true, }) if err := db.Run(ctx, b); err != nil { return err } } return nil }
func (hv *historyVerifier) runCmds( cmds []*cmd, db *client.DB, t *testing.T, ) (string, map[string]int64, error) { var strs []string env := map[string]int64{} err := db.Txn(context.TODO(), func(txn *client.Txn) error { for _, c := range cmds { c.historyIdx = hv.idx c.env = env c.init(nil) fmtStr, err := c.execute(txn, t) if err != nil { return err } strs = append(strs, fmt.Sprintf(fmtStr, 0, 0)) } return nil }) return strings.Join(strs, " "), env, err }
// GetTableDescriptor retrieves a table descriptor directly from the KV layer. func GetTableDescriptor(kvDB *client.DB, database string, table string) *TableDescriptor { dbNameKey := MakeNameMetadataKey(keys.RootNamespaceID, database) gr, err := kvDB.Get(context.TODO(), dbNameKey) if err != nil { panic(err) } if !gr.Exists() { panic("database missing") } dbDescID := ID(gr.ValueInt()) tableNameKey := MakeNameMetadataKey(dbDescID, table) gr, err = kvDB.Get(context.TODO(), tableNameKey) if err != nil { panic(err) } if !gr.Exists() { panic("table missing") } descKey := MakeDescMetadataKey(ID(gr.ValueInt())) desc := &Descriptor{} if err := kvDB.GetProto(context.TODO(), descKey, desc); err != nil { panic("proto missing") } return desc.GetTable() }
// pruneTimeSeries will prune data for the supplied set of time series. Time // series series are identified by name and resolution. // // For each time series supplied, the pruning operation will delete all data // older than a constant threshold. The threshold is different depending on the // resolution; typically, lower-resolution time series data will be retained for // a longer period. // // If data is stored at a resolution which is not known to the system, it is // assumed that the resolution has been deprecated and all data for that time // series at that resolution will be deleted. // // As range deletion of inline data is an idempotent operation, it is safe to // run this operation concurrently on multiple nodes at the same time. func pruneTimeSeries( ctx context.Context, db *client.DB, timeSeriesList []timeSeriesResolutionInfo, now hlc.Timestamp, ) error { thresholds := computeThresholds(now.WallTime) b := &client.Batch{} for _, timeSeries := range timeSeriesList { // Time series data for a specific resolution falls in a contiguous key // range, and can be deleted with a DelRange command. // The start key is the prefix unique to this name/resolution pair. start := makeDataKeySeriesPrefix(timeSeries.Name, timeSeries.Resolution) // The end key can be created by generating a time series key with the // threshold timestamp for the resolution. If the resolution is not // supported, the start key's PrefixEnd is used instead (which will clear // the time series entirely). var end roachpb.Key threshold, ok := thresholds[timeSeries.Resolution] if ok { end = MakeDataKey(timeSeries.Name, "", timeSeries.Resolution, threshold) } else { end = start.PrefixEnd() } b.AddRawRequest(&roachpb.DeleteRangeRequest{ Span: roachpb.Span{ Key: start, EndKey: end, }, Inline: true, }) } return db.Run(ctx, b) }
// concurrentIncrements starts two Goroutines in parallel, both of which // read the integers stored at the other's key and add it onto their own. // It is checked that the outcome is serializable, i.e. exactly one of the // two Goroutines (the later write) sees the previous write by the other. func concurrentIncrements(db *client.DB, t *testing.T) { // wgStart waits for all transactions to line up, wgEnd has the main // function wait for them to finish. var wgStart, wgEnd sync.WaitGroup wgStart.Add(2 + 1) wgEnd.Add(2) for i := 0; i < 2; i++ { go func(i int) { // Read the other key, write key i. readKey := []byte(fmt.Sprintf(testUser+"/value-%d", (i+1)%2)) writeKey := []byte(fmt.Sprintf(testUser+"/value-%d", i)) defer wgEnd.Done() wgStart.Done() // Wait until the other goroutines are running. wgStart.Wait() if err := db.Txn(context.TODO(), func(txn *client.Txn) error { txn.SetDebugName(fmt.Sprintf("test-%d", i), 0) // Retrieve the other key. gr, err := txn.Get(readKey) if err != nil { return err } otherValue := int64(0) if gr.Value != nil { otherValue = gr.ValueInt() } _, err = txn.Inc(writeKey, 1+otherValue) return err }); err != nil { t.Error(err) } }(i) } // Kick the goroutines loose. wgStart.Done() // Wait for the goroutines to finish. wgEnd.Wait() // Verify that both keys contain something and, more importantly, that // one key actually contains the value of the first writer and not only // its own. total := int64(0) results := []int64(nil) for i := 0; i < 2; i++ { readKey := []byte(fmt.Sprintf(testUser+"/value-%d", i)) gr, err := db.Get(context.TODO(), readKey) if err != nil { t.Fatal(err) } if gr.Value == nil { t.Fatalf("unexpected empty key: %s=%v", readKey, gr.Value) } total += gr.ValueInt() results = append(results, gr.ValueInt()) } // First writer should have 1, second one 2 if total != 3 { t.Fatalf("got unserializable values %v", results) } }
// Run a particular schema change and run some OLTP operations in parallel, as // soon as the schema change starts executing its backfill. func runSchemaChangeWithOperations( t *testing.T, sqlDB *gosql.DB, kvDB *client.DB, schemaChange string, maxValue int, keyMultiple int, backfillNotification chan bool, ) { tableDesc := sqlbase.GetTableDescriptor(kvDB, "t", "test") // Run the schema change in a separate goroutine. var wg sync.WaitGroup wg.Add(1) go func() { start := timeutil.Now() // Start schema change that eventually runs a backfill. if _, err := sqlDB.Exec(schemaChange); err != nil { t.Error(err) } t.Logf("schema change %s took %v", schemaChange, timeutil.Since(start)) wg.Done() }() // Wait until the schema change backfill starts. <-backfillNotification // Run a variety of operations during the backfill. // Grabbing a schema change lease on the table will fail, disallowing // another schema change from being simultaneously executed. sc := csql.NewSchemaChangerForTesting(tableDesc.ID, 0, 0, *kvDB, nil) if l, err := sc.AcquireLease(); err == nil { t.Fatalf("schema change lease acquisition on table %d succeeded: %v", tableDesc.ID, l) } // Update some rows. var updatedKeys []int for i := 0; i < 10; i++ { k := rand.Intn(maxValue) v := maxValue + i + 1 if _, err := sqlDB.Exec(`UPDATE t.test SET v = $1 WHERE k = $2`, v, k); err != nil { t.Error(err) } updatedKeys = append(updatedKeys, k) } // Reupdate updated values back to what they were before. for _, k := range updatedKeys { if _, err := sqlDB.Exec(`UPDATE t.test SET v = $1 WHERE k = $2`, maxValue-k, k); err != nil { t.Error(err) } } // Delete some rows. deleteStartKey := rand.Intn(maxValue - 10) for i := 0; i < 10; i++ { if _, err := sqlDB.Exec(`DELETE FROM t.test WHERE k = $1`, deleteStartKey+i); err != nil { t.Error(err) } } // Reinsert deleted rows. for i := 0; i < 10; i++ { k := deleteStartKey + i if _, err := sqlDB.Exec(`INSERT INTO t.test VALUES($1, $2)`, k, maxValue-k); err != nil { t.Error(err) } } // Insert some new rows. numInserts := 10 for i := 0; i < numInserts; i++ { k := maxValue + i + 1 if _, err := sqlDB.Exec(`INSERT INTO t.test VALUES($1, $1)`, k); err != nil { t.Error(err) } } wg.Wait() // for schema change to complete. // Verify the number of keys left behind in the table to validate schema // change operations. tablePrefix := roachpb.Key(keys.MakeTablePrefix(uint32(tableDesc.ID))) tableEnd := tablePrefix.PrefixEnd() if kvs, err := kvDB.Scan(context.TODO(), tablePrefix, tableEnd, 0); err != nil { t.Fatal(err) } else if e := keyMultiple * (maxValue + numInserts + 1); len(kvs) != e { for _, kv := range kvs { t.Errorf("key %s, value %s", kv.Key, kv.Value) } t.Fatalf("expected %d key value pairs, but got %d", e, len(kvs)) } // Delete the rows inserted. for i := 0; i < numInserts; i++ { if _, err := sqlDB.Exec(`DELETE FROM t.test WHERE k = $1`, maxValue+i+1); err != nil { t.Error(err) } } }
// restoreTable inserts the given DatabaseDescriptor. If the name conflicts with // an existing table, the one being restored is rekeyed with a new ID and the // old data is deleted. func restoreTable( ctx context.Context, db client.DB, database sqlbase.DatabaseDescriptor, table *sqlbase.TableDescriptor, ranges []sqlbase.BackupRangeDescriptor, ) error { if log.V(1) { log.Infof(ctx, "Restoring Table %q", table.Name) } var newTableID sqlbase.ID if err := db.Txn(ctx, func(txn *client.Txn) error { // Make sure there's a database with a name that matches the original. if _, err := getDescriptorID(txn, tableKey{name: database.Name}); err != nil { return errors.Wrapf(err, "a database named %q needs to exist to restore table %q", database.Name, table.Name) } // Assign a new ID for the table. TODO(dan): For now, we're always // generating a new ID, but varints get longer as they get bigger and so // our keys will, too. We should someday figure out how to overwrite an // existing table and steal its ID. var err error newTableID, err = GenerateUniqueDescID(txn) return err }); err != nil { return err } // Create the iteration keys before we give the table its new ID. tableStartKeyOld := roachpb.Key(sqlbase.MakeIndexKeyPrefix(table, table.PrimaryIndex.ID)) tableEndKeyOld := tableStartKeyOld.PrefixEnd() // This loop makes restoring multiple tables O(N*M), where N is the number // of tables and M is the number of ranges. We could reduce this using an // interval tree if necessary. var wg sync.WaitGroup result := struct { syncutil.Mutex firstErr error numErrs int }{} for _, rangeDesc := range ranges { if len(rangeDesc.Path) == 0 { // Empty path means empty range. continue } intersectBegin, intersectEnd := IntersectHalfOpen( rangeDesc.StartKey, rangeDesc.EndKey, tableStartKeyOld, tableEndKeyOld) if intersectBegin != nil && intersectEnd != nil { // Write the data under the new ID. // TODO(dan): There's no SQL descriptors that point at this yet, so it // should be possible to remove it from the one txn this is all currently // run under. If we do that, make sure this data gets cleaned up on errors. wg.Add(1) go func(desc sqlbase.BackupRangeDescriptor) { for r := retry.StartWithCtx(ctx, base.DefaultRetryOptions()); r.Next(); { err := db.Txn(ctx, func(txn *client.Txn) error { return Ingest(ctx, txn, desc.Path, desc.CRC, intersectBegin, intersectEnd, newTableID) }) if _, ok := err.(*client.AutoCommitError); ok { log.Errorf(ctx, "auto commit error during ingest: %s", err) // TODO(dan): Ingest currently does not rely on the // range being empty, but the plan is that it will. When // that change happens, this will have to delete any // partially ingested data or something. continue } if err != nil { log.Errorf(ctx, "%T %s", err, err) result.Lock() defer result.Unlock() if result.firstErr != nil { result.firstErr = err } result.numErrs++ } break } wg.Done() }(rangeDesc) } } wg.Wait() // All concurrent accesses have finished, we don't need the lock anymore. if result.firstErr != nil { // This leaves the data that did get imported in case the user wants to // retry. // TODO(dan): Build tooling to allow a user to restart a failed restore. return errors.Wrapf(result.firstErr, "ingest encountered %d errors", result.numErrs) } table.ID = newTableID return db.Txn(ctx, func(txn *client.Txn) error { // Pass the descriptors by value to keep this idempotent. return restoreTableDesc(ctx, txn, database, *table) }) }