// recordJoinEvent begins an asynchronous task which attempts to log a "node // join" or "node restart" event. This query will retry until it succeeds or the // server stops. func (n *Node) recordJoinEvent() { if !n.storeCfg.LogRangeEvents { return } logEventType := sql.EventLogNodeRestart if n.initialBoot { logEventType = sql.EventLogNodeJoin } n.stopper.RunWorker(func() { ctx, span := n.AnnotateCtxWithSpan(context.Background(), "record-join-event") defer span.Finish() retryOpts := base.DefaultRetryOptions() retryOpts.Closer = n.stopper.ShouldStop() for r := retry.Start(retryOpts); r.Next(); { if err := n.storeCfg.DB.Txn(ctx, func(txn *client.Txn) error { return n.eventLogger.InsertEventRecord(txn, logEventType, int32(n.Descriptor.NodeID), int32(n.Descriptor.NodeID), struct { Descriptor roachpb.NodeDescriptor ClusterID uuid.UUID StartedAt int64 }{n.Descriptor, n.ClusterID, n.startedAt}, ) }); err != nil { log.Warningf(ctx, "%s: unable to log %s event: %s", n, logEventType, err) } else { return } } }) }
// get performs an HTTPS GET to the specified path for a specific node. func get(ctx context.Context, t *testing.T, base, rel string) []byte { // TODO(bram) #2059: Remove retry logic. url := base + rel for r := retry.Start(retryOptions); r.Next(); { resp, err := cluster.HTTPClient.Get(url) if err != nil { log.Infof(ctx, "could not GET %s - %s", url, err) continue } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { log.Infof(ctx, "could not read body for %s - %s", url, err) continue } if resp.StatusCode != http.StatusOK { log.Infof(ctx, "could not GET %s - statuscode: %d - body: %s", url, resp.StatusCode, body) continue } if log.V(1) { log.Infof(ctx, "OK response from %s", url) } return body } t.Fatalf("There was an error retrieving %s", url) return []byte("") }
// waitForFullReplication waits until all stores in the cluster // have no ranges with replication pending. func (tc *TestCluster) waitForFullReplication() error { opts := retry.Options{ InitialBackoff: time.Millisecond * 10, MaxBackoff: time.Millisecond * 100, Multiplier: 2, } notReplicated := true for r := retry.Start(opts); r.Next() && notReplicated; { notReplicated = false for _, s := range tc.Servers { err := s.Stores().VisitStores(func(s *storage.Store) error { if err := s.ComputeMetrics(0); err != nil { return err } if s.Metrics().ReplicaAllocatorAddCount.Value() > 0 { notReplicated = true } return nil }) if err != nil { return err } if notReplicated { break } } } return nil }
// waitForOneVersion returns once there are no unexpired leases on the // previous version of the table descriptor. It returns the current version. // After returning there can only be versions of the descriptor >= to the // returned version. Lease acquisition (see acquire()) maintains the // invariant that no new leases for desc.Version-1 will be granted once // desc.Version exists. func (s LeaseStore) waitForOneVersion( tableID sqlbase.ID, retryOpts retry.Options, ) (sqlbase.DescriptorVersion, error) { desc := &sqlbase.Descriptor{} descKey := sqlbase.MakeDescMetadataKey(tableID) var tableDesc *sqlbase.TableDescriptor for r := retry.Start(retryOpts); r.Next(); { // Get the current version of the table descriptor non-transactionally. // // TODO(pmattis): Do an inconsistent read here? if err := s.db.GetProto(context.TODO(), descKey, desc); err != nil { return 0, err } tableDesc = desc.GetTable() if tableDesc == nil { return 0, errors.Errorf("ID %d is not a table", tableID) } // Check to see if there are any leases that still exist on the previous // version of the descriptor. now := s.clock.Now() count, err := s.countLeases(tableDesc.ID, tableDesc.Version-1, now.GoTime()) if err != nil { return 0, err } if count == 0 { break } log.Infof(context.TODO(), "publish (count leases): descID=%d name=%s version=%d count=%d", tableDesc.ID, tableDesc.Name, tableDesc.Version-1, count) } return tableDesc.Version, nil }
// Tests a batch of queries very similar to those that that PGBench runs // in its TPC-B(ish) mode. func runPgbenchQueryParallel(b *testing.B, db *gosql.DB) { if err := pgbench.SetupBenchDB(db, 20000, true /*quiet*/); err != nil { b.Fatal(err) } retryOpts := retry.Options{ InitialBackoff: 1 * time.Millisecond, MaxBackoff: 200 * time.Millisecond, Multiplier: 2, } b.ResetTimer() b.RunParallel(func(pb *testing.PB) { src := rand.New(rand.NewSource(5432)) r := retry.Start(retryOpts) var err error for pb.Next() { r.Reset() for r.Next() { err = pgbench.RunOne(db, src, 20000) if err == nil { break } } if err != nil { b.Fatal(err) } } }) b.StopTimer() }
// execSchemaChanges releases schema leases and runs the queued // schema changers. This needs to be run after the transaction // scheduling the schema change has finished. // // The list of closures is cleared after (attempting) execution. // // Args: // results: The results from all statements in the group that scheduled the // schema changes we're about to execute. Results corresponding to the // schema change statements will be changed in case an error occurs. func (scc *schemaChangerCollection) execSchemaChanges( e *Executor, planMaker *planner, results ResultList, ) { if planMaker.txn != nil { panic("trying to execute schema changes while still in a transaction") } ctx := e.AnnotateCtx(context.TODO()) // Release the leases once a transaction is complete. planMaker.releaseLeases() if e.cfg.SchemaChangerTestingKnobs.SyncFilter != nil { e.cfg.SchemaChangerTestingKnobs.SyncFilter(TestingSchemaChangerCollection{scc}) } // Execute any schema changes that were scheduled, in the order of the // statements that scheduled them. for _, scEntry := range scc.schemaChangers { sc := &scEntry.sc sc.db = *e.cfg.DB sc.testingKnobs = e.cfg.SchemaChangerTestingKnobs for r := retry.Start(base.DefaultRetryOptions()); r.Next(); { if done, err := sc.IsDone(); err != nil { log.Warning(ctx, err) break } else if done { break } if err := sc.exec(); err != nil { if isSchemaChangeRetryError(err) { // Try again continue } // All other errors can be reported; we report it as the result // corresponding to the statement that enqueued this changer. // There's some sketchiness here: we assume there's a single result // per statement and we clobber the result/error of the corresponding // statement. // There's also another subtlety: we can only report results for // statements in the current batch; we can't modify the results of older // statements. if scEntry.epoch == scc.curGroupNum { results[scEntry.idx] = Result{Err: err} } log.Warningf(ctx, "error executing schema change: %s", err) } break } } scc.schemaChangers = scc.schemaChangers[:0] }
func (ia *idAllocator) start() { ia.stopper.RunWorker(func() { ctx := ia.AnnotateCtx(context.Background()) defer close(ia.ids) for { var newValue int64 for newValue <= int64(ia.minID) { var err error var res client.KeyValue for r := retry.Start(base.DefaultRetryOptions()); r.Next(); { idKey := ia.idKey.Load().(roachpb.Key) if err := ia.stopper.RunTask(func() { res, err = ia.db.Inc(ctx, idKey, int64(ia.blockSize)) }); err != nil { log.Warning(ctx, err) return } if err == nil { newValue = res.ValueInt() break } log.Warningf(ctx, "unable to allocate %d ids from %s: %s", ia.blockSize, idKey, err) } if err != nil { panic(fmt.Sprintf("unexpectedly exited id allocation retry loop: %s", err)) } } end := newValue + 1 start := end - int64(ia.blockSize) if start < int64(ia.minID) { start = int64(ia.minID) } // Add all new ids to the channel for consumption. for i := start; i < end; i++ { select { case ia.ids <- uint32(i): case <-ia.stopper.ShouldStop(): return } } } }) }
func eventlogUniqueIDDefault(ctx context.Context, r runner) error { const alterStmt = "ALTER TABLE system.eventlog ALTER COLUMN uniqueID SET DEFAULT uuid_v4();" // System tables can only be modified by a privileged internal user. session := sql.NewSession(ctx, sql.SessionArgs{User: security.NodeUser}, r.sqlExecutor, nil, nil) defer session.Finish(r.sqlExecutor) // Retry a limited number of times because returning an error and letting // the node kill itself is better than holding the migration lease for an // arbitrarily long time. var err error for retry := retry.Start(retry.Options{MaxRetries: 5}); retry.Next(); { res := r.sqlExecutor.ExecuteStatements(session, alterStmt, nil) err = checkQueryResults(res.ResultList, 1) if err == nil { break } log.Warningf(ctx, "failed attempt to update system.eventlog schema: %s", err) } return err }
// WaitReady waits until the infrastructure is in a state that *should* allow // for a healthy cluster. Currently, this means waiting for the load balancer // to resolve from all nodes. func (f *Farmer) WaitReady(d time.Duration) error { var rOpts = retry.Options{ InitialBackoff: time.Second, MaxBackoff: time.Minute, Multiplier: 1.5, } var err error for r := retry.Start(rOpts); r.Next(); { instance := f.FirstInstance() if err != nil || instance == "" { err = fmt.Errorf("no nodes found: %v", err) continue } for i := range f.Nodes() { if err = f.Exec(i, "nslookup "+instance); err != nil { break } } if err == nil { return nil } } return err }
// waitForStoreFrozen polls the given stores until they all report having no // unfrozen Replicas (or an error or timeout occurs). func (s *adminServer) waitForStoreFrozen( stream serverpb.Admin_ClusterFreezeServer, stores map[roachpb.StoreID]roachpb.NodeID, wantFrozen bool, ) error { mu := struct { syncutil.Mutex oks map[roachpb.StoreID]bool }{ oks: make(map[roachpb.StoreID]bool), } opts := base.DefaultRetryOptions() opts.Closer = s.server.stopper.ShouldQuiesce() opts.MaxRetries = 20 sem := make(chan struct{}, 256) errChan := make(chan error, 1) sendErr := func(err error) { select { case errChan <- err: default: } } numWaiting := len(stores) // loop until this drops to zero var err error for r := retry.Start(opts); r.Next(); { mu.Lock() for storeID, nodeID := range stores { storeID, nodeID := storeID, nodeID // loop-local copies for goroutine var nodeDesc roachpb.NodeDescriptor if err := s.server.gossip.GetInfoProto(gossip.MakeNodeIDKey(nodeID), &nodeDesc); err != nil { sendErr(err) break } addr := nodeDesc.Address.String() if _, inflightOrSucceeded := mu.oks[storeID]; inflightOrSucceeded { continue } mu.oks[storeID] = false // mark as inflight action := func() (err error) { var resp *storage.PollFrozenResponse defer func() { message := fmt.Sprintf("node %d, store %d: ", nodeID, storeID) if err != nil { message += err.Error() } else { numMismatching := len(resp.Results) mu.Lock() if numMismatching == 0 { // If the Store is in the right state, mark it as such. // This means we won't try it again. message += "ready" mu.oks[storeID] = true } else { // Otherwise, forget that we tried the Store so that // the retry loop picks it up again. message += fmt.Sprintf("%d replicas report wrong status", numMismatching) if limit := 10; numMismatching > limit { message += " [truncated]: " resp.Results = resp.Results[:limit] } else { message += ": " } message += fmt.Sprintf("%+v", resp.Results) delete(mu.oks, storeID) } mu.Unlock() } err = stream.Send(&serverpb.ClusterFreezeResponse{ Message: message, }) }() conn, err := s.server.rpcContext.GRPCDial(addr) if err != nil { return err } client := storage.NewFreezeClient(conn) resp, err = client.PollFrozen(context.TODO(), &storage.PollFrozenRequest{ StoreRequestHeader: storage.StoreRequestHeader{ NodeID: nodeID, StoreID: storeID, }, // If we are looking to freeze everything, we want to // collect thawed Replicas, and vice versa. CollectFrozen: !wantFrozen, }) return err } // Run a limited, non-blocking task. That means the task simply // won't run if the semaphore is full (or the node is draining). // Both are handled by the surrounding retry loop. if err := s.server.stopper.RunLimitedAsyncTask( context.TODO(), sem, true /* wait */, func(_ context.Context) { if err := action(); err != nil { sendErr(err) } }); err != nil { // Node draining. sendErr(err) break } } numWaiting = len(stores) for _, ok := range mu.oks { if ok { // Store has reported that it is frozen. numWaiting-- continue } } mu.Unlock() select { case err = <-errChan: default: } // Keep going unless there's been an error or everyone's frozen. if err != nil || numWaiting == 0 { break } if err := stream.Send(&serverpb.ClusterFreezeResponse{ Message: fmt.Sprintf("waiting for %d store%s to apply operation", numWaiting, util.Pluralize(int64(numWaiting))), }); err != nil { return err } } if err != nil { return err } if numWaiting > 0 { err = fmt.Errorf("timed out waiting for %d store%s to report freeze", numWaiting, util.Pluralize(int64(numWaiting))) } return err }
func TestAsyncSchemaChanger(t *testing.T) { defer leaktest.AfterTest(t)() // The descriptor changes made must have an immediate effect // so disable leases on tables. defer csql.TestDisableTableLeases()() // Disable synchronous schema change execution so the asynchronous schema // changer executes all schema changes. params, _ := createTestServerParams() params.Knobs = base.TestingKnobs{ SQLSchemaChanger: &csql.SchemaChangerTestingKnobs{ SyncFilter: func(tscc csql.TestingSchemaChangerCollection) { tscc.ClearSchemaChangers() }, AsyncExecQuickly: true, }, } s, sqlDB, kvDB := serverutils.StartServer(t, params) defer s.Stopper().Stop() if _, err := sqlDB.Exec(` CREATE DATABASE t; CREATE TABLE t.test (k CHAR PRIMARY KEY, v CHAR); INSERT INTO t.test VALUES ('a', 'b'), ('c', 'd'); `); err != nil { t.Fatal(err) } // Read table descriptor for version. tableDesc := sqlbase.GetTableDescriptor(kvDB, "t", "test") // A long running schema change operation runs through // a state machine that increments the version by 3. expectedVersion := tableDesc.Version + 3 // Run some schema change if _, err := sqlDB.Exec(` CREATE INDEX foo ON t.test (v) `); err != nil { t.Fatal(err) } retryOpts := retry.Options{ InitialBackoff: 20 * time.Millisecond, MaxBackoff: 200 * time.Millisecond, Multiplier: 2, } // Wait until index is created. for r := retry.Start(retryOpts); r.Next(); { tableDesc = sqlbase.GetTableDescriptor(kvDB, "t", "test") if len(tableDesc.Indexes) == 1 { break } } // Ensure that the indexes have been created. mTest := makeMutationTest(t, kvDB, sqlDB, tableDesc) indexQuery := `SELECT v FROM t.test@foo` mTest.CheckQueryResults(indexQuery, [][]string{{"b"}, {"d"}}) // Ensure that the version has been incremented. tableDesc = sqlbase.GetTableDescriptor(kvDB, "t", "test") newVersion := tableDesc.Version if newVersion != expectedVersion { t.Fatalf("bad version; e = %d, v = %d", expectedVersion, newVersion) } // Apply a schema change that only sets the UpVersion bit. expectedVersion = newVersion + 1 mTest.Exec(`ALTER INDEX t.test@foo RENAME TO ufo`) for r := retry.Start(retryOpts); r.Next(); { // Ensure that the version gets incremented. tableDesc = sqlbase.GetTableDescriptor(kvDB, "t", "test") name := tableDesc.Indexes[0].Name if name != "ufo" { t.Fatalf("bad index name %s", name) } newVersion = tableDesc.Version if newVersion == expectedVersion { break } } // Run many schema changes simultaneously and check // that they all get executed. count := 5 for i := 0; i < count; i++ { mTest.Exec(fmt.Sprintf(`CREATE INDEX foo%d ON t.test (v)`, i)) } // Wait until indexes are created. for r := retry.Start(retryOpts); r.Next(); { tableDesc = sqlbase.GetTableDescriptor(kvDB, "t", "test") if len(tableDesc.Indexes) == count+1 { break } } for i := 0; i < count; i++ { indexQuery := fmt.Sprintf(`SELECT v FROM t.test@foo%d`, i) mTest.CheckQueryResults(indexQuery, [][]string{{"b"}, {"d"}}) } }
// Exec executes fn in the context of a distributed transaction. // Execution is controlled by opt (see comments in TxnExecOptions). // // opt is passed to fn, and it's valid for fn to modify opt as it sees // fit during each execution attempt. // // It's valid for txn to be nil (meaning the txn has already aborted) if fn // can handle that. This is useful for continuing transactions that have been // aborted because of an error in a previous batch of statements in the hope // that a ROLLBACK will reset the state. Neither opt.AutoRetry not opt.AutoCommit // can be set in this case. // // When this method returns, txn might be in any state; Exec does not attempt // to clean up the transaction before returning an error. In case of // TransactionAbortedError, txn is reset to a fresh transaction, ready to be // used. func (txn *Txn) Exec(opt TxnExecOptions, fn func(txn *Txn, opt *TxnExecOptions) error) (err error) { // Run fn in a retry loop until we encounter a success or // error condition this loop isn't capable of handling. var retryOptions retry.Options if txn == nil && (opt.AutoRetry || opt.AutoCommit) { panic("asked to retry or commit a txn that is already aborted") } // Ensure that a RetryableTxnError escaping this function is not used by // another (higher-level) Exec() invocation to restart its unrelated // transaction. Technically, setting TxnID to nil here is best-effort and // doesn't ensure that (the error will be wrongly used if the outer txn also // has a nil TxnID). // TODO(andrei): set TxnID to a bogus non-nil value once we get rid of the // retErr.Transaction field. defer func() { if retErr, ok := err.(*roachpb.RetryableTxnError); ok { retErr.TxnID = nil retErr.Transaction = nil } }() if opt.AutoRetry { retryOptions = txn.db.ctx.TxnRetryOptions } for r := retry.Start(retryOptions); r.Next(); { if txn != nil { // If we're looking at a brand new transaction, then communicate // what should be used as initial timestamp for the KV txn created // by TxnCoordSender. if opt.Clock != nil && !txn.Proto.IsInitialized() { // Control the KV timestamp, such that the value returned by // `cluster_logical_timestamp()` is consistent with the commit // (serializable) ordering. txn.Proto.OrigTimestamp = opt.Clock.Now() } } err = fn(txn, &opt) // TODO(andrei): Until 7881 is fixed. if err == nil && opt.AutoCommit && txn.Proto.Status == roachpb.ABORTED { log.Errorf(txn.Context, "#7881: no err but aborted txn proto. opt: %+v, txn: %+v", opt, txn) } if err == nil && opt.AutoCommit && txn.Proto.Status == roachpb.PENDING { // fn succeeded, but didn't commit. err = txn.Commit() log.Eventf(txn.Context, "client.Txn did AutoCommit. err: %v\ntxn: %+v", err, txn.Proto) if err != nil { if _, retryable := err.(*roachpb.RetryableTxnError); !retryable { // We can't retry, so let the caller know we tried to // autocommit. err = &AutoCommitError{cause: err} } } } if !opt.AutoRetry { break } if retErr, retryable := err.(*roachpb.RetryableTxnError); !retryable { break } else { // Make sure the txn record that err carries is for this txn. // If it's not, we terminate the "retryable" character of the error. if txn.Proto.ID != nil && (retErr.TxnID == nil || *retErr.TxnID != *txn.Proto.ID) { return errors.New(retErr.Error()) } if !retErr.Backoff { r.Reset() } } txn.commitTriggers = nil log.VEventf(txn.Context, 2, "automatically retrying transaction: %s because of error: %s", txn.DebugName(), err) } return err }
// Publish updates a table descriptor. It also maintains the invariant that // there are at most two versions of the descriptor out in the wild at any time // by first waiting for all nodes to be on the current (pre-update) version of // the table desc. // The update closure is called after the wait, and it provides the new version // of the descriptor to be written. In a multi-step schema operation, this // update should perform a single step. // The closure may be called multiple times if retries occur; make sure it does // not have side effects. // Returns the updated version of the descriptor. func (s LeaseStore) Publish( tableID sqlbase.ID, update func(*sqlbase.TableDescriptor) error, logEvent func(*client.Txn) error, ) (*sqlbase.Descriptor, error) { errLeaseVersionChanged := errors.New("lease version changed") // Retry while getting errLeaseVersionChanged. for r := retry.Start(base.DefaultRetryOptions()); r.Next(); { // Wait until there are no unexpired leases on the previous version // of the table. expectedVersion, err := s.waitForOneVersion(tableID, base.DefaultRetryOptions()) if err != nil { return nil, err } desc := &sqlbase.Descriptor{} // There should be only one version of the descriptor, but it's // a race now to update to the next version. err = s.db.Txn(context.TODO(), func(txn *client.Txn) error { descKey := sqlbase.MakeDescMetadataKey(tableID) // Re-read the current version of the table descriptor, this time // transactionally. if err := txn.GetProto(descKey, desc); err != nil { return err } tableDesc := desc.GetTable() if tableDesc == nil { return errors.Errorf("ID %d is not a table", tableID) } if expectedVersion != tableDesc.Version { // The version changed out from under us. Someone else must be // performing a schema change operation. if log.V(3) { log.Infof(txn.Context, "publish (version changed): %d != %d", expectedVersion, tableDesc.Version) } return errLeaseVersionChanged } // Run the update closure. if err := update(tableDesc); err != nil { return err } // Bump the version and modification time. tableDesc.Version++ now := s.clock.Now() tableDesc.ModificationTime = now if log.V(3) { log.Infof(txn.Context, "publish: descID=%d (%s) version=%d mtime=%s", tableDesc.ID, tableDesc.Name, tableDesc.Version, now.GoTime()) } if err := tableDesc.ValidateTable(); err != nil { return err } // Write the updated descriptor. txn.SetSystemConfigTrigger() b := txn.NewBatch() b.Put(descKey, desc) if logEvent != nil { // If an event log is required for this update, ensure that the // descriptor change occurs first in the transaction. This is // necessary to ensure that the System configuration change is // gossiped. See the documentation for // transaction.SetSystemConfigTrigger() for more information. if err := txn.Run(b); err != nil { return err } if err := logEvent(txn); err != nil { return err } return txn.Commit() } // More efficient batching can be used if no event log message // is required. return txn.CommitInBatch(b) }) switch err { case nil, errDidntUpdateDescriptor: return desc, nil case errLeaseVersionChanged: // will loop around to retry default: return nil, err } } panic("not reached") }