// processLoop processes the entries in the queue until the provided // stopper signals exit. // // TODO(spencer): current load should factor into replica processing timer. func (bq *baseQueue) processLoop(clock *hlc.Clock, stopper *stop.Stopper) { stopper.RunWorker(func() { ctx := bq.AnnotateCtx(context.Background()) defer func() { bq.mu.Lock() bq.mu.stopped = true bq.mu.Unlock() bq.AmbientContext.FinishEventLog() }() // nextTime is initially nil; we don't start any timers until the queue // becomes non-empty. var nextTime <-chan time.Time immediately := make(chan time.Time) close(immediately) for { select { // Exit on stopper. case <-stopper.ShouldStop(): return // Incoming signal sets the next time to process if there were previously // no replicas in the queue. case <-bq.incoming: if nextTime == nil { // When a replica is added, wake up immediately. This is mainly // to facilitate testing without unnecessary sleeps. nextTime = immediately // In case we're in a test, still block on the impl. bq.impl.timer() } // Process replicas as the timer expires. case <-nextTime: repl := bq.pop() if repl != nil { if stopper.RunTask(func() { annotatedCtx := repl.AnnotateCtx(ctx) if err := bq.processReplica(annotatedCtx, repl, clock); err != nil { // Maybe add failing replica to purgatory if the queue supports it. bq.maybeAddToPurgatory(annotatedCtx, repl, err, clock, stopper) } }) != nil { return } } if bq.Length() == 0 { nextTime = nil } else { nextTime = time.After(bq.impl.timer()) } } } }) }
// gossip loops, sending deltas of the infostore and receiving deltas // in turn. If an alternate is proposed on response, the client addr // is modified and method returns for forwarding by caller. func (c *client) gossip( ctx context.Context, g *Gossip, stream Gossip_GossipClient, stopper *stop.Stopper, wg *sync.WaitGroup, ) error { sendGossipChan := make(chan struct{}, 1) // Register a callback for gossip updates. updateCallback := func(_ string, _ roachpb.Value) { select { case sendGossipChan <- struct{}{}: default: } } // Defer calling "undoer" callback returned from registration. defer g.RegisterCallback(".*", updateCallback)() errCh := make(chan error, 1) // This wait group is used to allow the caller to wait until gossip // processing is terminated. wg.Add(1) stopper.RunWorker(func() { defer wg.Done() errCh <- func() error { for { reply, err := stream.Recv() if err != nil { return err } if err := c.handleResponse(ctx, g, reply); err != nil { return err } } }() }) for { select { case <-c.closer: return nil case <-stopper.ShouldStop(): return nil case err := <-errCh: return err case <-sendGossipChan: if err := c.sendGossip(g, stream); err != nil { return err } } } }
func (s *raftScheduler) Start(stopper *stop.Stopper) { stopper.RunWorker(func() { <-stopper.ShouldStop() s.mu.Lock() s.mu.stopped = true s.mu.Unlock() s.mu.cond.Broadcast() }) s.done.Add(s.numWorkers) for i := 0; i < s.numWorkers; i++ { stopper.RunWorker(func() { s.worker(stopper) }) } }
// scanLoop loops endlessly, scanning through replicas available via // the replica set, or until the scanner is stopped. The iteration // is paced to complete a full scan in approximately the scan interval. func (rs *replicaScanner) scanLoop(clock *hlc.Clock, stopper *stop.Stopper) { stopper.RunWorker(func() { ctx := rs.AnnotateCtx(context.Background()) start := timeutil.Now() // waitTimer is reset in each call to waitAndProcess. defer rs.waitTimer.Stop() for { if rs.GetDisabled() { if done := rs.waitEnabled(stopper); done { return } continue } var shouldStop bool count := 0 rs.replicas.Visit(func(repl *Replica) bool { count++ shouldStop = rs.waitAndProcess(ctx, start, clock, stopper, repl) return !shouldStop }) if count == 0 { // No replicas processed, just wait. shouldStop = rs.waitAndProcess(ctx, start, clock, stopper, nil) } shouldStop = shouldStop || nil != stopper.RunTask(func() { // Increment iteration count. rs.mu.Lock() defer rs.mu.Unlock() rs.mu.scanCount++ rs.mu.total += timeutil.Since(start) if log.V(6) { log.Infof(ctx, "reset replica scan iteration") } // Reset iteration and start time. start = timeutil.Now() }) if shouldStop { return } } }) }
// NewExecutor creates an Executor and registers a callback on the // system config. func NewExecutor( cfg ExecutorConfig, stopper *stop.Stopper, startupMemMetrics *MemoryMetrics, ) *Executor { exec := &Executor{ cfg: cfg, reCache: parser.NewRegexpCache(512), Latency: metric.NewLatency(MetaLatency, cfg.MetricsSampleInterval), TxnBeginCount: metric.NewCounter(MetaTxnBegin), TxnCommitCount: metric.NewCounter(MetaTxnCommit), TxnAbortCount: metric.NewCounter(MetaTxnAbort), TxnRollbackCount: metric.NewCounter(MetaTxnRollback), SelectCount: metric.NewCounter(MetaSelect), UpdateCount: metric.NewCounter(MetaUpdate), InsertCount: metric.NewCounter(MetaInsert), DeleteCount: metric.NewCounter(MetaDelete), DdlCount: metric.NewCounter(MetaDdl), MiscCount: metric.NewCounter(MetaMisc), QueryCount: metric.NewCounter(MetaQuery), } exec.systemConfigCond = sync.NewCond(exec.systemConfigMu.RLocker()) gossipUpdateC := cfg.Gossip.RegisterSystemConfigChannel() stopper.RunWorker(func() { for { select { case <-gossipUpdateC: sysCfg, _ := cfg.Gossip.GetSystemConfig() exec.updateSystemConfig(sysCfg) case <-stopper.ShouldStop(): return } } }) ctx := log.WithLogTag(context.Background(), "startup", nil) startupSession := NewSession(ctx, SessionArgs{}, exec, nil, startupMemMetrics) if err := exec.virtualSchemas.init(&startupSession.planner); err != nil { log.Fatal(ctx, err) } startupSession.Finish(exec) return exec }
// startComputePeriodicMetrics starts a loop which periodically instructs each // store to compute the value of metrics which cannot be incrementally // maintained. func (n *Node) startComputePeriodicMetrics(stopper *stop.Stopper, interval time.Duration) { stopper.RunWorker(func() { ctx := n.AnnotateCtx(context.Background()) // Compute periodic stats at the same frequency as metrics are sampled. ticker := time.NewTicker(interval) defer ticker.Stop() for tick := 0; ; tick++ { select { case <-ticker.C: if err := n.computePeriodicMetrics(tick); err != nil { log.Errorf(ctx, "failed computing periodic metrics: %s", err) } case <-stopper.ShouldStop(): return } } }) }
// NewContext creates an rpc Context with the supplied values. func NewContext( ambient log.AmbientContext, baseCtx *base.Config, hlcClock *hlc.Clock, stopper *stop.Stopper, ) *Context { ctx := &Context{ Config: baseCtx, } if hlcClock != nil { ctx.localClock = hlcClock } else { ctx.localClock = hlc.NewClock(hlc.UnixNano) } ctx.breakerClock = breakerClock{ clock: ctx.localClock, } var cancel context.CancelFunc ctx.masterCtx, cancel = context.WithCancel(ambient.AnnotateCtx(context.Background())) ctx.Stopper = stopper ctx.RemoteClocks = newRemoteClockMonitor( ctx.masterCtx, ctx.localClock, 10*defaultHeartbeatInterval) ctx.HeartbeatInterval = defaultHeartbeatInterval ctx.HeartbeatTimeout = 2 * defaultHeartbeatInterval ctx.conns.cache = make(map[string]*connMeta) stopper.RunWorker(func() { <-stopper.ShouldQuiesce() cancel() ctx.conns.Lock() for key, meta := range ctx.conns.cache { meta.Do(func() { // Make sure initialization is not in progress when we're removing the // conn. We need to set the error in case we win the race against the // real initialization code. if meta.err == nil { meta.err = &roachpb.NodeUnavailableError{} } }) ctx.removeConnLocked(key, meta) } ctx.conns.Unlock() }) return ctx }
// ListenAndServeGRPC creates a listener and serves the specified grpc Server // on it, closing the listener when signalled by the stopper. func ListenAndServeGRPC( stopper *stop.Stopper, server *grpc.Server, addr net.Addr, ) (net.Listener, error) { ln, err := net.Listen(addr.Network(), addr.String()) if err != nil { return ln, err } stopper.RunWorker(func() { <-stopper.ShouldQuiesce() FatalIfUnexpected(ln.Close()) <-stopper.ShouldStop() server.Stop() }) stopper.RunWorker(func() { FatalIfUnexpected(server.Serve(ln)) }) return ln, nil }
func (tq *testQueue) Start(clock *hlc.Clock, stopper *stop.Stopper) { stopper.RunWorker(func() { for { select { case <-time.After(1 * time.Millisecond): tq.Lock() if !tq.disabled && len(tq.ranges) > 0 { tq.ranges = tq.ranges[1:] tq.processed++ } tq.Unlock() case <-stopper.ShouldStop(): tq.Lock() tq.done = true tq.Unlock() return } } }) }
// start will run continuously and expire old reservations. func (b *bookie) start(stopper *stop.Stopper) { stopper.RunWorker(func() { var timeoutTimer timeutil.Timer defer timeoutTimer.Stop() ctx := context.TODO() for { var timeout time.Duration b.mu.Lock() nextExpiration := b.mu.queue.peek() if nextExpiration == nil { // No reservations to expire. timeout = b.reservationTimeout } else { now := b.clock.Now() if now.GoTime().After(nextExpiration.expireAt.GoTime()) { // We have a reservation expiration, remove it. expiredReservation := b.mu.queue.dequeue() // Is it an active reservation? if b.mu.reservationsByRangeID[expiredReservation.RangeID] == expiredReservation { b.fillReservationLocked(ctx, expiredReservation) } else if log.V(2) { log.Infof(ctx, "[r%d] expired reservation has already been filled", expiredReservation.RangeID) } // Set the timeout to 0 to force another peek. timeout = 0 } else { timeout = nextExpiration.expireAt.GoTime().Sub(now.GoTime()) } } b.mu.Unlock() timeoutTimer.Reset(timeout) select { case <-timeoutTimer.C: timeoutTimer.Read = true case <-stopper.ShouldStop(): return } } }) }
// StartHeartbeat starts a periodic heartbeat to refresh this node's // last heartbeat in the node liveness table. func (nl *NodeLiveness) StartHeartbeat(ctx context.Context, stopper *stop.Stopper) { log.VEventf(ctx, 1, "starting liveness heartbeat") retryOpts := base.DefaultRetryOptions() retryOpts.Closer = stopper.ShouldQuiesce() stopper.RunWorker(func() { ambient := nl.ambientCtx ambient.AddLogTag("hb", nil) ticker := time.NewTicker(nl.heartbeatInterval) defer ticker.Stop() for { if !nl.pauseHeartbeat.Load().(bool) { ctx, sp := ambient.AnnotateCtxWithSpan(context.Background(), "heartbeat") ctx, cancel := context.WithTimeout(ctx, nl.heartbeatInterval) // Retry heartbeat in the event the conditional put fails. for r := retry.StartWithCtx(ctx, retryOpts); r.Next(); { liveness, err := nl.Self() if err != nil && err != ErrNoLivenessRecord { log.Errorf(ctx, "unexpected error getting liveness: %v", err) } if err := nl.Heartbeat(ctx, liveness); err != nil { if err == errSkippedHeartbeat { continue } log.Errorf(ctx, "failed liveness heartbeat: %v", err) } break } cancel() sp.Finish() } select { case <-ticker.C: case <-stopper.ShouldStop(): return } } }) }
// MakeServer constructs a Server that tracks active connections, closing them // when signalled by stopper. func MakeServer(stopper *stop.Stopper, tlsConfig *tls.Config, handler http.Handler) Server { var mu syncutil.Mutex activeConns := make(map[net.Conn]struct{}) server := Server{ Server: &http.Server{ Handler: handler, TLSConfig: tlsConfig, ConnState: func(conn net.Conn, state http.ConnState) { mu.Lock() switch state { case http.StateNew: activeConns[conn] = struct{}{} case http.StateClosed: delete(activeConns, conn) } mu.Unlock() }, ErrorLog: httpLogger, }, } // net/http.(*Server).Serve/http2.ConfigureServer are not thread safe with // respect to net/http.(*Server).TLSConfig, so we call it synchronously here. if err := http2.ConfigureServer(server.Server, nil); err != nil { log.Fatal(context.TODO(), err) } stopper.RunWorker(func() { <-stopper.ShouldStop() mu.Lock() for conn := range activeConns { conn.Close() } mu.Unlock() }) return server }
// startGossip loops on a periodic ticker to gossip node-related // information. Starts a goroutine to loop until the node is closed. func (n *Node) startGossip(stopper *stop.Stopper) { stopper.RunWorker(func() { ctx := n.AnnotateCtx(context.Background()) // This should always return immediately and acts as a sanity check that we // don't try to gossip before we're connected. select { case <-n.storeCfg.Gossip.Connected: default: panic(fmt.Sprintf("%s: not connected to gossip", n)) } // Verify we've already gossiped our node descriptor. if _, err := n.storeCfg.Gossip.GetNodeDescriptor(n.Descriptor.NodeID); err != nil { panic(err) } gossipStoresInterval := envutil.EnvOrDefaultDuration("COCKROACH_GOSSIP_STORES_INTERVAL", gossip.DefaultGossipStoresInterval) statusTicker := time.NewTicker(gossipStatusInterval) storesTicker := time.NewTicker(gossipStoresInterval) nodeTicker := time.NewTicker(gossipNodeDescriptorInterval) defer storesTicker.Stop() defer nodeTicker.Stop() n.gossipStores(ctx) // one-off run before going to sleep for { select { case <-statusTicker.C: n.storeCfg.Gossip.LogStatus() case <-storesTicker.C: n.gossipStores(ctx) case <-nodeTicker.C: if err := n.storeCfg.Gossip.SetNodeDescriptor(&n.Descriptor); err != nil { log.Warningf(ctx, "couldn't gossip descriptor for node %d: %s", n.Descriptor.NodeID, err) } case <-stopper.ShouldStop(): return } } }) }
// start will run continuously and mark stores as offline if they haven't been // heard from in longer than timeUntilStoreDead. func (sp *StorePool) start(stopper *stop.Stopper) { stopper.RunWorker(func() { var timeoutTimer timeutil.Timer defer timeoutTimer.Stop() for { var timeout time.Duration sp.mu.Lock() detail := sp.mu.queue.peek() if detail == nil { // No stores yet, wait the full timeout. timeout = sp.timeUntilStoreDead } else { // Check to see if the store should be marked as dead. deadAsOf := detail.lastUpdatedTime.GoTime().Add(sp.timeUntilStoreDead) now := sp.clock.Now() if now.GoTime().After(deadAsOf) { deadDetail := sp.mu.queue.dequeue() deadDetail.markDead(now) // The next store might be dead as well, set the timeout to // 0 to process it immediately. timeout = 0 } else { // Store is still alive, schedule the next check for when // it should timeout. timeout = deadAsOf.Sub(now.GoTime()) } } sp.mu.Unlock() timeoutTimer.Reset(timeout) select { case <-timeoutTimer.C: timeoutTimer.Read = true case <-stopper.ShouldStop(): return } } }) }
// StartHeartbeat starts a periodic heartbeat to refresh this node's // last heartbeat in the node liveness table. func (nl *NodeLiveness) StartHeartbeat(ctx context.Context, stopper *stop.Stopper) { log.VEventf(ctx, 1, "starting liveness heartbeat") stopper.RunWorker(func() { ambient := nl.ambientCtx ambient.AddLogTag("hb", nil) ticker := time.NewTicker(nl.heartbeatInterval) defer ticker.Stop() for { ctx, sp := ambient.AnnotateCtxWithSpan(context.Background(), "heartbeat") if err := nl.heartbeat(ctx); err != nil { log.Errorf(ctx, "failed liveness heartbeat: %s", err) } sp.Finish() select { case <-ticker.C: case <-nl.stopHeartbeat: return case <-stopper.ShouldStop(): return } } }) }
// maybeAddToPurgatory possibly adds the specified replica to the // purgatory queue, which holds replicas which have failed // processing. To be added, the failing error must implement // purgatoryError and the queue implementation must have its own // mechanism for signaling re-processing of replicas held in // purgatory. func (bq *baseQueue) maybeAddToPurgatory( ctx context.Context, repl *Replica, triggeringErr error, clock *hlc.Clock, stopper *stop.Stopper, ) { // Increment failures metric here to capture all error returns from // process(). bq.failures.Inc(1) // Check whether the failure is a purgatory error and whether the queue supports it. if _, ok := triggeringErr.(purgatoryError); !ok || bq.impl.purgatoryChan() == nil { log.Error(ctx, triggeringErr) return } bq.mu.Lock() defer bq.mu.Unlock() // First, check whether the replica has already been re-added to queue. if _, ok := bq.mu.replicas[repl.RangeID]; ok { return } log.Error(ctx, errors.Wrap(triggeringErr, "purgatory")) item := &replicaItem{value: repl.RangeID} bq.mu.replicas[repl.RangeID] = item defer func() { bq.purgatory.Update(int64(len(bq.mu.purgatory))) }() // If purgatory already exists, just add to the map and we're done. if bq.mu.purgatory != nil { bq.mu.purgatory[repl.RangeID] = triggeringErr return } // Otherwise, create purgatory and start processing. bq.mu.purgatory = map[roachpb.RangeID]error{ repl.RangeID: triggeringErr, } stopper.RunWorker(func() { ctx := bq.AnnotateCtx(context.Background()) ticker := time.NewTicker(purgatoryReportInterval) for { select { case <-bq.impl.purgatoryChan(): // Remove all items from purgatory into a copied slice. bq.mu.Lock() ranges := make([]roachpb.RangeID, 0, len(bq.mu.purgatory)) for rangeID := range bq.mu.purgatory { item := bq.mu.replicas[rangeID] ranges = append(ranges, item.value) bq.remove(item) } bq.mu.Unlock() for _, id := range ranges { repl, err := bq.store.GetReplica(id) if err != nil { log.Errorf(ctx, "range %s no longer exists on store: %s", id, err) return } if stopper.RunTask(func() { annotatedCtx := repl.AnnotateCtx(ctx) if err := bq.processReplica(annotatedCtx, repl, clock); err != nil { bq.maybeAddToPurgatory(annotatedCtx, repl, err, clock, stopper) } }) != nil { return } } bq.mu.Lock() if len(bq.mu.purgatory) == 0 { log.Infof(ctx, "purgatory is now empty") bq.mu.purgatory = nil bq.mu.Unlock() return } bq.mu.Unlock() case <-ticker.C: // Report purgatory status. bq.mu.Lock() errMap := map[string]int{} for _, err := range bq.mu.purgatory { errMap[err.Error()]++ } bq.mu.Unlock() for errStr, count := range errMap { log.Errorf(ctx, "%d replicas failing with %q", count, errStr) } case <-stopper.ShouldStop(): return } } }) }
// Start starts a goroutine that runs outstanding schema changes // for tables received in the latest system configuration via gossip. func (s *SchemaChangeManager) Start(stopper *stop.Stopper) { stopper.RunWorker(func() { descKeyPrefix := keys.MakeTablePrefix(uint32(sqlbase.DescriptorTable.ID)) gossipUpdateC := s.gossip.RegisterSystemConfigChannel() timer := &time.Timer{} delay := 360 * time.Second if s.testingKnobs.AsyncExecQuickly { delay = 20 * time.Millisecond } for { select { case <-gossipUpdateC: cfg, _ := s.gossip.GetSystemConfig() // Read all tables and their versions if log.V(2) { log.Info(context.TODO(), "received a new config") } schemaChanger := SchemaChanger{ nodeID: s.leaseMgr.nodeID.Get(), db: s.db, leaseMgr: s.leaseMgr, testingKnobs: s.testingKnobs, } // Keep track of existing schema changers. oldSchemaChangers := make(map[sqlbase.ID]struct{}, len(s.schemaChangers)) for k := range s.schemaChangers { oldSchemaChangers[k] = struct{}{} } execAfter := timeutil.Now().Add(delay) // Loop through the configuration to find all the tables. for _, kv := range cfg.Values { if !bytes.HasPrefix(kv.Key, descKeyPrefix) { continue } // Attempt to unmarshal config into a table/database descriptor. var descriptor sqlbase.Descriptor if err := kv.Value.GetProto(&descriptor); err != nil { log.Warningf(context.TODO(), "%s: unable to unmarshal descriptor %v", kv.Key, kv.Value) continue } switch union := descriptor.Union.(type) { case *sqlbase.Descriptor_Table: table := union.Table table.MaybeUpgradeFormatVersion() if err := table.ValidateTable(); err != nil { log.Errorf(context.TODO(), "%s: received invalid table descriptor: %v", kv.Key, table) continue } // Keep track of outstanding schema changes. // If all schema change commands always set UpVersion, why // check for the presence of mutations? // A schema change execution might fail soon after // unsetting UpVersion, and we still want to process // outstanding mutations. Similar with a table marked for deletion. if table.UpVersion || table.Dropped() || table.Adding() || table.Renamed() || len(table.Mutations) > 0 { if log.V(2) { log.Infof(context.TODO(), "%s: queue up pending schema change; table: %d, version: %d", kv.Key, table.ID, table.Version) } // Only track the first schema change. We depend on // gossip to renotify us when a schema change has been // completed. schemaChanger.tableID = table.ID if len(table.Mutations) == 0 { schemaChanger.mutationID = sqlbase.InvalidMutationID } else { schemaChanger.mutationID = table.Mutations[0].MutationID } schemaChanger.execAfter = execAfter // Keep track of this schema change. // Remove from oldSchemaChangers map. delete(oldSchemaChangers, table.ID) if sc, ok := s.schemaChangers[table.ID]; ok { if sc.mutationID == schemaChanger.mutationID { // Ignore duplicate. continue } } s.schemaChangers[table.ID] = schemaChanger } case *sqlbase.Descriptor_Database: // Ignore. } } // Delete old schema changers. for k := range oldSchemaChangers { delete(s.schemaChangers, k) } timer = s.newTimer() case <-timer.C: if s.testingKnobs.AsyncExecNotification != nil && s.testingKnobs.AsyncExecNotification() != nil { timer = s.newTimer() continue } for tableID, sc := range s.schemaChangers { if timeutil.Since(sc.execAfter) > 0 { err := sc.exec() if err != nil { if err == errExistingSchemaChangeLease { } else if err == sqlbase.ErrDescriptorNotFound { // Someone deleted this table. Don't try to run the schema // changer again. Note that there's no gossip update for the // deletion which would remove this schemaChanger. delete(s.schemaChangers, tableID) } else { // We don't need to act on integrity // constraints violations because exec() // purges mutations that violate integrity // constraints. log.Warningf(context.TODO(), "Error executing schema change: %s", err) } } // Advance the execAfter time so that this schema // changer doesn't get called again for a while. sc.execAfter = timeutil.Now().Add(delay) } // Only attempt to run one schema changer. break } timer = s.newTimer() case <-stopper.ShouldStop(): return } } }) }
// RefreshLeases starts a goroutine that refreshes the lease manager // leases for tables received in the latest system configuration via gossip. func (m *LeaseManager) RefreshLeases(s *stop.Stopper, db *client.DB, gossip *gossip.Gossip) { s.RunWorker(func() { descKeyPrefix := keys.MakeTablePrefix(uint32(sqlbase.DescriptorTable.ID)) gossipUpdateC := gossip.RegisterSystemConfigChannel() for { select { case <-gossipUpdateC: cfg, _ := gossip.GetSystemConfig() if m.testingKnobs.GossipUpdateEvent != nil { m.testingKnobs.GossipUpdateEvent(cfg) } // Read all tables and their versions if log.V(2) { log.Info(context.TODO(), "received a new config; will refresh leases") } // Loop through the configuration to find all the tables. for _, kv := range cfg.Values { if !bytes.HasPrefix(kv.Key, descKeyPrefix) { continue } // Attempt to unmarshal config into a table/database descriptor. var descriptor sqlbase.Descriptor if err := kv.Value.GetProto(&descriptor); err != nil { log.Warningf(context.TODO(), "%s: unable to unmarshal descriptor %v", kv.Key, kv.Value) continue } switch union := descriptor.Union.(type) { case *sqlbase.Descriptor_Table: table := union.Table table.MaybeUpgradeFormatVersion() if err := table.ValidateTable(); err != nil { log.Errorf(context.TODO(), "%s: received invalid table descriptor: %v", kv.Key, table) continue } if log.V(2) { log.Infof(context.TODO(), "%s: refreshing lease table: %d (%s), version: %d, deleted: %t", kv.Key, table.ID, table.Name, table.Version, table.Dropped()) } // Try to refresh the table lease to one >= this version. if t := m.findTableState(table.ID, false /* create */); t != nil { if err := t.purgeOldLeases( db, table.Dropped(), table.Version, m.LeaseStore); err != nil { log.Warningf(context.TODO(), "error purging leases for table %d(%s): %s", table.ID, table.Name, err) } } case *sqlbase.Descriptor_Database: // Ignore. } } if m.testingKnobs.TestingLeasesRefreshedEvent != nil { m.testingKnobs.TestingLeasesRefreshedEvent(cfg) } case <-s.ShouldStop(): return } } }) }
// start dials the remote addr and commences gossip once connected. Upon exit, // the client is sent on the disconnected channel. This method starts client // processing in a goroutine and returns immediately. func (c *client) start( g *Gossip, disconnected chan *client, rpcCtx *rpc.Context, stopper *stop.Stopper, nodeID roachpb.NodeID, breaker *circuit.Breaker, ) { stopper.RunWorker(func() { ctx, cancel := context.WithCancel(c.AnnotateCtx(context.Background())) var wg sync.WaitGroup defer func() { // This closes the outgoing stream, causing any attempt to send or // receive to return an error. // // Note: it is still possible for incoming gossip to be processed after // this point. cancel() // The stream is closed, but there may still be some incoming gossip // being processed. Wait until that is complete to avoid racing the // client's removal against the discovery of its remote's node ID. wg.Wait() disconnected <- c }() consecFailures := breaker.ConsecFailures() var stream Gossip_GossipClient if err := breaker.Call(func() error { // Note: avoid using `grpc.WithBlock` here. This code is already // asynchronous from the caller's perspective, so the only effect of // `WithBlock` here is blocking shutdown - at the time of this writing, // that ends ups up making `kv` tests take twice as long. conn, err := rpcCtx.GRPCDial(c.addr.String()) if err != nil { return err } if stream, err = NewGossipClient(conn).Gossip(ctx); err != nil { return err } return c.requestGossip(g, stream) }, 0); err != nil { if consecFailures == 0 { log.Warningf(ctx, "node %d: failed to start gossip client: %s", nodeID, err) } return } // Start gossiping. log.Infof(ctx, "node %d: started gossip client to %s", nodeID, c.addr) if err := c.gossip(ctx, g, stream, stopper, &wg); err != nil { if !grpcutil.IsClosedConnection(err) { g.mu.Lock() if c.peerID != 0 { log.Infof(ctx, "node %d: closing client to node %d (%s): %s", nodeID, c.peerID, c.addr, err) } else { log.Infof(ctx, "node %d: closing client to %s: %s", nodeID, c.addr, err) } g.mu.Unlock() } } }) }