// waitAndProcess waits for the pace interval and processes the replica // if repl is not nil. The method returns true when the scanner needs // to be stopped. The method also removes a replica from queues when it // is signaled via the removed channel. func (rs *replicaScanner) waitAndProcess(start time.Time, clock *hlc.Clock, stopper *stop.Stopper, repl *Replica) bool { waitInterval := rs.paceInterval(start, timeutil.Now()) rs.waitTimer.Reset(waitInterval) if log.V(6) { log.Infof("Wait time interval set to %s", waitInterval) } for { select { case <-rs.waitTimer.C: rs.waitTimer.Read = true if repl == nil { return false } return !stopper.RunTask(func() { // Try adding replica to all queues. for _, q := range rs.queues { q.MaybeAdd(repl, clock.Now()) } }) case repl := <-rs.removed: // Remove replica from all queues as applicable. for _, q := range rs.queues { q.MaybeRemove(repl) } if log.V(6) { log.Infof("removed replica %s", repl) } case <-stopper.ShouldStop(): return true } } }
// waitAndProcess waits for the pace interval and processes the replica // if repl is not nil. The method returns true when the scanner needs // to be stopped. The method also removes a replica from queues when it // is signaled via the removed channel. func (rs *replicaScanner) waitAndProcess( start time.Time, clock *hlc.Clock, stopper *stop.Stopper, repl *Replica, ) bool { waitInterval := rs.paceInterval(start, timeutil.Now()) rs.waitTimer.Reset(waitInterval) if log.V(6) { log.Infof(context.TODO(), "wait timer interval set to %s", waitInterval) } for { select { case <-rs.waitTimer.C: if log.V(6) { log.Infof(context.TODO(), "wait timer fired") } rs.waitTimer.Read = true if repl == nil { return false } return nil != stopper.RunTask(func() { // Try adding replica to all queues. for _, q := range rs.queues { q.MaybeAdd(repl, clock.Now()) } }) case repl := <-rs.removed: rs.removeReplica(repl) case <-stopper.ShouldStop(): return true } } }
// waitAndProcess waits for the pace interval and processes the range // if rng is not nil. The method returns true when the scanner needs // to be stopped. The method also removes a range from queues when it // is signaled via the removed channel. func (rs *rangeScanner) waitAndProcess(start time.Time, clock *hlc.Clock, stopper *stop.Stopper, rng *Replica) bool { waitInterval := rs.paceInterval(start, time.Now()) nextTime := time.After(waitInterval) if log.V(6) { log.Infof("Wait time interval set to %s", waitInterval) } for { select { case <-nextTime: if rng == nil { return false } return !stopper.RunTask(func() { // Try adding range to all queues. for _, q := range rs.queues { q.MaybeAdd(rng, clock.Now()) } }) case rng := <-rs.removed: // Remove range from all queues as applicable. for _, q := range rs.queues { q.MaybeRemove(rng) } if log.V(6) { log.Infof("removed range %s", rng) } case <-stopper.ShouldStop(): return true } } }
func (bq *baseQueue) processOne(clock *hlc.Clock, stopper *stop.Stopper) { stopper.RunTask(func() { start := time.Now() bq.Lock() rng := bq.pop() bq.Unlock() if rng != nil { now := clock.Now() if log.V(1) { log.Infof("processing range %s from %s queue...", rng, bq.name) } // If the queue requires the leader lease to process the // range, check whether this replica has leader lease and // renew or acquire if necessary. if bq.impl.needsLeaderLease() { // Create a "fake" get request in order to invoke redirectOnOrAcquireLease. args := &proto.GetRequest{RequestHeader: proto.RequestHeader{Timestamp: now}} if err := rng.redirectOnOrAcquireLeaderLease(nil /* Trace */, args.Header().Timestamp); err != nil { if log.V(1) { log.Infof("this replica of %s could not acquire leader lease; skipping...", rng) } return } } if err := bq.impl.process(now, rng); err != nil { log.Errorf("failure processing range %s from %s queue: %s", rng, bq.name, err) } if log.V(1) { log.Infof("processed range %s from %s queue in %s", rng, bq.name, time.Now().Sub(start)) } } }) }
// processLoop processes the entries in the queue until the provided // stopper signals exit. // // TODO(spencer): current load should factor into replica processing timer. func (bq *baseQueue) processLoop(clock *hlc.Clock, stopper *stop.Stopper) { stopper.RunWorker(func() { defer func() { bq.mu.Lock() bq.mu.stopped = true bq.mu.Unlock() log.FinishEventLog(bq.ctx) }() // nextTime is initially nil; we don't start any timers until the queue // becomes non-empty. var nextTime <-chan time.Time immediately := make(chan time.Time) close(immediately) for { select { // Exit on stopper. case <-stopper.ShouldStop(): return // Incoming signal sets the next time to process if there were previously // no replicas in the queue. case <-bq.incoming: if nextTime == nil { // When a replica is added, wake up immediately. This is mainly // to facilitate testing without unnecessary sleeps. nextTime = immediately // In case we're in a test, still block on the impl. bq.impl.timer() } // Process replicas as the timer expires. case <-nextTime: repl := bq.pop() if repl != nil { if stopper.RunTask(func() { if err := bq.processReplica(repl, clock); err != nil { // Maybe add failing replica to purgatory if the queue supports it. bq.maybeAddToPurgatory(repl, err, clock, stopper) } }) != nil { return } } if bq.Length() == 0 { nextTime = nil } else { nextTime = time.After(bq.impl.timer()) } } } }) }
// processLoop processes the entries in the queue until the provided // stopper signals exit. // // TODO(spencer): current load should factor into replica processing timer. func (bq *baseQueue) processLoop(clock *hlc.Clock, stopper *stop.Stopper) { stopper.RunWorker(func() { // nextTime is initially nil; we don't start any timers until the queue // becomes non-empty. var nextTime <-chan time.Time immediately := make(chan time.Time) close(immediately) for { select { // Incoming signal sets the next time to process if there were previously // no replicas in the queue. case <-bq.incoming: if nextTime == nil { // When a replica is added, wake up immediately. This is mainly // to facilitate testing without unnecessary sleeps. nextTime = immediately // In case we're in a test, still block on the impl. bq.impl.timer() } // Process replicas as the timer expires. case <-nextTime: stopper.RunTask(func() { bq.processOne(clock) }) if bq.Length() == 0 { nextTime = nil } else { nextTime = time.After(bq.impl.timer()) } // Exit on stopper. case <-stopper.ShouldStop(): bq.Lock() bq.replicas = map[proto.RangeID]*replicaItem{} bq.priorityQ = nil bq.Unlock() return } } }) }
// scanLoop loops endlessly, scanning through replicas available via // the replica set, or until the scanner is stopped. The iteration // is paced to complete a full scan in approximately the scan interval. func (rs *replicaScanner) scanLoop(clock *hlc.Clock, stopper *stop.Stopper) { stopper.RunWorker(func() { start := timeutil.Now() // waitTimer is reset in each call to waitAndProcess. defer rs.waitTimer.Stop() for { if rs.GetDisabled() { if done := rs.waitEnabled(stopper); done { return } continue } var shouldStop bool count := 0 rs.replicas.Visit(func(repl *Replica) bool { count++ shouldStop = rs.waitAndProcess(start, clock, stopper, repl) return !shouldStop }) if count == 0 { // No replicas processed, just wait. shouldStop = rs.waitAndProcess(start, clock, stopper, nil) } shouldStop = shouldStop || nil != stopper.RunTask(func() { // Increment iteration count. rs.mu.Lock() defer rs.mu.Unlock() rs.mu.scanCount++ rs.mu.total += timeutil.Since(start) if log.V(6) { log.Infof(context.TODO(), "reset replica scan iteration") } // Reset iteration and start time. start = timeutil.Now() }) if shouldStop { return } } }) }
// scanLoop loops endlessly, scanning through ranges available via // the range set, or until the scanner is stopped. The iteration // is paced to complete a full scan in approximately the scan interval. func (rs *rangeScanner) scanLoop(clock *hlc.Clock, stopper *stop.Stopper) { stopper.RunWorker(func() { start := time.Now() for { if rs.ranges.EstimatedCount() == 0 { // Just wait without processing any range. if rs.waitAndProcess(start, clock, stopper, nil) { break } } else { shouldStop := true rs.ranges.Visit(func(rng *Range) bool { shouldStop = rs.waitAndProcess(start, clock, stopper, rng) return !shouldStop }) if shouldStop { break } } if !stopper.RunTask(func() { // Increment iteration count. rs.completedScan.L.Lock() rs.count++ rs.total += time.Now().Sub(start) rs.completedScan.Broadcast() rs.completedScan.L.Unlock() if log.V(6) { log.Infof("reset range scan iteration") } // Reset iteration and start time. start = time.Now() }) { // Exit the loop break } } }) }
// scanLoop loops endlessly, scanning through replicas available via // the replica set, or until the scanner is stopped. The iteration // is paced to complete a full scan in approximately the scan interval. func (rs *replicaScanner) scanLoop(clock *hlc.Clock, stopper *stop.Stopper) { stopper.RunWorker(func() { start := timeutil.Now() // waitTimer is reset in each call to waitAndProcess. defer rs.waitTimer.Stop() for { var shouldStop bool count := 0 rs.replicas.Visit(func(repl *Replica) bool { count++ shouldStop = rs.waitAndProcess(start, clock, stopper, repl) return !shouldStop }) if count == 0 { // No replicas processed, just wait. shouldStop = rs.waitAndProcess(start, clock, stopper, nil) } shouldStop = shouldStop || !stopper.RunTask(func() { // Increment iteration count. rs.completedScan.L.Lock() rs.count++ rs.total += timeutil.Now().Sub(start) rs.completedScan.Broadcast() rs.completedScan.L.Unlock() if log.V(6) { log.Infof("reset replica scan iteration") } // Reset iteration and start time. start = timeutil.Now() }) if shouldStop { return } } }) }
// scanLoop loops endlessly, scanning through replicas available via // the replica set, or until the scanner is stopped. The iteration // is paced to complete a full scan in approximately the scan interval. func (rs *replicaScanner) scanLoop(clock *hlc.Clock, stopper *stop.Stopper) { stopper.RunWorker(func() { start := time.Now() for { var shouldStop bool if rs.replicas.EstimatedCount() == 0 { // Just wait without processing any replica. shouldStop = rs.waitAndProcess(start, clock, stopper, nil) } else { shouldStop = true rs.replicas.Visit(func(repl *Replica) bool { shouldStop = rs.waitAndProcess(start, clock, stopper, repl) return !shouldStop }) } shouldStop = shouldStop || !stopper.RunTask(func() { // Increment iteration count. rs.completedScan.L.Lock() rs.count++ rs.total += time.Now().Sub(start) rs.completedScan.Broadcast() rs.completedScan.L.Unlock() if log.V(6) { log.Infof("reset replica scan iteration") } // Reset iteration and start time. start = time.Now() }) if shouldStop { return } } }) }
// maybeAddToPurgatory possibly adds the specified replica to the // purgatory queue, which holds replicas which have failed // processing. To be added, the failing error must implement // purgatoryError and the queue implementation must have its own // mechanism for signaling re-processing of replicas held in // purgatory. func (bq *baseQueue) maybeAddToPurgatory(repl *Replica, err error, clock *hlc.Clock, stopper *stop.Stopper) { // Check whether the failure is a purgatory error and whether the queue supports it. if _, ok := err.(purgatoryError); !ok || bq.impl.purgatoryChan() == nil { bq.eventLog.Errorf("%s: error: %v", repl, err) return } bq.mu.Lock() defer bq.mu.Unlock() // First, check whether the replica has already been re-added to queue. if _, ok := bq.mu.replicas[repl.RangeID]; ok { return } bq.eventLog.Infof(log.V(2), "%s (purgatory): error: %v", repl, err) item := &replicaItem{value: repl} bq.mu.replicas[repl.RangeID] = item // If purgatory already exists, just add to the map and we're done. if bq.mu.purgatory != nil { bq.mu.purgatory[repl.RangeID] = err return } // Otherwise, create purgatory and start processing. bq.mu.purgatory = map[roachpb.RangeID]error{ repl.RangeID: err, } stopper.RunWorker(func() { ticker := time.NewTicker(purgatoryReportInterval) for { select { case <-bq.impl.purgatoryChan(): // Remove all items from purgatory into a copied slice. bq.mu.Lock() repls := make([]*Replica, 0, len(bq.mu.purgatory)) for rangeID := range bq.mu.purgatory { item := bq.mu.replicas[rangeID] repls = append(repls, item.value) bq.remove(item) } bq.mu.Unlock() for _, repl := range repls { stopper.RunTask(func() { if err := bq.processReplica(repl, clock); err != nil { bq.maybeAddToPurgatory(repl, err, clock, stopper) } }) } bq.mu.Lock() if len(bq.mu.purgatory) == 0 { bq.eventLog.Infof(log.V(0), "purgatory is now empty") bq.mu.purgatory = nil bq.mu.Unlock() return } bq.mu.Unlock() case <-ticker.C: // Report purgatory status. bq.mu.Lock() errMap := map[string]int{} for _, err := range bq.mu.purgatory { errMap[err.Error()]++ } bq.mu.Unlock() for errStr, count := range errMap { bq.eventLog.Errorf("%d replicas failing with %q", count, errStr) } case <-stopper.ShouldStop(): return } } }) }