Exemplo n.º 1
0
// waitAndProcess waits for the pace interval and processes the replica
// if repl is not nil. The method returns true when the scanner needs
// to be stopped. The method also removes a replica from queues when it
// is signaled via the removed channel.
func (rs *replicaScanner) waitAndProcess(start time.Time, clock *hlc.Clock, stopper *stop.Stopper,
	repl *Replica) bool {
	waitInterval := rs.paceInterval(start, timeutil.Now())
	rs.waitTimer.Reset(waitInterval)
	if log.V(6) {
		log.Infof("Wait time interval set to %s", waitInterval)
	}
	for {
		select {
		case <-rs.waitTimer.C:
			rs.waitTimer.Read = true
			if repl == nil {
				return false
			}

			return !stopper.RunTask(func() {
				// Try adding replica to all queues.
				for _, q := range rs.queues {
					q.MaybeAdd(repl, clock.Now())
				}
			})
		case repl := <-rs.removed:
			// Remove replica from all queues as applicable.
			for _, q := range rs.queues {
				q.MaybeRemove(repl)
			}
			if log.V(6) {
				log.Infof("removed replica %s", repl)
			}
		case <-stopper.ShouldStop():
			return true
		}
	}
}
Exemplo n.º 2
0
// waitAndProcess waits for the pace interval and processes the replica
// if repl is not nil. The method returns true when the scanner needs
// to be stopped. The method also removes a replica from queues when it
// is signaled via the removed channel.
func (rs *replicaScanner) waitAndProcess(
	start time.Time, clock *hlc.Clock, stopper *stop.Stopper, repl *Replica,
) bool {
	waitInterval := rs.paceInterval(start, timeutil.Now())
	rs.waitTimer.Reset(waitInterval)
	if log.V(6) {
		log.Infof(context.TODO(), "wait timer interval set to %s", waitInterval)
	}
	for {
		select {
		case <-rs.waitTimer.C:
			if log.V(6) {
				log.Infof(context.TODO(), "wait timer fired")
			}
			rs.waitTimer.Read = true
			if repl == nil {
				return false
			}

			return nil != stopper.RunTask(func() {
				// Try adding replica to all queues.
				for _, q := range rs.queues {
					q.MaybeAdd(repl, clock.Now())
				}
			})

		case repl := <-rs.removed:
			rs.removeReplica(repl)

		case <-stopper.ShouldStop():
			return true
		}
	}
}
Exemplo n.º 3
0
// waitAndProcess waits for the pace interval and processes the range
// if rng is not nil. The method returns true when the scanner needs
// to be stopped. The method also removes a range from queues when it
// is signaled via the removed channel.
func (rs *rangeScanner) waitAndProcess(start time.Time, clock *hlc.Clock, stopper *stop.Stopper,
	rng *Replica) bool {
	waitInterval := rs.paceInterval(start, time.Now())
	nextTime := time.After(waitInterval)
	if log.V(6) {
		log.Infof("Wait time interval set to %s", waitInterval)
	}
	for {
		select {
		case <-nextTime:
			if rng == nil {
				return false
			}

			return !stopper.RunTask(func() {
				// Try adding range to all queues.
				for _, q := range rs.queues {
					q.MaybeAdd(rng, clock.Now())
				}
			})
		case rng := <-rs.removed:
			// Remove range from all queues as applicable.
			for _, q := range rs.queues {
				q.MaybeRemove(rng)
			}
			if log.V(6) {
				log.Infof("removed range %s", rng)
			}
		case <-stopper.ShouldStop():
			return true
		}
	}
}
Exemplo n.º 4
0
func (bq *baseQueue) processOne(clock *hlc.Clock, stopper *stop.Stopper) {
	stopper.RunTask(func() {
		start := time.Now()
		bq.Lock()
		rng := bq.pop()
		bq.Unlock()
		if rng != nil {
			now := clock.Now()
			if log.V(1) {
				log.Infof("processing range %s from %s queue...", rng, bq.name)
			}
			// If the queue requires the leader lease to process the
			// range, check whether this replica has leader lease and
			// renew or acquire if necessary.
			if bq.impl.needsLeaderLease() {
				// Create a "fake" get request in order to invoke redirectOnOrAcquireLease.
				args := &proto.GetRequest{RequestHeader: proto.RequestHeader{Timestamp: now}}
				if err := rng.redirectOnOrAcquireLeaderLease(nil /* Trace */, args.Header().Timestamp); err != nil {
					if log.V(1) {
						log.Infof("this replica of %s could not acquire leader lease; skipping...", rng)
					}
					return
				}
			}
			if err := bq.impl.process(now, rng); err != nil {
				log.Errorf("failure processing range %s from %s queue: %s", rng, bq.name, err)
			}
			if log.V(1) {
				log.Infof("processed range %s from %s queue in %s", rng, bq.name, time.Now().Sub(start))
			}
		}
	})
}
Exemplo n.º 5
0
// processLoop processes the entries in the queue until the provided
// stopper signals exit.
//
// TODO(spencer): current load should factor into replica processing timer.
func (bq *baseQueue) processLoop(clock *hlc.Clock, stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		defer func() {
			bq.mu.Lock()
			bq.mu.stopped = true
			bq.mu.Unlock()
			log.FinishEventLog(bq.ctx)
		}()

		// nextTime is initially nil; we don't start any timers until the queue
		// becomes non-empty.
		var nextTime <-chan time.Time

		immediately := make(chan time.Time)
		close(immediately)

		for {
			select {
			// Exit on stopper.
			case <-stopper.ShouldStop():
				return

			// Incoming signal sets the next time to process if there were previously
			// no replicas in the queue.
			case <-bq.incoming:
				if nextTime == nil {
					// When a replica is added, wake up immediately. This is mainly
					// to facilitate testing without unnecessary sleeps.
					nextTime = immediately

					// In case we're in a test, still block on the impl.
					bq.impl.timer()
				}
			// Process replicas as the timer expires.
			case <-nextTime:
				repl := bq.pop()
				if repl != nil {
					if stopper.RunTask(func() {
						if err := bq.processReplica(repl, clock); err != nil {
							// Maybe add failing replica to purgatory if the queue supports it.
							bq.maybeAddToPurgatory(repl, err, clock, stopper)
						}
					}) != nil {
						return
					}
				}
				if bq.Length() == 0 {
					nextTime = nil
				} else {
					nextTime = time.After(bq.impl.timer())
				}
			}
		}
	})
}
Exemplo n.º 6
0
// processLoop processes the entries in the queue until the provided
// stopper signals exit.
//
// TODO(spencer): current load should factor into replica processing timer.
func (bq *baseQueue) processLoop(clock *hlc.Clock, stopper *stop.Stopper) {

	stopper.RunWorker(func() {
		// nextTime is initially nil; we don't start any timers until the queue
		// becomes non-empty.
		var nextTime <-chan time.Time

		immediately := make(chan time.Time)
		close(immediately)

		for {
			select {
			// Incoming signal sets the next time to process if there were previously
			// no replicas in the queue.
			case <-bq.incoming:
				if nextTime == nil {
					// When a replica is added, wake up immediately. This is mainly
					// to facilitate testing without unnecessary sleeps.
					nextTime = immediately

					// In case we're in a test, still block on the impl.
					bq.impl.timer()
				}
			// Process replicas as the timer expires.
			case <-nextTime:
				stopper.RunTask(func() {
					bq.processOne(clock)
				})
				if bq.Length() == 0 {
					nextTime = nil
				} else {
					nextTime = time.After(bq.impl.timer())
				}

			// Exit on stopper.
			case <-stopper.ShouldStop():
				bq.Lock()
				bq.replicas = map[proto.RangeID]*replicaItem{}
				bq.priorityQ = nil
				bq.Unlock()
				return
			}
		}
	})
}
Exemplo n.º 7
0
// scanLoop loops endlessly, scanning through replicas available via
// the replica set, or until the scanner is stopped. The iteration
// is paced to complete a full scan in approximately the scan interval.
func (rs *replicaScanner) scanLoop(clock *hlc.Clock, stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		start := timeutil.Now()

		// waitTimer is reset in each call to waitAndProcess.
		defer rs.waitTimer.Stop()

		for {
			if rs.GetDisabled() {
				if done := rs.waitEnabled(stopper); done {
					return
				}
				continue
			}
			var shouldStop bool
			count := 0
			rs.replicas.Visit(func(repl *Replica) bool {
				count++
				shouldStop = rs.waitAndProcess(start, clock, stopper, repl)
				return !shouldStop
			})
			if count == 0 {
				// No replicas processed, just wait.
				shouldStop = rs.waitAndProcess(start, clock, stopper, nil)
			}

			shouldStop = shouldStop || nil != stopper.RunTask(func() {
				// Increment iteration count.
				rs.mu.Lock()
				defer rs.mu.Unlock()
				rs.mu.scanCount++
				rs.mu.total += timeutil.Since(start)
				if log.V(6) {
					log.Infof(context.TODO(), "reset replica scan iteration")
				}

				// Reset iteration and start time.
				start = timeutil.Now()
			})
			if shouldStop {
				return
			}
		}
	})
}
Exemplo n.º 8
0
// scanLoop loops endlessly, scanning through ranges available via
// the range set, or until the scanner is stopped. The iteration
// is paced to complete a full scan in approximately the scan interval.
func (rs *rangeScanner) scanLoop(clock *hlc.Clock, stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		start := time.Now()

		for {
			if rs.ranges.EstimatedCount() == 0 {
				// Just wait without processing any range.
				if rs.waitAndProcess(start, clock, stopper, nil) {
					break
				}
			} else {
				shouldStop := true
				rs.ranges.Visit(func(rng *Range) bool {
					shouldStop = rs.waitAndProcess(start, clock, stopper, rng)
					return !shouldStop
				})
				if shouldStop {
					break
				}
			}

			if !stopper.RunTask(func() {
				// Increment iteration count.
				rs.completedScan.L.Lock()
				rs.count++
				rs.total += time.Now().Sub(start)
				rs.completedScan.Broadcast()
				rs.completedScan.L.Unlock()
				if log.V(6) {
					log.Infof("reset range scan iteration")
				}

				// Reset iteration and start time.
				start = time.Now()
			}) {
				// Exit the loop
				break
			}
		}
	})
}
Exemplo n.º 9
0
// scanLoop loops endlessly, scanning through replicas available via
// the replica set, or until the scanner is stopped. The iteration
// is paced to complete a full scan in approximately the scan interval.
func (rs *replicaScanner) scanLoop(clock *hlc.Clock, stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		start := timeutil.Now()

		// waitTimer is reset in each call to waitAndProcess.
		defer rs.waitTimer.Stop()

		for {
			var shouldStop bool
			count := 0
			rs.replicas.Visit(func(repl *Replica) bool {
				count++
				shouldStop = rs.waitAndProcess(start, clock, stopper, repl)
				return !shouldStop
			})
			if count == 0 {
				// No replicas processed, just wait.
				shouldStop = rs.waitAndProcess(start, clock, stopper, nil)
			}

			shouldStop = shouldStop || !stopper.RunTask(func() {
				// Increment iteration count.
				rs.completedScan.L.Lock()
				rs.count++
				rs.total += timeutil.Now().Sub(start)
				rs.completedScan.Broadcast()
				rs.completedScan.L.Unlock()
				if log.V(6) {
					log.Infof("reset replica scan iteration")
				}

				// Reset iteration and start time.
				start = timeutil.Now()
			})
			if shouldStop {
				return
			}
		}
	})
}
Exemplo n.º 10
0
// scanLoop loops endlessly, scanning through replicas available via
// the replica set, or until the scanner is stopped. The iteration
// is paced to complete a full scan in approximately the scan interval.
func (rs *replicaScanner) scanLoop(clock *hlc.Clock, stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		start := time.Now()

		for {
			var shouldStop bool
			if rs.replicas.EstimatedCount() == 0 {
				// Just wait without processing any replica.
				shouldStop = rs.waitAndProcess(start, clock, stopper, nil)
			} else {
				shouldStop = true
				rs.replicas.Visit(func(repl *Replica) bool {
					shouldStop = rs.waitAndProcess(start, clock, stopper, repl)
					return !shouldStop
				})
			}

			shouldStop = shouldStop || !stopper.RunTask(func() {
				// Increment iteration count.
				rs.completedScan.L.Lock()
				rs.count++
				rs.total += time.Now().Sub(start)
				rs.completedScan.Broadcast()
				rs.completedScan.L.Unlock()
				if log.V(6) {
					log.Infof("reset replica scan iteration")
				}

				// Reset iteration and start time.
				start = time.Now()
			})
			if shouldStop {
				return
			}
		}
	})
}
Exemplo n.º 11
0
// maybeAddToPurgatory possibly adds the specified replica to the
// purgatory queue, which holds replicas which have failed
// processing. To be added, the failing error must implement
// purgatoryError and the queue implementation must have its own
// mechanism for signaling re-processing of replicas held in
// purgatory.
func (bq *baseQueue) maybeAddToPurgatory(repl *Replica, err error, clock *hlc.Clock, stopper *stop.Stopper) {
	// Check whether the failure is a purgatory error and whether the queue supports it.
	if _, ok := err.(purgatoryError); !ok || bq.impl.purgatoryChan() == nil {
		bq.eventLog.Errorf("%s: error: %v", repl, err)
		return
	}
	bq.mu.Lock()
	defer bq.mu.Unlock()

	// First, check whether the replica has already been re-added to queue.
	if _, ok := bq.mu.replicas[repl.RangeID]; ok {
		return
	}

	bq.eventLog.Infof(log.V(2), "%s (purgatory): error: %v", repl, err)

	item := &replicaItem{value: repl}
	bq.mu.replicas[repl.RangeID] = item

	// If purgatory already exists, just add to the map and we're done.
	if bq.mu.purgatory != nil {
		bq.mu.purgatory[repl.RangeID] = err
		return
	}

	// Otherwise, create purgatory and start processing.
	bq.mu.purgatory = map[roachpb.RangeID]error{
		repl.RangeID: err,
	}

	stopper.RunWorker(func() {
		ticker := time.NewTicker(purgatoryReportInterval)
		for {
			select {
			case <-bq.impl.purgatoryChan():
				// Remove all items from purgatory into a copied slice.
				bq.mu.Lock()
				repls := make([]*Replica, 0, len(bq.mu.purgatory))
				for rangeID := range bq.mu.purgatory {
					item := bq.mu.replicas[rangeID]
					repls = append(repls, item.value)
					bq.remove(item)
				}
				bq.mu.Unlock()
				for _, repl := range repls {
					stopper.RunTask(func() {
						if err := bq.processReplica(repl, clock); err != nil {
							bq.maybeAddToPurgatory(repl, err, clock, stopper)
						}
					})
				}
				bq.mu.Lock()
				if len(bq.mu.purgatory) == 0 {
					bq.eventLog.Infof(log.V(0), "purgatory is now empty")
					bq.mu.purgatory = nil
					bq.mu.Unlock()
					return
				}
				bq.mu.Unlock()
			case <-ticker.C:
				// Report purgatory status.
				bq.mu.Lock()
				errMap := map[string]int{}
				for _, err := range bq.mu.purgatory {
					errMap[err.Error()]++
				}
				bq.mu.Unlock()
				for errStr, count := range errMap {
					bq.eventLog.Errorf("%d replicas failing with %q", count, errStr)
				}
			case <-stopper.ShouldStop():
				return
			}
		}
	})
}