Ejemplo n.º 1
0
// waitAndProcess waits for the pace interval and processes the range
// if rng is not nil. The method returns true when the scanner needs
// to be stopped. The method also removes a range from queues when it
// is signaled via the removed channel.
func (rs *rangeScanner) waitAndProcess(start time.Time, clock *hlc.Clock, stopper *util.Stopper,
	rng *Range) bool {
	waitInterval := rs.paceInterval(start, time.Now())
	nextTime := time.After(waitInterval)
	if log.V(6) {
		log.Infof("Wait time interval set to %s", waitInterval)
	}
	for {
		select {
		case <-nextTime:
			if rng == nil {
				return false
			}
			if !stopper.StartTask() {
				return true
			}
			// Try adding range to all queues.
			for _, q := range rs.queues {
				q.MaybeAdd(rng, clock.Now())
			}
			stopper.FinishTask()
			return false
		case rng := <-rs.removed:
			// Remove range from all queues as applicable.
			for _, q := range rs.queues {
				q.MaybeRemove(rng)
			}
			if log.V(6) {
				log.Infof("removed range %s", rng)
			}
		case <-stopper.ShouldStop():
			return true
		}
	}
}
Ejemplo n.º 2
0
// initStores initializes the Stores map from id to Store. Stores are
// added to the local sender if already bootstrapped. A bootstrapped
// Store has a valid ident with cluster, node and Store IDs set. If
// the Store doesn't yet have a valid ident, it's added to the
// bootstraps list for initialization once the cluster and node IDs
// have been determined.
func (n *Node) initStores(engines []engine.Engine, stopper *util.Stopper) error {
	bootstraps := list.New()

	if len(engines) == 0 {
		return util.Error("no engines")
	}
	for _, e := range engines {
		s := storage.NewStore(n.ctx, e, &n.Descriptor)
		// Initialize each store in turn, handling un-bootstrapped errors by
		// adding the store to the bootstraps list.
		if err := s.Start(stopper); err != nil {
			if _, ok := err.(*storage.NotBootstrappedError); ok {
				log.Infof("store %s not bootstrapped", s)
				bootstraps.PushBack(s)
				continue
			}
			return util.Errorf("failed to start store: %s", err)
		}
		if s.Ident.ClusterID == "" || s.Ident.NodeID == 0 {
			return util.Errorf("unidentified store: %s", s)
		}
		capacity, err := s.Capacity()
		if err != nil {
			return util.Errorf("could not query store capacity: %s", err)
		}
		log.Infof("initialized store %s: %+v", s, capacity)
		n.lSender.AddStore(s)
	}

	// Verify all initialized stores agree on cluster and node IDs.
	if err := n.validateStores(); err != nil {
		return err
	}

	// Connect gossip before starting bootstrap. For new nodes, connecting
	// to the gossip network is necessary to get the cluster ID.
	n.connectGossip()

	// If no NodeID has been assigned yet, allocate a new node ID by
	// supplying 0 to initNodeID.
	if n.Descriptor.NodeID == 0 {
		n.initNodeID(0)
	}

	// Bootstrap any uninitialized stores asynchronously.
	if bootstraps.Len() > 0 && stopper.StartTask() {
		go func() {
			n.bootstrapStores(bootstraps, stopper)
			stopper.FinishTask()
		}()
	}

	return nil
}
Ejemplo n.º 3
0
// scanLoop loops endlessly, scanning through ranges available via
// the range set, or until the scanner is stopped. The iteration
// is paced to complete a full scan in approximately the scan interval.
func (rs *rangeScanner) scanLoop(clock *hlc.Clock, stopper *util.Stopper) {
	stopper.RunWorker(func() {
		start := time.Now()
		stats := &storeStats{}

		for {
			if rs.ranges.EstimatedCount() == 0 {
				// Just wait without processing any range.
				if rs.waitAndProcess(start, clock, stopper, stats, nil) {
					break
				}
			} else {
				shouldStop := true
				rs.ranges.Visit(func(rng *Range) bool {
					shouldStop = rs.waitAndProcess(start, clock, stopper, stats, rng)
					return !shouldStop
				})
				if shouldStop {
					break
				}
			}

			if !stopper.StartTask() {
				// Exit the loop.
				break
			}

			// We're done with the iteration.
			// Store the most recent scan results in the scanner's stats.
			atomic.StorePointer(&rs.stats, unsafe.Pointer(stats))
			stats = &storeStats{}
			if rs.scanFn != nil {
				rs.scanFn()
			}
			// Increment iteration count.
			rs.completedScan.L.Lock()
			rs.count++
			rs.total += time.Now().Sub(start)
			rs.completedScan.Broadcast()
			rs.completedScan.L.Unlock()
			if log.V(6) {
				log.Infof("reset range scan iteration")
			}

			// Reset iteration and start time.
			start = time.Now()
			stopper.FinishTask()
		}
	})
}
Ejemplo n.º 4
0
// scanLoop loops endlessly, scanning through ranges available via
// the range set, or until the scanner is stopped. The iteration
// is paced to complete a full scan in approximately the scan interval.
func (rs *rangeScanner) scanLoop(clock *hlc.Clock, stopper *util.Stopper) {
	stopper.RunWorker(func() {
		start := time.Now()

		for {
			if rs.ranges.EstimatedCount() == 0 {
				// Just wait without processing any range.
				if rs.waitAndProcess(start, clock, stopper, nil) {
					break
				}
			} else {
				shouldStop := true
				rs.ranges.Visit(func(rng *Range) bool {
					shouldStop = rs.waitAndProcess(start, clock, stopper, rng)
					return !shouldStop
				})
				if shouldStop {
					break
				}
			}

			if !stopper.StartTask() {
				// Exit the loop.
				break
			}

			// Increment iteration count.
			rs.completedScan.L.Lock()
			rs.count++
			rs.total += time.Now().Sub(start)
			rs.completedScan.Broadcast()
			rs.completedScan.L.Unlock()
			if log.V(6) {
				log.Infof("reset range scan iteration")
			}

			// Reset iteration and start time.
			start = time.Now()
			stopper.FinishTask()
		}
	})
}
Ejemplo n.º 5
0
func (bq *baseQueue) processOne(clock *hlc.Clock, stopper *util.Stopper) {
	if !stopper.StartTask() {
		return
	}
	defer stopper.FinishTask()

	start := time.Now()
	bq.Lock()
	rng := bq.pop()
	bq.Unlock()
	if rng != nil {
		now := clock.Now()
		if log.V(1) {
			log.Infof("processing range %s from %s queue...", rng, bq.name)
		}
		// If the queue requires the leader lease to process the
		// range, check whether this replica has leader lease and
		// renew or acquire if necessary.
		if bq.impl.needsLeaderLease() {
			// Create a "fake" get request in order to invoke redirectOnOrAcquireLease.
			args := &proto.GetRequest{RequestHeader: proto.RequestHeader{Timestamp: now}}
			if err := rng.redirectOnOrAcquireLeaderLease(args.Header().Timestamp); err != nil {
				if log.V(1) {
					log.Infof("this replica of %s could not acquire leader lease; skipping...", rng)
				}
				return
			}
		}
		if err := bq.impl.process(now, rng); err != nil {
			log.Errorf("failure processing range %s from %s queue: %s", rng, bq.name, err)
		}
		if log.V(1) {
			log.Infof("processed range %s from %s queue in %s", rng, bq.name, time.Now().Sub(start))
		}
	}
}
Ejemplo n.º 6
0
// close sends resolve intent commands for all key ranges this
// transaction has covered, clears the keys cache and closes the
// metadata heartbeat. Any keys listed in the resolved slice have
// already been resolved and do not receive resolve intent commands.
func (tm *txnMetadata) close(txn *proto.Transaction, resolved []proto.Key, sender client.Sender, stopper *util.Stopper) {
	close(tm.txnEnd) // stop heartbeat
	if tm.keys.Len() > 0 {
		if log.V(2) {
			log.Infof("cleaning up %d intent(s) for transaction %s", tm.keys.Len(), txn)
		}
	}
	for _, o := range tm.keys.GetOverlaps(proto.KeyMin, proto.KeyMax) {
		// If the op was range based, end key != start key: resolve a range.
		var call proto.Call
		key := o.Key.Start().(proto.Key)
		endKey := o.Key.End().(proto.Key)
		if !key.Next().Equal(endKey) {
			call.Args = &proto.InternalResolveIntentRangeRequest{
				RequestHeader: proto.RequestHeader{
					Timestamp: txn.Timestamp,
					Key:       key,
					EndKey:    endKey,
					User:      storage.UserRoot,
					Txn:       txn,
				},
			}
			call.Reply = &proto.InternalResolveIntentRangeResponse{}
		} else {
			// Check if the key has already been resolved; skip if yes.
			found := false
			for _, k := range resolved {
				if key.Equal(k) {
					found = true
				}
			}
			if found {
				continue
			}
			call.Args = &proto.InternalResolveIntentRequest{
				RequestHeader: proto.RequestHeader{
					Timestamp: txn.Timestamp,
					Key:       key,
					User:      storage.UserRoot,
					Txn:       txn,
				},
			}
			call.Reply = &proto.InternalResolveIntentResponse{}
		}
		// We don't care about the reply channel; these are best
		// effort. We simply fire and forget, each in its own goroutine.
		if stopper.StartTask() {
			go func() {
				if log.V(2) {
					log.Infof("cleaning up intent %q for txn %s", call.Args.Header().Key, txn)
				}
				sender.Send(context.TODO(), call)
				if call.Reply.Header().Error != nil {
					log.Warningf("failed to cleanup %q intent: %s", call.Args.Header().Key, call.Reply.Header().GoError())
				}
				stopper.FinishTask()
			}()
		}
	}
	tm.keys.Clear()
}
Ejemplo n.º 7
0
// startStoresScanner will walk through all the stores in the node every
// ctx.ScanInterval and store the status in the db.
func (n *Node) startStoresScanner(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		// Pick the smaller of the two intervals.
		var minScanInterval time.Duration
		if n.ctx.ScanInterval <= n.ctx.ScanMaxIdleTime || n.ctx.ScanMaxIdleTime == 0 {
			minScanInterval = n.ctx.ScanInterval
		} else {
			minScanInterval = n.ctx.ScanMaxIdleTime
		}

		// TODO(bram): The number of stores is small. The node status should be
		// updated whenever a store status is updated.
		for interval := time.Duration(0); true; interval = minScanInterval {
			select {
			case <-time.After(interval):
				if !stopper.StartTask() {
					continue
				}
				// Walk through all the stores on this node.
				var rangeCount, leaderRangeCount, replicatedRangeCount, availableRangeCount int32
				stats := &engine.MVCCStats{}
				accessedStoreIDs := []proto.StoreID{}
				// will never error because `return nil` below
				_ = n.lSender.VisitStores(func(store *storage.Store) error {
					storeStatus, err := store.GetStatus()
					if err != nil {
						log.Error(err)
						return nil
					}
					if storeStatus == nil {
						// The store scanner hasn't run on this node yet.
						return nil
					}
					accessedStoreIDs = append(accessedStoreIDs, store.Ident.StoreID)
					rangeCount += storeStatus.RangeCount
					leaderRangeCount += storeStatus.LeaderRangeCount
					replicatedRangeCount += storeStatus.ReplicatedRangeCount
					availableRangeCount += storeStatus.AvailableRangeCount
					stats.Add(&storeStatus.Stats)
					return nil
				})

				// Store the combined stats in the db.
				now := n.ctx.Clock.Now().WallTime
				status := &NodeStatus{
					Desc:                 n.Descriptor,
					StoreIDs:             accessedStoreIDs,
					UpdatedAt:            now,
					StartedAt:            n.startedAt,
					RangeCount:           rangeCount,
					Stats:                *stats,
					LeaderRangeCount:     leaderRangeCount,
					ReplicatedRangeCount: replicatedRangeCount,
					AvailableRangeCount:  availableRangeCount,
				}
				key := keys.NodeStatusKey(int32(n.Descriptor.NodeID))
				if err := n.ctx.DB.Put(key, status); err != nil {
					log.Error(err)
				}
				// Increment iteration count.
				n.completedScan.L.Lock()
				n.scanCount++
				n.completedScan.Broadcast()
				n.completedScan.L.Unlock()
				if log.V(6) {
					log.Infof("store scan iteration completed")
				}
				stopper.FinishTask()
			case <-stopper.ShouldStop():
				// Exit the loop.
				return
			}
		}
	})
}