예제 #1
0
func (e *eventDemux) start(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		for {
			select {
			case event := <-e.events:
				switch event := event.(type) {
				case *EventLeaderElection:
					e.LeaderElection <- event

				case *EventCommandCommitted:
					e.CommandCommitted <- event

				case *EventMembershipChangeCommitted:
					e.MembershipChangeCommitted <- event

				default:
					panic(fmt.Sprintf("got unknown event type %T", event))
				}

			case <-stopper.ShouldStop():
				close(e.CommandCommitted)
				close(e.MembershipChangeCommitted)
				close(e.LeaderElection)
				return
			}
		}
	})
}
예제 #2
0
// maybeWarnAboutInit looks for signs indicating a cluster which
// hasn't been initialized and warns. There's no absolutely sure way
// to determine whether the current node is simply waiting to be
// bootstrapped to an existing cluster vs. the operator having failed
// to initialize the cluster via the "cockroach init" command, so
// we can only warn.
//
// This method checks whether all gossip bootstrap hosts are
// connected, and whether the node itself is a bootstrap host, but
// there is still no sentinel gossip.
func (g *Gossip) maybeWarnAboutInit(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		// Wait 5s before first check.
		select {
		case <-stopper.ShouldStop():
			return
		case <-time.After(5 * time.Second):
		}
		retryOptions := retry.Options{
			Tag:         "check cluster initialization",
			Backoff:     5 * time.Second,  // first backoff at 5s
			MaxBackoff:  60 * time.Second, // max backoff is 60s
			Constant:    2,                // doubles
			MaxAttempts: 0,                // indefinite retries
			Stopper:     stopper,          // stop no matter what on stopper
		}
		// will never error because infinite retries
		_ = retry.WithBackoff(retryOptions, func() (retry.Status, error) {
			g.mu.Lock()
			hasSentinel := g.is.getInfo(KeySentinel) != nil
			g.mu.Unlock()
			// If we have the sentinel, exit the retry loop.
			if hasSentinel {
				return retry.Break, nil
			}
			// Otherwise, if all bootstrap hosts are connected, warn.
			if g.triedAll {
				log.Warningf("connected to gossip but missing sentinel. Has the cluster been initialized? " +
					"Use \"cockroach init\" to initialize.")
			}
			return retry.Continue, nil
		})
	})
}
예제 #3
0
// bootstrap connects the node to the gossip network. Bootstrapping
// commences in the event there are no connected clients or the
// sentinel gossip info is not available. After a successful bootstrap
// connection, this method will block on the stalled condvar, which
// receives notifications that gossip network connectivity has been
// lost and requires re-bootstrapping.
func (g *Gossip) bootstrap(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		for {
			g.mu.Lock()
			if g.closed {
				g.mu.Unlock()
				return
			}
			// Check whether or not we need bootstrap.
			haveClients := g.outgoing.len() > 0
			haveSentinel := g.is.getInfo(KeySentinel) != nil
			if !haveClients || !haveSentinel {
				// Try to get another bootstrap address from the resolvers.
				if addr := g.getNextBootstrapAddress(); addr != nil {
					g.startClient(addr, g.bsRPCContext, stopper)
				}
			}
			g.mu.Unlock()

			// Block until we need bootstrapping again.
			select {
			case <-g.stalled:
				// continue
			case <-stopper.ShouldStop():
				return
			}
		}
	})
}
예제 #4
0
// maybeWarnAboutInit looks for signs indicating a cluster which
// hasn't been initialized and warns. There's no absolutely sure way
// to determine whether the current node is simply waiting to be
// bootstrapped to an existing cluster vs. the operator having failed
// to initialize the cluster via the "cockroach init" command, so
// we can only warn.
//
// This method checks whether all gossip bootstrap hosts are
// connected, and whether the node itself is a bootstrap host, but
// there is still no sentinel gossip.
func (g *Gossip) maybeWarnAboutInit(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		// Wait 5s before first check.
		select {
		case <-stopper.ShouldStop():
			return
		case <-time.After(5 * time.Second):
		}
		retryOptions := retry.Options{
			InitialBackoff: 5 * time.Second,  // first backoff at 5s
			MaxBackoff:     60 * time.Second, // max backoff is 60s
			Multiplier:     2,                // doubles
			Stopper:        stopper,          // stop no matter what on stopper
		}
		// will never error because infinite retries
		for r := retry.Start(retryOptions); r.Next(); {
			g.mu.Lock()
			hasSentinel := g.is.getInfo(KeySentinel) != nil
			g.mu.Unlock()
			// If we have the sentinel, exit the retry loop.
			if hasSentinel {
				break
			}
			// Otherwise, if all bootstrap hosts are connected, warn.
			if g.triedAll {
				log.Warningf("connected to gossip but missing sentinel. Has the cluster been initialized? " +
					"Use \"cockroach init\" to initialize.")
			}
		}
	})
}
예제 #5
0
// waitAndProcess waits for the pace interval and processes the range
// if rng is not nil. The method returns true when the scanner needs
// to be stopped. The method also removes a range from queues when it
// is signaled via the removed channel.
func (rs *rangeScanner) waitAndProcess(start time.Time, clock *hlc.Clock, stopper *util.Stopper,
	rng *Range) bool {
	waitInterval := rs.paceInterval(start, time.Now())
	nextTime := time.After(waitInterval)
	if log.V(6) {
		log.Infof("Wait time interval set to %s", waitInterval)
	}
	for {
		select {
		case <-nextTime:
			if rng == nil {
				return false
			}
			if !stopper.StartTask() {
				return true
			}
			// Try adding range to all queues.
			for _, q := range rs.queues {
				q.MaybeAdd(rng, clock.Now())
			}
			stopper.FinishTask()
			return false
		case rng := <-rs.removed:
			// Remove range from all queues as applicable.
			for _, q := range rs.queues {
				q.MaybeRemove(rng)
			}
			if log.V(6) {
				log.Infof("removed range %s", rng)
			}
		case <-stopper.ShouldStop():
			return true
		}
	}
}
예제 #6
0
// scanLoop loops endlessly, scanning through ranges available via
// the range iterator, or until the scanner is stopped. The iteration
// is paced to complete a full scan in approximately the scan interval.
func (rs *rangeScanner) scanLoop(clock *hlc.Clock, stopper *util.Stopper) {
	start := time.Now()
	stats := &storeStats{}

	for {
		elapsed := time.Now().Sub(start)
		remainingNanos := rs.interval.Nanoseconds() - elapsed.Nanoseconds()
		if remainingNanos < 0 {
			remainingNanos = 0
		}
		nextIteration := time.Duration(remainingNanos)
		if count := rs.iter.EstimatedCount(); count > 0 {
			nextIteration = time.Duration(remainingNanos / int64(count))
		}
		log.V(6).Infof("next range scan iteration in %s", nextIteration)

		select {
		case <-time.After(nextIteration):
			rng := rs.iter.Next()
			if rng != nil {
				// Try adding range to all queues.
				for _, q := range rs.queues {
					q.MaybeAdd(rng, clock.Now())
				}
				stats.RangeCount++
				stats.MVCC.Accumulate(rng.stats.GetMVCC())
			} else {
				// Otherwise, we're done with the iteration. Reset iteration and start time.
				rs.iter.Reset()
				start = time.Now()
				// Increment iteration counter.
				atomic.AddInt64(&rs.count, 1)
				// Store the most recent scan results in the scanner's stats.
				atomic.StorePointer(&rs.stats, unsafe.Pointer(stats))
				stats = &storeStats{}
				log.V(6).Infof("reset range scan iteration")
			}

		case rng := <-rs.removed:
			// Remove range from all queues as applicable.
			for _, q := range rs.queues {
				q.MaybeRemove(rng)
			}
			log.V(6).Infof("removed range %s", rng)

		case <-stopper.ShouldStop():
			// Exit the loop.
			stopper.SetStopped()
			return
		}
	}
}
예제 #7
0
// start runs the storage loop in a goroutine.
func (w *writeTask) start(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		for {
			var request *writeRequest
			select {
			case <-w.ready:
				continue
			case <-stopper.ShouldStop():
				return
			case request = <-w.in:
			}
			if log.V(6) {
				log.Infof("writeTask got request %#v", *request)
			}
			response := &writeResponse{make(map[proto.RaftID]*groupWriteResponse)}

			for groupID, groupReq := range request.groups {
				group := w.storage.GroupStorage(groupID)
				if group == nil {
					if log.V(4) {
						log.Infof("dropping write to group %v", groupID)
					}
					continue
				}
				groupResp := &groupWriteResponse{raftpb.HardState{}, -1, -1, groupReq.entries}
				response.groups[groupID] = groupResp
				if !raft.IsEmptyHardState(groupReq.state) {
					err := group.SetHardState(groupReq.state)
					if err != nil {
						panic(err) // TODO(bdarnell): mark this node dead on storage errors
					}
					groupResp.state = groupReq.state
				}
				if !raft.IsEmptySnap(groupReq.snapshot) {
					err := group.ApplySnapshot(groupReq.snapshot)
					if err != nil {
						panic(err) // TODO(bdarnell)
					}
				}
				if len(groupReq.entries) > 0 {
					err := group.Append(groupReq.entries)
					if err != nil {
						panic(err) // TODO(bdarnell)
					}
				}
			}
			w.out <- response
		}
	})
}
예제 #8
0
// manage manages outgoing clients. Periodically, the infostore is
// scanned for infos with hop count exceeding maxToleratedHops()
// threshold. If the number of outgoing clients doesn't exceed
// MaxPeers, a new gossip client is connected to a randomly selected
// peer beyond maxToleratedHops threshold. Otherwise, the least useful
// peer node is cut off to make room for a replacement. Disconnected
// clients are processed via the disconnected channel and taken out of
// the outgoing address set. If there are no longer any outgoing
// connections or the sentinel gossip is unavailable, the bootstrapper
// is notified via the stalled conditional variable.
func (g *Gossip) manage(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		// Loop until closed and there are no remaining outgoing connections.
		for {
			select {
			case <-stopper.ShouldStop():
				return
			case c := <-g.disconnected:
				g.doDisconnected(stopper, c)
			case <-time.After(g.jitteredGossipInterval()):
				g.doCheckTimeout(stopper)
			}
		}
	})
}
예제 #9
0
// startGossip loops on a periodic ticker to gossip node-related
// information. Starts a goroutine to loop until the node is closed.
func (n *Node) startGossip(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		ticker := time.NewTicker(gossipInterval)
		defer ticker.Stop()
		n.gossipCapacities() // one-off run before going to sleep
		for {
			select {
			case <-ticker.C:
				n.gossipCapacities()
			case <-stopper.ShouldStop():
				return
			}
		}
	})
}
예제 #10
0
// processEventsUntil reads and acknowledges messages from the given channel
// until either the given conditional returns true, the channel is closed or a
// read on the channel times out.
func processEventsUntil(ch <-chan *interceptMessage, stopper *util.Stopper, f func(*RaftMessageRequest) bool) {
	for {
		select {
		case e, ok := <-ch:
			if !ok {
				return
			}
			e.ack <- struct{}{}
			if f(e.args.(*RaftMessageRequest)) {
				return
			}
		case <-stopper.ShouldStop():
			return
		}
	}
}
예제 #11
0
// runHeartbeat sends periodic heartbeats to client. Closes the
// connection on error. Heartbeats are sent in an infinite loop until
// an error is encountered.
func (c *Client) runHeartbeat(stopper *util.Stopper) {
	if log.V(2) {
		log.Infof("client %s starting heartbeat", c.Addr())
	}

	for {
		select {
		case <-stopper.ShouldStop():
			return
		case <-time.After(heartbeatInterval):
			if err := c.heartbeat(); err != nil {
				log.Infof("client %s heartbeat failed: %v; recycling...", c.Addr(), err)
				return
			}
		}
	}
}
예제 #12
0
// startPublishStatuses starts a loop which periodically instructs each store to
// publish its current status to the event feed.
func (n *Node) startPublishStatuses(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		// Publish status at the same frequency as metrics are collected.
		ticker := time.NewTicker(publishStatusInterval)
		defer ticker.Stop()
		for {
			select {
			case <-ticker.C:
				err := n.publishStoreStatuses()
				if err != nil {
					log.Error(err)
				}
			case <-stopper.ShouldStop():
				return
			}
		}
	})
}
예제 #13
0
func (tq *testQueue) Start(clock *hlc.Clock, stopper *util.Stopper) {
	stopper.RunWorker(func() {
		for {
			select {
			case <-time.After(1 * time.Millisecond):
				tq.Lock()
				if !tq.disabled && len(tq.ranges) > 0 {
					tq.ranges = tq.ranges[1:]
					tq.processed++
				}
				tq.Unlock()
			case <-stopper.ShouldStop():
				tq.Lock()
				tq.done = true
				tq.Unlock()
				return
			}
		}
	})
}
예제 #14
0
// start initializes the infostore with the rpc server address and
// then begins processing connecting clients in an infinite select
// loop via goroutine. Periodically, clients connected and awaiting
// the next round of gossip are awoken via the conditional variable.
func (s *server) start(rpcServer *rpc.Server, stopper *util.Stopper) {
	s.is.NodeAddr = rpcServer.Addr()
	if err := rpcServer.RegisterName("Gossip", s); err != nil {
		log.Fatalf("unable to register gossip service with RPC server: %s", err)
	}
	rpcServer.AddCloseCallback(s.onClose)

	stopper.RunWorker(func() {
		// Periodically wakeup blocked client gossip requests.
		for {
			select {
			case <-time.After(s.jitteredGossipInterval()):
				// Wakeup all blocked gossip requests.
				s.ready.Broadcast()
			case <-stopper.ShouldStop():
				s.stop()
				return
			}
		}
	})
}
예제 #15
0
// MonitorRemoteOffsets periodically checks that the offset of this server's
// clock from the true cluster time is within MaxOffset. If the offset exceeds
// MaxOffset, then this method will trigger a fatal error, causing the node to
// suicide.
func (r *RemoteClockMonitor) MonitorRemoteOffsets(stopper *util.Stopper) {
	if log.V(1) {
		log.Infof("monitoring cluster offset")
	}
	for {
		select {
		case <-stopper.ShouldStop():
			return
		case <-time.After(monitorInterval):
			offsetInterval, err := r.findOffsetInterval()
			// By the contract of the hlc, if the value is 0, then safety checking
			// of the max offset is disabled. However we may still want to
			// propagate the information to a status node.
			// TODO(embark): once there is a framework for collecting timeseries
			// data about the db, propagate the offset status to that.
			// Don't forget to protect r.offsets through the Mutex if those
			// Fatalf's below ever turn into something less destructive.
			if r.lClock.MaxOffset() != 0 {
				if err != nil {
					log.Fatalf("clock offset from the cluster time "+
						"for remote clocks %v could not be determined: %s",
						r.offsets, err)
				}

				if !isHealthyOffsetInterval(offsetInterval, r.lClock.MaxOffset()) {
					log.Fatalf("clock offset from the cluster time "+
						"for remote clocks: %v is in interval: %s, which "+
						"indicates that the true offset is greater than %s",
						r.offsets, offsetInterval, time.Duration(r.lClock.MaxOffset()))
				}
				if log.V(1) {
					log.Infof("healthy cluster offset: %s", offsetInterval)
				}
			}
			r.mu.Lock()
			r.lastMonitoredAt = r.lClock.PhysicalNow()
			r.mu.Unlock()
		}
	}
}
예제 #16
0
func (tq *testQueue) Start(clock *hlc.Clock, stopper *util.Stopper) {
	stopper.Add(1)
	go func() {
		for {
			select {
			case <-time.After(1 * time.Millisecond):
				tq.Lock()
				if len(tq.ranges) > 0 {
					tq.ranges = tq.ranges[1:]
					tq.processed++
				}
				tq.Unlock()
			case <-stopper.ShouldStop():
				tq.Lock()
				tq.done = true
				tq.Unlock()
				stopper.SetStopped()
				return
			}
		}
	}()
}
예제 #17
0
// processLoop processes the entries in the queue until the provided
// stopper signals exit.
//
// TODO(spencer): current load should factor into range processing timer.
func (bq *baseQueue) processLoop(clock *hlc.Clock, stopper *util.Stopper) {
	stopper.RunWorker(func() {
		// nextTime is initially nil; we don't start any timers until the queue
		// becomes non-empty.
		var nextTime <-chan time.Time

		for {
			select {
			// Incoming signal sets the next time to process if there were previously
			// no ranges in the queue.
			case <-bq.incoming:
				if nextTime == nil {
					// When the first range is added, wake up immediately. This is
					// mainly to facilitate testing without unnecessary sleeps.
					nextTime = time.After(0 * time.Millisecond)
				}
			// Process ranges as the timer expires.
			case <-nextTime:
				bq.processOne(clock, stopper)
				if bq.Length() == 0 {
					nextTime = nil
				} else {
					nextTime = time.After(bq.impl.timer())
				}

			// Exit on stopper.
			case <-stopper.ShouldStop():
				bq.Lock()
				bq.ranges = map[proto.RaftID]*rangeItem{}
				bq.priorityQ = nil
				bq.Unlock()
				return
			}
		}
	})
}
예제 #18
0
// startStoresScanner will walk through all the stores in the node every
// ctx.ScanInterval and store the status in the db.
func (n *Node) startStoresScanner(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		// Pick the smaller of the two intervals.
		var minScanInterval time.Duration
		if n.ctx.ScanInterval <= n.ctx.ScanMaxIdleTime || n.ctx.ScanMaxIdleTime == 0 {
			minScanInterval = n.ctx.ScanInterval
		} else {
			minScanInterval = n.ctx.ScanMaxIdleTime
		}

		// TODO(bram): The number of stores is small. The node status should be
		// updated whenever a store status is updated.
		for interval := time.Duration(0); true; interval = minScanInterval {
			select {
			case <-time.After(interval):
				if !stopper.StartTask() {
					continue
				}
				// Walk through all the stores on this node.
				var rangeCount, leaderRangeCount, replicatedRangeCount, availableRangeCount int32
				stats := &engine.MVCCStats{}
				accessedStoreIDs := []proto.StoreID{}
				// will never error because `return nil` below
				_ = n.lSender.VisitStores(func(store *storage.Store) error {
					storeStatus, err := store.GetStatus()
					if err != nil {
						log.Error(err)
						return nil
					}
					if storeStatus == nil {
						// The store scanner hasn't run on this node yet.
						return nil
					}
					accessedStoreIDs = append(accessedStoreIDs, store.Ident.StoreID)
					rangeCount += storeStatus.RangeCount
					leaderRangeCount += storeStatus.LeaderRangeCount
					replicatedRangeCount += storeStatus.ReplicatedRangeCount
					availableRangeCount += storeStatus.AvailableRangeCount
					stats.Add(&storeStatus.Stats)
					return nil
				})

				// Store the combined stats in the db.
				now := n.ctx.Clock.Now().WallTime
				status := &NodeStatus{
					Desc:                 n.Descriptor,
					StoreIDs:             accessedStoreIDs,
					UpdatedAt:            now,
					StartedAt:            n.startedAt,
					RangeCount:           rangeCount,
					Stats:                *stats,
					LeaderRangeCount:     leaderRangeCount,
					ReplicatedRangeCount: replicatedRangeCount,
					AvailableRangeCount:  availableRangeCount,
				}
				key := keys.NodeStatusKey(int32(n.Descriptor.NodeID))
				if err := n.ctx.DB.Put(key, status); err != nil {
					log.Error(err)
				}
				// Increment iteration count.
				n.completedScan.L.Lock()
				n.scanCount++
				n.completedScan.Broadcast()
				n.completedScan.L.Unlock()
				if log.V(6) {
					log.Infof("store scan iteration completed")
				}
				stopper.FinishTask()
			case <-stopper.ShouldStop():
				// Exit the loop.
				return
			}
		}
	})
}
예제 #19
0
// gossip loops, sending deltas of the infostore and receiving deltas
// in turn. If an alternate is proposed on response, the client addr
// is modified and method returns for forwarding by caller.
func (c *client) gossip(g *Gossip, stopper *util.Stopper) error {
	localMaxSeq := int64(0)
	remoteMaxSeq := int64(-1)
	for {
		// Compute the delta of local node's infostore to send with request.
		g.mu.Lock()
		delta := g.is.delta(c.peerID, localMaxSeq)
		nodeID := g.is.NodeID // needs to be accessed with the lock held
		g.mu.Unlock()
		var deltaBytes []byte
		if delta != nil {
			localMaxSeq = delta.MaxSeq
			var buf bytes.Buffer
			if err := gob.NewEncoder(&buf).Encode(delta); err != nil {
				return util.Errorf("infostore could not be encoded: %s", err)
			}
			deltaBytes = buf.Bytes()
		}

		// Send gossip with timeout.
		args := &proto.GossipRequest{
			NodeID: nodeID,
			Addr:   *proto.FromNetAddr(g.is.NodeAddr),
			LAddr:  *proto.FromNetAddr(c.rpcClient.LocalAddr()),
			MaxSeq: remoteMaxSeq,
			Delta:  deltaBytes,
		}
		reply := &proto.GossipResponse{}
		gossipCall := c.rpcClient.Go("Gossip.Gossip", args, reply, nil)
		select {
		case <-gossipCall.Done:
			if gossipCall.Error != nil {
				return gossipCall.Error
			}
		case <-c.rpcClient.Closed:
			return util.Error("client closed")
		case <-c.closer:
			return nil
		case <-stopper.ShouldStop():
			return nil
		case <-time.After(g.interval * 10):
			return util.Errorf("timeout after: %s", g.interval*10)
		}

		// Handle remote forwarding.
		if reply.Alternate != nil {
			var err error
			if c.forwardAddr, err = reply.Alternate.NetAddr(); err != nil {
				return util.Errorf("unable to resolve alternate address: %s: %s", reply.Alternate, err)
			}
			return util.Errorf("received forward from %s to %s", c.addr, reply.Alternate)
		}

		// Combine remote node's infostore delta with ours.
		now := time.Now().UnixNano()
		if reply.Delta != nil {
			delta := &infoStore{}
			if err := gob.NewDecoder(bytes.NewBuffer(reply.Delta)).Decode(delta); err != nil {
				return util.Errorf("infostore could not be decoded: %s", err)
			}
			if delta.infoCount() > 0 {
				if log.V(1) {
					log.Infof("gossip: received %s", delta)
				} else {
					log.Infof("gossip: received %d info(s) from %s", delta.infoCount(), c.addr)
				}
			}
			g.mu.Lock()
			c.peerID = delta.NodeID
			g.outgoing.addNode(c.peerID)
			freshCount := g.is.combine(delta)
			if freshCount > 0 {
				c.lastFresh = now
			}
			remoteMaxSeq = delta.MaxSeq

			// If we have the sentinel gossip, we're considered connected.
			g.checkHasConnected()
			g.mu.Unlock()
		}

		// Check whether this outgoing client is duplicating work already
		// being done by an incoming client. To avoid mutual shutdown, we
		// only shutdown our client if our node ID is less than the peer's.
		if g.hasIncoming(c.peerID) && nodeID < c.peerID {
			return util.Errorf("stopping outgoing client %d @ %s; already have incoming", c.peerID, c.addr)
		}
		// Check whether peer node is too boring--disconnect if yes.
		if nodeID != c.peerID && (now-c.lastFresh) > int64(maxWaitForNewGossip) {
			return util.Errorf("peer is too boring")
		}
	}
}