Beispiel #1
0
func (e *eventDemux) start(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		for {
			select {
			case event := <-e.events:
				switch event := event.(type) {
				case *EventLeaderElection:
					e.LeaderElection <- event

				case *EventCommandCommitted:
					e.CommandCommitted <- event

				case *EventMembershipChangeCommitted:
					e.MembershipChangeCommitted <- event

				default:
					panic(fmt.Sprintf("got unknown event type %T", event))
				}

			case <-stopper.ShouldStop():
				close(e.CommandCommitted)
				close(e.MembershipChangeCommitted)
				close(e.LeaderElection)
				return
			}
		}
	})
}
Beispiel #2
0
// bootstrap connects the node to the gossip network. Bootstrapping
// commences in the event there are no connected clients or the
// sentinel gossip info is not available. After a successful bootstrap
// connection, this method will block on the stalled condvar, which
// receives notifications that gossip network connectivity has been
// lost and requires re-bootstrapping.
func (g *Gossip) bootstrap(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		for {
			g.mu.Lock()
			if g.closed {
				g.mu.Unlock()
				return
			}
			// Check whether or not we need bootstrap.
			haveClients := g.outgoing.len() > 0
			haveSentinel := g.is.getInfo(KeySentinel) != nil
			if !haveClients || !haveSentinel {
				// Try to get another bootstrap address from the resolvers.
				if addr := g.getNextBootstrapAddress(); addr != nil {
					g.startClient(addr, g.bsRPCContext, stopper)
				}
			}
			g.mu.Unlock()

			// Block until we need bootstrapping again.
			select {
			case <-g.stalled:
				// continue
			case <-stopper.ShouldStop():
				return
			}
		}
	})
}
Beispiel #3
0
// Start spins up the scanning loop. Call Stop() to exit the loop.
func (rs *rangeScanner) Start(clock *hlc.Clock, stopper *util.Stopper) {
	stopper.Add(1)
	for _, queue := range rs.queues {
		queue.Start(clock, stopper)
	}
	go rs.scanLoop(clock, stopper)
}
Beispiel #4
0
// maybeWarnAboutInit looks for signs indicating a cluster which
// hasn't been initialized and warns. There's no absolutely sure way
// to determine whether the current node is simply waiting to be
// bootstrapped to an existing cluster vs. the operator having failed
// to initialize the cluster via the "cockroach init" command, so
// we can only warn.
//
// This method checks whether all gossip bootstrap hosts are
// connected, and whether the node itself is a bootstrap host, but
// there is still no sentinel gossip.
func (g *Gossip) maybeWarnAboutInit(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		// Wait 5s before first check.
		select {
		case <-stopper.ShouldStop():
			return
		case <-time.After(5 * time.Second):
		}
		retryOptions := retry.Options{
			InitialBackoff: 5 * time.Second,  // first backoff at 5s
			MaxBackoff:     60 * time.Second, // max backoff is 60s
			Multiplier:     2,                // doubles
			Stopper:        stopper,          // stop no matter what on stopper
		}
		// will never error because infinite retries
		for r := retry.Start(retryOptions); r.Next(); {
			g.mu.Lock()
			hasSentinel := g.is.getInfo(KeySentinel) != nil
			g.mu.Unlock()
			// If we have the sentinel, exit the retry loop.
			if hasSentinel {
				break
			}
			// Otherwise, if all bootstrap hosts are connected, warn.
			if g.triedAll {
				log.Warningf("connected to gossip but missing sentinel. Has the cluster been initialized? " +
					"Use \"cockroach init\" to initialize.")
			}
		}
	})
}
Beispiel #5
0
// start starts the node by registering the storage instance for the
// RPC service "Node" and initializing stores for each specified
// engine. Launches periodic store gossiping in a goroutine.
func (n *Node) start(rpcServer *rpc.Server, engines []engine.Engine,
	attrs proto.Attributes, stopper *util.Stopper) error {
	n.initDescriptor(rpcServer.Addr(), attrs)
	if err := rpcServer.RegisterName("Node", (*nodeServer)(n)); err != nil {
		log.Fatalf("unable to register node service with RPC server: %s", err)
	}

	// Start status monitor.
	n.status.StartMonitorFeed(n.ctx.EventFeed)
	stopper.AddCloser(n.ctx.EventFeed)

	// Initialize stores, including bootstrapping new ones.
	if err := n.initStores(engines, stopper); err != nil {
		return err
	}

	// Pass NodeID to status monitor - this value is initialized in initStores,
	// but the StatusMonitor must be active before initStores.
	n.status.SetNodeID(n.Descriptor.NodeID)

	// Initialize publisher for Node Events.
	n.feed = status.NewNodeEventFeed(n.Descriptor.NodeID, n.ctx.EventFeed)

	n.startedAt = n.ctx.Clock.Now().WallTime
	n.startStoresScanner(stopper)
	n.startPublishStatuses(stopper)
	n.startGossip(stopper)
	log.Infoc(n.context(), "Started node with %v engine(s) and attributes %v", engines, attrs.Attrs)
	return nil
}
Beispiel #6
0
// start starts the node by registering the storage instance for the
// RPC service "Node" and initializing stores for each specified
// engine. Launches periodic store gossiping in a goroutine.
func (n *Node) start(rpcServer *rpc.Server, engines []engine.Engine,
	attrs proto.Attributes, stopper *util.Stopper) error {
	n.initDescriptor(rpcServer.Addr(), attrs)
	if err := rpcServer.RegisterName("Node", (*nodeServer)(n)); err != nil {
		log.Fatalf("unable to register node service with RPC server: %s", err)
	}

	// Start status monitor.
	n.status.StartMonitorFeed(n.ctx.EventFeed)
	stopper.AddCloser(n.ctx.EventFeed)

	// Initialize stores, including bootstrapping new ones.
	if err := n.initStores(engines, stopper); err != nil {
		return err
	}

	n.startedAt = n.ctx.Clock.Now().WallTime

	// Initialize publisher for Node Events. This requires the NodeID, which is
	// initialized by initStores(); because of this, some Store initialization
	// events will precede the StartNodeEvent on the feed.
	n.feed = status.NewNodeEventFeed(n.Descriptor.NodeID, n.ctx.EventFeed)
	n.feed.StartNode(n.Descriptor, n.startedAt)

	n.startStoresScanner(stopper)
	n.startPublishStatuses(stopper)
	n.startGossip(stopper)
	log.Infoc(n.context(), "Started node with %v engine(s) and attributes %v", engines, attrs.Attrs)
	return nil
}
Beispiel #7
0
// maybeWarnAboutInit looks for signs indicating a cluster which
// hasn't been initialized and warns. There's no absolutely sure way
// to determine whether the current node is simply waiting to be
// bootstrapped to an existing cluster vs. the operator having failed
// to initialize the cluster via the "cockroach init" command, so
// we can only warn.
//
// This method checks whether all gossip bootstrap hosts are
// connected, and whether the node itself is a bootstrap host, but
// there is still no sentinel gossip.
func (g *Gossip) maybeWarnAboutInit(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		// Wait 5s before first check.
		select {
		case <-stopper.ShouldStop():
			return
		case <-time.After(5 * time.Second):
		}
		retryOptions := retry.Options{
			Tag:         "check cluster initialization",
			Backoff:     5 * time.Second,  // first backoff at 5s
			MaxBackoff:  60 * time.Second, // max backoff is 60s
			Constant:    2,                // doubles
			MaxAttempts: 0,                // indefinite retries
			Stopper:     stopper,          // stop no matter what on stopper
		}
		// will never error because infinite retries
		_ = retry.WithBackoff(retryOptions, func() (retry.Status, error) {
			g.mu.Lock()
			hasSentinel := g.is.getInfo(KeySentinel) != nil
			g.mu.Unlock()
			// If we have the sentinel, exit the retry loop.
			if hasSentinel {
				return retry.Break, nil
			}
			// Otherwise, if all bootstrap hosts are connected, warn.
			if g.triedAll {
				log.Warningf("connected to gossip but missing sentinel. Has the cluster been initialized? " +
					"Use \"cockroach init\" to initialize.")
			}
			return retry.Continue, nil
		})
	})
}
Beispiel #8
0
// waitForStopper stops the supplied util.Stopper and waits up to five seconds
// for it to complete.
func waitForStopper(t testing.TB, stopper *util.Stopper) {
	stopper.Stop()
	select {
	case <-stopper.IsStopped():
	case <-time.After(5 * time.Second):
		t.Fatalf("Stopper failed to stop after 5 seconds")
	}
}
Beispiel #9
0
// initStores initializes the Stores map from id to Store. Stores are
// added to the local sender if already bootstrapped. A bootstrapped
// Store has a valid ident with cluster, node and Store IDs set. If
// the Store doesn't yet have a valid ident, it's added to the
// bootstraps list for initialization once the cluster and node IDs
// have been determined.
func (n *Node) initStores(engines []engine.Engine, stopper *util.Stopper) error {
	bootstraps := list.New()

	if len(engines) == 0 {
		return util.Error("no engines")
	}
	for _, e := range engines {
		s := storage.NewStore(n.ctx, e, &n.Descriptor)
		// Initialize each store in turn, handling un-bootstrapped errors by
		// adding the store to the bootstraps list.
		if err := s.Start(stopper); err != nil {
			if _, ok := err.(*storage.NotBootstrappedError); ok {
				log.Infof("store %s not bootstrapped", s)
				bootstraps.PushBack(s)
				continue
			}
			return util.Errorf("failed to start store: %s", err)
		}
		if s.Ident.ClusterID == "" || s.Ident.NodeID == 0 {
			return util.Errorf("unidentified store: %s", s)
		}
		capacity, err := s.Capacity()
		if err != nil {
			return util.Errorf("could not query store capacity: %s", err)
		}
		log.Infof("initialized store %s: %+v", s, capacity)
		n.lSender.AddStore(s)
	}

	// Verify all initialized stores agree on cluster and node IDs.
	if err := n.validateStores(); err != nil {
		return err
	}

	// Connect gossip before starting bootstrap. For new nodes, connecting
	// to the gossip network is necessary to get the cluster ID.
	n.connectGossip()

	// If no NodeID has been assigned yet, allocate a new node ID by
	// supplying 0 to initNodeID.
	if n.Descriptor.NodeID == 0 {
		n.initNodeID(0)
	}

	// Bootstrap any uninitialized stores asynchronously.
	if bootstraps.Len() > 0 && stopper.StartTask() {
		go func() {
			n.bootstrapStores(bootstraps, stopper)
			stopper.FinishTask()
		}()
	}

	return nil
}
Beispiel #10
0
// scanLoop loops endlessly, scanning through ranges available via
// the range iterator, or until the scanner is stopped. The iteration
// is paced to complete a full scan in approximately the scan interval.
func (rs *rangeScanner) scanLoop(clock *hlc.Clock, stopper *util.Stopper) {
	start := time.Now()
	stats := &storeStats{}

	for {
		elapsed := time.Now().Sub(start)
		remainingNanos := rs.interval.Nanoseconds() - elapsed.Nanoseconds()
		if remainingNanos < 0 {
			remainingNanos = 0
		}
		nextIteration := time.Duration(remainingNanos)
		if count := rs.iter.EstimatedCount(); count > 0 {
			nextIteration = time.Duration(remainingNanos / int64(count))
		}
		log.V(6).Infof("next range scan iteration in %s", nextIteration)

		select {
		case <-time.After(nextIteration):
			rng := rs.iter.Next()
			if rng != nil {
				// Try adding range to all queues.
				for _, q := range rs.queues {
					q.MaybeAdd(rng, clock.Now())
				}
				stats.RangeCount++
				stats.MVCC.Accumulate(rng.stats.GetMVCC())
			} else {
				// Otherwise, we're done with the iteration. Reset iteration and start time.
				rs.iter.Reset()
				start = time.Now()
				// Increment iteration counter.
				atomic.AddInt64(&rs.count, 1)
				// Store the most recent scan results in the scanner's stats.
				atomic.StorePointer(&rs.stats, unsafe.Pointer(stats))
				stats = &storeStats{}
				log.V(6).Infof("reset range scan iteration")
			}

		case rng := <-rs.removed:
			// Remove range from all queues as applicable.
			for _, q := range rs.queues {
				q.MaybeRemove(rng)
			}
			log.V(6).Infof("removed range %s", rng)

		case <-stopper.ShouldStop():
			// Exit the loop.
			stopper.SetStopped()
			return
		}
	}
}
Beispiel #11
0
// start runs the storage loop in a goroutine.
func (w *writeTask) start(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		for {
			var request *writeRequest
			select {
			case <-w.ready:
				continue
			case <-stopper.ShouldStop():
				return
			case request = <-w.in:
			}
			if log.V(6) {
				log.Infof("writeTask got request %#v", *request)
			}
			response := &writeResponse{make(map[proto.RaftID]*groupWriteResponse)}

			for groupID, groupReq := range request.groups {
				group := w.storage.GroupStorage(groupID)
				if group == nil {
					if log.V(4) {
						log.Infof("dropping write to group %v", groupID)
					}
					continue
				}
				groupResp := &groupWriteResponse{raftpb.HardState{}, -1, -1, groupReq.entries}
				response.groups[groupID] = groupResp
				if !raft.IsEmptyHardState(groupReq.state) {
					err := group.SetHardState(groupReq.state)
					if err != nil {
						panic(err) // TODO(bdarnell): mark this node dead on storage errors
					}
					groupResp.state = groupReq.state
				}
				if !raft.IsEmptySnap(groupReq.snapshot) {
					err := group.ApplySnapshot(groupReq.snapshot)
					if err != nil {
						panic(err) // TODO(bdarnell)
					}
				}
				if len(groupReq.entries) > 0 {
					err := group.Append(groupReq.entries)
					if err != nil {
						panic(err) // TODO(bdarnell)
					}
				}
			}
			w.out <- response
		}
	})
}
Beispiel #12
0
// NewMultiRaft creates a MultiRaft object.
func NewMultiRaft(nodeID proto.RaftNodeID, config *Config, stopper *util.Stopper) (*MultiRaft, error) {
	if nodeID == 0 {
		return nil, util.Error("Invalid RaftNodeID")
	}
	if err := config.validate(); err != nil {
		return nil, err
	}

	if config.Ticker == nil {
		config.Ticker = newTicker(config.TickInterval)
		stopper.AddCloser(config.Ticker)
	}

	if config.EntryFormatter != nil {
		// Wrap the EntryFormatter to strip off the command id.
		ef := config.EntryFormatter
		config.EntryFormatter = func(data []byte) string {
			if len(data) == 0 {
				return "[empty]"
			}
			id, cmd := decodeCommand(data)
			formatted := ef(cmd)
			return fmt.Sprintf("%x: %s", id, formatted)
		}
	}

	m := &MultiRaft{
		Config:    *config,
		stopper:   stopper,
		multiNode: raft.StartMultiNode(uint64(nodeID)),
		nodeID:    nodeID,

		// Output channel.
		Events: make(chan interface{}, config.EventBufferSize),

		// Input channels.
		reqChan:         make(chan *RaftMessageRequest),
		createGroupChan: make(chan *createGroupOp),
		removeGroupChan: make(chan *removeGroupOp),
		proposalChan:    make(chan *proposal),
		callbackChan:    make(chan func()),
	}

	if err := m.Transport.Listen(nodeID, (*multiraftServer)(m)); err != nil {
		return nil, err
	}

	return m, nil
}
Beispiel #13
0
// manage manages outgoing clients. Periodically, the infostore is
// scanned for infos with hop count exceeding maxToleratedHops()
// threshold. If the number of outgoing clients doesn't exceed
// MaxPeers, a new gossip client is connected to a randomly selected
// peer beyond maxToleratedHops threshold. Otherwise, the least useful
// peer node is cut off to make room for a replacement. Disconnected
// clients are processed via the disconnected channel and taken out of
// the outgoing address set. If there are no longer any outgoing
// connections or the sentinel gossip is unavailable, the bootstrapper
// is notified via the stalled conditional variable.
func (g *Gossip) manage(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		// Loop until closed and there are no remaining outgoing connections.
		for {
			select {
			case <-stopper.ShouldStop():
				return
			case c := <-g.disconnected:
				g.doDisconnected(stopper, c)
			case <-time.After(g.jitteredGossipInterval()):
				g.doCheckTimeout(stopper)
			}
		}
	})
}
Beispiel #14
0
// startGossip loops on a periodic ticker to gossip node-related
// information. Starts a goroutine to loop until the node is closed.
func (n *Node) startGossip(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		ticker := time.NewTicker(gossipInterval)
		defer ticker.Stop()
		n.gossipCapacities() // one-off run before going to sleep
		for {
			select {
			case <-ticker.C:
				n.gossipCapacities()
			case <-stopper.ShouldStop():
				return
			}
		}
	})
}
Beispiel #15
0
// processEventsUntil reads and acknowledges messages from the given channel
// until either the given conditional returns true, the channel is closed or a
// read on the channel times out.
func processEventsUntil(ch <-chan *interceptMessage, stopper *util.Stopper, f func(*RaftMessageRequest) bool) {
	for {
		select {
		case e, ok := <-ch:
			if !ok {
				return
			}
			e.ack <- struct{}{}
			if f(e.args.(*RaftMessageRequest)) {
				return
			}
		case <-stopper.ShouldStop():
			return
		}
	}
}
Beispiel #16
0
// runHeartbeat sends periodic heartbeats to client. Closes the
// connection on error. Heartbeats are sent in an infinite loop until
// an error is encountered.
func (c *Client) runHeartbeat(stopper *util.Stopper) {
	if log.V(2) {
		log.Infof("client %s starting heartbeat", c.Addr())
	}

	for {
		select {
		case <-stopper.ShouldStop():
			return
		case <-time.After(heartbeatInterval):
			if err := c.heartbeat(); err != nil {
				log.Infof("client %s heartbeat failed: %v; recycling...", c.Addr(), err)
				return
			}
		}
	}
}
Beispiel #17
0
// startPublishStatuses starts a loop which periodically instructs each store to
// publish its current status to the event feed.
func (n *Node) startPublishStatuses(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		// Publish status at the same frequency as metrics are collected.
		ticker := time.NewTicker(publishStatusInterval)
		defer ticker.Stop()
		for {
			select {
			case <-ticker.C:
				err := n.publishStoreStatuses()
				if err != nil {
					log.Error(err)
				}
			case <-stopper.ShouldStop():
				return
			}
		}
	})
}
Beispiel #18
0
func (tq *testQueue) Start(clock *hlc.Clock, stopper *util.Stopper) {
	stopper.RunWorker(func() {
		for {
			select {
			case <-time.After(1 * time.Millisecond):
				tq.Lock()
				if !tq.disabled && len(tq.ranges) > 0 {
					tq.ranges = tq.ranges[1:]
					tq.processed++
				}
				tq.Unlock()
			case <-stopper.ShouldStop():
				tq.Lock()
				tq.done = true
				tq.Unlock()
				return
			}
		}
	})
}
Beispiel #19
0
// start dials the remote addr and commences gossip once connected.
// Upon exit, signals client is done by pushing it onto the done
// channel. If the client experienced an error, its err field will
// be set. This method starts client processing in a goroutine and
// returns immediately.
func (c *client) start(g *Gossip, done chan *client, context *rpc.Context, stopper *util.Stopper) {
	stopper.RunWorker(func() {
		c.rpcClient = rpc.NewClient(c.addr, nil, context)
		select {
		case <-c.rpcClient.Ready:
			// Success!
		case <-c.rpcClient.Closed:
			c.err = util.Errorf("gossip client failed to connect")
			done <- c
			return
		}

		// Start gossipping and wait for disconnect or error.
		c.lastFresh = time.Now().UnixNano()
		c.err = c.gossip(g, stopper)
		if c.err != nil {
			c.rpcClient.Close()
		}
		done <- c
	})
}
Beispiel #20
0
// start initializes the infostore with the rpc server address and
// then begins processing connecting clients in an infinite select
// loop via goroutine. Periodically, clients connected and awaiting
// the next round of gossip are awoken via the conditional variable.
func (s *server) start(rpcServer *rpc.Server, stopper *util.Stopper) {
	s.is.NodeAddr = rpcServer.Addr()
	if err := rpcServer.RegisterName("Gossip", s); err != nil {
		log.Fatalf("unable to register gossip service with RPC server: %s", err)
	}
	rpcServer.AddCloseCallback(s.onClose)

	stopper.RunWorker(func() {
		// Periodically wakeup blocked client gossip requests.
		for {
			select {
			case <-time.After(s.jitteredGossipInterval()):
				// Wakeup all blocked gossip requests.
				s.ready.Broadcast()
			case <-stopper.ShouldStop():
				s.stop()
				return
			}
		}
	})
}
Beispiel #21
0
// waitAndProcess waits for the pace interval and processes the range
// if rng is not nil. The method returns true when the scanner needs
// to be stopped. The method also removes a range from queues when it
// is signaled via the removed channel.
func (rs *rangeScanner) waitAndProcess(start time.Time, clock *hlc.Clock, stopper *util.Stopper,
	rng *Range) bool {
	waitInterval := rs.paceInterval(start, time.Now())
	nextTime := time.After(waitInterval)
	if log.V(6) {
		log.Infof("Wait time interval set to %s", waitInterval)
	}
	for {
		select {
		case <-nextTime:
			if rng == nil {
				return false
			}
			if !stopper.StartTask() {
				return true
			}
			// Try adding range to all queues.
			for _, q := range rs.queues {
				q.MaybeAdd(rng, clock.Now())
			}
			stopper.FinishTask()
			return false
		case rng := <-rs.removed:
			// Remove range from all queues as applicable.
			for _, q := range rs.queues {
				q.MaybeRemove(rng)
			}
			if log.V(6) {
				log.Infof("removed range %s", rng)
			}
		case <-stopper.ShouldStop():
			return true
		}
	}
}
Beispiel #22
0
// MonitorRemoteOffsets periodically checks that the offset of this server's
// clock from the true cluster time is within MaxOffset. If the offset exceeds
// MaxOffset, then this method will trigger a fatal error, causing the node to
// suicide.
func (r *RemoteClockMonitor) MonitorRemoteOffsets(stopper *util.Stopper) {
	if log.V(1) {
		log.Infof("monitoring cluster offset")
	}
	for {
		select {
		case <-stopper.ShouldStop():
			return
		case <-time.After(monitorInterval):
			offsetInterval, err := r.findOffsetInterval()
			// By the contract of the hlc, if the value is 0, then safety checking
			// of the max offset is disabled. However we may still want to
			// propagate the information to a status node.
			// TODO(embark): once there is a framework for collecting timeseries
			// data about the db, propagate the offset status to that.
			// Don't forget to protect r.offsets through the Mutex if those
			// Fatalf's below ever turn into something less destructive.
			if r.lClock.MaxOffset() != 0 {
				if err != nil {
					log.Fatalf("clock offset from the cluster time "+
						"for remote clocks %v could not be determined: %s",
						r.offsets, err)
				}

				if !isHealthyOffsetInterval(offsetInterval, r.lClock.MaxOffset()) {
					log.Fatalf("clock offset from the cluster time "+
						"for remote clocks: %v is in interval: %s, which "+
						"indicates that the true offset is greater than %s",
						r.offsets, offsetInterval, time.Duration(r.lClock.MaxOffset()))
				}
				if log.V(1) {
					log.Infof("healthy cluster offset: %s", offsetInterval)
				}
			}
			r.mu.Lock()
			r.lastMonitoredAt = r.lClock.PhysicalNow()
			r.mu.Unlock()
		}
	}
}
func newTestCluster(transport Transport, size int, stopper *util.Stopper, t *testing.T) *testCluster {
	if transport == nil {
		transport = NewLocalRPCTransport()
	}
	stopper.AddCloser(transport)
	cluster := &testCluster{
		t:         t,
		transport: transport,
		groups:    map[proto.RaftID][]int{},
	}

	for i := 0; i < size; i++ {
		ticker := newManualTicker()
		storage := &BlockableStorage{storage: NewMemoryStorage()}
		config := &Config{
			Transport:              transport,
			Storage:                storage,
			Ticker:                 ticker,
			ElectionTimeoutTicks:   2,
			HeartbeatIntervalTicks: 1,
			TickInterval:           time.Hour, // not in use
		}
		mr, err := NewMultiRaft(proto.RaftNodeID(i+1), config, stopper)
		if err != nil {
			t.Fatal(err)
		}
		state := newState(mr)
		demux := newEventDemux(state.Events)
		demux.start(stopper)
		cluster.nodes = append(cluster.nodes, state)
		cluster.tickers = append(cluster.tickers, ticker)
		cluster.events = append(cluster.events, demux)
		cluster.storages = append(cluster.storages, storage)
	}
	cluster.start()
	return cluster
}
Beispiel #24
0
func (bq *baseQueue) processOne(clock *hlc.Clock, stopper *util.Stopper) {
	if !stopper.StartTask() {
		return
	}
	defer stopper.FinishTask()

	start := time.Now()
	bq.Lock()
	rng := bq.pop()
	bq.Unlock()
	if rng != nil {
		now := clock.Now()
		if log.V(1) {
			log.Infof("processing range %s from %s queue...", rng, bq.name)
		}
		// If the queue requires the leader lease to process the
		// range, check whether this replica has leader lease and
		// renew or acquire if necessary.
		if bq.impl.needsLeaderLease() {
			// Create a "fake" get request in order to invoke redirectOnOrAcquireLease.
			args := &proto.GetRequest{RequestHeader: proto.RequestHeader{Timestamp: now}}
			if err := rng.redirectOnOrAcquireLeaderLease(args.Header().Timestamp); err != nil {
				if log.V(1) {
					log.Infof("this replica of %s could not acquire leader lease; skipping...", rng)
				}
				return
			}
		}
		if err := bq.impl.process(now, rng); err != nil {
			log.Errorf("failure processing range %s from %s queue: %s", rng, bq.name, err)
		}
		if log.V(1) {
			log.Infof("processed range %s from %s queue in %s", rng, bq.name, time.Now().Sub(start))
		}
	}
}
Beispiel #25
0
// processLoop processes the entries in the queue until the provided
// stopper signals exit.
//
// TODO(spencer): current load should factor into range processing timer.
func (bq *baseQueue) processLoop(clock *hlc.Clock, stopper *util.Stopper) {
	stopper.RunWorker(func() {
		// nextTime is initially nil; we don't start any timers until the queue
		// becomes non-empty.
		var nextTime <-chan time.Time

		for {
			select {
			// Incoming signal sets the next time to process if there were previously
			// no ranges in the queue.
			case <-bq.incoming:
				if nextTime == nil {
					// When the first range is added, wake up immediately. This is
					// mainly to facilitate testing without unnecessary sleeps.
					nextTime = time.After(0 * time.Millisecond)
				}
			// Process ranges as the timer expires.
			case <-nextTime:
				bq.processOne(clock, stopper)
				if bq.Length() == 0 {
					nextTime = nil
				} else {
					nextTime = time.After(bq.impl.timer())
				}

			// Exit on stopper.
			case <-stopper.ShouldStop():
				bq.Lock()
				bq.ranges = map[proto.RaftID]*rangeItem{}
				bq.priorityQ = nil
				bq.Unlock()
				return
			}
		}
	})
}
Beispiel #26
0
// scanLoop loops endlessly, scanning through ranges available via
// the range set, or until the scanner is stopped. The iteration
// is paced to complete a full scan in approximately the scan interval.
func (rs *rangeScanner) scanLoop(clock *hlc.Clock, stopper *util.Stopper) {
	stopper.RunWorker(func() {
		start := time.Now()
		stats := &storeStats{}

		for {
			if rs.ranges.EstimatedCount() == 0 {
				// Just wait without processing any range.
				if rs.waitAndProcess(start, clock, stopper, stats, nil) {
					break
				}
			} else {
				shouldStop := true
				rs.ranges.Visit(func(rng *Range) bool {
					shouldStop = rs.waitAndProcess(start, clock, stopper, stats, rng)
					return !shouldStop
				})
				if shouldStop {
					break
				}
			}

			if !stopper.StartTask() {
				// Exit the loop.
				break
			}

			// We're done with the iteration.
			// Store the most recent scan results in the scanner's stats.
			atomic.StorePointer(&rs.stats, unsafe.Pointer(stats))
			stats = &storeStats{}
			if rs.scanFn != nil {
				rs.scanFn()
			}
			// Increment iteration count.
			rs.completedScan.L.Lock()
			rs.count++
			rs.total += time.Now().Sub(start)
			rs.completedScan.Broadcast()
			rs.completedScan.L.Unlock()
			if log.V(6) {
				log.Infof("reset range scan iteration")
			}

			// Reset iteration and start time.
			start = time.Now()
			stopper.FinishTask()
		}
	})
}
Beispiel #27
0
// scanLoop loops endlessly, scanning through ranges available via
// the range set, or until the scanner is stopped. The iteration
// is paced to complete a full scan in approximately the scan interval.
func (rs *rangeScanner) scanLoop(clock *hlc.Clock, stopper *util.Stopper) {
	stopper.RunWorker(func() {
		start := time.Now()

		for {
			if rs.ranges.EstimatedCount() == 0 {
				// Just wait without processing any range.
				if rs.waitAndProcess(start, clock, stopper, nil) {
					break
				}
			} else {
				shouldStop := true
				rs.ranges.Visit(func(rng *Range) bool {
					shouldStop = rs.waitAndProcess(start, clock, stopper, rng)
					return !shouldStop
				})
				if shouldStop {
					break
				}
			}

			if !stopper.StartTask() {
				// Exit the loop.
				break
			}

			// Increment iteration count.
			rs.completedScan.L.Lock()
			rs.count++
			rs.total += time.Now().Sub(start)
			rs.completedScan.Broadcast()
			rs.completedScan.L.Unlock()
			if log.V(6) {
				log.Infof("reset range scan iteration")
			}

			// Reset iteration and start time.
			start = time.Now()
			stopper.FinishTask()
		}
	})
}
Beispiel #28
0
func (tq *testQueue) Start(clock *hlc.Clock, stopper *util.Stopper) {
	stopper.Add(1)
	go func() {
		for {
			select {
			case <-time.After(1 * time.Millisecond):
				tq.Lock()
				if len(tq.ranges) > 0 {
					tq.ranges = tq.ranges[1:]
					tq.processed++
				}
				tq.Unlock()
			case <-stopper.ShouldStop():
				tq.Lock()
				tq.done = true
				tq.Unlock()
				stopper.SetStopped()
				return
			}
		}
	}()
}
// close sends resolve intent commands for all key ranges this
// transaction has covered, clears the keys cache and closes the
// metadata heartbeat. Any keys listed in the resolved slice have
// already been resolved and do not receive resolve intent commands.
func (tm *txnMetadata) close(txn *proto.Transaction, resolved []proto.Key, sender client.Sender, stopper *util.Stopper) {
	close(tm.txnEnd) // stop heartbeat
	if tm.keys.Len() > 0 {
		if log.V(2) {
			log.Infof("cleaning up %d intent(s) for transaction %s", tm.keys.Len(), txn)
		}
	}
	for _, o := range tm.keys.GetOverlaps(proto.KeyMin, proto.KeyMax) {
		// If the op was range based, end key != start key: resolve a range.
		var call proto.Call
		key := o.Key.Start().(proto.Key)
		endKey := o.Key.End().(proto.Key)
		if !key.Next().Equal(endKey) {
			call.Args = &proto.InternalResolveIntentRangeRequest{
				RequestHeader: proto.RequestHeader{
					Timestamp: txn.Timestamp,
					Key:       key,
					EndKey:    endKey,
					User:      storage.UserRoot,
					Txn:       txn,
				},
			}
			call.Reply = &proto.InternalResolveIntentRangeResponse{}
		} else {
			// Check if the key has already been resolved; skip if yes.
			found := false
			for _, k := range resolved {
				if key.Equal(k) {
					found = true
				}
			}
			if found {
				continue
			}
			call.Args = &proto.InternalResolveIntentRequest{
				RequestHeader: proto.RequestHeader{
					Timestamp: txn.Timestamp,
					Key:       key,
					User:      storage.UserRoot,
					Txn:       txn,
				},
			}
			call.Reply = &proto.InternalResolveIntentResponse{}
		}
		// We don't care about the reply channel; these are best
		// effort. We simply fire and forget, each in its own goroutine.
		if stopper.StartTask() {
			go func() {
				if log.V(2) {
					log.Infof("cleaning up intent %q for txn %s", call.Args.Header().Key, txn)
				}
				sender.Send(context.TODO(), call)
				if call.Reply.Header().Error != nil {
					log.Warningf("failed to cleanup %q intent: %s", call.Args.Header().Key, call.Reply.Header().GoError())
				}
				stopper.FinishTask()
			}()
		}
	}
	tm.keys.Clear()
}
Beispiel #30
0
// startStoresScanner will walk through all the stores in the node every
// ctx.ScanInterval and store the status in the db.
func (n *Node) startStoresScanner(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		// Pick the smaller of the two intervals.
		var minScanInterval time.Duration
		if n.ctx.ScanInterval <= n.ctx.ScanMaxIdleTime || n.ctx.ScanMaxIdleTime == 0 {
			minScanInterval = n.ctx.ScanInterval
		} else {
			minScanInterval = n.ctx.ScanMaxIdleTime
		}

		// TODO(bram): The number of stores is small. The node status should be
		// updated whenever a store status is updated.
		for interval := time.Duration(0); true; interval = minScanInterval {
			select {
			case <-time.After(interval):
				if !stopper.StartTask() {
					continue
				}
				// Walk through all the stores on this node.
				var rangeCount, leaderRangeCount, replicatedRangeCount, availableRangeCount int32
				stats := &engine.MVCCStats{}
				accessedStoreIDs := []proto.StoreID{}
				// will never error because `return nil` below
				_ = n.lSender.VisitStores(func(store *storage.Store) error {
					storeStatus, err := store.GetStatus()
					if err != nil {
						log.Error(err)
						return nil
					}
					if storeStatus == nil {
						// The store scanner hasn't run on this node yet.
						return nil
					}
					accessedStoreIDs = append(accessedStoreIDs, store.Ident.StoreID)
					rangeCount += storeStatus.RangeCount
					leaderRangeCount += storeStatus.LeaderRangeCount
					replicatedRangeCount += storeStatus.ReplicatedRangeCount
					availableRangeCount += storeStatus.AvailableRangeCount
					stats.Add(&storeStatus.Stats)
					return nil
				})

				// Store the combined stats in the db.
				now := n.ctx.Clock.Now().WallTime
				status := &NodeStatus{
					Desc:                 n.Descriptor,
					StoreIDs:             accessedStoreIDs,
					UpdatedAt:            now,
					StartedAt:            n.startedAt,
					RangeCount:           rangeCount,
					Stats:                *stats,
					LeaderRangeCount:     leaderRangeCount,
					ReplicatedRangeCount: replicatedRangeCount,
					AvailableRangeCount:  availableRangeCount,
				}
				key := keys.NodeStatusKey(int32(n.Descriptor.NodeID))
				if err := n.ctx.DB.Put(key, status); err != nil {
					log.Error(err)
				}
				// Increment iteration count.
				n.completedScan.L.Lock()
				n.scanCount++
				n.completedScan.Broadcast()
				n.completedScan.L.Unlock()
				if log.V(6) {
					log.Infof("store scan iteration completed")
				}
				stopper.FinishTask()
			case <-stopper.ShouldStop():
				// Exit the loop.
				return
			}
		}
	})
}