// NewStorePool creates a StorePool and registers the store updating callback // with gossip. func NewStorePool( ambient log.AmbientContext, g *gossip.Gossip, clock *hlc.Clock, rpcContext *rpc.Context, timeUntilStoreDead time.Duration, stopper *stop.Stopper, deterministic bool, ) *StorePool { sp := &StorePool{ AmbientContext: ambient, clock: clock, timeUntilStoreDead: timeUntilStoreDead, rpcContext: rpcContext, failedReservationsTimeout: envutil.EnvOrDefaultDuration("COCKROACH_FAILED_RESERVATION_TIMEOUT", defaultFailedReservationsTimeout), declinedReservationsTimeout: envutil.EnvOrDefaultDuration("COCKROACH_DECLINED_RESERVATION_TIMEOUT", defaultDeclinedReservationsTimeout), resolver: GossipAddressResolver(g), deterministic: deterministic, } sp.mu.storeDetails = make(map[roachpb.StoreID]*storeDetail) heap.Init(&sp.mu.queue) sp.mu.nodeLocalities = make(map[roachpb.NodeID]roachpb.Locality) storeRegex := gossip.MakePrefixPattern(gossip.KeyStorePrefix) g.RegisterCallback(storeRegex, sp.storeGossipUpdate) deadReplicasRegex := gossip.MakePrefixPattern(gossip.KeyDeadReplicasPrefix) g.RegisterCallback(deadReplicasRegex, sp.deadReplicasGossipUpdate) sp.start(stopper) return sp }
// WaitForStores waits for all of the store descriptors to be gossiped. Servers // other than the first "bootstrap" their stores asynchronously, but we'd like // to wait for all of the stores to be initialized before returning the // TestCluster. func (tc *TestCluster) WaitForStores(t testing.TB, g *gossip.Gossip) { // Register a gossip callback for the store descriptors. var storesMu syncutil.Mutex stores := map[roachpb.StoreID]struct{}{} storesDone := make(chan error) storesDoneOnce := storesDone unregister := g.RegisterCallback(gossip.MakePrefixPattern(gossip.KeyStorePrefix), func(_ string, content roachpb.Value) { storesMu.Lock() defer storesMu.Unlock() if storesDoneOnce == nil { return } var desc roachpb.StoreDescriptor if err := content.GetProto(&desc); err != nil { storesDoneOnce <- err return } stores[desc.StoreID] = struct{}{} if len(stores) == len(tc.Servers) { close(storesDoneOnce) storesDoneOnce = nil } }) defer unregister() // Wait for the store descriptors to be gossiped. for err := range storesDone { if err != nil { t.Fatal(err) } } }
// NewNodeLiveness returns a new instance of NodeLiveness configured // with the specified gossip instance. func NewNodeLiveness( ambient log.AmbientContext, clock *hlc.Clock, db *client.DB, g *gossip.Gossip, livenessThreshold time.Duration, heartbeatInterval time.Duration, ) *NodeLiveness { nl := &NodeLiveness{ ambientCtx: ambient, clock: clock, db: db, gossip: g, livenessThreshold: livenessThreshold, heartbeatInterval: heartbeatInterval, stopHeartbeat: make(chan struct{}), metrics: LivenessMetrics{ HeartbeatSuccesses: metric.NewCounter(metaHeartbeatSuccesses), HeartbeatFailures: metric.NewCounter(metaHeartbeatFailures), EpochIncrements: metric.NewCounter(metaEpochIncrements), }, } nl.mu.nodes = map[roachpb.NodeID]Liveness{} livenessRegex := gossip.MakePrefixPattern(gossip.KeyNodeLivenessPrefix) nl.gossip.RegisterCallback(livenessRegex, nl.livenessGossipUpdate) return nl }
// NewNodeLiveness returns a new instance of NodeLiveness configured // with the specified gossip instance. func NewNodeLiveness( ambient log.AmbientContext, clock *hlc.Clock, db *client.DB, g *gossip.Gossip, livenessThreshold time.Duration, renewalDuration time.Duration, ) *NodeLiveness { nl := &NodeLiveness{ ambientCtx: ambient, clock: clock, db: db, gossip: g, livenessThreshold: livenessThreshold, heartbeatInterval: livenessThreshold - renewalDuration, } nl.metrics = LivenessMetrics{ LiveNodes: metric.NewFunctionalGauge(metaLiveNodes, nl.numLiveNodes), HeartbeatSuccesses: metric.NewCounter(metaHeartbeatSuccesses), HeartbeatFailures: metric.NewCounter(metaHeartbeatFailures), EpochIncrements: metric.NewCounter(metaEpochIncrements), } nl.pauseHeartbeat.Store(false) nl.mu.nodes = map[roachpb.NodeID]Liveness{} livenessRegex := gossip.MakePrefixPattern(gossip.KeyNodeLivenessPrefix) nl.gossip.RegisterCallback(livenessRegex, nl.livenessGossipUpdate) return nl }
// newReplicateQueue returns a new instance of replicateQueue. func newReplicateQueue( store *Store, g *gossip.Gossip, allocator Allocator, clock *hlc.Clock, options AllocatorOptions, ) *replicateQueue { rq := &replicateQueue{ allocator: allocator, clock: clock, updateChan: make(chan struct{}, 1), } rq.baseQueue = newBaseQueue( "replicate", rq, store, g, queueConfig{ maxSize: replicateQueueMaxSize, needsLease: true, acceptsUnsplitRanges: store.TestingKnobs().ReplicateQueueAcceptsUnsplit, successes: store.metrics.ReplicateQueueSuccesses, failures: store.metrics.ReplicateQueueFailures, pending: store.metrics.ReplicateQueuePending, processingNanos: store.metrics.ReplicateQueueProcessingNanos, purgatory: store.metrics.ReplicateQueuePurgatory, }, ) if g != nil { // gossip is nil for some unittests // Register a gossip callback to signal queue that replicas in // purgatory might be retried due to new store gossip. g.RegisterCallback(gossip.MakePrefixPattern(gossip.KeyStorePrefix), func(_ string, _ roachpb.Value) { select { case rq.updateChan <- struct{}{}: default: } }) } return rq }
// TestNodeLivenessRestart verifies that if nodes are shutdown and // restarted, the node liveness records are re-gossiped immediately. func TestNodeLivenessRestart(t *testing.T) { defer leaktest.AfterTest(t)() mtc := &multiTestContext{} defer mtc.Stop() mtc.Start(t, 2) // After verifying node is in liveness table, stop store. verifyLiveness(t, mtc) mtc.stopStore(0) // Clear the liveness records in store 1's gossip to make sure we're // seeing the liveness record properly gossiped at store startup. var expKeys []string for _, g := range mtc.gossips { key := gossip.MakeNodeLivenessKey(g.NodeID.Get()) expKeys = append(expKeys, key) if err := g.AddInfoProto(key, &storage.Liveness{}, 0); err != nil { t.Fatal(err) } } sort.Strings(expKeys) // Register a callback to gossip in order to verify liveness records // are re-gossiped. var keysMu struct { syncutil.Mutex keys []string } livenessRegex := gossip.MakePrefixPattern(gossip.KeyNodeLivenessPrefix) mtc.gossips[0].RegisterCallback(livenessRegex, func(key string, _ roachpb.Value) { keysMu.Lock() defer keysMu.Unlock() for _, k := range keysMu.keys { if k == key { return } } keysMu.keys = append(keysMu.keys, key) }) // Restart store and verify gossip contains liveness record for nodes 1&2. mtc.restartStore(0) testutils.SucceedsSoon(t, func() error { keysMu.Lock() defer keysMu.Unlock() sort.Strings(keysMu.keys) if !reflect.DeepEqual(keysMu.keys, expKeys) { return errors.Errorf("expected keys %+v != keys %+v", expKeys, keysMu.keys) } return nil }) }
// NewStoreGossiper creates a store gossiper for use by tests. It adds the // callback to gossip. func NewStoreGossiper(g *gossip.Gossip) *StoreGossiper { sg := &StoreGossiper{ g: g, storeKeyMap: make(map[string]struct{}), } sg.cond = sync.NewCond(&sg.mu) g.RegisterCallback(gossip.MakePrefixPattern(gossip.KeyStorePrefix), func(key string, _ roachpb.Value) { sg.mu.Lock() defer sg.mu.Unlock() delete(sg.storeKeyMap, key) sg.cond.Broadcast() }) return sg }
// newReplicateQueue returns a new instance of replicateQueue. func newReplicateQueue( store *Store, g *gossip.Gossip, allocator Allocator, clock *hlc.Clock, options AllocatorOptions, ) *replicateQueue { rq := &replicateQueue{ metrics: makeReplicateQueueMetrics(), allocator: allocator, clock: clock, updateChan: make(chan struct{}, 1), } store.metrics.registry.AddMetricStruct(&rq.metrics) rq.baseQueue = newBaseQueue( "replicate", rq, store, g, queueConfig{ maxSize: defaultQueueMaxSize, needsLease: true, acceptsUnsplitRanges: store.TestingKnobs().ReplicateQueueAcceptsUnsplit, successes: store.metrics.ReplicateQueueSuccesses, failures: store.metrics.ReplicateQueueFailures, pending: store.metrics.ReplicateQueuePending, processingNanos: store.metrics.ReplicateQueueProcessingNanos, purgatory: store.metrics.ReplicateQueuePurgatory, }, ) updateFn := func() { select { case rq.updateChan <- struct{}{}: default: } } // Register a gossip and node liveness callbacks to signal queue // that replicas in purgatory might be retried. if g != nil { // gossip is nil for some unittests g.RegisterCallback(gossip.MakePrefixPattern(gossip.KeyStorePrefix), func(_ string, _ roachpb.Value) { updateFn() }) } if nl := store.cfg.NodeLiveness; nl != nil { // node liveness is nil for some unittests nl.RegisterCallback(func(_ roachpb.NodeID) { updateFn() }) } return rq }