func cutNetwork(t *testing.T, c cluster.Cluster, closer <-chan struct{}, partitions ...[]int) { defer func() { if errs := restoreNetwork(t, c); len(errs) > 0 { t.Fatalf("errors restoring the network: %+v", errs) } }() addrs, addrsToNode := mustGetHosts(t, c) ipPartitions := make([][]iptables.IP, 0, len(partitions)) for _, partition := range partitions { ipPartition := make([]iptables.IP, 0, len(partition)) for _, nodeIndex := range partition { ipPartition = append(ipPartition, addrs[nodeIndex]) } ipPartitions = append(ipPartitions, ipPartition) } log.Warningf(context.TODO(), "partitioning: %v (%v)", partitions, ipPartitions) for host, cmds := range iptables.Rules(iptables.Bidirectional(ipPartitions...)) { for _, cmd := range cmds { if err := c.ExecRoot(addrsToNode[host], cmd); err != nil { t.Fatal(err) } } } <-closer log.Warningf(context.TODO(), "resolved all partitions") }
// maybeSignalStatusChangeLocked checks whether gossip should transition its // internal state from connected to stalled or vice versa. func (g *Gossip) maybeSignalStatusChangeLocked() { ctx := g.AnnotateCtx(context.TODO()) orphaned := g.outgoing.len()+g.mu.incoming.len() == 0 stalled := orphaned || g.mu.is.getInfo(KeySentinel) == nil if stalled { // We employ the stalled boolean to avoid filling logs with warnings. if !g.stalled { log.Eventf(ctx, "now stalled") if orphaned { if len(g.resolvers) == 0 { log.Warningf(ctx, "no resolvers found; use --join to specify a connected node") } else { log.Warningf(ctx, "no incoming or outgoing connections") } } else if len(g.resolversTried) == len(g.resolvers) { log.Warningf(ctx, "first range unavailable; resolvers exhausted") } else { log.Warningf(ctx, "first range unavailable; trying remaining resolvers") } } if len(g.resolvers) > 0 { g.signalStalledLocked() } } else { if g.stalled { log.Eventf(ctx, "connected") log.Infof(ctx, "node has connected to cluster via gossip") g.signalConnectedLocked() } g.maybeCleanupBootstrapAddressesLocked() } g.stalled = stalled }
// MIGRATION(tschottdorf): As of #7310, we make sure that a Replica always has // a complete Raft state on disk. Prior versions may not have that, which // causes issues due to the fact that we used to synthesize a TruncatedState // and do so no more. To make up for that, write a missing TruncatedState here. // That key is in the replicated state, but since during a cluster upgrade, all // nodes do it, it's fine (and we never CPut on that key, so anything in the // Raft pipeline will simply overwrite it). // // Migration(tschottdorf): See #6991. It's possible that the HardState is // missing after a snapshot was applied (so there is a TruncatedState). In this // case, synthesize a HardState (simply setting everything that was in the // snapshot to committed). Having lost the original HardState can theoretically // mean that the replica was further ahead or had voted, and so there's no // guarantee that this will be correct. But it will be correct in the majority // of cases, and some state *has* to be recovered. func migrate7310And6991( ctx context.Context, batch engine.ReadWriter, desc roachpb.RangeDescriptor, ) error { state, err := loadState(ctx, batch, &desc) if err != nil { return errors.Wrap(err, "could not migrate TruncatedState: %s") } if (*state.TruncatedState == roachpb.RaftTruncatedState{}) { state.TruncatedState.Term = raftInitialLogTerm state.TruncatedState.Index = raftInitialLogIndex state.RaftAppliedIndex = raftInitialLogIndex if _, err := saveState(ctx, batch, state); err != nil { return errors.Wrapf(err, "could not migrate TruncatedState to %+v", &state.TruncatedState) } log.Warningf(ctx, "migration: synthesized TruncatedState for %+v", desc) } hs, err := loadHardState(ctx, batch, desc.RangeID) if err != nil { return errors.Wrap(err, "unable to load HardState") } // Only update the HardState when there is a nontrivial Commit field. We // don't have a snapshot here, so we could wind up lowering the commit // index (which would error out and fatal us). if hs.Commit == 0 { log.Warningf(ctx, "migration: synthesized HardState for %+v", desc) if err := synthesizeHardState(ctx, batch, state, hs); err != nil { return errors.Wrap(err, "could not migrate HardState") } } return nil }
// t.mu needs to be locked. func (t *tableState) removeLease(lease *LeaseState, store LeaseStore) { t.active.remove(lease) t.tableNameCache.remove(lease) // Release to the store asynchronously, without the tableState lock. err := t.stopper.RunAsyncTask(context.TODO(), func(ctx context.Context) { if err := store.Release(lease); err != nil { log.Warningf(ctx, "error releasing lease %q: %s", lease, err) } }) if log.V(1) && err != nil { log.Warningf(context.TODO(), "error removing lease from store: %s", err) } }
// Heartbeat is called to update a node's expiration timestamp. This // method does a conditional put on the node liveness record, and if // successful, stores the updated liveness record in the nodes map. func (nl *NodeLiveness) Heartbeat(ctx context.Context, liveness *Liveness) error { defer func(start time.Time) { if dur := timeutil.Now().Sub(start); dur > time.Second { log.Warningf(ctx, "slow heartbeat took %0.1fs", dur.Seconds()) } }(timeutil.Now()) // Allow only one heartbeat at a time. select { case nl.heartbeatSem <- struct{}{}: case <-ctx.Done(): return ctx.Err() } defer func() { <-nl.heartbeatSem }() nodeID := nl.gossip.NodeID.Get() var newLiveness Liveness if liveness == nil { newLiveness = Liveness{ NodeID: nodeID, Epoch: 1, } } else { newLiveness = *liveness } // We need to add the maximum clock offset to the expiration because it's // used when determining liveness for a node. newLiveness.Expiration = nl.clock.Now().Add( (nl.livenessThreshold + nl.clock.MaxOffset()).Nanoseconds(), 0) if err := nl.updateLiveness(ctx, &newLiveness, liveness, func(actual Liveness) error { // Update liveness to actual value on mismatch. nl.mu.Lock() nl.mu.self = actual nl.mu.Unlock() // If the actual liveness is different than expected, but is // considered live, treat the heartbeat as a success. This can // happen when the periodic heartbeater races with a concurrent // lease acquisition. if actual.isLive(nl.clock.Now(), nl.clock.MaxOffset()) { return errNodeAlreadyLive } // Otherwise, return error. return errSkippedHeartbeat }); err != nil { if err == errNodeAlreadyLive { return nil } nl.metrics.HeartbeatFailures.Inc(1) return err } log.VEventf(ctx, 1, "heartbeat %+v", newLiveness.Expiration) nl.mu.Lock() nl.mu.self = newLiveness nl.mu.Unlock() nl.metrics.HeartbeatSuccesses.Inc(1) return nil }
// recordJoinEvent begins an asynchronous task which attempts to log a "node // join" or "node restart" event. This query will retry until it succeeds or the // server stops. func (n *Node) recordJoinEvent() { if !n.storeCfg.LogRangeEvents { return } logEventType := sql.EventLogNodeRestart if n.initialBoot { logEventType = sql.EventLogNodeJoin } n.stopper.RunWorker(func() { ctx, span := n.AnnotateCtxWithSpan(context.Background(), "record-join-event") defer span.Finish() retryOpts := base.DefaultRetryOptions() retryOpts.Closer = n.stopper.ShouldStop() for r := retry.Start(retryOpts); r.Next(); { if err := n.storeCfg.DB.Txn(ctx, func(txn *client.Txn) error { return n.eventLogger.InsertEventRecord(txn, logEventType, int32(n.Descriptor.NodeID), int32(n.Descriptor.NodeID), struct { Descriptor roachpb.NodeDescriptor ClusterID uuid.UUID StartedAt int64 }{n.Descriptor, n.ClusterID, n.startedAt}, ) }); err != nil { log.Warningf(ctx, "%s: unable to log %s event: %s", n, logEventType, err) } else { return } } }) }
// updateRangeInfo is called whenever a range is updated by ApplySnapshot // or is created by range splitting to setup the fields which are // uninitialized or need updating. func (r *Replica) updateRangeInfo(desc *roachpb.RangeDescriptor) error { // RangeMaxBytes should be updated by looking up Zone Config in two cases: // 1. After applying a snapshot, if the zone config was not updated for // this key range, then maxBytes of this range will not be updated either. // 2. After a new range is created by a split, only copying maxBytes from // the original range wont work as the original and new ranges might belong // to different zones. // Load the system config. cfg, ok := r.store.Gossip().GetSystemConfig() if !ok { // This could be before the system config was ever gossiped, // or it expired. Let the gossip callback set the info. ctx := r.AnnotateCtx(context.TODO()) log.Warningf(ctx, "no system config available, cannot determine range MaxBytes") return nil } // Find zone config for this range. zone, err := cfg.GetZoneConfigForKey(desc.StartKey) if err != nil { return errors.Errorf("%s: failed to lookup zone config: %s", r, err) } r.SetMaxBytes(zone.RangeMaxBytes) return nil }
// handleResponse handles errors, remote forwarding, and combines delta // gossip infos from the remote server with this node's infostore. func (c *client) handleResponse(ctx context.Context, g *Gossip, reply *Response) error { g.mu.Lock() defer g.mu.Unlock() bytesReceived := int64(reply.Size()) infosReceived := int64(len(reply.Delta)) c.clientMetrics.BytesReceived.Inc(bytesReceived) c.clientMetrics.InfosReceived.Inc(infosReceived) c.nodeMetrics.BytesReceived.Inc(bytesReceived) c.nodeMetrics.InfosReceived.Inc(infosReceived) // Combine remote node's infostore delta with ours. if reply.Delta != nil { freshCount, err := g.mu.is.combine(reply.Delta, reply.NodeID) if err != nil { log.Warningf(ctx, "failed to fully combine delta from node %d: %s", reply.NodeID, err) } if infoCount := len(reply.Delta); infoCount > 0 { if log.V(1) { log.Infof(ctx, "received %s from node %d (%d fresh)", extractKeys(reply.Delta), reply.NodeID, freshCount) } } g.maybeTightenLocked() } c.peerID = reply.NodeID g.outgoing.addNode(c.peerID) c.remoteHighWaterStamps = reply.HighWaterStamps // Handle remote forwarding. if reply.AlternateAddr != nil { if g.hasIncomingLocked(reply.AlternateNodeID) || g.hasOutgoingLocked(reply.AlternateNodeID) { return errors.Errorf("received forward from node %d to %d (%s); already have active connection, skipping", reply.NodeID, reply.AlternateNodeID, reply.AlternateAddr) } // We try to resolve the address, but don't actually use the result. // The certificates (if any) may only be valid for the unresolved // address. if _, err := reply.AlternateAddr.Resolve(); err != nil { return errors.Errorf("unable to resolve alternate address %s for node %d: %s", reply.AlternateAddr, reply.AlternateNodeID, err) } c.forwardAddr = reply.AlternateAddr return errors.Errorf("received forward from node %d to %d (%s)", reply.NodeID, reply.AlternateNodeID, reply.AlternateAddr) } // Check whether we're connected at this point. g.signalConnectedLocked() // Check whether this outgoing client is duplicating work already // being done by an incoming client, either because an outgoing // matches an incoming or the client is connecting to itself. if nodeID := g.NodeID.Get(); nodeID == c.peerID { return errors.Errorf("stopping outgoing client to node %d (%s); loopback connection", c.peerID, c.addr) } else if g.hasIncomingLocked(c.peerID) && nodeID > c.peerID { // To avoid mutual shutdown, we only shutdown our client if our // node ID is higher than the peer's. return errors.Errorf("stopping outgoing client to node %d (%s); already have incoming", c.peerID, c.addr) } return nil }
func parseOptions(data []byte) (sql.SessionArgs, error) { args := sql.SessionArgs{} buf := readBuffer{msg: data} for { key, err := buf.getString() if err != nil { return sql.SessionArgs{}, errors.Errorf("error reading option key: %s", err) } if len(key) == 0 { break } value, err := buf.getString() if err != nil { return sql.SessionArgs{}, errors.Errorf("error reading option value: %s", err) } switch key { case "database": args.Database = value case "user": args.User = value default: if log.V(1) { log.Warningf(context.TODO(), "unrecognized configuration parameter %q", key) } } } return args, nil }
// pushTxn attempts to abort the txn via push. The wait group is signaled on // completion. func pushTxn( ctx context.Context, db *client.DB, now hlc.Timestamp, txn *roachpb.Transaction, typ roachpb.PushTxnType, ) { // Attempt to push the transaction which created the intent. pushArgs := &roachpb.PushTxnRequest{ Span: roachpb.Span{ Key: txn.Key, }, Now: now, PusherTxn: roachpb.Transaction{TxnMeta: enginepb.TxnMeta{Priority: math.MaxInt32}}, PusheeTxn: txn.TxnMeta, PushType: typ, } b := &client.Batch{} b.AddRawRequest(pushArgs) if err := db.Run(ctx, b); err != nil { log.Warningf(ctx, "push of txn %s failed: %s", txn, err) return } br := b.RawResponse() // Update the supplied txn on successful push. *txn = br.Responses[0].GetInner().(*roachpb.PushTxnResponse).PusheeTxn }
// deleteIndexMutationsWithReversedColumns deletes index mutations with a // different mutationID than the schema changer and a reference to one of the // reversed columns. func (sc *SchemaChanger) deleteIndexMutationsWithReversedColumns( desc *sqlbase.TableDescriptor, columns map[string]struct{}, ) { newMutations := make([]sqlbase.DescriptorMutation, 0, len(desc.Mutations)) for _, mutation := range desc.Mutations { if mutation.MutationID != sc.mutationID { if idx := mutation.GetIndex(); idx != nil { deleteMutation := false for _, name := range idx.ColumnNames { if _, ok := columns[name]; ok { // Such an index mutation has to be with direction ADD and // in the DELETE_ONLY state. Live indexes referencing live // columns cannot be deleted and thus never have direction // DROP. All mutations with the ADD direction start off in // the DELETE_ONLY state. if mutation.Direction != sqlbase.DescriptorMutation_ADD || mutation.State != sqlbase.DescriptorMutation_DELETE_ONLY { panic(fmt.Sprintf("mutation in bad state: %+v", mutation)) } log.Warningf(context.TODO(), "delete schema change mutation: %+v", mutation) deleteMutation = true break } } if deleteMutation { continue } } } newMutations = append(newMutations, mutation) } // Reset mutations. desc.Mutations = newMutations }
// removeLeaseIfExpiring removes a lease and returns true if it is about to expire. // The method also resets the transaction deadline. func (p *planner) removeLeaseIfExpiring(lease *LeaseState) bool { if lease == nil || lease.hasSomeLifeLeft(p.leaseMgr.clock) { return false } // Remove the lease from p.leases. idx := -1 for i, l := range p.leases { if l == lease { idx = i break } } if idx == -1 { log.Warningf(p.ctx(), "lease (%s) not found", lease) return false } p.leases[idx] = p.leases[len(p.leases)-1] p.leases[len(p.leases)-1] = nil p.leases = p.leases[:len(p.leases)-1] if err := p.leaseMgr.Release(lease); err != nil { log.Warning(p.ctx(), err) } // Reset the deadline so that a new deadline will be set after the lease is acquired. p.txn.ResetDeadline() for _, l := range p.leases { p.txn.UpdateDeadlineMaybe(hlc.Timestamp{WallTime: l.Expiration().UnixNano()}) } return true }
func (l *LocalCluster) createNetwork(ctx context.Context) { l.panicOnStop() l.networkName = fmt.Sprintf("%s-%s", networkPrefix, l.clusterID) log.Infof(ctx, "creating docker network with name: %s", l.networkName) net, err := l.client.NetworkInspect(ctx, l.networkName) if err == nil { // We need to destroy the network and any running containers inside of it. for containerID := range net.Containers { // This call could fail if the container terminated on its own after we call // NetworkInspect, but the likelihood of this seems low. If this line creates // a lot of panics we should do more careful error checking. maybePanic(l.client.ContainerKill(ctx, containerID, "9")) } maybePanic(l.client.NetworkRemove(ctx, l.networkName)) } else if !client.IsErrNotFound(err) { panic(err) } resp, err := l.client.NetworkCreate(ctx, l.networkName, types.NetworkCreate{ Driver: "bridge", // Docker gets very confused if two networks have the same name. CheckDuplicate: true, }) maybePanic(err) if resp.Warning != "" { log.Warningf(ctx, "creating network: %s", resp.Warning) } l.networkID = resp.ID }
// RaftSnapshot handles incoming streaming snapshot requests. func (t *RaftTransport) RaftSnapshot(stream MultiRaft_RaftSnapshotServer) error { errCh := make(chan error, 1) if err := t.rpcContext.Stopper.RunAsyncTask(stream.Context(), func(ctx context.Context) { errCh <- func() error { req, err := stream.Recv() if err != nil { return err } if req.Header == nil { return stream.Send(&SnapshotResponse{ Status: SnapshotResponse_ERROR, Message: "client error: no header in first snapshot request message"}) } rmr := req.Header.RaftMessageRequest t.recvMu.Lock() handler, ok := t.recvMu.handlers[rmr.ToReplica.StoreID] t.recvMu.Unlock() if !ok { log.Warningf(ctx, "unable to accept Raft message from %+v: no handler registered for %+v", rmr.FromReplica, rmr.ToReplica) return roachpb.NewStoreNotFoundError(rmr.ToReplica.StoreID) } return handler.HandleSnapshot(req.Header, stream) }() }); err != nil { return err } select { case <-t.rpcContext.Stopper.ShouldStop(): return nil case err := <-errCh: return err } }
func (s *Server) reportUsage(ctx context.Context) { b := new(bytes.Buffer) if err := json.NewEncoder(b).Encode(s.getReportingInfo()); err != nil { log.Warning(ctx, err) return } q := reportingURL.Query() q.Set("version", build.GetInfo().Tag) q.Set("uuid", s.node.ClusterID.String()) reportingURL.RawQuery = q.Encode() res, err := http.Post(reportingURL.String(), "application/json", b) if err != nil && log.V(2) { // This is probably going to be relatively common in production // environments where network access is usually curtailed. log.Warning(ctx, "Failed to report node usage metrics: ", err) return } if res.StatusCode != http.StatusOK { b, err := ioutil.ReadAll(res.Body) log.Warningf(ctx, "Failed to report node usage metrics: status: %s, body: %s, "+ "error: %v", res.Status, b, err) } }
// processIntentsAsync asynchronously processes intents which were // encountered during another command but did not interfere with the // execution of that command. This occurs in two cases: inconsistent // reads and EndTransaction (which queues its own external intents for // processing via this method). The two cases are handled somewhat // differently and would be better served by different entry points, // but combining them simplifies the plumbing necessary in Replica. func (ir *intentResolver) processIntentsAsync(r *Replica, intents []intentsWithArg) { if r.store.TestingKnobs().DisableAsyncIntentResolution { return } now := r.store.Clock().Now() ctx := context.TODO() stopper := r.store.Stopper() for _, item := range intents { err := stopper.RunLimitedAsyncTask( ctx, ir.sem, false /* wait */, func(ctx context.Context) { ir.processIntents(ctx, r, item, now) }) if err != nil { if err == stop.ErrThrottled { // A limited task was not available. Rather than waiting for one, we // reuse the current goroutine. ir.processIntents(ctx, r, item, now) } else { log.Warningf(ctx, "failed to resolve intents: %s", err) return } } } }
// computePeriodicMetrics instructs each store to compute the value of // complicated metrics. func (n *Node) computePeriodicMetrics(ctx context.Context, tick int) error { return n.stores.VisitStores(func(store *storage.Store) error { if err := store.ComputeMetrics(ctx, tick); err != nil { log.Warningf(ctx, "%s: unable to compute metrics: %s", store, err) } return nil }) }
// computePeriodicMetrics instructs each store to compute the value of // complicated metrics. func (n *Node) computePeriodicMetrics(tick int) error { return n.stores.VisitStores(func(store *storage.Store) error { if err := store.ComputeMetrics(tick); err != nil { ctx := n.AnnotateCtx(context.TODO()) log.Warningf(ctx, "%s: unable to compute metrics: %s", store, err) } return nil }) }
// SetStorage provides an instance of the Storage interface // for reading and writing gossip bootstrap data from persistent // storage. This should be invoked as early in the lifecycle of a // gossip instance as possible, but can be called at any time. func (g *Gossip) SetStorage(storage Storage) error { ctx := g.AnnotateCtx(context.TODO()) // Maintain lock ordering. var storedBI BootstrapInfo if err := storage.ReadBootstrapInfo(&storedBI); err != nil { log.Warningf(ctx, "failed to read gossip bootstrap info: %s", err) } g.mu.Lock() defer g.mu.Unlock() g.storage = storage // Merge the stored bootstrap info addresses with any we've become // aware of through gossip. existing := map[string]struct{}{} makeKey := func(a util.UnresolvedAddr) string { return fmt.Sprintf("%s,%s", a.Network(), a.String()) } for _, addr := range g.bootstrapInfo.Addresses { existing[makeKey(addr)] = struct{}{} } for _, addr := range storedBI.Addresses { // If the address is new, and isn't our own address, add it. if _, ok := existing[makeKey(addr)]; !ok && addr != g.mu.is.NodeAddr { g.maybeAddBootstrapAddress(addr) } } // Persist merged addresses. if numAddrs := len(g.bootstrapInfo.Addresses); numAddrs > len(storedBI.Addresses) { if err := g.storage.WriteBootstrapInfo(&g.bootstrapInfo); err != nil { log.Error(ctx, err) } } // Cycle through all persisted bootstrap hosts and add resolvers for // any which haven't already been added. newResolverFound := false for _, addr := range g.bootstrapInfo.Addresses { if !g.maybeAddResolver(addr) { continue } // If we find a new resolver, reset the resolver index so that the // next resolver we try is the first of the new resolvers. if !newResolverFound { newResolverFound = true g.resolverIdx = len(g.resolvers) - 1 } } // If a new resolver was found, immediately signal bootstrap. if newResolverFound { if log.V(1) { log.Infof(ctx, "found new resolvers from storage; signalling bootstrap") } g.signalStalledLocked() } return nil }
// Close finishes the reader. func (fr *RocksDBSstFileReader) Close() { if fr.rocksDB == nil { return } fr.rocksDB.Close() fr.rocksDB = nil if err := os.RemoveAll(fr.dir); err != nil { log.Warningf(context.TODO(), "error removing temp rocksdb directory %q: %s", fr.dir, err) } }
// bootstrapStores bootstraps uninitialized stores once the cluster // and node IDs have been established for this node. Store IDs are // allocated via a sequence id generator stored at a system key per // node. func (n *Node) bootstrapStores( ctx context.Context, bootstraps []*storage.Store, stopper *stop.Stopper, ) { if n.ClusterID == *uuid.EmptyUUID { panic("ClusterID missing during store bootstrap of auxiliary store") } // Bootstrap all waiting stores by allocating a new store id for // each and invoking store.Bootstrap() to persist. inc := int64(len(bootstraps)) firstID, err := allocateStoreIDs(ctx, n.Descriptor.NodeID, inc, n.storeCfg.DB) if err != nil { log.Fatal(ctx, err) } sIdent := roachpb.StoreIdent{ ClusterID: n.ClusterID, NodeID: n.Descriptor.NodeID, StoreID: firstID, } for _, s := range bootstraps { if err := s.Bootstrap(sIdent); err != nil { log.Fatal(ctx, err) } if err := s.Start(ctx, stopper); err != nil { log.Fatal(ctx, err) } n.addStore(s) sIdent.StoreID++ log.Infof(ctx, "bootstrapped store %s", s) // Done regularly in Node.startGossip, but this cuts down the time // until this store is used for range allocations. if err := s.GossipStore(ctx); err != nil { log.Warningf(ctx, "error doing initial gossiping: %s", err) } } // write a new status summary after all stores have been bootstrapped; this // helps the UI remain responsive when new nodes are added. if err := n.writeSummaries(ctx); err != nil { log.Warningf(ctx, "error writing node summary after store bootstrap: %s", err) } }
// markDead sets the storeDetail to dead(inactive). func (sd *storeDetail) markDead(foundDeadOn hlc.Timestamp) { sd.dead = true sd.foundDeadOn = foundDeadOn sd.timesDied++ if sd.desc != nil { // sd.desc can still be nil if it was markedAlive and enqueued in getStoreDetailLocked // and never markedAlive again. log.Warningf( sd.ctx, "store %s on node %s is now considered offline", sd.desc.StoreID, sd.desc.Node.NodeID, ) } }
// If the time is greater than the timestamp stored at `key`, run `f`. // Before running `f`, the timestamp is updated forward by a small amount via // a compare-and-swap to ensure at-most-one concurrent execution. After `f` // executes the timestamp is set to the next execution time. // Returns how long until `f` should be run next (i.e. when this method should // be called again). func (s *Server) maybeRunPeriodicCheck( op string, key roachpb.Key, f func(context.Context), ) time.Duration { ctx, span := s.AnnotateCtxWithSpan(context.Background(), "op") defer span.Finish() // Add the op name to the log context. ctx = log.WithLogTag(ctx, op, nil) resp, err := s.db.Get(ctx, key) if err != nil { log.Infof(ctx, "error reading time: %s", err) return updateCheckRetryFrequency } // We should early returned below if either the next check time is in the // future or if the atomic compare-and-set of that time failed (which // would happen if two nodes tried at the same time). if resp.Exists() { whenToCheck, pErr := resp.Value.GetTime() if pErr != nil { log.Warningf(ctx, "error decoding time: %s", err) return updateCheckRetryFrequency } else if delay := whenToCheck.Sub(timeutil.Now()); delay > 0 { return delay } nextRetry := whenToCheck.Add(updateCheckRetryFrequency) if err := s.db.CPut(ctx, key, nextRetry, whenToCheck); err != nil { if log.V(2) { log.Infof(ctx, "could not set next version check time (maybe another node checked?): %s", err) } return updateCheckRetryFrequency } } else { log.Infof(ctx, "No previous %s time.", op) nextRetry := timeutil.Now().Add(updateCheckRetryFrequency) // CPut with `nil` prev value to assert that no other node has checked. if err := s.db.CPut(ctx, key, nextRetry, nil); err != nil { if log.V(2) { log.Infof(ctx, "Could not set %s time (maybe another node checked?): %v", op, err) } return updateCheckRetryFrequency } } f(ctx) if err := s.db.Put(ctx, key, timeutil.Now().Add(updateCheckFrequency)); err != nil { log.Infof(ctx, "Error updating %s time: %v", op, err) } return updateCheckFrequency }
// TestConvergence verifies a 10 node gossip network converges within // a fixed number of simulation cycles. It's really difficult to // determine the right number for cycles because different things can // happen during a single cycle, depending on how much CPU time is // available. Eliminating this variability by getting more // synchronization primitives in place for the simulation is possible, // though two attempts so far have introduced more complexity into the // actual production gossip code than seems worthwhile for a unittest. func TestConvergence(t *testing.T) { defer leaktest.AfterTest(t)() stopper := stop.NewStopper() defer stopper.Stop() network := simulation.NewNetwork(stopper, 10, true) const maxCycles = 100 if connectedCycle := network.RunUntilFullyConnected(); connectedCycle > maxCycles { log.Warningf(context.TODO(), "expected a fully-connected network within %d cycles; took %d", maxCycles, connectedCycle) } }
// reverseMutations reverses the direction of all the mutations with the // mutationID. This is called after hitting an irrecoverable error while // applying a schema change. If a column being added is reversed and dropped, // all new indexes referencing the column will also be dropped. func (sc *SchemaChanger) reverseMutations(causingError error) error { // Reverse the flow of the state machine. _, err := sc.leaseMgr.Publish(sc.tableID, func(desc *sqlbase.TableDescriptor) error { // Keep track of the column mutations being reversed so that indexes // referencing them can be dropped. columns := make(map[string]struct{}) for i, mutation := range desc.Mutations { if mutation.MutationID != sc.mutationID { // Only reverse the first set of mutations if they have the // mutation ID we're looking for. break } desc.Mutations[i].ResumeSpan = roachpb.Span{} log.Warningf(context.TODO(), "reverse schema change mutation: %+v", mutation) switch mutation.Direction { case sqlbase.DescriptorMutation_ADD: desc.Mutations[i].Direction = sqlbase.DescriptorMutation_DROP // A column ADD being reversed gets placed in the map. if col := mutation.GetColumn(); col != nil { columns[col.Name] = struct{}{} } case sqlbase.DescriptorMutation_DROP: desc.Mutations[i].Direction = sqlbase.DescriptorMutation_ADD } } // Delete index mutations that reference any of the reversed columns. if len(columns) > 0 { sc.deleteIndexMutationsWithReversedColumns(desc, columns) } // Publish() will increment the version. return nil }, func(txn *client.Txn) error { // Log "Reverse Schema Change" event. Only the causing error and the // mutation ID are logged; this can be correlated with the DDL statement // that initiated the change using the mutation id. return MakeEventLogger(sc.leaseMgr).InsertEventRecord(txn, EventLogReverseSchemaChange, int32(sc.tableID), int32(sc.evalCtx.NodeID), struct { Error string MutationID uint32 }{fmt.Sprintf("%+v", causingError), uint32(sc.mutationID)}, ) }) return err }
// LookupReplica looks up replica by key [range]. Lookups are done // by consulting each store in turn via Store.LookupReplica(key). // Returns RangeID and replica on success; RangeKeyMismatch error // if not found. // If end is nil, a replica containing start is looked up. // This is only for testing usage; performance doesn't matter. func (ls *Stores) LookupReplica( start, end roachpb.RKey, ) (roachpb.RangeID, roachpb.ReplicaDescriptor, error) { ls.mu.RLock() defer ls.mu.RUnlock() var rangeID roachpb.RangeID var repDesc roachpb.ReplicaDescriptor var repDescFound bool for _, store := range ls.storeMap { replica := store.LookupReplica(start, nil) if replica == nil { continue } // Verify that the descriptor contains the entire range. if desc := replica.Desc(); !desc.ContainsKeyRange(start, end) { ctx := ls.AnnotateCtx(context.TODO()) log.Warningf(ctx, "range not contained in one range: [%s,%s), but have [%s,%s)", start, end, desc.StartKey, desc.EndKey) err := roachpb.NewRangeKeyMismatchError(start.AsRawKey(), end.AsRawKey(), desc) return 0, roachpb.ReplicaDescriptor{}, err } rangeID = replica.RangeID var err error repDesc, err = replica.GetReplicaDescriptor() if err != nil { if _, ok := err.(*roachpb.RangeNotFoundError); ok { // We are not holding a lock across this block; the replica could have // been removed from the range (via down-replication) between the // LookupReplica and the GetReplicaDescriptor calls. In this case just // ignore this replica. continue } return 0, roachpb.ReplicaDescriptor{}, err } if repDescFound { // We already found the range; this should never happen outside of tests. err := errors.Errorf("range %+v exists on additional store: %+v", replica, store) return 0, roachpb.ReplicaDescriptor{}, err } repDescFound = true } if !repDescFound { return 0, roachpb.ReplicaDescriptor{}, roachpb.NewRangeNotFoundError(0) } return rangeID, repDesc, nil }
// getPhysicalClockLocked returns the current physical clock and checks for // time jumps. func (c *Clock) getPhysicalClockLocked() int64 { newTime := c.physicalClock() if c.mu.lastPhysicalTime != 0 { interval := c.mu.lastPhysicalTime - newTime if interval > int64(c.maxOffset/10) { c.mu.monotonicityErrorsCount++ log.Warningf(context.TODO(), "backward time jump detected (%f seconds)", float64(newTime-c.mu.lastPhysicalTime)/1e9) } } c.mu.lastPhysicalTime = newTime return newTime }
// startWriteSummaries begins periodically persisting status summaries for the // node and its stores. func (n *Node) startWriteSummaries(frequency time.Duration) { ctx := log.WithLogTag(n.AnnotateCtx(context.Background()), "summaries", nil) // Immediately record summaries once on server startup. n.stopper.RunWorker(func() { // Write a status summary immediately; this helps the UI remain // responsive when new nodes are added. if err := n.writeSummaries(ctx); err != nil { log.Warningf(ctx, "error recording initial status summaries: %s", err) } ticker := time.NewTicker(frequency) defer ticker.Stop() for { select { case <-ticker.C: if err := n.writeSummaries(ctx); err != nil { log.Warningf(ctx, "error recording status summaries: %s", err) } case <-n.stopper.ShouldStop(): return } } }) }
func testClusterRecoveryInner( ctx context.Context, t *testing.T, c cluster.Cluster, cfg cluster.TestConfig, ) { num := c.NumNodes() // One client for each node. initBank(t, c.PGUrl(ctx, 0)) start := timeutil.Now() state := testState{ t: t, errChan: make(chan error, num), teardown: make(chan struct{}), deadline: start.Add(cfg.Duration), clients: make([]testClient, num), } for i := 0; i < num; i++ { state.clients[i].Lock() state.initClient(ctx, t, c, i) state.clients[i].Unlock() go transferMoneyLoop(ctx, i, &state, *numAccounts, *maxTransfer) } defer func() { <-state.teardown }() // Chaos monkey. rnd, seed := randutil.NewPseudoRand() log.Warningf(ctx, "monkey starts (seed %d)", seed) pickNodes := func() []int { return rnd.Perm(num)[:rnd.Intn(num)+1] } go chaosMonkey(ctx, &state, c, true, pickNodes, 0) waitClientsStop(ctx, num, &state, stall) // Verify accounts. verifyAccounts(t, &state.clients[0]) elapsed := timeutil.Since(start) var count uint64 counts := state.counts() for _, c := range counts { count += c } log.Infof(ctx, "%d %.1f/sec", count, float64(count)/elapsed.Seconds()) }
// maybeAddResolver creates and adds a resolver for the specified // address if one does not already exist. Returns whether a new // resolver was added. The caller must hold the gossip mutex. func (g *Gossip) maybeAddResolver(addr util.UnresolvedAddr) bool { if _, ok := g.resolverAddrs[addr]; ok { return false } ctx := g.AnnotateCtx(context.TODO()) r, err := resolver.NewResolverFromUnresolvedAddr(addr) if err != nil { log.Warningf(ctx, "bad address %s: %s", addr, err) return false } g.resolvers = append(g.resolvers, r) g.resolverAddrs[addr] = r log.Eventf(ctx, "add resolver %s", r) return true }