// maybeWarnAboutInit looks for signs indicating a cluster which // hasn't been initialized and warns. There's no absolutely sure way // to determine whether the current node is simply waiting to be // bootstrapped to an existing cluster vs. the operator having failed // to initialize the cluster via the "cockroach init" command, so // we can only warn. // // This method checks whether all gossip bootstrap hosts are // connected, and whether the node itself is a bootstrap host, but // there is still no sentinel gossip. func (g *Gossip) maybeWarnAboutInit(stopper *stop.Stopper) { stopper.RunWorker(func() { // Wait 5s before first check. select { case <-stopper.ShouldStop(): return case <-time.After(5 * time.Second): } retryOptions := retry.Options{ InitialBackoff: 5 * time.Second, // first backoff at 5s MaxBackoff: 60 * time.Second, // max backoff is 60s Multiplier: 2, // doubles Closer: stopper.ShouldStop(), // stop no matter what on stopper } // This will never error because of infinite retries. for r := retry.Start(retryOptions); r.Next(); { g.mu.Lock() hasConnections := g.outgoing.len()+g.incoming.len() > 0 hasSentinel := g.is.getInfo(KeySentinel) != nil triedAll := g.triedAll g.mu.Unlock() // If we have the sentinel, exit the retry loop. if hasSentinel { break } if !hasConnections { log.Warningf("not connected to gossip; check that gossip flag is set appropriately") } else if triedAll { log.Warningf("missing gossip sentinel; first range unavailable or cluster not initialized") } } }) }
// RefreshLeases starts a goroutine that refreshes the lease manager // leases for tables received in the latest system configuration via gossip. func (m *LeaseManager) RefreshLeases(s *stop.Stopper, db *client.DB, gossip *gossip.Gossip) { s.RunWorker(func() { descKeyPrefix := keys.MakeTablePrefix(uint32(sqlbase.DescriptorTable.ID)) gossipUpdateC := gossip.RegisterSystemConfigChannel() for { select { case <-gossipUpdateC: cfg, _ := gossip.GetSystemConfig() if m.testingKnobs.GossipUpdateEvent != nil { m.testingKnobs.GossipUpdateEvent(cfg) } // Read all tables and their versions if log.V(2) { log.Info("received a new config; will refresh leases") } // Loop through the configuration to find all the tables. for _, kv := range cfg.Values { if !bytes.HasPrefix(kv.Key, descKeyPrefix) { continue } // Attempt to unmarshal config into a table/database descriptor. var descriptor sqlbase.Descriptor if err := kv.Value.GetProto(&descriptor); err != nil { log.Warningf("%s: unable to unmarshal descriptor %v", kv.Key, kv.Value) continue } switch union := descriptor.Union.(type) { case *sqlbase.Descriptor_Table: table := union.Table if err := table.Validate(); err != nil { log.Errorf("%s: received invalid table descriptor: %v", kv.Key, table) continue } if log.V(2) { log.Infof("%s: refreshing lease table: %d (%s), version: %d", kv.Key, table.ID, table.Name, table.Version) } // Try to refresh the table lease to one >= this version. if t := m.findTableState(table.ID, false /* create */, nil); t != nil { if err := t.purgeOldLeases( db, table.Deleted(), table.Version, m.LeaseStore); err != nil { log.Warningf("error purging leases for table %d(%s): %s", table.ID, table.Name, err) } } case *sqlbase.Descriptor_Database: // Ignore. } } if m.testingKnobs.TestingLeasesRefreshedEvent != nil { m.testingKnobs.TestingLeasesRefreshedEvent(cfg) } case <-s.ShouldStop(): return } } }) }
// runStart starts the cockroach node using --stores as the list of // storage devices ("stores") on this machine and --gossip as the list // of "well-known" hosts used to join this node to the cockroach // cluster via the gossip network. func runStart(cmd *cobra.Command, args []string) { info := util.GetBuildInfo() log.Infof("build Vers: %s", info.Vers) log.Infof("build Tag: %s", info.Tag) log.Infof("build Time: %s", info.Time) log.Infof("build Deps: %s", info.Deps) // Default user for servers. Context.User = security.NodeUser // First initialize the Context as it is used in other places. err := Context.Init("start") if err != nil { log.Errorf("failed to initialize context: %s", err) return } log.Info("starting cockroach cluster") stopper := util.NewStopper() stopper.AddWorker() s, err := server.NewServer(Context, stopper) if err != nil { log.Errorf("failed to start Cockroach server: %s", err) return } err = s.Start(false) if err != nil { log.Errorf("cockroach server exited with error: %s", err) return } signalCh := make(chan os.Signal, 1) signal.Notify(signalCh, os.Interrupt, os.Kill) // TODO(spencer): move this behind a build tag. signal.Notify(signalCh, syscall.SIGTERM) // Block until one of the signals above is received or the stopper // is stopped externally (for example, via the quit endpoint). select { case <-stopper.ShouldStop(): stopper.SetStopped() case <-signalCh: log.Infof("initiating graceful shutdown of server") stopper.SetStopped() go func() { s.Stop() }() } select { case <-signalCh: log.Warningf("second signal received, initiating hard shutdown") case <-time.After(time.Minute): log.Warningf("time limit reached, initiating hard shutdown") return case <-stopper.IsStopped(): log.Infof("server drained and shutdown completed") } log.Flush() }
func cutNetwork(t *testing.T, c cluster.Cluster, closer <-chan struct{}, partitions ...[]int) { addrs, addrsToNode := mustGetHosts(t, c) ipPartitions := make([][]iptables.IP, 0, len(partitions)) for _, partition := range partitions { ipPartition := make([]iptables.IP, 0, len(partition)) for _, nodeIndex := range partition { ipPartition = append(ipPartition, addrs[nodeIndex]) } ipPartitions = append(ipPartitions, ipPartition) } log.Warningf("partitioning: %v (%v)", partitions, ipPartitions) for host, cmds := range iptables.Rules(iptables.Bidirectional(ipPartitions...)) { for _, cmd := range cmds { if err := c.ExecRoot(addrsToNode[host], cmd); err != nil { t.Fatal(err) } } } <-closer for i := 0; i < c.NumNodes(); i++ { for _, cmd := range iptables.Reset() { if err := c.ExecRoot(i, cmd); err != nil { t.Fatal(err) } } } log.Warningf("resolved all partitions") }
// MIGRATION(tschottdorf): As of #7310, we make sure that a Replica always has // a complete Raft state on disk. Prior versions may not have that, which // causes issues due to the fact that we used to synthesize a TruncatedState // and do so no more. To make up for that, write a missing TruncatedState here. // That key is in the replicated state, but since during a cluster upgrade, all // nodes do it, it's fine (and we never CPut on that key, so anything in the // Raft pipeline will simply overwrite it). // // Migration(tschottdorf): See #6991. It's possible that the HardState is // missing after a snapshot was applied (so there is a TruncatedState). In this // case, synthesize a HardState (simply setting everything that was in the // snapshot to committed). Having lost the original HardState can theoretically // mean that the replica was further ahead or had voted, and so there's no // guarantee that this will be correct. But it will be correct in the majority // of cases, and some state *has* to be recovered. func migrate7310And6991(ctx context.Context, batch engine.ReadWriter, desc roachpb.RangeDescriptor) error { state, err := loadState(ctx, batch, &desc) if err != nil { return errors.Wrap(err, "could not migrate TruncatedState: %s") } if (*state.TruncatedState == roachpb.RaftTruncatedState{}) { state.TruncatedState.Term = raftInitialLogTerm state.TruncatedState.Index = raftInitialLogIndex state.RaftAppliedIndex = raftInitialLogIndex if _, err := saveState(ctx, batch, state); err != nil { return errors.Wrapf(err, "could not migrate TruncatedState to %+v", &state.TruncatedState) } log.Warningf(ctx, "migration: synthesized TruncatedState for %+v", desc) } hs, err := loadHardState(ctx, batch, desc.RangeID) if err != nil { return errors.Wrap(err, "unable to load HardState") } // Only update the HardState when there is a nontrivial Commit field. We // don't have a snapshot here, so we could wind up lowering the commit // index (which would error out and fatal us). if hs.Commit == 0 { log.Warningf(ctx, "migration: synthesized HardState for %+v", desc) if err := synthesizeHardState(ctx, batch, state, hs); err != nil { return errors.Wrap(err, "could not migrate HardState") } } return nil }
func cutNetwork(t *testing.T, c cluster.Cluster, closer <-chan struct{}, partitions ...[]int) { defer func() { if errs := restoreNetwork(t, c); len(errs) > 0 { t.Fatalf("errors restoring the network: %+v", errs) } }() addrs, addrsToNode := mustGetHosts(t, c) ipPartitions := make([][]iptables.IP, 0, len(partitions)) for _, partition := range partitions { ipPartition := make([]iptables.IP, 0, len(partition)) for _, nodeIndex := range partition { ipPartition = append(ipPartition, addrs[nodeIndex]) } ipPartitions = append(ipPartitions, ipPartition) } log.Warningf(context.TODO(), "partitioning: %v (%v)", partitions, ipPartitions) for host, cmds := range iptables.Rules(iptables.Bidirectional(ipPartitions...)) { for _, cmd := range cmds { if err := c.ExecRoot(addrsToNode[host], cmd); err != nil { t.Fatal(err) } } } <-closer log.Warningf(context.TODO(), "resolved all partitions") }
// maybeSignalStatusChangeLocked checks whether gossip should transition its // internal state from connected to stalled or vice versa. func (g *Gossip) maybeSignalStatusChangeLocked() { orphaned := g.outgoing.len()+g.mu.incoming.len() == 0 stalled := orphaned || g.mu.is.getInfo(KeySentinel) == nil if stalled { // We employ the stalled boolean to avoid filling logs with warnings. if !g.stalled { log.Eventf(g.ctx, "now stalled") if orphaned { if len(g.resolvers) == 0 { log.Warningf(g.ctx, "no resolvers found; use --join to specify a connected node") } else { log.Warningf(g.ctx, "no incoming or outgoing connections") } } else if len(g.resolversTried) == len(g.resolvers) { log.Warningf(g.ctx, "first range unavailable; resolvers exhausted") } else { log.Warningf(g.ctx, "first range unavailable; trying remaining resolvers") } } if len(g.resolvers) > 0 { g.signalStalledLocked() } } else { if g.stalled { log.Eventf(g.ctx, "connected") log.Infof(g.ctx, "node has connected to cluster via gossip") g.signalConnectedLocked() } g.maybeCleanupBootstrapAddressesLocked() } g.stalled = stalled }
// Send sends call to Cockroach via an RPC request. Errors which are retryable // are retried with backoff in a loop using the default retry options. Other // errors sending the request are retried indefinitely using the same client // command ID to avoid reporting failure when in fact the command may have gone // through and been executed successfully. We retry here to eventually get // through with the same client command ID and be given the cached response. func (s *Sender) Send(_ context.Context, call proto.Call) { var err error for r := retry.Start(s.retryOpts); r.Next(); { if !s.client.IsHealthy() { log.Warningf("client %s is unhealthy; retrying", s.client) continue } method := call.Args.Method().String() c := s.client.Go("Server."+method, call.Args, call.Reply, nil) <-c.Done err = c.Error if err != nil { // Assume all errors sending request are retryable. The actual // number of things that could go wrong is vast, but we don't // want to miss any which should in theory be retried with the // same client command ID. We log the error here as a warning so // there's visiblity that this is happening. Some of the errors // we'll sweep up in this net shouldn't be retried, but we can't // really know for sure which. log.Warningf("failed to send RPC request %s: %v", method, err) continue } // On successful post, we're done with retry loop. break } if err != nil { call.Reply.Header().SetGoError(err) } }
// warnAboutStall attempts to diagnose the cause of a gossip network // not being connected to the sentinel. This could happen in a network // partition, or because of misconfiguration. It's impossible to tell, // but we can warn appropriately. If there are no incoming or outgoing // connections, we warn about the --join flag being set. If we've // connected, and all resolvers have been tried, we warn about either // the first range not being available or else possible the cluster // never having been initialized. func (g *Gossip) warnAboutStall() { if g.outgoing.len()+g.incoming.len() == 0 { log.Warningf("not connected to cluster; use --join to specify a connected node") } else if len(g.resolversTried) == len(g.resolvers) { log.Warningf("first range unavailable or cluster not initialized") } else { log.Warningf("partition in gossip network; attempting new connection") } }
// warnAboutStall attempts to diagnose the cause of a gossip network // not being connected to the sentinel. This could happen in a network // partition, or because of misconfiguration. It's impossible to tell, // but we can warn appropriately. If there are no incoming or outgoing // connections, we warn about the --gossip flag being set. If we've // connected, and all resolvers have been tried, we warn about either // the first range not being available or else possible the cluster // never having been initialized. func (g *Gossip) warnAboutStall() { if g.outgoing.len()+g.incoming.len() == 0 { log.Warningf("not connected to gossip; check that gossip flag is set appropriately") } else if len(g.resolversTried) == len(g.resolvers) { log.Warningf("first range unavailable or cluster not initialized") } else { log.Warningf("partition in gossip network; attempting new connection") } }
func logLinuxStats() { if !log.V(1) { return } // We don't know which fields in struct mallinfo are most relevant to us yet, // so log it all for now. // // A major caveat is that mallinfo() returns stats only for the main arena. // glibc uses multiple allocation arenas to increase malloc performance for // multithreaded processes, so mallinfo may not report on significant parts // of the heap. mi := C.mallinfo() log.Infof("mallinfo stats: ordblks=%s, smblks=%s, hblks=%s, hblkhd=%s, usmblks=%s, fsmblks=%s, "+ "uordblks=%s, fordblks=%s, keepcost=%s", humanize.IBytes(uint64(mi.ordblks)), humanize.IBytes(uint64(mi.smblks)), humanize.IBytes(uint64(mi.hblks)), humanize.IBytes(uint64(mi.hblkhd)), humanize.IBytes(uint64(mi.usmblks)), humanize.IBytes(uint64(mi.fsmblks)), humanize.IBytes(uint64(mi.uordblks)), humanize.IBytes(uint64(mi.fordblks)), humanize.IBytes(uint64(mi.fsmblks))) // malloc_info() emits a *lot* of XML, partly because it generates stats for // all arenas, unlike mallinfo(). // // TODO(cdo): extract the useful bits and record to time-series DB. if !log.V(2) { return } // Create a memstream and make malloc_info() output to it. var buf *C.char var bufSize C.size_t memstream := C.open_memstream(&buf, &bufSize) if memstream == nil { log.Warning("couldn't open memstream") return } defer func() { C.fclose(memstream) C.free(unsafe.Pointer(buf)) }() if rc := C.malloc_info(0, memstream); rc != 0 { log.Warningf("malloc_info returned %d", rc) return } if rc := C.fflush(memstream); rc != 0 { log.Warningf("fflush returned %d", rc) return } log.Infof("malloc_info: %s", C.GoString(buf)) }
func (s *state) handleMessage(req *RaftMessageRequest) { // We only want to lazily create the group if it's not heartbeat-related; // our heartbeats are coalesced and contain a dummy GroupID. switch req.Message.Type { case raftpb.MsgHeartbeat: s.fanoutHeartbeat(req) return case raftpb.MsgHeartbeatResp: s.fanoutHeartbeatResponse(req) return } s.CacheReplicaDescriptor(req.GroupID, req.FromReplica) s.CacheReplicaDescriptor(req.GroupID, req.ToReplica) if g, ok := s.groups[req.GroupID]; ok { if g.replicaID > req.ToReplica.ReplicaID { log.Warningf("node %v: got message for group %s with stale replica ID %s (expected %s)", s.nodeID, req.GroupID, req.ToReplica.ReplicaID, g.replicaID) return } else if g.replicaID < req.ToReplica.ReplicaID { // The message has a newer ReplicaID than we know about. This // means that this node has been removed from a group and // re-added to it, before our GC process was able to remove the // remnants of the old group. log.Infof("node %v: got message for group %s with newer replica ID (%s vs %s), recreating group", s.nodeID, req.GroupID, req.ToReplica.ReplicaID, g.replicaID) if err := s.removeGroup(req.GroupID); err != nil { log.Warningf("Error removing group %d (in response to incoming message): %s", req.GroupID, err) return } if err := s.createGroup(req.GroupID, req.ToReplica.ReplicaID); err != nil { log.Warningf("Error recreating group %d (in response to incoming message): %s", req.GroupID, err) return } } } else { if log.V(1) { log.Infof("node %v: got message for unknown group %d; creating it", s.nodeID, req.GroupID) } if err := s.createGroup(req.GroupID, req.ToReplica.ReplicaID); err != nil { log.Warningf("Error creating group %d (in response to incoming message): %s", req.GroupID, err) return } } if err := s.multiNode.Step(context.Background(), uint64(req.GroupID), req.Message); err != nil { if log.V(4) { log.Infof("node %v: multinode step to group %v failed for message %.200s", s.nodeID, req.GroupID, raft.DescribeMessage(req.Message, s.EntryFormatter)) } } }
// RefreshLeases starts a goroutine that refreshes the lease manager // leases for tables received in the latest system configuration via gossip. func (m *LeaseManager) RefreshLeases(s *stop.Stopper, db *client.DB, gossip *gossip.Gossip) { s.RunWorker(func() { descKeyPrefix := keys.MakeTablePrefix(uint32(DescriptorTable.ID)) gossipUpdateC := gossip.RegisterSystemConfigChannel() for { select { case <-gossipUpdateC: cfg := *gossip.GetSystemConfig() m.updateSystemConfig(cfg) // Read all tables and their versions if log.V(2) { log.Info("received a new config %v", cfg) } // Loop through the configuration to find all the tables. for _, kv := range cfg.Values { if !bytes.HasPrefix(kv.Key, descKeyPrefix) { continue } // Attempt to unmarshal config into a table/database descriptor. var descriptor Descriptor if err := kv.Value.GetProto(&descriptor); err != nil { log.Warningf("%s: unable to unmarshal descriptor %v", kv.Key, kv.Value) continue } switch union := descriptor.Union.(type) { case *Descriptor_Table: table := union.Table if err := table.Validate(); err != nil { log.Errorf("%s: received invalid table descriptor: %v", kv.Key, table) continue } if log.V(2) { log.Infof("%s: refreshing lease table: %d, version: %d", kv.Key, table.ID, table.Version) } // Try to refresh the table lease to one >= this version. if err := m.refreshLease(db, table.ID, table.Version); err != nil { log.Warningf("%s: %v", kv.Key, err) } case *Descriptor_Database: // Ignore. } } case <-s.ShouldStop(): return } } }) }
// t.mu needs to be locked. func (t *tableState) removeLease(lease *LeaseState, store LeaseStore) { t.active.remove(lease) t.tableNameCache.remove(lease) // Release to the store asynchronously, without the tableState lock. err := t.stopper.RunAsyncTask(func() { if err := store.Release(lease); err != nil { log.Warningf(context.TODO(), "error releasing lease %q: %s", lease, err) } }) if log.V(1) && err != nil { log.Warningf(context.TODO(), "error removing lease from store: %s", err) } }
// ShouldRebalance returns whether the specified store should attempt to // rebalance a replica to another store. func (a Allocator) ShouldRebalance(storeID roachpb.StoreID) bool { if !a.options.AllowRebalance { return false } // In production, add some random jitter to shouldRebalance. if !a.options.Deterministic && a.randGen.Float32() > rebalanceShouldRebalanceChance { return false } if log.V(2) { log.Infof("ShouldRebalance from store %d", storeID) } storeDesc := a.storePool.getStoreDescriptor(storeID) if storeDesc == nil { if log.V(2) { log.Warningf( "ShouldRebalance couldn't find store with id %d in StorePool", storeID) } return false } sl := a.storePool.getStoreList(*storeDesc.CombinedAttrs(), []roachpb.NodeID{storeDesc.Node.NodeID}, a.options.Deterministic) // ShouldRebalance is true if a suitable replacement can be found. return a.balancer.improve(storeDesc, sl) != nil }
func parseOptions(data []byte) (sql.SessionArgs, error) { args := sql.SessionArgs{} buf := readBuffer{msg: data} for { key, err := buf.getString() if err != nil { return sql.SessionArgs{}, util.Errorf("error reading option key: %s", err) } if len(key) == 0 { break } value, err := buf.getString() if err != nil { return sql.SessionArgs{}, util.Errorf("error reading option value: %s", err) } switch key { case "database": args.Database = value case "user": args.User = value default: if log.V(1) { log.Warningf("unrecognized configuration parameter %q", key) } } } return args, nil }
// Batch sends a request to Cockroach via RPC. Errors which are retryable are // retried with backoff in a loop using the default retry options. Other errors // sending the request are retried indefinitely using the same client command // ID to avoid reporting failure when in fact the command may have gone through // and been executed successfully. We retry here to eventually get through with // the same client command ID and be given the cached response. func (s *rpcSender) Send(ctx context.Context, ba proto.BatchRequest) (*proto.BatchResponse, *proto.Error) { var err error var br proto.BatchResponse for r := retry.Start(s.retryOpts); r.Next(); { select { case <-s.client.Healthy(): default: err = fmt.Errorf("failed to send RPC request %s: client is unhealthy", method) log.Warning(err) continue } if err = s.client.Call(method, &ba, &br); err != nil { br.Reset() // don't trust anyone. // Assume all errors sending request are retryable. The actual // number of things that could go wrong is vast, but we don't // want to miss any which should in theory be retried with the // same client command ID. We log the error here as a warning so // there's visiblity that this is happening. Some of the errors // we'll sweep up in this net shouldn't be retried, but we can't // really know for sure which. log.Warningf("failed to send RPC request %s: %s", method, err) continue } // On successful post, we're done with retry loop. break } if err != nil { return nil, proto.NewError(err) } pErr := br.Error br.Error = nil return &br, pErr }
// pushTxn attempts to abort the txn via push. If the transaction // cannot be aborted, the oldestIntentNanos value is atomically // updated to the min of oldestIntentNanos and the intent's // timestamp. The wait group is signaled on completion. func (gcq *gcQueue) pushTxn(repl *Replica, now roachpb.Timestamp, txn *roachpb.Transaction, updateOldestIntent func(int64), wg *sync.WaitGroup) { defer wg.Done() // signal wait group always on completion if log.V(1) { log.Infof("pushing txn %s ts=%s", txn, txn.OrigTimestamp) } // Attempt to push the transaction which created the intent. pushArgs := &roachpb.PushTxnRequest{ RequestHeader: roachpb.RequestHeader{ Key: txn.Key, }, Now: now, PusherTxn: roachpb.Transaction{Priority: roachpb.MaxPriority}, PusheeTxn: *txn, PushType: roachpb.ABORT_TXN, } b := &client.Batch{} b.InternalAddRequest(pushArgs) br, err := repl.rm.DB().RunWithResponse(b) if err != nil { log.Warningf("push of txn %s failed: %s", txn, err) updateOldestIntent(txn.OrigTimestamp.WallTime) return } // Update the supplied txn on successful push. *txn = *br.Responses[0].GetInner().(*roachpb.PushTxnResponse).PusheeTxn }
// maybeWarnAboutInit looks for signs indicating a cluster which // hasn't been initialized and warns. There's no absolutely sure way // to determine whether the current node is simply waiting to be // bootstrapped to an existing cluster vs. the operator having failed // to initialize the cluster via the "cockroach init" command, so // we can only warn. // // This method checks whether all gossip bootstrap hosts are // connected, and whether the node itself is a bootstrap host, but // there is still no sentinel gossip. func (g *Gossip) maybeWarnAboutInit(stopper *stop.Stopper) { stopper.RunWorker(func() { // Wait 5s before first check. select { case <-stopper.ShouldStop(): return case <-time.After(5 * time.Second): } retryOptions := retry.Options{ InitialBackoff: 5 * time.Second, // first backoff at 5s MaxBackoff: 60 * time.Second, // max backoff is 60s Multiplier: 2, // doubles Stopper: stopper, // stop no matter what on stopper } // This will never error because of infinite retries. for r := retry.Start(retryOptions); r.Next(); { g.mu.Lock() hasSentinel := g.is.getInfo(KeySentinel) != nil triedAll := g.triedAll g.mu.Unlock() // If we have the sentinel, exit the retry loop. if hasSentinel { break } // Otherwise, if all bootstrap hosts are connected, warn. if triedAll { log.Warningf("connected to gossip but missing sentinel. Has the cluster been initialized? " + "Use \"cockroach init\" to initialize.") } } }) }
// lookupReplica looks up replica by key [range]. Lookups are done // by consulting each store in turn via Store.LookupRange(key). // Returns RangeID and replica on success; RangeKeyMismatch error // if not found. // This is only for testing usage; performance doesn't matter. func (ls *Stores) lookupReplica(start, end roachpb.RKey) (rangeID roachpb.RangeID, replica *roachpb.ReplicaDescriptor, err error) { ls.mu.RLock() defer ls.mu.RUnlock() var rng *Replica var partialDesc *roachpb.RangeDescriptor for _, store := range ls.storeMap { rng = store.LookupReplica(start, end) if rng == nil { if tmpRng := store.LookupReplica(start, nil); tmpRng != nil { log.Warningf("range not contained in one range: [%s,%s), but have [%s,%s)", start, end, tmpRng.Desc().StartKey, tmpRng.Desc().EndKey) partialDesc = tmpRng.Desc() break } continue } if replica == nil { rangeID = rng.RangeID replica, err = rng.GetReplica() if err != nil { if _, ok := err.(*errReplicaNotInRange); !ok { return 0, nil, err } } continue } // Should never happen outside of tests. return 0, nil, util.Errorf( "range %+v exists on additional store: %+v", rng, store) } if replica == nil { err = roachpb.NewRangeKeyMismatchError(start.AsRawKey(), end.AsRawKey(), partialDesc) } return rangeID, replica, err }
// Send sends call to Cockroach via an RPC request. Errors which are retryable // are retried with backoff in a loop using the default retry options. Other // errors sending the request are retried indefinitely using the same client // command ID to avoid reporting failure when in fact the command may have gone // through and been executed successfully. We retry here to eventually get // through with the same client command ID and be given the cached response. func (s *rpcSender) Send(_ context.Context, call proto.Call) { method := fmt.Sprintf("Server.%s", call.Args.Method()) var err error for r := retry.Start(s.retryOpts); r.Next(); { select { case <-s.client.Healthy(): default: err = fmt.Errorf("failed to send RPC request %s: client is unhealthy", method) log.Warning(err) continue } if err = s.client.Call(method, call.Args, call.Reply); err != nil { // Assume all errors sending request are retryable. The actual // number of things that could go wrong is vast, but we don't // want to miss any which should in theory be retried with the // same client command ID. We log the error here as a warning so // there's visiblity that this is happening. Some of the errors // we'll sweep up in this net shouldn't be retried, but we can't // really know for sure which. log.Warningf("failed to send RPC request %s: %s", method, err) continue } // On successful post, we're done with retry loop. break } if err != nil { call.Reply.Header().SetGoError(err) } }
// EvictCachedRangeDescriptor will evict any cached range descriptors // for the given key. It is intended that this method be called from a // consumer of rangeDescriptorCache if the returned range descriptor is // discovered to be stale. // seenDesc should always be passed in and is used as the basis of a // compare-and-evict (as pointers); if it is nil, eviction is unconditional // but a warning will be logged. func (rdc *rangeDescriptorCache) EvictCachedRangeDescriptor(descKey proto.Key, seenDesc *proto.RangeDescriptor) { if seenDesc == nil { log.Warningf("compare-and-evict for key %s with nil descriptor; clearing unconditionally", descKey) } rdc.rangeCacheMu.Lock() defer rdc.rangeCacheMu.Unlock() rngKey, cachedDesc := rdc.getCachedRangeDescriptorLocked(descKey) // Note that we're doing a "compare-and-erase": If seenDesc is not nil, // we want to clean the cache only if it equals the cached range // descriptor as a pointer. If not, then likely some other caller // already evicted previously, and we can save work by not doing it // again (which would prompt another expensive lookup). if seenDesc != nil && seenDesc != cachedDesc { return } for !bytes.Equal(descKey, proto.KeyMin) { if log.V(2) { log.Infof("evict cached descriptor: key=%s desc=%s\n%s", descKey, cachedDesc, rdc.stringLocked()) } else if log.V(1) { log.Infof("evict cached descriptor: key=%s desc=%s", descKey, cachedDesc) } rdc.rangeCache.Del(rngKey) // Retrieve the metadata range key for the next level of metadata, and // evict that key as well. This loop ends after the meta1 range, which // returns KeyMin as its metadata key. descKey = keys.RangeMetaKey(descKey) rngKey, cachedDesc = rdc.getCachedRangeDescriptorLocked(descKey) } }
func (l *LocalCluster) createNetwork() { l.panicOnStop() net, err := l.client.NetworkInspect(context.Background(), networkName) if err == nil { // We need to destroy the network and any running containers inside of it. for containerID := range net.Containers { // This call could fail if the container terminated on its own after we call // NetworkInspect, but the likelihood of this seems low. If this line creates // a lot of panics we should do more careful error checking. maybePanic(l.client.ContainerKill(context.Background(), containerID, "9")) } maybePanic(l.client.NetworkRemove(context.Background(), networkName)) } else if !client.IsErrNotFound(err) { panic(err) } resp, err := l.client.NetworkCreate(context.Background(), networkName, types.NetworkCreate{ Driver: "bridge", // Docker gets very confused if two networks have the same name. CheckDuplicate: true, }) maybePanic(err) if resp.Warning != "" { log.Warningf("creating network: %s", resp.Warning) } l.networkID = resp.ID }
// pushTxn attempts to abort the txn via push. The wait group is signaled on // completion. func (gcq *gcQueue) pushTxn(repl *Replica, now roachpb.Timestamp, txn *roachpb.Transaction, typ roachpb.PushTxnType, wg *sync.WaitGroup) { defer wg.Done() // signal wait group always on completion if log.V(1) { gcq.eventLog.Infof(true, "pushing txn %s ts=%s", txn, txn.OrigTimestamp) } // Attempt to push the transaction which created the intent. pushArgs := &roachpb.PushTxnRequest{ Span: roachpb.Span{ Key: txn.Key, }, Now: now, PusherTxn: roachpb.Transaction{Priority: roachpb.MaxUserPriority}, PusheeTxn: txn.TxnMeta, PushType: typ, } b := &client.Batch{} b.InternalAddRequest(pushArgs) br, err := repl.store.DB().RunWithResponse(b) if err != nil { log.Warningf("push of txn %s failed: %s", txn, err) return } // Update the supplied txn on successful push. *txn = br.Responses[0].GetInner().(*roachpb.PushTxnResponse).PusheeTxn }
// pushTxn attempts to abort the txn via push. If the transaction // cannot be aborted, the oldestIntentNanos value is atomically // updated to the min of oldestIntentNanos and the intent's // timestamp. The wait group is signaled on completion. func (gcq *gcQueue) pushTxn(repl *Replica, now proto.Timestamp, txn *proto.Transaction, updateOldestIntent func(int64), wg *sync.WaitGroup) { defer wg.Done() // signal wait group always on completion if log.V(1) { log.Infof("pushing txn %s ts=%s", txn, txn.OrigTimestamp) } // Attempt to push the transaction which created the intent. pushArgs := &proto.PushTxnRequest{ RequestHeader: proto.RequestHeader{ Timestamp: now, Key: txn.Key, User: security.RootUser, UserPriority: gogoproto.Int32(proto.MaxPriority), Txn: nil, }, Now: now, PusheeTxn: *txn, PushType: proto.ABORT_TXN, } pushReply := &proto.PushTxnResponse{} b := &client.Batch{} b.InternalAddCall(proto.Call{Args: pushArgs, Reply: pushReply}) if err := repl.rm.DB().Run(b); err != nil { log.Warningf("push of txn %s failed: %s", txn, err) updateOldestIntent(txn.OrigTimestamp.WallTime) return } // Update the supplied txn on successful push. *txn = *pushReply.PusheeTxn }
// deleteIndexMutationsWithReversedColumns deletes index mutations with a // different mutationID than the schema changer and a reference to one of the // reversed columns. func (sc *SchemaChanger) deleteIndexMutationsWithReversedColumns( desc *sqlbase.TableDescriptor, columns map[string]struct{}, ) { newMutations := make([]sqlbase.DescriptorMutation, 0, len(desc.Mutations)) for _, mutation := range desc.Mutations { if mutation.MutationID != sc.mutationID { if idx := mutation.GetIndex(); idx != nil { deleteMutation := false for _, name := range idx.ColumnNames { if _, ok := columns[name]; ok { // Such an index mutation has to be with direction ADD and // in the DELETE_ONLY state. Live indexes referencing live // columns cannot be deleted and thus never have direction // DROP. All mutations with the ADD direction start off in // the DELETE_ONLY state. if mutation.Direction != sqlbase.DescriptorMutation_ADD || mutation.State != sqlbase.DescriptorMutation_DELETE_ONLY { panic(fmt.Sprintf("mutation in bad state: %+v", mutation)) } log.Warningf(context.TODO(), "delete schema change mutation: %+v", mutation) deleteMutation = true break } } if deleteMutation { continue } } } newMutations = append(newMutations, mutation) } // Reset mutations. desc.Mutations = newMutations }
// lookupReplica looks up replica by key [range]. Lookups are done // by consulting each store in turn via Store.LookupRange(key). // Returns RangeID and replica on success; RangeKeyMismatch error // if not found. // This is only for testing usage; performance doesn't matter. func (ls *Stores) lookupReplica(start, end roachpb.RKey) (rangeID roachpb.RangeID, replica *roachpb.ReplicaDescriptor, pErr *roachpb.Error) { ls.mu.RLock() defer ls.mu.RUnlock() var rng *Replica for _, store := range ls.storeMap { rng = store.LookupReplica(start, end) if rng == nil { if tmpRng := store.LookupReplica(start, nil); tmpRng != nil { log.Warningf(fmt.Sprintf("range not contained in one range: [%s,%s), but have [%s,%s)", start, end, tmpRng.Desc().StartKey, tmpRng.Desc().EndKey)) } continue } if replica == nil { rangeID = rng.RangeID replica = rng.GetReplica() continue } // Should never happen outside of tests. return 0, nil, roachpb.NewErrorf( "range %+v exists on additional store: %+v", rng, store) } if replica == nil { pErr = roachpb.NewError(roachpb.NewRangeKeyMismatchError(start.AsRawKey(), end.AsRawKey(), nil)) } return rangeID, replica, pErr }
func (c *v3Conn) parseOptions(data []byte) error { buf := readBuffer{msg: data} for { key, err := buf.getString() if err != nil { return util.Errorf("error reading option key: %s", err) } if len(key) == 0 { return nil } value, err := buf.getString() if err != nil { return util.Errorf("error reading option value: %s", err) } switch key { case "database": c.opts.database = value case "user": c.opts.user = value default: if log.V(1) { log.Warningf("unrecognized configuration parameter %q", key) } } } }
// updateRangeInfo is called whenever a range is updated by ApplySnapshot // or is created by range splitting to setup the fields which are // uninitialized or need updating. func (r *Replica) updateRangeInfo(desc *roachpb.RangeDescriptor) error { // RangeMaxBytes should be updated by looking up Zone Config in two cases: // 1. After snapshot applying, if no updating of zone config // for this key range, then maxBytes of this range will not // be updated. // 2. After a new range is created by range splition, just // copying maxBytes from the original range does not work // since the original range and the new range might belong // to different zones. // Load the system config. cfg, ok := r.store.Gossip().GetSystemConfig() if !ok { // This could be before the system config was ever gossiped, // or it expired. Let the gossip callback set the info. log.Warningf("no system config available, cannot determine range MaxBytes") return nil } // Find zone config for this range. zone, err := cfg.GetZoneConfigForKey(desc.StartKey) if err != nil { return util.Errorf("failed to lookup zone config for Range %s: %s", r, err) } r.SetMaxBytes(zone.RangeMaxBytes) return nil }
// updateRangeInfo is called whenever a range is updated by ApplySnapshot // or is created by range splitting to setup the fields which are // uninitialized or need updating. func (r *Replica) updateRangeInfo(desc *roachpb.RangeDescriptor) error { // RangeMaxBytes should be updated by looking up Zone Config in two cases: // 1. After applying a snapshot, if the zone config was not updated for // this key range, then maxBytes of this range will not be updated either. // 2. After a new range is created by a split, only copying maxBytes from // the original range wont work as the original and new ranges might belong // to different zones. // Load the system config. cfg, ok := r.store.Gossip().GetSystemConfig() if !ok { // This could be before the system config was ever gossiped, // or it expired. Let the gossip callback set the info. log.Warningf(context.TODO(), "%s: no system config available, cannot determine range MaxBytes", r) return nil } // Find zone config for this range. zone, err := cfg.GetZoneConfigForKey(desc.StartKey) if err != nil { return errors.Errorf("%s: failed to lookup zone config: %s", r, err) } r.SetMaxBytes(zone.RangeMaxBytes) return nil }