// initNodeID updates the internal NodeDescriptor with the given ID. If zero is // supplied, a new NodeID is allocated with the first invocation. For all other // values, the supplied ID is stored into the descriptor (unless one has been // set previously, in which case a fatal error occurs). // // Upon setting a new NodeID, the descriptor is gossiped and the NodeID is // stored into the gossip instance. func (n *Node) initNodeID(id roachpb.NodeID) { ctx := n.AnnotateCtx(context.TODO()) if id < 0 { log.Fatalf(ctx, "NodeID must not be negative") } if o := n.Descriptor.NodeID; o > 0 { if id == 0 { return } log.Fatalf(ctx, "cannot initialize NodeID to %d, already have %d", id, o) } var err error if id == 0 { ctxWithSpan, span := n.AnnotateCtxWithSpan(ctx, "alloc-node-id") id, err = allocateNodeID(ctxWithSpan, n.storeCfg.DB) if err != nil { log.Fatal(ctxWithSpan, err) } log.Infof(ctxWithSpan, "new node allocated ID %d", id) if id == 0 { log.Fatal(ctxWithSpan, "new node allocated illegal ID 0") } span.Finish() n.storeCfg.Gossip.NodeID.Set(ctx, id) } else { log.Infof(ctx, "node ID %d initialized", id) } // Gossip the node descriptor to make this node addressable by node ID. n.Descriptor.NodeID = id if err = n.storeCfg.Gossip.SetNodeDescriptor(&n.Descriptor); err != nil { log.Fatalf(ctx, "couldn't gossip descriptor for node %d: %s", n.Descriptor.NodeID, err) } }
func (z *zeroSum) verify(d time.Duration) { for { time.Sleep(d) // Grab the count of accounts from committed transactions first. The number // of accounts found by the SELECT should be at least this number. committedAccounts := uint64(z.accountsLen()) q := `SELECT count(*), sum(balance) FROM accounts` var accounts uint64 var total int64 db := z.DB[z.RandNode(rand.Intn)] if err := db.QueryRow(q).Scan(&accounts, &total); err != nil { z.maybeLogError(err) continue } if total != 0 { log.Fatalf(context.Background(), "unexpected total balance %d", total) } if accounts < committedAccounts { log.Fatalf(context.Background(), "expected at least %d accounts, but found %d", committedAccounts, accounts) } } }
func (c *Cluster) isReplicated() (bool, string) { db := c.Clients[0] rows, err := db.Scan(context.Background(), keys.Meta2Prefix, keys.Meta2Prefix.PrefixEnd(), 100000) if err != nil { if IsUnavailableError(err) { return false, "" } log.Fatalf(context.Background(), "scan failed: %s\n", err) } var buf bytes.Buffer tw := tabwriter.NewWriter(&buf, 2, 1, 2, ' ', 0) done := true for _, row := range rows { desc := &roachpb.RangeDescriptor{} if err := row.ValueProto(desc); err != nil { log.Fatalf(context.Background(), "%s: unable to unmarshal range descriptor\n", row.Key) continue } var storeIDs []roachpb.StoreID for _, replica := range desc.Replicas { storeIDs = append(storeIDs, replica.StoreID) } fmt.Fprintf(tw, "\t%s\t%s\t[%d]\t%d\n", desc.StartKey, desc.EndKey, desc.RangeID, storeIDs) if len(desc.Replicas) != 3 { done = false } } _ = tw.Flush() return done, buf.String() }
// Start starts a node. func (n *Node) Start() { n.Lock() defer n.Unlock() if n.cmd != nil { return } n.cmd = exec.Command(n.args[0], n.args[1:]...) n.cmd.Env = os.Environ() n.cmd.Env = append(n.cmd.Env, n.env...) stdoutPath := filepath.Join(n.logDir, "stdout") stdout, err := os.OpenFile(stdoutPath, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) if err != nil { log.Fatalf(context.Background(), "unable to open file %s: %s", stdoutPath, err) } n.cmd.Stdout = stdout stderrPath := filepath.Join(n.logDir, "stderr") stderr, err := os.OpenFile(stderrPath, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) if err != nil { log.Fatalf(context.Background(), "unable to open file %s: %s", stderrPath, err) } n.cmd.Stderr = stderr err = n.cmd.Start() if n.cmd.Process != nil { log.Infof(context.Background(), "process %d started: %s", n.cmd.Process.Pid, strings.Join(n.args, " ")) } if err != nil { log.Infof(context.Background(), "%v", err) _ = stdout.Close() _ = stderr.Close() return } go func(cmd *exec.Cmd) { if err := cmd.Wait(); err != nil { log.Errorf(context.Background(), "waiting for command: %v", err) } _ = stdout.Close() _ = stderr.Close() ps := cmd.ProcessState sy := ps.Sys().(syscall.WaitStatus) log.Infof(context.Background(), "Process %d exited with status %d", ps.Pid(), sy.ExitStatus()) log.Infof(context.Background(), ps.String()) n.Lock() n.cmd = nil n.Unlock() }(n.cmd) }
func main() { // Seed the random number generator for non-determinism across // multiple runs. randutil.SeedForTests() if f := flag.Lookup("alsologtostderr"); f != nil { fmt.Println("Starting simulation. Add -alsologtostderr to see progress.") } flag.Parse() dirName, err := ioutil.TempDir("", "gossip-simulation-") if err != nil { log.Fatalf(context.TODO(), "could not create temporary directory for gossip simulation output: %s", err) } // Simulation callbacks to run the simulation for cycleCount // cycles. At each cycle % outputEvery, a dot file showing the // state of the network graph is output. nodeCount := 3 switch *size { case "tiny": // Use default parameters. case "small": nodeCount = 10 case "medium": nodeCount = 25 case "large": nodeCount = 50 case "huge": nodeCount = 100 case "ginormous": nodeCount = 250 default: log.Fatalf(context.TODO(), "unknown simulation size: %s", *size) } edgeSet := make(map[string]edge) stopper := stop.NewStopper() defer stopper.Stop() n := simulation.NewNetwork(stopper, nodeCount, true) n.SimulateNetwork( func(cycle int, network *simulation.Network) bool { // Output dot graph. dotFN := fmt.Sprintf("%s/sim-cycle-%03d.dot", dirName, cycle) _, quiescent := outputDotFile(dotFN, cycle, network, edgeSet) // Run until network has quiesced. return !quiescent }, ) // Output instructions for viewing graphs. fmt.Printf("To view simulation graph output run (you must install graphviz):\n\nfor f in %s/*.dot ; do circo $f -Tpng -o $f.png ; echo $f.png ; done\n", dirName) }
// Set sets the current node ID. If it is already set, the value must match. func (n *NodeIDContainer) Set(ctx context.Context, val roachpb.NodeID) { if val <= 0 { log.Fatalf(ctx, "trying to set invalid NodeID: %d", val) } oldVal := atomic.SwapInt32(&n.nodeID, int32(val)) if oldVal == 0 { log.Infof(ctx, "NodeID set to %d", val) } else if oldVal != int32(val) { log.Fatalf(ctx, "different NodeIDs set: %d, then %d", oldVal, val) } }
func (as *nodeSet) updateGauge() { if as.placeholders < 0 { log.Fatalf(context.TODO(), "nodeSet.placeholders should never be less than 0; gossip logic is broken %+v", as) } newTotal := as.len() if newTotal > as.maxSize { log.Fatalf(context.TODO(), "too many nodes (%d) in nodeSet (maxSize=%d); gossip logic is broken: %+v", newTotal, as.maxSize, as) } as.gauge.Update(int64(newTotal)) }
func (c *Cluster) makeClient(nodeIdx int) *client.DB { sender, err := client.NewSender(c.rpcCtx, c.RPCAddr(nodeIdx)) if err != nil { log.Fatalf(context.Background(), "failed to initialize KV client: %s", err) } return client.NewDB(sender) }
// Batch implements the roachpb.InternalServer interface. func (n *Node) Batch( ctx context.Context, args *roachpb.BatchRequest, ) (*roachpb.BatchResponse, error) { growStack() ctx = n.AnnotateCtx(ctx) br, err := n.batchInternal(ctx, args) // We always return errors via BatchResponse.Error so structure is // preserved; plain errors are presumed to be from the RPC // framework and not from cockroach. if err != nil { if br == nil { br = &roachpb.BatchResponse{} } if br.Error != nil { log.Fatalf( ctx, "attempting to return both a plain error (%s) and roachpb.Error (%s)", err, br.Error, ) } br.Error = roachpb.NewError(err) } return br, nil }
func (c *Cluster) makeStatus(nodeIdx int) serverpb.StatusClient { conn, err := c.rpcCtx.GRPCDial(c.RPCAddr(nodeIdx)) if err != nil { log.Fatalf(context.Background(), "failed to initialize status client: %s", err) } return serverpb.NewStatusClient(conn) }
func Example_user_insecure() { s, err := serverutils.StartServerRaw( base.TestServerArgs{Insecure: true}) if err != nil { log.Fatalf(context.Background(), "Could not start server: %v", err) } defer s.Stopper().Stop() c := cliTest{TestServer: s.(*server.TestServer), cleanupFunc: func() {}} // Since util.IsolatedTestAddr is used on Linux in insecure test clusters, // we have to reset the advertiseHost so that the value from previous // tests is not used to construct an incorrect postgres URL by the client. advertiseHost = "" // No prompting for password in insecure mode. c.Run("user set foo") c.Run("user ls --pretty") // Output: // user set foo // INSERT 1 // user ls --pretty // +----------+ // | username | // +----------+ // | foo | // +----------+ // (1 row) }
// NewNetwork creates nodeCount gossip nodes. func NewNetwork(stopper *stop.Stopper, nodeCount int, createResolvers bool) *Network { log.Infof(context.TODO(), "simulating gossip network with %d nodes", nodeCount) n := &Network{ Nodes: []*Node{}, Stopper: stopper, } n.rpcContext = rpc.NewContext( log.AmbientContext{}, &base.Config{Insecure: true}, hlc.NewClock(hlc.UnixNano, time.Nanosecond), n.Stopper, ) var err error n.tlsConfig, err = n.rpcContext.GetServerTLSConfig() if err != nil { log.Fatal(context.TODO(), err) } for i := 0; i < nodeCount; i++ { node, err := n.CreateNode() if err != nil { log.Fatal(context.TODO(), err) } // Build a resolver for each instance or we'll get data races. if createResolvers { r, err := resolver.NewResolverFromAddress(n.Nodes[0].Addr()) if err != nil { log.Fatalf(context.TODO(), "bad gossip address %s: %s", n.Nodes[0].Addr(), err) } node.Gossip.SetResolvers([]resolver.Resolver{r}) } } return n }
func Example_node() { c, err := newCLITest(nil, false) if err != nil { panic(err) } defer c.stop(true) // Refresh time series data, which is required to retrieve stats. if err := c.WriteSummaries(); err != nil { log.Fatalf(context.Background(), "Couldn't write stats summaries: %s", err) } c.Run("node ls") c.Run("node ls --pretty") c.Run("node status 10000") // Output: // node ls // 1 row // id // 1 // node ls --pretty // +----+ // | id | // +----+ // | 1 | // +----+ // (1 row) // node status 10000 // Error: node 10000 doesn't exist }
// Freeze freezes (or thaws) the cluster. The freeze request is sent to the // specified node. func (c *Cluster) Freeze(nodeIdx int, freeze bool) { addr := c.RPCAddr(nodeIdx) conn, err := c.rpcCtx.GRPCDial(addr) if err != nil { log.Fatalf(context.Background(), "unable to dial: %s: %v", addr, err) } adminClient := serverpb.NewAdminClient(conn) stream, err := adminClient.ClusterFreeze( context.Background(), &serverpb.ClusterFreezeRequest{Freeze: freeze}) if err != nil { log.Fatal(context.Background(), err) } for { resp, err := stream.Recv() if err != nil { if err == io.EOF { break } log.Fatal(context.Background(), err) } fmt.Println(resp.Message) } fmt.Println("ok") }
// Create the key/value pairs for the default zone config entry. func createDefaultZoneConfig() []roachpb.KeyValue { var ret []roachpb.KeyValue value := roachpb.Value{} desc := config.DefaultZoneConfig() if err := value.SetProto(&desc); err != nil { log.Fatalf(context.TODO(), "could not marshal %v", desc) } ret = append(ret, roachpb.KeyValue{ Key: MakeZoneKey(keys.RootNamespaceID), Value: value, }) return ret }
func (s channelServer) HandleRaftResponse( ctx context.Context, resp *storage.RaftMessageResponse, ) error { // Mimic the logic in (*Store).HandleRaftResponse without requiring an // entire Store object to be pulled into these tests. if val, ok := resp.Union.GetValue().(*roachpb.Error); ok { if err, ok := val.GetDetail().(*roachpb.StoreNotFoundError); ok { return err } } log.Fatalf(ctx, "unexpected raft response: %s", resp) return nil }
// GetInitialValues returns the set of initial K/V values which should be added to // a bootstrapping CockroachDB cluster in order to create the tables contained // in the schema. func (ms MetadataSchema) GetInitialValues() []roachpb.KeyValue { var ret []roachpb.KeyValue // Save the ID generator value, which will generate descriptor IDs for user // objects. value := roachpb.Value{} value.SetInt(int64(keys.MaxReservedDescID + 1)) ret = append(ret, roachpb.KeyValue{ Key: keys.DescIDGenerator, Value: value, }) // addDescriptor generates the needed KeyValue objects to install a // descriptor on a new cluster. addDescriptor := func(parentID ID, desc DescriptorProto) { // Create name metadata key. value := roachpb.Value{} value.SetInt(int64(desc.GetID())) ret = append(ret, roachpb.KeyValue{ Key: MakeNameMetadataKey(parentID, desc.GetName()), Value: value, }) // Create descriptor metadata key. value = roachpb.Value{} wrappedDesc := WrapDescriptor(desc) if err := value.SetProto(wrappedDesc); err != nil { log.Fatalf(context.TODO(), "could not marshal %v", desc) } ret = append(ret, roachpb.KeyValue{ Key: MakeDescMetadataKey(desc.GetID()), Value: value, }) } // Generate initial values for system databases and tables, which have // static descriptors that were generated elsewhere. for _, sysObj := range ms.descs { addDescriptor(sysObj.parentID, sysObj.desc) } // Other key/value generation that doesn't fit into databases and // tables. This can be used to add initial entries to a table. ret = append(ret, ms.otherKV...) // Sort returned key values; this is valuable because it matches the way the // objects would be sorted if read from the engine. sort.Sort(roachpb.KeyValueByKey(ret)) return ret }
func (z *zeroSum) chaos() { switch z.chaosType { case "none": // nothing to do case "simple": go z.chaosSimple() case "flappy": go z.chaosFlappy() case "freeze": go z.chaosFreeze() default: log.Fatalf(context.Background(), "unknown chaos type: %s", z.chaosType) } }
// connectGossip connects to gossip network and reads cluster ID. If // this node is already part of a cluster, the cluster ID is verified // for a match. If not part of a cluster, the cluster ID is set. The // node's address is gossiped with node ID as the gossip key. func (n *Node) connectGossip(ctx context.Context) { log.Infof(ctx, "connecting to gossip network to verify cluster ID...") // No timeout or stop condition is needed here. Log statements should be // sufficient for diagnosing this type of condition. <-n.storeCfg.Gossip.Connected uuidBytes, err := n.storeCfg.Gossip.GetInfo(gossip.KeyClusterID) if err != nil { log.Fatalf(ctx, "unable to ascertain cluster ID from gossip network: %s", err) } gossipClusterID, err := uuid.FromBytes(uuidBytes) if err != nil { log.Fatalf(ctx, "unable to ascertain cluster ID from gossip network: %s", err) } if n.ClusterID == (uuid.UUID{}) { n.ClusterID = gossipClusterID } else if n.ClusterID != gossipClusterID { log.Fatalf(ctx, "node %d belongs to cluster %q but is attempting to connect to a gossip network for cluster %q", n.Descriptor.NodeID, n.ClusterID, gossipClusterID) } log.Infof(ctx, "node connected via gossip and verified as part of cluster %q", gossipClusterID) }
// ExampleNewClock shows how to create a new // hybrid logical clock based on the local machine's // physical clock. The sanity checks in this example // will, of course, not fail and the output will be // the age of the Unix epoch in nanoseconds. func ExampleNewClock() { // Initialize a new clock, using the local // physical clock. c := NewClock(UnixNano) // Update the state of the hybrid clock. s := c.Now() time.Sleep(50 * time.Nanosecond) t := Timestamp{WallTime: UnixNano()} // The sanity checks below will usually never be triggered. if s.Less(t) || !t.Less(s) { log.Fatalf(context.Background(), "The later timestamp is smaller than the earlier one") } if t.WallTime-s.WallTime > 0 { log.Fatalf(context.Background(), "HLC timestamp %d deviates from physical clock %d", s, t) } if s.Logical > 0 { log.Fatalf(context.Background(), "Trivial timestamp has logical component") } fmt.Printf("The Unix Epoch is now approximately %dns old.\n", t.WallTime) }
func newCLITest() cliTest { // Reset the client context for each test. We don't reset the // pointer (because they are tied into the flags), but instead // overwrite the existing struct's values. baseCfg.InitDefaults() cliCtx.InitCLIDefaults() osStderr = os.Stdout s, err := serverutils.StartServerRaw(base.TestServerArgs{}) if err != nil { log.Fatalf(context.Background(), "Could not start server: %s", err) } tempDir, err := ioutil.TempDir("", "cli-test") if err != nil { log.Fatal(context.Background(), err) } // Copy these assets to disk from embedded strings, so this test can // run from a standalone binary. // Disable embedded certs, or the security library will try to load // our real files as embedded assets. security.ResetReadFileFn() assets := []string{ filepath.Join(security.EmbeddedCertsDir, security.EmbeddedCACert), filepath.Join(security.EmbeddedCertsDir, security.EmbeddedCAKey), filepath.Join(security.EmbeddedCertsDir, security.EmbeddedNodeCert), filepath.Join(security.EmbeddedCertsDir, security.EmbeddedNodeKey), filepath.Join(security.EmbeddedCertsDir, security.EmbeddedRootCert), filepath.Join(security.EmbeddedCertsDir, security.EmbeddedRootKey), } for _, a := range assets { securitytest.RestrictedCopy(nil, a, tempDir, filepath.Base(a)) } return cliTest{ TestServer: s.(*server.TestServer), certsDir: tempDir, cleanupFunc: func() { if err := os.RemoveAll(tempDir); err != nil { log.Fatal(context.Background(), err) } }, } }
// writeInitialState bootstraps a new Raft group (i.e. it is called when we // bootstrap a Range, or when setting up the right hand side of a split). // Its main task is to persist a consistent Raft (and associated Replica) state // which does not start from zero but presupposes a few entries already having // applied. // The supplied MVCCStats are used for the Stats field after adjusting for // persisting the state itself, and the updated stats are returned. func writeInitialState( ctx context.Context, eng engine.ReadWriter, ms enginepb.MVCCStats, desc roachpb.RangeDescriptor, oldHS raftpb.HardState, lease *roachpb.Lease, ) (enginepb.MVCCStats, error) { var s storagebase.ReplicaState s.TruncatedState = &roachpb.RaftTruncatedState{ Term: raftInitialLogTerm, Index: raftInitialLogIndex, } s.RaftAppliedIndex = s.TruncatedState.Index s.Desc = &roachpb.RangeDescriptor{ RangeID: desc.RangeID, } s.Frozen = storagebase.ReplicaState_UNFROZEN s.Stats = ms s.Lease = lease if existingLease, err := loadLease(ctx, eng, desc.RangeID); err != nil { return enginepb.MVCCStats{}, errors.Wrap(err, "error reading lease") } else if (existingLease != roachpb.Lease{}) { log.Fatalf(ctx, "expected trivial lease, but found %+v", existingLease) } newMS, err := saveState(ctx, eng, s) if err != nil { return enginepb.MVCCStats{}, err } if err := synthesizeHardState(ctx, eng, s, oldHS); err != nil { return enginepb.MVCCStats{}, err } if err := setLastIndex(ctx, eng, desc.RangeID, s.TruncatedState.Index); err != nil { return enginepb.MVCCStats{}, err } return newMS, nil }
func Example_insecure() { s, err := serverutils.StartServerRaw( base.TestServerArgs{Insecure: true}) if err != nil { log.Fatalf(context.Background(), "Could not start server: %v", err) } defer s.Stopper().Stop() c := cliTest{TestServer: s.(*server.TestServer), cleanupFunc: func() {}} c.Run("debug kv put a 1 b 2") c.Run("debug kv scan") // Output: // debug kv put a 1 b 2 // debug kv scan // "a" "1" // "b" "2" // 2 result(s) }
// CreateLocal creates a new local cockroach cluster. The stopper is used to // gracefully shutdown the channel (e.g. when a signal arrives). The cluster // must be started before being used and keeps logs in the specified logDir, if // supplied. func CreateLocal( ctx context.Context, cfg TestConfig, logDir string, privileged bool, stopper *stop.Stopper, ) *LocalCluster { select { case <-stopper.ShouldStop(): // The stopper was already closed, exit early. os.Exit(1) default: } if *cockroachImage == builderImageFull && !exists(*cockroachBinary) { log.Fatalf(ctx, "\"%s\": does not exist", *cockroachBinary) } cli, err := client.NewEnvClient() maybePanic(err) retryingClient := retryingDockerClient{ resilientDockerClient: resilientDockerClient{APIClient: cli}, attempts: 10, timeout: 10 * time.Second, } clusterID := uuid.MakeV4() clusterIDS := clusterID.Short() // Only pass a nonzero logDir down to LocalCluster when instructed to keep // logs. var uniqueLogDir string if logDir != "" { uniqueLogDir = fmt.Sprintf("%s-%s", logDir, clusterIDS) } return &LocalCluster{ clusterID: clusterIDS, client: retryingClient, config: cfg, stopper: stopper, // TODO(tschottdorf): deadlocks will occur if these channels fill up. events: make(chan Event, 1000), expectedEvents: make(chan Event, 1000), logDir: uniqueLogDir, privileged: privileged, } }
// updateSystemConfig is the raw gossip info callback. // Unmarshal the system config, and if successfully, update out // copy and run the callbacks. func (g *Gossip) updateSystemConfig(key string, content roachpb.Value) { ctx := g.AnnotateCtx(context.TODO()) if key != KeySystemConfig { log.Fatalf(ctx, "wrong key received on SystemConfig callback: %s", key) } cfg := config.SystemConfig{} if err := content.GetProto(&cfg); err != nil { log.Errorf(ctx, "could not unmarshal system config on callback: %s", err) return } g.systemConfigMu.Lock() defer g.systemConfigMu.Unlock() g.systemConfig = cfg g.systemConfigSet = true for _, c := range g.systemConfigChannels { select { case c <- struct{}{}: default: } } }
// partitionSpans finds out which nodes are owners for ranges touching the given // spans, and splits the spans according to owning nodes. The result is a set of // spanPartitions (one for each relevant node), which form a partitioning of the // spans (i.e. they are non-overlapping and their union is exactly the original // set of spans). func (dsp *distSQLPlanner) partitionSpans( planCtx *planningCtx, spans roachpb.Spans, ) ([]spanPartition, error) { if len(spans) == 0 { panic("no spans") } ctx := planCtx.ctx splits := make([]spanPartition, 0, 1) // nodeMap maps a nodeID to an index inside the splits array. nodeMap := make(map[roachpb.NodeID]int) it := planCtx.spanIter for _, span := range spans { var rspan roachpb.RSpan var err error if rspan.Key, err = keys.Addr(span.Key); err != nil { return nil, err } if rspan.EndKey, err = keys.Addr(span.EndKey); err != nil { return nil, err } var lastNodeID roachpb.NodeID for it.Seek(ctx, span, kv.Ascending); ; it.Next(ctx) { if !it.Valid() { return nil, it.Error() } replInfo, err := it.ReplicaInfo(ctx) if err != nil { return nil, err } desc := it.Desc() var trimmedSpan roachpb.Span if rspan.Key.Less(desc.StartKey) { trimmedSpan.Key = desc.StartKey.AsRawKey() } else { trimmedSpan.Key = span.Key } if desc.EndKey.Less(rspan.EndKey) { trimmedSpan.EndKey = desc.EndKey.AsRawKey() } else { trimmedSpan.EndKey = span.EndKey } nodeID := replInfo.NodeDesc.NodeID idx, ok := nodeMap[nodeID] if !ok { idx = len(splits) splits = append(splits, spanPartition{node: nodeID}) nodeMap[nodeID] = idx if _, ok := planCtx.nodeAddresses[nodeID]; !ok { planCtx.nodeAddresses[nodeID] = replInfo.NodeDesc.Address.String() } } split := &splits[idx] if lastNodeID == nodeID { // Two consecutive ranges on the same node, merge the spans. if !split.spans[len(split.spans)-1].EndKey.Equal(trimmedSpan.Key) { log.Fatalf(ctx, "expected consecutive span pieces %v %v", split.spans, trimmedSpan) } split.spans[len(split.spans)-1].EndKey = trimmedSpan.EndKey } else { split.spans = append(split.spans, trimmedSpan) } lastNodeID = nodeID if !it.NeedAnother() { break } } } return splits, nil }
// execRequest executes the request using the provided planner. // It parses the sql into statements, iterates through the statements, creates // KV transactions and automatically retries them when possible, and executes // the (synchronous attempt of) schema changes. // It will accumulate a result in Response for each statement. // It will resume a SQL transaction, if one was previously open for this client. // // execRequest handles the mismatch between the SQL interface that the Executor // provides, based on statements being streamed from the client in the context // of a session, and the KV client.Txn interface, based on (possibly-retriable) // callbacks passed to be executed in the context of a transaction. Actual // execution of statements in the context of a KV txn is delegated to // runTxnAttempt(). // // Args: // txnState: State about about ongoing transaction (if any). The state will be // updated. func (e *Executor) execRequest(session *Session, sql string, copymsg copyMsg) StatementResults { var res StatementResults txnState := &session.TxnState planMaker := &session.planner var stmts parser.StatementList var err error log.VEventf(session.Ctx(), 2, "execRequest: %s", sql) if session.planner.copyFrom != nil { stmts, err = session.planner.ProcessCopyData(sql, copymsg) } else if copymsg != copyMsgNone { err = fmt.Errorf("unexpected copy command") } else { stmts, err = planMaker.parser.Parse(sql, parser.Syntax(session.Syntax)) } if err != nil { // A parse error occurred: we can't determine if there were multiple // statements or only one, so just pretend there was one. if txnState.txn != nil { // Rollback the txn. txnState.updateStateAndCleanupOnErr(err, e) } res.ResultList = append(res.ResultList, Result{Err: err}) return res } if len(stmts) == 0 { res.Empty = true return res } // If the planMaker wants config updates to be blocked, then block them. defer planMaker.blockConfigUpdatesMaybe(e)() for len(stmts) > 0 { // Each iteration consumes a transaction's worth of statements. inTxn := txnState.State != NoTxn execOpt := client.TxnExecOptions{ Clock: e.cfg.Clock, } // Figure out the statements out of which we're going to try to consume // this iteration. If we need to create an implicit txn, only one statement // can be consumed. stmtsToExec := stmts // If protoTS is set, the transaction proto sets its Orig and Max timestamps // to it each retry. var protoTS *hlc.Timestamp // We can AutoRetry the next batch of statements if we're in a clean state // (i.e. the next statements we're going to see are the first statements in // a transaction). if !inTxn { // Detect implicit transactions. if _, isBegin := stmts[0].(*parser.BeginTransaction); !isBegin { execOpt.AutoCommit = true stmtsToExec = stmtsToExec[:1] // Check for AS OF SYSTEM TIME. If it is present but not detected here, // it will raise an error later on. protoTS, err = isAsOf(planMaker, stmtsToExec[0], e.cfg.Clock.Now()) if err != nil { res.ResultList = append(res.ResultList, Result{Err: err}) return res } if protoTS != nil { planMaker.avoidCachedDescriptors = true defer func() { planMaker.avoidCachedDescriptors = false }() } } txnState.resetForNewSQLTxn(e, session) txnState.autoRetry = true txnState.sqlTimestamp = e.cfg.Clock.PhysicalTime() if execOpt.AutoCommit { txnState.txn.SetDebugName(sqlImplicitTxnName, 0) } else { txnState.txn.SetDebugName(sqlTxnName, 0) } } else { txnState.autoRetry = false } execOpt.AutoRetry = txnState.autoRetry if txnState.State == NoTxn { panic("we failed to initialize a txn") } // Now actually run some statements. var remainingStmts parser.StatementList var results []Result origState := txnState.State txnClosure := func(txn *client.Txn, opt *client.TxnExecOptions) error { if txnState.State == Open && txnState.txn != txn { panic(fmt.Sprintf("closure wasn't called in the txn we set up for it."+ "\ntxnState.txn:%+v\ntxn:%+v\ntxnState:%+v", txnState.txn, txn, txnState)) } txnState.txn = txn if protoTS != nil { setTxnTimestamps(txnState.txn, *protoTS) } var err error if results != nil { // Some results were produced by a previous attempt. Discard them. ResultList(results).Close() } results, remainingStmts, err = runTxnAttempt(e, planMaker, origState, txnState, opt, stmtsToExec) // TODO(andrei): Until #7881 fixed. if err == nil && txnState.State == Aborted { log.Errorf(session.Ctx(), "7881: txnState is Aborted without an error propagating. stmtsToExec: %s, "+ "results: %+v, remainingStmts: %s, txnState: %+v", stmtsToExec, results, remainingStmts, txnState) } return err } // This is where the magic happens - we ask db to run a KV txn and possibly retry it. txn := txnState.txn // this might be nil if the txn was already aborted. err := txn.Exec(execOpt, txnClosure) // Update the Err field of the last result if the error was coming from // auto commit. The error was generated outside of the txn closure, so it was not // set in any result. if err != nil { lastResult := &results[len(results)-1] if aErr, ok := err.(*client.AutoCommitError); ok { // TODO(andrei): Until #7881 fixed. { log.Eventf(session.Ctx(), "executor got AutoCommitError: %s\n"+ "txn: %+v\nexecOpt.AutoRetry %t, execOpt.AutoCommit:%t, stmts %+v, remaining %+v", aErr, txnState.txn.Proto, execOpt.AutoRetry, execOpt.AutoCommit, stmts, remainingStmts) if txnState.txn == nil { log.Errorf(session.Ctx(), "7881: AutoCommitError on nil txn: %s, "+ "txnState %+v, execOpt %+v, stmts %+v, remaining %+v", aErr, txnState, execOpt, stmts, remainingStmts) txnState.sp.SetBaggageItem(keyFor7881Sample, "sample me please") } } lastResult.Err = aErr e.TxnAbortCount.Inc(1) txn.CleanupOnError(err) } if lastResult.Err == nil { log.Fatalf(session.Ctx(), "error (%s) was returned, but it was not set in the last result (%v)", err, lastResult) } } res.ResultList = append(res.ResultList, results...) // Now make sense of the state we got into and update txnState. if (txnState.State == RestartWait || txnState.State == Aborted) && txnState.commitSeen { // A COMMIT got an error (retryable or not). Too bad, this txn is toast. // After we return a result for COMMIT (with the COMMIT pgwire tag), the // user can't send any more commands. e.TxnAbortCount.Inc(1) txn.CleanupOnError(err) txnState.resetStateAndTxn(NoTxn) } if execOpt.AutoCommit { // If execOpt.AutoCommit was set, then the txn no longer exists at this point. txnState.resetStateAndTxn(NoTxn) } // If we're no longer in a transaction, finish the trace. if txnState.State == NoTxn { txnState.finishSQLTxn(session.context) } // If the txn is in any state but Open, exec the schema changes. They'll // short-circuit themselves if the mutation that queued them has been // rolled back from the table descriptor. stmtsExecuted := stmts[:len(stmtsToExec)-len(remainingStmts)] if txnState.State != Open { planMaker.checkTestingVerifyMetadataInitialOrDie(e, stmts) planMaker.checkTestingVerifyMetadataOrDie(e, stmtsExecuted) // Exec the schema changers (if the txn rolled back, the schema changers // will short-circuit because the corresponding descriptor mutation is not // found). planMaker.releaseLeases() txnState.schemaChangers.execSchemaChanges(e, planMaker, res.ResultList) } else { // We're still in a txn, so we only check that the verifyMetadata callback // fails the first time it's run. The gossip update that will make the // callback succeed only happens when the txn is done. planMaker.checkTestingVerifyMetadataInitialOrDie(e, stmtsExecuted) } // Figure out what statements to run on the next iteration. if err != nil { // Don't execute anything further. stmts = nil } else if execOpt.AutoCommit { stmts = stmts[1:] } else { stmts = remainingStmts } } return res }
func (s channelServer) HandleRaftResponse( ctx context.Context, resp *storage.RaftMessageResponse, ) error { log.Fatalf(ctx, "unexpected raft response: %s", resp) return nil }
func (r *Replica) handleLocalProposalData( ctx context.Context, originReplica roachpb.ReplicaDescriptor, lpd LocalProposalData, ) (shouldAssert bool) { // Fields for which no action is taken in this method are zeroed so that // they don't trigger an assertion at the end of the method (which checks // that all fields were handled). { lpd.idKey = storagebase.CmdIDKey("") lpd.Batch = nil lpd.done = nil lpd.ctx = nil lpd.Err = nil lpd.proposedAtTicks = 0 lpd.Reply = nil } // ====================== // Non-state updates and actions. // ====================== if originReplica.StoreID == r.store.StoreID() { // On the replica on which this command originated, resolve skipped // intents asynchronously - even on failure. // // TODO(tschottdorf): EndTransaction will use this pathway to return // intents which should immediately be resolved. However, there's // a slight chance that an error between the origin of that intents // slice and here still results in that intent slice arriving here // without the EndTransaction having committed. We should clearly // separate the part of the ProposalData which also applies on errors. if lpd.intents != nil { r.store.intentResolver.processIntentsAsync(r, *lpd.intents) } } lpd.intents = nil // The above are present too often, so we assert only if there are // "nontrivial" actions below. shouldAssert = (lpd != LocalProposalData{}) if lpd.raftLogSize != nil { r.mu.Lock() r.mu.raftLogSize = *lpd.raftLogSize r.mu.Unlock() lpd.raftLogSize = nil } if lpd.gossipFirstRange { // We need to run the gossip in an async task because gossiping requires // the range lease and we'll deadlock if we try to acquire it while // holding processRaftMu. Specifically, Replica.redirectOnOrAcquireLease // blocks waiting for the lease acquisition to finish but it can't finish // because we're not processing raft messages due to holding // processRaftMu (and running on the processRaft goroutine). if err := r.store.Stopper().RunAsyncTask(ctx, func(ctx context.Context) { hasLease, pErr := r.getLeaseForGossip(ctx) if pErr != nil { log.Infof(ctx, "unable to gossip first range; hasLease=%t, err=%s", hasLease, pErr) } else if !hasLease { return } r.gossipFirstRange(ctx) }); err != nil { log.Infof(ctx, "unable to gossip first range: %s", err) } lpd.gossipFirstRange = false } if lpd.maybeAddToSplitQueue { r.store.splitQueue.MaybeAdd(r, r.store.Clock().Now()) lpd.maybeAddToSplitQueue = false } if lpd.maybeGossipSystemConfig { r.maybeGossipSystemConfig() lpd.maybeGossipSystemConfig = false } if originReplica.StoreID == r.store.StoreID() { if lpd.leaseMetricsResult != nil { r.store.metrics.leaseRequestComplete(*lpd.leaseMetricsResult) } if lpd.maybeGossipNodeLiveness != nil { r.maybeGossipNodeLiveness(*lpd.maybeGossipNodeLiveness) } } // Satisfy the assertions for all of the items processed only on the // proposer (the block just above). lpd.leaseMetricsResult = nil lpd.maybeGossipNodeLiveness = nil if (lpd != LocalProposalData{}) { log.Fatalf(ctx, "unhandled field in LocalProposalData: %s", pretty.Diff(lpd, LocalProposalData{})) } return shouldAssert }
// add adds commands to the queue which affect the specified key ranges. Ranges // without an end key affect only the start key. The returned interface is the // key for the command queue and must be re-supplied on subsequent invocation // of remove(). // // Either all supplied spans must be range-global or range-local. Failure to // obey with this restriction results in a fatal error. // // Returns a nil `cmd` when no spans are given. // // add should be invoked after waiting on already-executing, overlapping // commands via the WaitGroup initialized through getWait(). func (cq *CommandQueue) add(readOnly bool, spans ...roachpb.Span) *cmd { if len(spans) == 0 { return nil } prepareSpans(spans...) // Compute the min and max key that covers all of the spans. minKey, maxKey := spans[0].Key, spans[0].EndKey for i := 1; i < len(spans); i++ { start, end := spans[i].Key, spans[i].EndKey if minKey.Compare(start) > 0 { minKey = start } if maxKey.Compare(end) < 0 { maxKey = end } } if keys.IsLocal(minKey) != keys.IsLocal(maxKey) { log.Fatalf( context.TODO(), "mixed range-global and range-local keys: %s and %s", minKey, maxKey, ) } numCmds := 1 if len(spans) > 1 { numCmds += len(spans) } cmds := make([]cmd, numCmds) // Create the covering entry. cmd := &cmds[0] cmd.id = cq.nextID() cmd.key = interval.Range{ Start: interval.Comparable(minKey), End: interval.Comparable(maxKey), } cmd.readOnly = readOnly cmd.expanded = false if len(spans) > 1 { // Populate the covering entry's children. cmd.children = cmds[1:] for i, span := range spans { child := &cmd.children[i] child.id = cq.nextID() child.key = interval.Range{ Start: interval.Comparable(span.Key), End: interval.Comparable(span.EndKey), } child.readOnly = readOnly child.expanded = true } } if cmd.readOnly { cq.localMetrics.readCommands += int64(cmd.cmdCount()) } else { cq.localMetrics.writeCommands += int64(cmd.cmdCount()) } if cq.coveringOptimization || len(spans) == 1 { if err := cq.tree.Insert(cmd, false /* !fast */); err != nil { panic(err) } } else { cq.expand(cmd, false /* !isInserted */) } return cmd }