func (s *Server) reportUsage(ctx context.Context) { b := new(bytes.Buffer) if err := json.NewEncoder(b).Encode(s.getReportingInfo()); err != nil { log.Warning(ctx, err) return } q := reportingURL.Query() q.Set("version", build.GetInfo().Tag) q.Set("uuid", s.node.ClusterID.String()) reportingURL.RawQuery = q.Encode() res, err := http.Post(reportingURL.String(), "application/json", b) if err != nil && log.V(2) { // This is probably going to be relatively common in production // environments where network access is usually curtailed. log.Warning(ctx, "Failed to report node usage metrics: ", err) return } if res.StatusCode != http.StatusOK { b, err := ioutil.ReadAll(res.Body) log.Warningf(ctx, "Failed to report node usage metrics: status: %s, body: %s, "+ "error: %v", res.Status, b, err) } }
// eachRecordableValue visits each metric in the registry, calling the supplied // function once for each recordable value represented by that metric. This is // useful to expand certain metric types (such as histograms) into multiple // recordable values. func eachRecordableValue(reg *metric.Registry, fn func(string, float64)) { reg.Each(func(name string, mtr interface{}) { if histogram, ok := mtr.(*metric.Histogram); ok { // TODO(mrtracy): Where should this comment go for better // visibility? // // Proper support of Histograms for time series is difficult and // likely not worth the trouble. Instead, we aggregate a windowed // histogram at fixed quantiles. If the scraping window and the // histogram's eviction duration are similar, this should give // good results; if the two durations are very different, we either // report stale results or report only the more recent data. // // Additionally, we can only aggregate max/min of the quantiles; // roll-ups don't know that and so they will return mathematically // nonsensical values, but that seems acceptable for the time // being. curr, _ := histogram.Windowed() for _, pt := range recordHistogramQuantiles { fn(name+pt.suffix, float64(curr.ValueAtQuantile(pt.quantile))) } } else { val, err := extractValue(mtr) if err != nil { log.Warning(context.TODO(), err) return } fn(name, val) } }) }
// removeLeaseIfExpiring removes a lease and returns true if it is about to expire. // The method also resets the transaction deadline. func (p *planner) removeLeaseIfExpiring(lease *LeaseState) bool { if lease == nil || lease.hasSomeLifeLeft(p.leaseMgr.clock) { return false } // Remove the lease from p.leases. idx := -1 for i, l := range p.leases { if l == lease { idx = i break } } if idx == -1 { log.Warningf(p.ctx(), "lease (%s) not found", lease) return false } p.leases[idx] = p.leases[len(p.leases)-1] p.leases[len(p.leases)-1] = nil p.leases = p.leases[:len(p.leases)-1] if err := p.leaseMgr.Release(lease); err != nil { log.Warning(p.ctx(), err) } // Reset the deadline so that a new deadline will be set after the lease is acquired. p.txn.ResetDeadline() for _, l := range p.leases { p.txn.UpdateDeadlineMaybe(hlc.Timestamp{WallTime: l.Expiration().UnixNano()}) } return true }
func (is *infoStore) runCallbacks(key string, content roachpb.Value, callbacks ...Callback) { // Add the callbacks to the callback work list. f := func() { for _, method := range callbacks { method(key, content) } } is.callbackWorkMu.Lock() is.callbackWork = append(is.callbackWork, f) is.callbackWorkMu.Unlock() // Run callbacks in a goroutine to avoid mutex reentry. We also guarantee // callbacks are run in order such that if a key is updated twice in // succession, the second callback will never be run before the first. if err := is.stopper.RunAsyncTask(context.Background(), func(_ context.Context) { // Grab the callback mutex to serialize execution of the callbacks. is.callbackMu.Lock() defer is.callbackMu.Unlock() // Grab and execute the list of work. is.callbackWorkMu.Lock() work := is.callbackWork is.callbackWork = nil is.callbackWorkMu.Unlock() for _, w := range work { w() } }); err != nil { ctx := is.AnnotateCtx(context.TODO()) log.Warning(ctx, err) } }
func (s *Server) checkForUpdates(ctx context.Context) { q := updatesURL.Query() q.Set("version", build.GetInfo().Tag) q.Set("uuid", s.node.ClusterID.String()) updatesURL.RawQuery = q.Encode() res, err := http.Get(updatesURL.String()) if err != nil { // This is probably going to be relatively common in production // environments where network access is usually curtailed. if log.V(2) { log.Warning(ctx, "Failed to check for updates: ", err) } return } defer res.Body.Close() if res.StatusCode != http.StatusOK { b, err := ioutil.ReadAll(res.Body) log.Warningf(ctx, "Failed to check for updates: status: %s, body: %s, error: %v", res.Status, b, err) return } decoder := json.NewDecoder(res.Body) r := struct { Details []versionInfo `json:"details"` }{} err = decoder.Decode(&r) if err != nil && err != io.EOF { log.Warning(ctx, "Error decoding updates info: ", err) return } // Ideally the updates server only returns the most relevant updates for us, // but if it replied with an excessive number of updates, limit log spam by // only printing the last few. if len(r.Details) > updateMaxVersionsToReport { r.Details = r.Details[len(r.Details)-updateMaxVersionsToReport:] } for _, v := range r.Details { log.Infof(ctx, "A new version is available: %s, details: %s", v.Version, v.Details) } }
// shouldQueue determines whether a range should be queued for truncating. This // is true only if the replica is the raft leader and if the total number of // the range's raft log's stale entries exceeds RaftLogQueueStaleThreshold. func (rlq *raftLogQueue) shouldQueue( ctx context.Context, now hlc.Timestamp, r *Replica, _ config.SystemConfig, ) (shouldQ bool, priority float64) { truncatableIndexes, _, err := getTruncatableIndexes(ctx, r) if err != nil { log.Warning(ctx, err) return false, 0 } return truncatableIndexes >= RaftLogQueueStaleThreshold, float64(truncatableIndexes) }
// GetStatusSummary returns a status summary messages for the node. The summary // includes the recent values of metrics for both the node and all of its // component stores. func (mr *MetricsRecorder) GetStatusSummary() *NodeStatus { mr.mu.Lock() defer mr.mu.Unlock() if mr.mu.nodeRegistry == nil { // We haven't yet processed initialization information; do nothing. if log.V(1) { log.Warning(context.TODO(), "attempt to generate status summary before NodeID allocation.") } return nil } now := mr.mu.clock.PhysicalNow() // Generate an node status with no store data. nodeStat := &NodeStatus{ Desc: mr.mu.desc, BuildInfo: build.GetInfo(), UpdatedAt: now, StartedAt: mr.mu.startedAt, StoreStatuses: make([]StoreStatus, 0, mr.mu.lastSummaryCount), Metrics: make(map[string]float64, mr.mu.lastNodeMetricCount), } eachRecordableValue(mr.mu.nodeRegistry, func(name string, val float64) { nodeStat.Metrics[name] = val }) // Generate status summaries for stores. for storeID, r := range mr.mu.storeRegistries { storeMetrics := make(map[string]float64, mr.mu.lastStoreMetricCount) eachRecordableValue(r, func(name string, val float64) { storeMetrics[name] = val }) // Gather descriptor from store. descriptor, err := mr.mu.stores[storeID].Descriptor() if err != nil { log.Errorf(context.TODO(), "Could not record status summaries: Store %d could not return descriptor, error: %s", storeID, err) continue } nodeStat.StoreStatuses = append(nodeStat.StoreStatuses, StoreStatus{ Desc: *descriptor, Metrics: storeMetrics, }) } mr.mu.lastSummaryCount = len(nodeStat.StoreStatuses) mr.mu.lastNodeMetricCount = len(nodeStat.Metrics) if len(nodeStat.StoreStatuses) > 0 { mr.mu.lastStoreMetricCount = len(nodeStat.StoreStatuses[0].Metrics) } return nodeStat }
// Wait waits for a running container to exit. func (c *Container) Wait() error { exitCode, err := c.cluster.client.ContainerWait(context.Background(), c.id) if err == nil && exitCode != 0 { err = errors.Errorf("non-zero exit code: %d", exitCode) } if err != nil { if err := c.Logs(os.Stderr); err != nil { log.Warning(context.TODO(), err) } } return err }
// gossipStores broadcasts each store and dead replica to the gossip network. func (n *Node) gossipStores(ctx context.Context) { if err := n.stores.VisitStores(func(s *storage.Store) error { if err := s.GossipStore(ctx); err != nil { return err } if err := s.GossipDeadReplicas(ctx); err != nil { return err } return nil }); err != nil { log.Warning(ctx, err) } }
// releaseLeases implements the SchemaAccessor interface. func (p *planner) releaseLeases() { if p.leases != nil { if log.V(2) { log.Infof(p.ctx(), "planner releasing %d leases", len(p.leases)) } for _, lease := range p.leases { if err := p.leaseMgr.Release(lease); err != nil { log.Warning(p.ctx(), err) } } p.leases = nil } }
func (s *Server) checkForUpdates(ctx context.Context) { q := updatesURL.Query() q.Set("version", build.GetInfo().Tag) q.Set("uuid", s.node.ClusterID.String()) updatesURL.RawQuery = q.Encode() res, err := http.Get(updatesURL.String()) if err != nil { // This is probably going to be relatively common in production // environments where network access is usually curtailed. if log.V(2) { log.Warning(ctx, "Failed to check for updates: ", err) } return } defer res.Body.Close() if res.StatusCode != http.StatusOK { b, err := ioutil.ReadAll(res.Body) log.Warningf(ctx, "Failed to check for updates: status: %s, body: %s, error: %v", res.Status, b, err) return } decoder := json.NewDecoder(res.Body) r := struct { Details []versionInfo `json:"details"` }{} err = decoder.Decode(&r) if err != nil && err != io.EOF { log.Warning(ctx, "Error decoding updates info: ", err) return } for _, v := range r.Details { log.Infof(ctx, "A new version is available: %s, details: %s", v.Version, v.Details) } }
// scrapePrometheus updates the prometheusExporter's metrics snapshot. func (mr *MetricsRecorder) scrapePrometheus() { mr.mu.Lock() defer mr.mu.Unlock() if mr.mu.nodeRegistry == nil { // We haven't yet processed initialization information; output nothing. if log.V(1) { log.Warning(context.TODO(), "MetricsRecorder asked to scrape metrics before NodeID allocation") } } mr.prometheusExporter.ScrapeRegistry(mr.mu.nodeRegistry) for _, reg := range mr.mu.storeRegistries { mr.prometheusExporter.ScrapeRegistry(reg) } }
func (ia *idAllocator) start() { ia.stopper.RunWorker(func() { ctx := ia.AnnotateCtx(context.Background()) defer close(ia.ids) for { var newValue int64 for newValue <= int64(ia.minID) { var err error var res client.KeyValue for r := retry.Start(base.DefaultRetryOptions()); r.Next(); { idKey := ia.idKey.Load().(roachpb.Key) if err := ia.stopper.RunTask(func() { res, err = ia.db.Inc(ctx, idKey, int64(ia.blockSize)) }); err != nil { log.Warning(ctx, err) return } if err == nil { newValue = res.ValueInt() break } log.Warningf(ctx, "unable to allocate %d ids from %s: %s", ia.blockSize, idKey, err) } if err != nil { panic(fmt.Sprintf("unexpectedly exited id allocation retry loop: %s", err)) } } end := newValue + 1 start := end - int64(ia.blockSize) if start < int64(ia.minID) { start = int64(ia.minID) } // Add all new ids to the channel for consumption. for i := start; i < end; i++ { select { case ia.ids <- uint32(i): case <-ia.stopper.ShouldStop(): return } } } }) }
// execSchemaChanges releases schema leases and runs the queued // schema changers. This needs to be run after the transaction // scheduling the schema change has finished. // // The list of closures is cleared after (attempting) execution. // // Args: // results: The results from all statements in the group that scheduled the // schema changes we're about to execute. Results corresponding to the // schema change statements will be changed in case an error occurs. func (scc *schemaChangerCollection) execSchemaChanges( e *Executor, planMaker *planner, results ResultList, ) { if planMaker.txn != nil { panic("trying to execute schema changes while still in a transaction") } ctx := e.AnnotateCtx(context.TODO()) // Release the leases once a transaction is complete. planMaker.releaseLeases() if e.cfg.SchemaChangerTestingKnobs.SyncFilter != nil { e.cfg.SchemaChangerTestingKnobs.SyncFilter(TestingSchemaChangerCollection{scc}) } // Execute any schema changes that were scheduled, in the order of the // statements that scheduled them. for _, scEntry := range scc.schemaChangers { sc := &scEntry.sc sc.db = *e.cfg.DB sc.testingKnobs = e.cfg.SchemaChangerTestingKnobs for r := retry.Start(base.DefaultRetryOptions()); r.Next(); { if done, err := sc.IsDone(); err != nil { log.Warning(ctx, err) break } else if done { break } if err := sc.exec(); err != nil { if isSchemaChangeRetryError(err) { // Try again continue } // All other errors can be reported; we report it as the result // corresponding to the statement that enqueued this changer. // There's some sketchiness here: we assume there's a single result // per statement and we clobber the result/error of the corresponding // statement. // There's also another subtlety: we can only report results for // statements in the current batch; we can't modify the results of older // statements. if scEntry.epoch == scc.curGroupNum { results[scEntry.idx] = Result{Err: err} } log.Warningf(ctx, "error executing schema change: %s", err) } break } } scc.schemaChangers = scc.schemaChangers[:0] }
// poll retrieves data from the underlying DataSource a single time, storing any // returned time series data on the server. func (p *poller) poll() { if err := p.stopper.RunTask(func() { data := p.source.GetTimeSeriesData() if len(data) == 0 { return } ctx, span := p.AnnotateCtxWithSpan(context.Background(), "ts-poll") defer span.Finish() if err := p.db.StoreData(ctx, p.r, data); err != nil { log.Warningf(ctx, "error writing time series data: %s", err) } }); err != nil { log.Warning(p.AnnotateCtx(context.TODO()), err) } }
// tryAsyncAbort (synchronously) grabs a copy of the txn proto and the intents // (which it then clears from txnMeta), and asynchronously tries to abort the // transaction. func (tc *TxnCoordSender) tryAsyncAbort(txnID uuid.UUID) { tc.Lock() txnMeta := tc.txns[txnID] // Clone the intents and the txn to avoid data races. intentSpans, _ := roachpb.MergeSpans(append([]roachpb.Span(nil), txnMeta.keys...)) txnMeta.keys = nil txn := txnMeta.txn.Clone() tc.Unlock() // Since we don't hold the lock continuously, it's possible that two aborts // raced here. That's fine (and probably better than the alternative, which // is missing new intents sometimes). if txn.Status != roachpb.PENDING { return } ba := roachpb.BatchRequest{} ba.Txn = &txn et := &roachpb.EndTransactionRequest{ Span: roachpb.Span{ Key: txn.Key, }, Commit: false, IntentSpans: intentSpans, } ba.Add(et) ctx := tc.AnnotateCtx(context.TODO()) if err := tc.stopper.RunAsyncTask(ctx, func(ctx context.Context) { // Use the wrapped sender since the normal Sender does not allow // clients to specify intents. if _, pErr := tc.wrapped.Send(ctx, ba); pErr != nil { if log.V(1) { log.Warningf(ctx, "abort due to inactivity failed for %s: %s ", txn, pErr) } } }); err != nil { log.Warning(ctx, err) } }
// MarshalJSON returns an appropriate JSON representation of the current values // of the metrics being tracked by this recorder. func (mr *MetricsRecorder) MarshalJSON() ([]byte, error) { mr.mu.Lock() defer mr.mu.Unlock() if mr.mu.nodeRegistry == nil { // We haven't yet processed initialization information; return an empty // JSON object. if log.V(1) { log.Warning(context.TODO(), "MetricsRecorder.MarshalJSON() called before NodeID allocation") } return []byte("{}"), nil } topLevel := map[string]interface{}{ fmt.Sprintf("node.%d", mr.mu.desc.NodeID): mr.mu.nodeRegistry, } // Add collection of stores to top level. JSON requires that keys be strings, // so we must convert the store ID to a string. storeLevel := make(map[string]interface{}) for id, reg := range mr.mu.storeRegistries { storeLevel[strconv.Itoa(int(id))] = reg } topLevel["stores"] = storeLevel return json.Marshal(topLevel) }
// GetTimeSeriesData serializes registered metrics for consumption by // CockroachDB's time series system. func (mr *MetricsRecorder) GetTimeSeriesData() []tspb.TimeSeriesData { mr.mu.Lock() defer mr.mu.Unlock() if mr.mu.nodeRegistry == nil { // We haven't yet processed initialization information; do nothing. if log.V(1) { log.Warning(context.TODO(), "MetricsRecorder.GetTimeSeriesData() called before NodeID allocation") } return nil } data := make([]tspb.TimeSeriesData, 0, mr.mu.lastDataCount) // Record time series from node-level registries. now := mr.mu.clock.PhysicalNow() recorder := registryRecorder{ registry: mr.mu.nodeRegistry, format: nodeTimeSeriesPrefix, source: strconv.FormatInt(int64(mr.mu.desc.NodeID), 10), timestampNanos: now, } recorder.record(&data) // Record time series from store-level registries. for storeID, r := range mr.mu.storeRegistries { storeRecorder := registryRecorder{ registry: r, format: storeTimeSeriesPrefix, source: strconv.FormatInt(int64(storeID), 10), timestampNanos: now, } storeRecorder.record(&data) } mr.mu.lastDataCount = len(data) return data }
func (s *Server) usageReportingEnabled() bool { ctx, span := s.AnnotateCtxWithSpan(context.Background(), "usage-reporting") defer span.Finish() // Grab the optin value from the database. req := &serverpb.GetUIDataRequest{Keys: []string{optinKey}} resp, err := s.admin.GetUIData(ctx, req) if err != nil { log.Warning(ctx, err) return false } val, ok := resp.KeyValues[optinKey] if !ok { // Key wasn't found, so we opt out by default. return false } optin, err := strconv.ParseBool(string(val.Value)) if err != nil { log.Warningf(ctx, "could not parse optin value (%q): %v", val.Value, err) return false } return optin }
// NewServer creates a Server from a server.Context. func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) { if _, err := net.ResolveTCPAddr("tcp", cfg.AdvertiseAddr); err != nil { return nil, errors.Errorf("unable to resolve RPC address %q: %v", cfg.AdvertiseAddr, err) } if cfg.AmbientCtx.Tracer == nil { cfg.AmbientCtx.Tracer = tracing.NewTracer() } // Try loading the TLS configs before anything else. if _, err := cfg.GetServerTLSConfig(); err != nil { return nil, err } if _, err := cfg.GetClientTLSConfig(); err != nil { return nil, err } s := &Server{ mux: http.NewServeMux(), clock: hlc.NewClock(hlc.UnixNano, cfg.MaxOffset), stopper: stopper, cfg: cfg, } // Add a dynamic log tag value for the node ID. // // We need to pass an ambient context to the various server components, but we // won't know the node ID until we Start(). At that point it's too late to // change the ambient contexts in the components (various background processes // will have already started using them). // // NodeIDContainer allows us to add the log tag to the context now and update // the value asynchronously. It's not significantly more expensive than a // regular tag since it's just doing an (atomic) load when a log/trace message // is constructed. The node ID is set by the Store if this host was // bootstrapped; otherwise a new one is allocated in Node. s.cfg.AmbientCtx.AddLogTag("n", &s.nodeIDContainer) ctx := s.AnnotateCtx(context.Background()) if s.cfg.Insecure { log.Warning(ctx, "running in insecure mode, this is strongly discouraged. See --insecure.") } s.rpcContext = rpc.NewContext(s.cfg.AmbientCtx, s.cfg.Config, s.clock, s.stopper) s.rpcContext.HeartbeatCB = func() { if err := s.rpcContext.RemoteClocks.VerifyClockOffset(); err != nil { log.Fatal(ctx, err) } } s.grpc = rpc.NewServer(s.rpcContext) s.registry = metric.NewRegistry() s.gossip = gossip.New( s.cfg.AmbientCtx, &s.nodeIDContainer, s.rpcContext, s.grpc, s.cfg.GossipBootstrapResolvers, s.stopper, s.registry, ) s.storePool = storage.NewStorePool( s.cfg.AmbientCtx, s.gossip, s.clock, s.rpcContext, s.cfg.TimeUntilStoreDead, s.stopper, /* deterministic */ false, ) // A custom RetryOptions is created which uses stopper.ShouldQuiesce() as // the Closer. This prevents infinite retry loops from occurring during // graceful server shutdown // // Such a loop loop occurs with the DistSender attempts a connection to the // local server during shutdown, and receives an internal server error (HTTP // Code 5xx). This is the correct error for a server to return when it is // shutting down, and is normally retryable in a cluster environment. // However, on a single-node setup (such as a test), retries will never // succeed because the only server has been shut down; thus, thus the // DistSender needs to know that it should not retry in this situation. retryOpts := base.DefaultRetryOptions() retryOpts.Closer = s.stopper.ShouldQuiesce() distSenderCfg := kv.DistSenderConfig{ AmbientCtx: s.cfg.AmbientCtx, Clock: s.clock, RPCContext: s.rpcContext, RPCRetryOptions: &retryOpts, } s.distSender = kv.NewDistSender(distSenderCfg, s.gossip) txnMetrics := kv.MakeTxnMetrics(s.cfg.MetricsSampleInterval) s.registry.AddMetricStruct(txnMetrics) s.txnCoordSender = kv.NewTxnCoordSender( s.cfg.AmbientCtx, s.distSender, s.clock, s.cfg.Linearizable, s.stopper, txnMetrics, ) s.db = client.NewDB(s.txnCoordSender) // Use the range lease expiration and renewal durations as the node // liveness expiration and heartbeat interval. active, renewal := storage.RangeLeaseDurations( storage.RaftElectionTimeout(s.cfg.RaftTickInterval, s.cfg.RaftElectionTimeoutTicks)) s.nodeLiveness = storage.NewNodeLiveness( s.cfg.AmbientCtx, s.clock, s.db, s.gossip, active, renewal, ) s.registry.AddMetricStruct(s.nodeLiveness.Metrics()) s.raftTransport = storage.NewRaftTransport( s.cfg.AmbientCtx, storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext, ) s.kvDB = kv.NewDBServer(s.cfg.Config, s.txnCoordSender, s.stopper) roachpb.RegisterExternalServer(s.grpc, s.kvDB) // Set up internal memory metrics for use by internal SQL executors. s.internalMemMetrics = sql.MakeMemMetrics("internal") s.registry.AddMetricStruct(s.internalMemMetrics) // Set up Lease Manager var lmKnobs sql.LeaseManagerTestingKnobs if cfg.TestingKnobs.SQLLeaseManager != nil { lmKnobs = *s.cfg.TestingKnobs.SQLLeaseManager.(*sql.LeaseManagerTestingKnobs) } s.leaseMgr = sql.NewLeaseManager(&s.nodeIDContainer, *s.db, s.clock, lmKnobs, s.stopper, &s.internalMemMetrics) s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip) // Set up the DistSQL server distSQLCfg := distsql.ServerConfig{ AmbientContext: s.cfg.AmbientCtx, DB: s.db, RPCContext: s.rpcContext, Stopper: s.stopper, } s.distSQLServer = distsql.NewServer(distSQLCfg) distsql.RegisterDistSQLServer(s.grpc, s.distSQLServer) // Set up admin memory metrics for use by admin SQL executors. s.adminMemMetrics = sql.MakeMemMetrics("admin") s.registry.AddMetricStruct(s.adminMemMetrics) // Set up Executor execCfg := sql.ExecutorConfig{ AmbientCtx: s.cfg.AmbientCtx, NodeID: &s.nodeIDContainer, DB: s.db, Gossip: s.gossip, LeaseManager: s.leaseMgr, Clock: s.clock, DistSQLSrv: s.distSQLServer, MetricsSampleInterval: s.cfg.MetricsSampleInterval, } if s.cfg.TestingKnobs.SQLExecutor != nil { execCfg.TestingKnobs = s.cfg.TestingKnobs.SQLExecutor.(*sql.ExecutorTestingKnobs) } else { execCfg.TestingKnobs = &sql.ExecutorTestingKnobs{} } if s.cfg.TestingKnobs.SQLSchemaChanger != nil { execCfg.SchemaChangerTestingKnobs = s.cfg.TestingKnobs.SQLSchemaChanger.(*sql.SchemaChangerTestingKnobs) } else { execCfg.SchemaChangerTestingKnobs = &sql.SchemaChangerTestingKnobs{} } s.sqlExecutor = sql.NewExecutor(execCfg, s.stopper, &s.adminMemMetrics) s.registry.AddMetricStruct(s.sqlExecutor) s.pgServer = pgwire.MakeServer( s.cfg.AmbientCtx, s.cfg.Config, s.sqlExecutor, &s.internalMemMetrics, s.cfg.SQLMemoryPoolSize, ) s.registry.AddMetricStruct(s.pgServer.Metrics()) s.tsDB = ts.NewDB(s.db) s.tsServer = ts.MakeServer(s.cfg.AmbientCtx, s.tsDB, s.cfg.TimeSeriesServerConfig, s.stopper) // TODO(bdarnell): make StoreConfig configurable. storeCfg := storage.StoreConfig{ AmbientCtx: s.cfg.AmbientCtx, Clock: s.clock, DB: s.db, Gossip: s.gossip, NodeLiveness: s.nodeLiveness, Transport: s.raftTransport, RaftTickInterval: s.cfg.RaftTickInterval, ScanInterval: s.cfg.ScanInterval, ScanMaxIdleTime: s.cfg.ScanMaxIdleTime, ConsistencyCheckInterval: s.cfg.ConsistencyCheckInterval, ConsistencyCheckPanicOnFailure: s.cfg.ConsistencyCheckPanicOnFailure, MetricsSampleInterval: s.cfg.MetricsSampleInterval, StorePool: s.storePool, SQLExecutor: sql.InternalExecutor{ LeaseManager: s.leaseMgr, }, LogRangeEvents: s.cfg.EventLogEnabled, AllocatorOptions: storage.AllocatorOptions{ AllowRebalance: true, }, RangeLeaseActiveDuration: active, RangeLeaseRenewalDuration: renewal, TimeSeriesDataStore: s.tsDB, } if s.cfg.TestingKnobs.Store != nil { storeCfg.TestingKnobs = *s.cfg.TestingKnobs.Store.(*storage.StoreTestingKnobs) } s.recorder = status.NewMetricsRecorder(s.clock) s.registry.AddMetricStruct(s.rpcContext.RemoteClocks.Metrics()) s.runtime = status.MakeRuntimeStatSampler(s.clock) s.registry.AddMetricStruct(s.runtime) s.node = NewNode(storeCfg, s.recorder, s.registry, s.stopper, txnMetrics, sql.MakeEventLogger(s.leaseMgr)) roachpb.RegisterInternalServer(s.grpc, s.node) storage.RegisterConsistencyServer(s.grpc, s.node.storesServer) storage.RegisterFreezeServer(s.grpc, s.node.storesServer) s.admin = newAdminServer(s) s.status = newStatusServer( s.cfg.AmbientCtx, s.db, s.gossip, s.recorder, s.rpcContext, s.node.stores, ) for _, gw := range []grpcGatewayServer{s.admin, s.status, &s.tsServer} { gw.RegisterService(s.grpc) } return s, nil }
func (n *Node) batchInternal( ctx context.Context, args *roachpb.BatchRequest, ) (*roachpb.BatchResponse, error) { // TODO(marc): grpc's authentication model (which gives credential access in // the request handler) doesn't really fit with the current design of the // security package (which assumes that TLS state is only given at connection // time) - that should be fixed. if peer, ok := peer.FromContext(ctx); ok { if tlsInfo, ok := peer.AuthInfo.(credentials.TLSInfo); ok { certUser, err := security.GetCertificateUser(&tlsInfo.State) if err != nil { return nil, err } if certUser != security.NodeUser { return nil, errors.Errorf("user %s is not allowed", certUser) } } } var br *roachpb.BatchResponse type snowballInfo struct { syncutil.Mutex collectedSpans [][]byte done bool } var snowball *snowballInfo if err := n.stopper.RunTaskWithErr(func() error { const opName = "node.Batch" sp, err := tracing.JoinOrNew(n.storeCfg.AmbientCtx.Tracer, args.TraceContext, opName) if err != nil { return err } // If this is a snowball span, it gets special treatment: It skips the // regular tracing machinery, and we instead send the collected spans // back with the response. This is more expensive, but then again, // those are individual requests traced by users, so they can be. if sp.BaggageItem(tracing.Snowball) != "" { sp.LogEvent("delegating to snowball tracing") sp.Finish() snowball = new(snowballInfo) recorder := func(rawSpan basictracer.RawSpan) { snowball.Lock() defer snowball.Unlock() if snowball.done { // This is a late span that we must discard because the request was // already completed. return } encSp, err := tracing.EncodeRawSpan(&rawSpan, nil) if err != nil { log.Warning(ctx, err) } snowball.collectedSpans = append(snowball.collectedSpans, encSp) } if sp, err = tracing.JoinOrNewSnowball(opName, args.TraceContext, recorder); err != nil { return err } } defer sp.Finish() traceCtx := opentracing.ContextWithSpan(ctx, sp) log.Event(traceCtx, args.Summary()) tStart := timeutil.Now() var pErr *roachpb.Error br, pErr = n.stores.Send(traceCtx, *args) if pErr != nil { br = &roachpb.BatchResponse{} log.ErrEventf(traceCtx, "%T", pErr.GetDetail()) } if br.Error != nil { panic(roachpb.ErrorUnexpectedlySet(n.stores, br)) } n.metrics.callComplete(timeutil.Since(tStart), pErr) br.Error = pErr return nil }); err != nil { return nil, err } if snowball != nil { snowball.Lock() br.CollectedSpans = snowball.collectedSpans snowball.done = true snowball.Unlock() } return br, nil }
// Execute the entire schema change in steps. func (sc SchemaChanger) exec() error { // Acquire lease. lease, err := sc.AcquireLease() if err != nil { return err } needRelease := true // Always try to release lease. defer func(l *sqlbase.TableDescriptor_SchemaChangeLease) { // If the schema changer deleted the descriptor, there's no longer a lease to be // released. if !needRelease { return } if err := sc.ReleaseLease(*l); err != nil { log.Warning(context.TODO(), err) } }(&lease) // Increment the version and unset tableDescriptor.UpVersion. desc, err := sc.MaybeIncrementVersion() if err != nil { return err } table := desc.GetTable() if table.Dropped() { lease, err = sc.ExtendLease(lease) if err != nil { return err } // Wait for everybody to see the version with the deleted bit set. When // this returns, nobody has any leases on the table, nor can get new leases, // so the table will no longer be modified. if err := sc.waitToUpdateLeases(sc.tableID); err != nil { return err } // Truncate the table and delete the descriptor. if err := sc.truncateAndDropTable(context.TODO(), &lease, table); err != nil { return err } needRelease = false return nil } if table.Adding() { for _, idx := range table.AllNonDropIndexes() { if idx.ForeignKey.IsSet() { if err := sc.waitToUpdateLeases(idx.ForeignKey.Table); err != nil { return err } } } if _, err := sc.leaseMgr.Publish( table.ID, func(tbl *sqlbase.TableDescriptor) error { tbl.State = sqlbase.TableDescriptor_PUBLIC return nil }, func(txn *client.Txn) error { return nil }, ); err != nil { return err } } if table.Renamed() { lease, err = sc.ExtendLease(lease) if err != nil { return err } // Wait for everyone to see the version with the new name. When this // returns, no new transactions will be using the old name for the table, so // the old name can now be re-used (by CREATE). if err := sc.waitToUpdateLeases(sc.tableID); err != nil { return err } if sc.testingKnobs.RenameOldNameNotInUseNotification != nil { sc.testingKnobs.RenameOldNameNotInUseNotification() } // Free up the old name(s). err := sc.db.Txn(context.TODO(), func(txn *client.Txn) error { b := txn.NewBatch() for _, renameDetails := range table.Renames { tbKey := tableKey{renameDetails.OldParentID, renameDetails.OldName}.Key() b.Del(tbKey) } return txn.Run(b) }) if err != nil { return err } // Clean up - clear the descriptor's state. _, err = sc.leaseMgr.Publish(sc.tableID, func(desc *sqlbase.TableDescriptor) error { desc.Renames = nil return nil }, nil) if err != nil { return err } } // Wait for the schema change to propagate to all nodes after this function // returns, so that the new schema is live everywhere. This is not needed for // correctness but is done to make the UI experience/tests predictable. defer func() { if err := sc.waitToUpdateLeases(sc.tableID); err != nil { log.Warning(context.TODO(), err) } }() if sc.mutationID == sqlbase.InvalidMutationID { // Nothing more to do. return nil } // Another transaction might set the up_version bit again, // but we're no longer responsible for taking care of that. // Run through mutation state machine and backfill. err = sc.runStateMachineAndBackfill(&lease) // Purge the mutations if the application of the mutations failed due to // an integrity constraint violation. All other errors are transient // errors that are resolved by retrying the backfill. if sqlbase.IsIntegrityConstraintError(err) { log.Warningf(context.TODO(), "reversing schema change due to irrecoverable error: %s", err) if errReverse := sc.reverseMutations(err); errReverse != nil { // Although the backfill did hit an integrity constraint violation // and made a decision to reverse the mutations, // reverseMutations() failed. If exec() is called again the entire // schema change will be retried. return errReverse } // After this point the schema change has been reversed and any retry // of the schema change will act upon the reversed schema change. if errPurge := sc.runStateMachineAndBackfill(&lease); errPurge != nil { // Don't return this error because we do want the caller to know // that an integrity constraint was violated with the original // schema change. The reversed schema change will be // retried via the async schema change manager. log.Warningf(context.TODO(), "error purging mutation: %s, after error: %s", errPurge, err) } } return err }