Exemple #1
0
func (s *Server) reportUsage(ctx context.Context) {
	b := new(bytes.Buffer)
	if err := json.NewEncoder(b).Encode(s.getReportingInfo()); err != nil {
		log.Warning(ctx, err)
		return
	}

	q := reportingURL.Query()
	q.Set("version", build.GetInfo().Tag)
	q.Set("uuid", s.node.ClusterID.String())
	reportingURL.RawQuery = q.Encode()

	res, err := http.Post(reportingURL.String(), "application/json", b)
	if err != nil && log.V(2) {
		// This is probably going to be relatively common in production
		// environments where network access is usually curtailed.
		log.Warning(ctx, "Failed to report node usage metrics: ", err)
		return
	}

	if res.StatusCode != http.StatusOK {
		b, err := ioutil.ReadAll(res.Body)
		log.Warningf(ctx, "Failed to report node usage metrics: status: %s, body: %s, "+
			"error: %v", res.Status, b, err)
	}
}
Exemple #2
0
// eachRecordableValue visits each metric in the registry, calling the supplied
// function once for each recordable value represented by that metric. This is
// useful to expand certain metric types (such as histograms) into multiple
// recordable values.
func eachRecordableValue(reg *metric.Registry, fn func(string, float64)) {
	reg.Each(func(name string, mtr interface{}) {
		if histogram, ok := mtr.(*metric.Histogram); ok {
			// TODO(mrtracy): Where should this comment go for better
			// visibility?
			//
			// Proper support of Histograms for time series is difficult and
			// likely not worth the trouble. Instead, we aggregate a windowed
			// histogram at fixed quantiles. If the scraping window and the
			// histogram's eviction duration are similar, this should give
			// good results; if the two durations are very different, we either
			// report stale results or report only the more recent data.
			//
			// Additionally, we can only aggregate max/min of the quantiles;
			// roll-ups don't know that and so they will return mathematically
			// nonsensical values, but that seems acceptable for the time
			// being.
			curr, _ := histogram.Windowed()
			for _, pt := range recordHistogramQuantiles {
				fn(name+pt.suffix, float64(curr.ValueAtQuantile(pt.quantile)))
			}
		} else {
			val, err := extractValue(mtr)
			if err != nil {
				log.Warning(context.TODO(), err)
				return
			}
			fn(name, val)
		}
	})
}
Exemple #3
0
// removeLeaseIfExpiring removes a lease and returns true if it is about to expire.
// The method also resets the transaction deadline.
func (p *planner) removeLeaseIfExpiring(lease *LeaseState) bool {
	if lease == nil || lease.hasSomeLifeLeft(p.leaseMgr.clock) {
		return false
	}

	// Remove the lease from p.leases.
	idx := -1
	for i, l := range p.leases {
		if l == lease {
			idx = i
			break
		}
	}
	if idx == -1 {
		log.Warningf(p.ctx(), "lease (%s) not found", lease)
		return false
	}
	p.leases[idx] = p.leases[len(p.leases)-1]
	p.leases[len(p.leases)-1] = nil
	p.leases = p.leases[:len(p.leases)-1]

	if err := p.leaseMgr.Release(lease); err != nil {
		log.Warning(p.ctx(), err)
	}

	// Reset the deadline so that a new deadline will be set after the lease is acquired.
	p.txn.ResetDeadline()
	for _, l := range p.leases {
		p.txn.UpdateDeadlineMaybe(hlc.Timestamp{WallTime: l.Expiration().UnixNano()})
	}
	return true
}
Exemple #4
0
func (is *infoStore) runCallbacks(key string, content roachpb.Value, callbacks ...Callback) {
	// Add the callbacks to the callback work list.
	f := func() {
		for _, method := range callbacks {
			method(key, content)
		}
	}
	is.callbackWorkMu.Lock()
	is.callbackWork = append(is.callbackWork, f)
	is.callbackWorkMu.Unlock()

	// Run callbacks in a goroutine to avoid mutex reentry. We also guarantee
	// callbacks are run in order such that if a key is updated twice in
	// succession, the second callback will never be run before the first.
	if err := is.stopper.RunAsyncTask(context.Background(), func(_ context.Context) {
		// Grab the callback mutex to serialize execution of the callbacks.
		is.callbackMu.Lock()
		defer is.callbackMu.Unlock()

		// Grab and execute the list of work.
		is.callbackWorkMu.Lock()
		work := is.callbackWork
		is.callbackWork = nil
		is.callbackWorkMu.Unlock()

		for _, w := range work {
			w()
		}
	}); err != nil {
		ctx := is.AnnotateCtx(context.TODO())
		log.Warning(ctx, err)
	}
}
Exemple #5
0
func (s *Server) checkForUpdates(ctx context.Context) {
	q := updatesURL.Query()
	q.Set("version", build.GetInfo().Tag)
	q.Set("uuid", s.node.ClusterID.String())
	updatesURL.RawQuery = q.Encode()

	res, err := http.Get(updatesURL.String())
	if err != nil {
		// This is probably going to be relatively common in production
		// environments where network access is usually curtailed.
		if log.V(2) {
			log.Warning(ctx, "Failed to check for updates: ", err)
		}
		return
	}
	defer res.Body.Close()

	if res.StatusCode != http.StatusOK {
		b, err := ioutil.ReadAll(res.Body)
		log.Warningf(ctx, "Failed to check for updates: status: %s, body: %s, error: %v",
			res.Status, b, err)
		return
	}

	decoder := json.NewDecoder(res.Body)
	r := struct {
		Details []versionInfo `json:"details"`
	}{}

	err = decoder.Decode(&r)
	if err != nil && err != io.EOF {
		log.Warning(ctx, "Error decoding updates info: ", err)
		return
	}

	// Ideally the updates server only returns the most relevant updates for us,
	// but if it replied with an excessive number of updates, limit log spam by
	// only printing the last few.
	if len(r.Details) > updateMaxVersionsToReport {
		r.Details = r.Details[len(r.Details)-updateMaxVersionsToReport:]
	}
	for _, v := range r.Details {
		log.Infof(ctx, "A new version is available: %s, details: %s", v.Version, v.Details)
	}
}
Exemple #6
0
// shouldQueue determines whether a range should be queued for truncating. This
// is true only if the replica is the raft leader and if the total number of
// the range's raft log's stale entries exceeds RaftLogQueueStaleThreshold.
func (rlq *raftLogQueue) shouldQueue(
	ctx context.Context, now hlc.Timestamp, r *Replica, _ config.SystemConfig,
) (shouldQ bool, priority float64) {
	truncatableIndexes, _, err := getTruncatableIndexes(ctx, r)
	if err != nil {
		log.Warning(ctx, err)
		return false, 0
	}

	return truncatableIndexes >= RaftLogQueueStaleThreshold, float64(truncatableIndexes)
}
Exemple #7
0
// GetStatusSummary returns a status summary messages for the node. The summary
// includes the recent values of metrics for both the node and all of its
// component stores.
func (mr *MetricsRecorder) GetStatusSummary() *NodeStatus {
	mr.mu.Lock()
	defer mr.mu.Unlock()

	if mr.mu.nodeRegistry == nil {
		// We haven't yet processed initialization information; do nothing.
		if log.V(1) {
			log.Warning(context.TODO(), "attempt to generate status summary before NodeID allocation.")
		}
		return nil
	}

	now := mr.mu.clock.PhysicalNow()

	// Generate an node status with no store data.
	nodeStat := &NodeStatus{
		Desc:          mr.mu.desc,
		BuildInfo:     build.GetInfo(),
		UpdatedAt:     now,
		StartedAt:     mr.mu.startedAt,
		StoreStatuses: make([]StoreStatus, 0, mr.mu.lastSummaryCount),
		Metrics:       make(map[string]float64, mr.mu.lastNodeMetricCount),
	}

	eachRecordableValue(mr.mu.nodeRegistry, func(name string, val float64) {
		nodeStat.Metrics[name] = val
	})

	// Generate status summaries for stores.
	for storeID, r := range mr.mu.storeRegistries {
		storeMetrics := make(map[string]float64, mr.mu.lastStoreMetricCount)
		eachRecordableValue(r, func(name string, val float64) {
			storeMetrics[name] = val
		})

		// Gather descriptor from store.
		descriptor, err := mr.mu.stores[storeID].Descriptor()
		if err != nil {
			log.Errorf(context.TODO(), "Could not record status summaries: Store %d could not return descriptor, error: %s", storeID, err)
			continue
		}

		nodeStat.StoreStatuses = append(nodeStat.StoreStatuses, StoreStatus{
			Desc:    *descriptor,
			Metrics: storeMetrics,
		})
	}
	mr.mu.lastSummaryCount = len(nodeStat.StoreStatuses)
	mr.mu.lastNodeMetricCount = len(nodeStat.Metrics)
	if len(nodeStat.StoreStatuses) > 0 {
		mr.mu.lastStoreMetricCount = len(nodeStat.StoreStatuses[0].Metrics)
	}
	return nodeStat
}
Exemple #8
0
// Wait waits for a running container to exit.
func (c *Container) Wait() error {
	exitCode, err := c.cluster.client.ContainerWait(context.Background(), c.id)
	if err == nil && exitCode != 0 {
		err = errors.Errorf("non-zero exit code: %d", exitCode)
	}
	if err != nil {
		if err := c.Logs(os.Stderr); err != nil {
			log.Warning(context.TODO(), err)
		}
	}
	return err
}
Exemple #9
0
// gossipStores broadcasts each store and dead replica to the gossip network.
func (n *Node) gossipStores(ctx context.Context) {
	if err := n.stores.VisitStores(func(s *storage.Store) error {
		if err := s.GossipStore(ctx); err != nil {
			return err
		}
		if err := s.GossipDeadReplicas(ctx); err != nil {
			return err
		}
		return nil
	}); err != nil {
		log.Warning(ctx, err)
	}
}
Exemple #10
0
// releaseLeases implements the SchemaAccessor interface.
func (p *planner) releaseLeases() {
	if p.leases != nil {
		if log.V(2) {
			log.Infof(p.ctx(), "planner releasing %d leases", len(p.leases))
		}
		for _, lease := range p.leases {
			if err := p.leaseMgr.Release(lease); err != nil {
				log.Warning(p.ctx(), err)
			}
		}
		p.leases = nil
	}
}
Exemple #11
0
func (s *Server) checkForUpdates(ctx context.Context) {
	q := updatesURL.Query()
	q.Set("version", build.GetInfo().Tag)
	q.Set("uuid", s.node.ClusterID.String())
	updatesURL.RawQuery = q.Encode()

	res, err := http.Get(updatesURL.String())
	if err != nil {
		// This is probably going to be relatively common in production
		// environments where network access is usually curtailed.
		if log.V(2) {
			log.Warning(ctx, "Failed to check for updates: ", err)
		}
		return
	}
	defer res.Body.Close()

	if res.StatusCode != http.StatusOK {
		b, err := ioutil.ReadAll(res.Body)
		log.Warningf(ctx, "Failed to check for updates: status: %s, body: %s, error: %v",
			res.Status, b, err)
		return
	}

	decoder := json.NewDecoder(res.Body)
	r := struct {
		Details []versionInfo `json:"details"`
	}{}

	err = decoder.Decode(&r)
	if err != nil && err != io.EOF {
		log.Warning(ctx, "Error decoding updates info: ", err)
		return
	}

	for _, v := range r.Details {
		log.Infof(ctx, "A new version is available: %s, details: %s", v.Version, v.Details)
	}
}
Exemple #12
0
// scrapePrometheus updates the prometheusExporter's metrics snapshot.
func (mr *MetricsRecorder) scrapePrometheus() {
	mr.mu.Lock()
	defer mr.mu.Unlock()
	if mr.mu.nodeRegistry == nil {
		// We haven't yet processed initialization information; output nothing.
		if log.V(1) {
			log.Warning(context.TODO(), "MetricsRecorder asked to scrape metrics before NodeID allocation")
		}
	}

	mr.prometheusExporter.ScrapeRegistry(mr.mu.nodeRegistry)
	for _, reg := range mr.mu.storeRegistries {
		mr.prometheusExporter.ScrapeRegistry(reg)
	}
}
Exemple #13
0
func (ia *idAllocator) start() {
	ia.stopper.RunWorker(func() {
		ctx := ia.AnnotateCtx(context.Background())
		defer close(ia.ids)

		for {
			var newValue int64
			for newValue <= int64(ia.minID) {
				var err error
				var res client.KeyValue
				for r := retry.Start(base.DefaultRetryOptions()); r.Next(); {
					idKey := ia.idKey.Load().(roachpb.Key)
					if err := ia.stopper.RunTask(func() {
						res, err = ia.db.Inc(ctx, idKey, int64(ia.blockSize))
					}); err != nil {
						log.Warning(ctx, err)
						return
					}
					if err == nil {
						newValue = res.ValueInt()
						break
					}

					log.Warningf(ctx, "unable to allocate %d ids from %s: %s", ia.blockSize, idKey, err)
				}
				if err != nil {
					panic(fmt.Sprintf("unexpectedly exited id allocation retry loop: %s", err))
				}
			}

			end := newValue + 1
			start := end - int64(ia.blockSize)

			if start < int64(ia.minID) {
				start = int64(ia.minID)
			}

			// Add all new ids to the channel for consumption.
			for i := start; i < end; i++ {
				select {
				case ia.ids <- uint32(i):
				case <-ia.stopper.ShouldStop():
					return
				}
			}
		}
	})
}
Exemple #14
0
// execSchemaChanges releases schema leases and runs the queued
// schema changers. This needs to be run after the transaction
// scheduling the schema change has finished.
//
// The list of closures is cleared after (attempting) execution.
//
// Args:
//  results: The results from all statements in the group that scheduled the
//    schema changes we're about to execute. Results corresponding to the
//    schema change statements will be changed in case an error occurs.
func (scc *schemaChangerCollection) execSchemaChanges(
	e *Executor, planMaker *planner, results ResultList,
) {
	if planMaker.txn != nil {
		panic("trying to execute schema changes while still in a transaction")
	}
	ctx := e.AnnotateCtx(context.TODO())
	// Release the leases once a transaction is complete.
	planMaker.releaseLeases()
	if e.cfg.SchemaChangerTestingKnobs.SyncFilter != nil {
		e.cfg.SchemaChangerTestingKnobs.SyncFilter(TestingSchemaChangerCollection{scc})
	}
	// Execute any schema changes that were scheduled, in the order of the
	// statements that scheduled them.
	for _, scEntry := range scc.schemaChangers {
		sc := &scEntry.sc
		sc.db = *e.cfg.DB
		sc.testingKnobs = e.cfg.SchemaChangerTestingKnobs
		for r := retry.Start(base.DefaultRetryOptions()); r.Next(); {
			if done, err := sc.IsDone(); err != nil {
				log.Warning(ctx, err)
				break
			} else if done {
				break
			}
			if err := sc.exec(); err != nil {
				if isSchemaChangeRetryError(err) {
					// Try again
					continue
				}
				// All other errors can be reported; we report it as the result
				// corresponding to the statement that enqueued this changer.
				// There's some sketchiness here: we assume there's a single result
				// per statement and we clobber the result/error of the corresponding
				// statement.
				// There's also another subtlety: we can only report results for
				// statements in the current batch; we can't modify the results of older
				// statements.
				if scEntry.epoch == scc.curGroupNum {
					results[scEntry.idx] = Result{Err: err}
				}
				log.Warningf(ctx, "error executing schema change: %s", err)
			}
			break
		}
	}
	scc.schemaChangers = scc.schemaChangers[:0]
}
Exemple #15
0
// poll retrieves data from the underlying DataSource a single time, storing any
// returned time series data on the server.
func (p *poller) poll() {
	if err := p.stopper.RunTask(func() {
		data := p.source.GetTimeSeriesData()
		if len(data) == 0 {
			return
		}

		ctx, span := p.AnnotateCtxWithSpan(context.Background(), "ts-poll")
		defer span.Finish()

		if err := p.db.StoreData(ctx, p.r, data); err != nil {
			log.Warningf(ctx, "error writing time series data: %s", err)
		}
	}); err != nil {
		log.Warning(p.AnnotateCtx(context.TODO()), err)
	}
}
Exemple #16
0
// tryAsyncAbort (synchronously) grabs a copy of the txn proto and the intents
// (which it then clears from txnMeta), and asynchronously tries to abort the
// transaction.
func (tc *TxnCoordSender) tryAsyncAbort(txnID uuid.UUID) {
	tc.Lock()
	txnMeta := tc.txns[txnID]
	// Clone the intents and the txn to avoid data races.
	intentSpans, _ := roachpb.MergeSpans(append([]roachpb.Span(nil), txnMeta.keys...))
	txnMeta.keys = nil
	txn := txnMeta.txn.Clone()
	tc.Unlock()

	// Since we don't hold the lock continuously, it's possible that two aborts
	// raced here. That's fine (and probably better than the alternative, which
	// is missing new intents sometimes).
	if txn.Status != roachpb.PENDING {
		return
	}

	ba := roachpb.BatchRequest{}
	ba.Txn = &txn

	et := &roachpb.EndTransactionRequest{
		Span: roachpb.Span{
			Key: txn.Key,
		},
		Commit:      false,
		IntentSpans: intentSpans,
	}
	ba.Add(et)
	ctx := tc.AnnotateCtx(context.TODO())
	if err := tc.stopper.RunAsyncTask(ctx, func(ctx context.Context) {
		// Use the wrapped sender since the normal Sender does not allow
		// clients to specify intents.
		if _, pErr := tc.wrapped.Send(ctx, ba); pErr != nil {
			if log.V(1) {
				log.Warningf(ctx, "abort due to inactivity failed for %s: %s ", txn, pErr)
			}
		}
	}); err != nil {
		log.Warning(ctx, err)
	}
}
Exemple #17
0
// MarshalJSON returns an appropriate JSON representation of the current values
// of the metrics being tracked by this recorder.
func (mr *MetricsRecorder) MarshalJSON() ([]byte, error) {
	mr.mu.Lock()
	defer mr.mu.Unlock()
	if mr.mu.nodeRegistry == nil {
		// We haven't yet processed initialization information; return an empty
		// JSON object.
		if log.V(1) {
			log.Warning(context.TODO(), "MetricsRecorder.MarshalJSON() called before NodeID allocation")
		}
		return []byte("{}"), nil
	}
	topLevel := map[string]interface{}{
		fmt.Sprintf("node.%d", mr.mu.desc.NodeID): mr.mu.nodeRegistry,
	}
	// Add collection of stores to top level. JSON requires that keys be strings,
	// so we must convert the store ID to a string.
	storeLevel := make(map[string]interface{})
	for id, reg := range mr.mu.storeRegistries {
		storeLevel[strconv.Itoa(int(id))] = reg
	}
	topLevel["stores"] = storeLevel
	return json.Marshal(topLevel)
}
Exemple #18
0
// GetTimeSeriesData serializes registered metrics for consumption by
// CockroachDB's time series system.
func (mr *MetricsRecorder) GetTimeSeriesData() []tspb.TimeSeriesData {
	mr.mu.Lock()
	defer mr.mu.Unlock()

	if mr.mu.nodeRegistry == nil {
		// We haven't yet processed initialization information; do nothing.
		if log.V(1) {
			log.Warning(context.TODO(), "MetricsRecorder.GetTimeSeriesData() called before NodeID allocation")
		}
		return nil
	}

	data := make([]tspb.TimeSeriesData, 0, mr.mu.lastDataCount)

	// Record time series from node-level registries.
	now := mr.mu.clock.PhysicalNow()
	recorder := registryRecorder{
		registry:       mr.mu.nodeRegistry,
		format:         nodeTimeSeriesPrefix,
		source:         strconv.FormatInt(int64(mr.mu.desc.NodeID), 10),
		timestampNanos: now,
	}
	recorder.record(&data)

	// Record time series from store-level registries.
	for storeID, r := range mr.mu.storeRegistries {
		storeRecorder := registryRecorder{
			registry:       r,
			format:         storeTimeSeriesPrefix,
			source:         strconv.FormatInt(int64(storeID), 10),
			timestampNanos: now,
		}
		storeRecorder.record(&data)
	}
	mr.mu.lastDataCount = len(data)
	return data
}
Exemple #19
0
func (s *Server) usageReportingEnabled() bool {
	ctx, span := s.AnnotateCtxWithSpan(context.Background(), "usage-reporting")
	defer span.Finish()

	// Grab the optin value from the database.
	req := &serverpb.GetUIDataRequest{Keys: []string{optinKey}}
	resp, err := s.admin.GetUIData(ctx, req)
	if err != nil {
		log.Warning(ctx, err)
		return false
	}

	val, ok := resp.KeyValues[optinKey]
	if !ok {
		// Key wasn't found, so we opt out by default.
		return false
	}
	optin, err := strconv.ParseBool(string(val.Value))
	if err != nil {
		log.Warningf(ctx, "could not parse optin value (%q): %v", val.Value, err)
		return false
	}
	return optin
}
Exemple #20
0
// NewServer creates a Server from a server.Context.
func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) {
	if _, err := net.ResolveTCPAddr("tcp", cfg.AdvertiseAddr); err != nil {
		return nil, errors.Errorf("unable to resolve RPC address %q: %v", cfg.AdvertiseAddr, err)
	}

	if cfg.AmbientCtx.Tracer == nil {
		cfg.AmbientCtx.Tracer = tracing.NewTracer()
	}

	// Try loading the TLS configs before anything else.
	if _, err := cfg.GetServerTLSConfig(); err != nil {
		return nil, err
	}
	if _, err := cfg.GetClientTLSConfig(); err != nil {
		return nil, err
	}

	s := &Server{
		mux:     http.NewServeMux(),
		clock:   hlc.NewClock(hlc.UnixNano, cfg.MaxOffset),
		stopper: stopper,
		cfg:     cfg,
	}
	// Add a dynamic log tag value for the node ID.
	//
	// We need to pass an ambient context to the various server components, but we
	// won't know the node ID until we Start(). At that point it's too late to
	// change the ambient contexts in the components (various background processes
	// will have already started using them).
	//
	// NodeIDContainer allows us to add the log tag to the context now and update
	// the value asynchronously. It's not significantly more expensive than a
	// regular tag since it's just doing an (atomic) load when a log/trace message
	// is constructed. The node ID is set by the Store if this host was
	// bootstrapped; otherwise a new one is allocated in Node.
	s.cfg.AmbientCtx.AddLogTag("n", &s.nodeIDContainer)

	ctx := s.AnnotateCtx(context.Background())
	if s.cfg.Insecure {
		log.Warning(ctx, "running in insecure mode, this is strongly discouraged. See --insecure.")
	}

	s.rpcContext = rpc.NewContext(s.cfg.AmbientCtx, s.cfg.Config, s.clock, s.stopper)
	s.rpcContext.HeartbeatCB = func() {
		if err := s.rpcContext.RemoteClocks.VerifyClockOffset(); err != nil {
			log.Fatal(ctx, err)
		}
	}
	s.grpc = rpc.NewServer(s.rpcContext)

	s.registry = metric.NewRegistry()
	s.gossip = gossip.New(
		s.cfg.AmbientCtx,
		&s.nodeIDContainer,
		s.rpcContext,
		s.grpc,
		s.cfg.GossipBootstrapResolvers,
		s.stopper,
		s.registry,
	)
	s.storePool = storage.NewStorePool(
		s.cfg.AmbientCtx,
		s.gossip,
		s.clock,
		s.rpcContext,
		s.cfg.TimeUntilStoreDead,
		s.stopper,
		/* deterministic */ false,
	)

	// A custom RetryOptions is created which uses stopper.ShouldQuiesce() as
	// the Closer. This prevents infinite retry loops from occurring during
	// graceful server shutdown
	//
	// Such a loop loop occurs with the DistSender attempts a connection to the
	// local server during shutdown, and receives an internal server error (HTTP
	// Code 5xx). This is the correct error for a server to return when it is
	// shutting down, and is normally retryable in a cluster environment.
	// However, on a single-node setup (such as a test), retries will never
	// succeed because the only server has been shut down; thus, thus the
	// DistSender needs to know that it should not retry in this situation.
	retryOpts := base.DefaultRetryOptions()
	retryOpts.Closer = s.stopper.ShouldQuiesce()
	distSenderCfg := kv.DistSenderConfig{
		AmbientCtx:      s.cfg.AmbientCtx,
		Clock:           s.clock,
		RPCContext:      s.rpcContext,
		RPCRetryOptions: &retryOpts,
	}
	s.distSender = kv.NewDistSender(distSenderCfg, s.gossip)

	txnMetrics := kv.MakeTxnMetrics(s.cfg.MetricsSampleInterval)
	s.registry.AddMetricStruct(txnMetrics)
	s.txnCoordSender = kv.NewTxnCoordSender(
		s.cfg.AmbientCtx,
		s.distSender,
		s.clock,
		s.cfg.Linearizable,
		s.stopper,
		txnMetrics,
	)
	s.db = client.NewDB(s.txnCoordSender)

	// Use the range lease expiration and renewal durations as the node
	// liveness expiration and heartbeat interval.
	active, renewal := storage.RangeLeaseDurations(
		storage.RaftElectionTimeout(s.cfg.RaftTickInterval, s.cfg.RaftElectionTimeoutTicks))
	s.nodeLiveness = storage.NewNodeLiveness(
		s.cfg.AmbientCtx, s.clock, s.db, s.gossip, active, renewal,
	)
	s.registry.AddMetricStruct(s.nodeLiveness.Metrics())

	s.raftTransport = storage.NewRaftTransport(
		s.cfg.AmbientCtx, storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext,
	)

	s.kvDB = kv.NewDBServer(s.cfg.Config, s.txnCoordSender, s.stopper)
	roachpb.RegisterExternalServer(s.grpc, s.kvDB)

	// Set up internal memory metrics for use by internal SQL executors.
	s.internalMemMetrics = sql.MakeMemMetrics("internal")
	s.registry.AddMetricStruct(s.internalMemMetrics)

	// Set up Lease Manager
	var lmKnobs sql.LeaseManagerTestingKnobs
	if cfg.TestingKnobs.SQLLeaseManager != nil {
		lmKnobs = *s.cfg.TestingKnobs.SQLLeaseManager.(*sql.LeaseManagerTestingKnobs)
	}
	s.leaseMgr = sql.NewLeaseManager(&s.nodeIDContainer, *s.db, s.clock, lmKnobs,
		s.stopper, &s.internalMemMetrics)
	s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip)

	// Set up the DistSQL server
	distSQLCfg := distsql.ServerConfig{
		AmbientContext: s.cfg.AmbientCtx,
		DB:             s.db,
		RPCContext:     s.rpcContext,
		Stopper:        s.stopper,
	}
	s.distSQLServer = distsql.NewServer(distSQLCfg)
	distsql.RegisterDistSQLServer(s.grpc, s.distSQLServer)

	// Set up admin memory metrics for use by admin SQL executors.
	s.adminMemMetrics = sql.MakeMemMetrics("admin")
	s.registry.AddMetricStruct(s.adminMemMetrics)

	// Set up Executor
	execCfg := sql.ExecutorConfig{
		AmbientCtx:            s.cfg.AmbientCtx,
		NodeID:                &s.nodeIDContainer,
		DB:                    s.db,
		Gossip:                s.gossip,
		LeaseManager:          s.leaseMgr,
		Clock:                 s.clock,
		DistSQLSrv:            s.distSQLServer,
		MetricsSampleInterval: s.cfg.MetricsSampleInterval,
	}
	if s.cfg.TestingKnobs.SQLExecutor != nil {
		execCfg.TestingKnobs = s.cfg.TestingKnobs.SQLExecutor.(*sql.ExecutorTestingKnobs)
	} else {
		execCfg.TestingKnobs = &sql.ExecutorTestingKnobs{}
	}
	if s.cfg.TestingKnobs.SQLSchemaChanger != nil {
		execCfg.SchemaChangerTestingKnobs =
			s.cfg.TestingKnobs.SQLSchemaChanger.(*sql.SchemaChangerTestingKnobs)
	} else {
		execCfg.SchemaChangerTestingKnobs = &sql.SchemaChangerTestingKnobs{}
	}
	s.sqlExecutor = sql.NewExecutor(execCfg, s.stopper, &s.adminMemMetrics)
	s.registry.AddMetricStruct(s.sqlExecutor)

	s.pgServer = pgwire.MakeServer(
		s.cfg.AmbientCtx, s.cfg.Config, s.sqlExecutor, &s.internalMemMetrics, s.cfg.SQLMemoryPoolSize,
	)
	s.registry.AddMetricStruct(s.pgServer.Metrics())

	s.tsDB = ts.NewDB(s.db)
	s.tsServer = ts.MakeServer(s.cfg.AmbientCtx, s.tsDB, s.cfg.TimeSeriesServerConfig, s.stopper)

	// TODO(bdarnell): make StoreConfig configurable.
	storeCfg := storage.StoreConfig{
		AmbientCtx:                     s.cfg.AmbientCtx,
		Clock:                          s.clock,
		DB:                             s.db,
		Gossip:                         s.gossip,
		NodeLiveness:                   s.nodeLiveness,
		Transport:                      s.raftTransport,
		RaftTickInterval:               s.cfg.RaftTickInterval,
		ScanInterval:                   s.cfg.ScanInterval,
		ScanMaxIdleTime:                s.cfg.ScanMaxIdleTime,
		ConsistencyCheckInterval:       s.cfg.ConsistencyCheckInterval,
		ConsistencyCheckPanicOnFailure: s.cfg.ConsistencyCheckPanicOnFailure,
		MetricsSampleInterval:          s.cfg.MetricsSampleInterval,
		StorePool:                      s.storePool,
		SQLExecutor: sql.InternalExecutor{
			LeaseManager: s.leaseMgr,
		},
		LogRangeEvents: s.cfg.EventLogEnabled,
		AllocatorOptions: storage.AllocatorOptions{
			AllowRebalance: true,
		},
		RangeLeaseActiveDuration:  active,
		RangeLeaseRenewalDuration: renewal,
		TimeSeriesDataStore:       s.tsDB,
	}
	if s.cfg.TestingKnobs.Store != nil {
		storeCfg.TestingKnobs = *s.cfg.TestingKnobs.Store.(*storage.StoreTestingKnobs)
	}

	s.recorder = status.NewMetricsRecorder(s.clock)
	s.registry.AddMetricStruct(s.rpcContext.RemoteClocks.Metrics())

	s.runtime = status.MakeRuntimeStatSampler(s.clock)
	s.registry.AddMetricStruct(s.runtime)

	s.node = NewNode(storeCfg, s.recorder, s.registry, s.stopper, txnMetrics, sql.MakeEventLogger(s.leaseMgr))
	roachpb.RegisterInternalServer(s.grpc, s.node)
	storage.RegisterConsistencyServer(s.grpc, s.node.storesServer)
	storage.RegisterFreezeServer(s.grpc, s.node.storesServer)

	s.admin = newAdminServer(s)
	s.status = newStatusServer(
		s.cfg.AmbientCtx, s.db, s.gossip, s.recorder, s.rpcContext, s.node.stores,
	)
	for _, gw := range []grpcGatewayServer{s.admin, s.status, &s.tsServer} {
		gw.RegisterService(s.grpc)
	}

	return s, nil
}
Exemple #21
0
func (n *Node) batchInternal(
	ctx context.Context, args *roachpb.BatchRequest,
) (*roachpb.BatchResponse, error) {
	// TODO(marc): grpc's authentication model (which gives credential access in
	// the request handler) doesn't really fit with the current design of the
	// security package (which assumes that TLS state is only given at connection
	// time) - that should be fixed.
	if peer, ok := peer.FromContext(ctx); ok {
		if tlsInfo, ok := peer.AuthInfo.(credentials.TLSInfo); ok {
			certUser, err := security.GetCertificateUser(&tlsInfo.State)
			if err != nil {
				return nil, err
			}
			if certUser != security.NodeUser {
				return nil, errors.Errorf("user %s is not allowed", certUser)
			}
		}
	}

	var br *roachpb.BatchResponse

	type snowballInfo struct {
		syncutil.Mutex
		collectedSpans [][]byte
		done           bool
	}
	var snowball *snowballInfo

	if err := n.stopper.RunTaskWithErr(func() error {
		const opName = "node.Batch"
		sp, err := tracing.JoinOrNew(n.storeCfg.AmbientCtx.Tracer, args.TraceContext, opName)
		if err != nil {
			return err
		}
		// If this is a snowball span, it gets special treatment: It skips the
		// regular tracing machinery, and we instead send the collected spans
		// back with the response. This is more expensive, but then again,
		// those are individual requests traced by users, so they can be.
		if sp.BaggageItem(tracing.Snowball) != "" {
			sp.LogEvent("delegating to snowball tracing")
			sp.Finish()

			snowball = new(snowballInfo)
			recorder := func(rawSpan basictracer.RawSpan) {
				snowball.Lock()
				defer snowball.Unlock()
				if snowball.done {
					// This is a late span that we must discard because the request was
					// already completed.
					return
				}
				encSp, err := tracing.EncodeRawSpan(&rawSpan, nil)
				if err != nil {
					log.Warning(ctx, err)
				}
				snowball.collectedSpans = append(snowball.collectedSpans, encSp)
			}

			if sp, err = tracing.JoinOrNewSnowball(opName, args.TraceContext, recorder); err != nil {
				return err
			}
		}
		defer sp.Finish()
		traceCtx := opentracing.ContextWithSpan(ctx, sp)
		log.Event(traceCtx, args.Summary())

		tStart := timeutil.Now()
		var pErr *roachpb.Error
		br, pErr = n.stores.Send(traceCtx, *args)
		if pErr != nil {
			br = &roachpb.BatchResponse{}
			log.ErrEventf(traceCtx, "%T", pErr.GetDetail())
		}
		if br.Error != nil {
			panic(roachpb.ErrorUnexpectedlySet(n.stores, br))
		}
		n.metrics.callComplete(timeutil.Since(tStart), pErr)
		br.Error = pErr
		return nil
	}); err != nil {
		return nil, err
	}

	if snowball != nil {
		snowball.Lock()
		br.CollectedSpans = snowball.collectedSpans
		snowball.done = true
		snowball.Unlock()
	}

	return br, nil
}
Exemple #22
0
// Execute the entire schema change in steps.
func (sc SchemaChanger) exec() error {
	// Acquire lease.
	lease, err := sc.AcquireLease()
	if err != nil {
		return err
	}
	needRelease := true
	// Always try to release lease.
	defer func(l *sqlbase.TableDescriptor_SchemaChangeLease) {
		// If the schema changer deleted the descriptor, there's no longer a lease to be
		// released.
		if !needRelease {
			return
		}
		if err := sc.ReleaseLease(*l); err != nil {
			log.Warning(context.TODO(), err)
		}
	}(&lease)

	// Increment the version and unset tableDescriptor.UpVersion.
	desc, err := sc.MaybeIncrementVersion()
	if err != nil {
		return err
	}
	table := desc.GetTable()

	if table.Dropped() {
		lease, err = sc.ExtendLease(lease)
		if err != nil {
			return err
		}
		// Wait for everybody to see the version with the deleted bit set. When
		// this returns, nobody has any leases on the table, nor can get new leases,
		// so the table will no longer be modified.
		if err := sc.waitToUpdateLeases(sc.tableID); err != nil {
			return err
		}

		// Truncate the table and delete the descriptor.
		if err := sc.truncateAndDropTable(context.TODO(), &lease, table); err != nil {
			return err
		}
		needRelease = false
		return nil
	}

	if table.Adding() {
		for _, idx := range table.AllNonDropIndexes() {
			if idx.ForeignKey.IsSet() {
				if err := sc.waitToUpdateLeases(idx.ForeignKey.Table); err != nil {
					return err
				}
			}
		}

		if _, err := sc.leaseMgr.Publish(
			table.ID,
			func(tbl *sqlbase.TableDescriptor) error {
				tbl.State = sqlbase.TableDescriptor_PUBLIC
				return nil
			},
			func(txn *client.Txn) error { return nil },
		); err != nil {
			return err
		}
	}

	if table.Renamed() {
		lease, err = sc.ExtendLease(lease)
		if err != nil {
			return err
		}
		// Wait for everyone to see the version with the new name. When this
		// returns, no new transactions will be using the old name for the table, so
		// the old name can now be re-used (by CREATE).
		if err := sc.waitToUpdateLeases(sc.tableID); err != nil {
			return err
		}

		if sc.testingKnobs.RenameOldNameNotInUseNotification != nil {
			sc.testingKnobs.RenameOldNameNotInUseNotification()
		}
		// Free up the old name(s).
		err := sc.db.Txn(context.TODO(), func(txn *client.Txn) error {
			b := txn.NewBatch()
			for _, renameDetails := range table.Renames {
				tbKey := tableKey{renameDetails.OldParentID, renameDetails.OldName}.Key()
				b.Del(tbKey)
			}
			return txn.Run(b)
		})
		if err != nil {
			return err
		}

		// Clean up - clear the descriptor's state.
		_, err = sc.leaseMgr.Publish(sc.tableID, func(desc *sqlbase.TableDescriptor) error {
			desc.Renames = nil
			return nil
		}, nil)
		if err != nil {
			return err
		}
	}

	// Wait for the schema change to propagate to all nodes after this function
	// returns, so that the new schema is live everywhere. This is not needed for
	// correctness but is done to make the UI experience/tests predictable.
	defer func() {
		if err := sc.waitToUpdateLeases(sc.tableID); err != nil {
			log.Warning(context.TODO(), err)
		}
	}()

	if sc.mutationID == sqlbase.InvalidMutationID {
		// Nothing more to do.
		return nil
	}

	// Another transaction might set the up_version bit again,
	// but we're no longer responsible for taking care of that.

	// Run through mutation state machine and backfill.
	err = sc.runStateMachineAndBackfill(&lease)

	// Purge the mutations if the application of the mutations failed due to
	// an integrity constraint violation. All other errors are transient
	// errors that are resolved by retrying the backfill.
	if sqlbase.IsIntegrityConstraintError(err) {
		log.Warningf(context.TODO(), "reversing schema change due to irrecoverable error: %s", err)
		if errReverse := sc.reverseMutations(err); errReverse != nil {
			// Although the backfill did hit an integrity constraint violation
			// and made a decision to reverse the mutations,
			// reverseMutations() failed. If exec() is called again the entire
			// schema change will be retried.
			return errReverse
		}

		// After this point the schema change has been reversed and any retry
		// of the schema change will act upon the reversed schema change.
		if errPurge := sc.runStateMachineAndBackfill(&lease); errPurge != nil {
			// Don't return this error because we do want the caller to know
			// that an integrity constraint was violated with the original
			// schema change. The reversed schema change will be
			// retried via the async schema change manager.
			log.Warningf(context.TODO(), "error purging mutation: %s, after error: %s", errPurge, err)
		}
	}

	return err
}