Exemple #1
0
// recordJoinEvent begins an asynchronous task which attempts to log a "node
// join" or "node restart" event. This query will retry until it succeeds or the
// server stops.
func (n *Node) recordJoinEvent() {
	if !n.storeCfg.LogRangeEvents {
		return
	}

	logEventType := sql.EventLogNodeRestart
	if n.initialBoot {
		logEventType = sql.EventLogNodeJoin
	}

	n.stopper.RunWorker(func() {
		ctx, span := n.AnnotateCtxWithSpan(context.Background(), "record-join-event")
		defer span.Finish()
		retryOpts := base.DefaultRetryOptions()
		retryOpts.Closer = n.stopper.ShouldStop()
		for r := retry.Start(retryOpts); r.Next(); {
			if err := n.storeCfg.DB.Txn(ctx, func(txn *client.Txn) error {
				return n.eventLogger.InsertEventRecord(txn,
					logEventType,
					int32(n.Descriptor.NodeID),
					int32(n.Descriptor.NodeID),
					struct {
						Descriptor roachpb.NodeDescriptor
						ClusterID  uuid.UUID
						StartedAt  int64
					}{n.Descriptor, n.ClusterID, n.startedAt},
				)
			}); err != nil {
				log.Warningf(ctx, "%s: unable to log %s event: %s", n, logEventType, err)
			} else {
				return
			}
		}
	})
}
Exemple #2
0
// createTestNode creates an rpc server using the specified address,
// gossip instance, KV database and a node using the specified slice
// of engines. The server, clock and node are returned. If gossipBS is
// not nil, the gossip bootstrap address is set to gossipBS.
func createTestNode(
	addr net.Addr, engines []engine.Engine, gossipBS net.Addr, t *testing.T,
) (*grpc.Server, net.Addr, *hlc.Clock, *Node, *stop.Stopper) {
	cfg := storage.StoreConfig{}

	stopper := stop.NewStopper()
	cfg.Clock = hlc.NewClock(hlc.UnixNano)
	nodeRPCContext := rpc.NewContext(log.AmbientContext{}, nodeTestBaseContext, cfg.Clock, stopper)
	cfg.ScanInterval = 10 * time.Hour
	cfg.ConsistencyCheckInterval = 10 * time.Hour
	grpcServer := rpc.NewServer(nodeRPCContext)
	serverCfg := makeTestConfig()
	cfg.Gossip = gossip.NewTest(
		0,
		nodeRPCContext,
		grpcServer,
		serverCfg.GossipBootstrapResolvers,
		stopper,
		metric.NewRegistry(),
	)
	ln, err := netutil.ListenAndServeGRPC(stopper, grpcServer, addr)
	if err != nil {
		t.Fatal(err)
	}
	if gossipBS != nil {
		// Handle possibility of a :0 port specification.
		if gossipBS.Network() == addr.Network() && gossipBS.String() == addr.String() {
			gossipBS = ln.Addr()
		}
		r, err := resolver.NewResolverFromAddress(gossipBS)
		if err != nil {
			t.Fatalf("bad gossip address %s: %s", gossipBS, err)
		}
		cfg.Gossip.SetResolvers([]resolver.Resolver{r})
		cfg.Gossip.Start(ln.Addr())
	}
	retryOpts := base.DefaultRetryOptions()
	retryOpts.Closer = stopper.ShouldQuiesce()
	distSender := kv.NewDistSender(kv.DistSenderConfig{
		Clock:           cfg.Clock,
		RPCContext:      nodeRPCContext,
		RPCRetryOptions: &retryOpts,
	}, cfg.Gossip)
	cfg.AmbientCtx.Tracer = tracing.NewTracer()
	sender := kv.NewTxnCoordSender(
		cfg.AmbientCtx,
		distSender,
		cfg.Clock,
		false,
		stopper,
		kv.MakeTxnMetrics(metric.TestSampleInterval),
	)
	cfg.DB = client.NewDB(sender)
	cfg.Transport = storage.NewDummyRaftTransport()
	cfg.MetricsSampleInterval = metric.TestSampleInterval
	node := NewNode(cfg, status.NewMetricsRecorder(cfg.Clock), metric.NewRegistry(), stopper,
		kv.MakeTxnMetrics(metric.TestSampleInterval), sql.MakeEventLogger(nil))
	roachpb.RegisterInternalServer(grpcServer, node)
	return grpcServer, ln.Addr(), cfg.Clock, node, stopper
}
Exemple #3
0
// execSchemaChanges releases schema leases and runs the queued
// schema changers. This needs to be run after the transaction
// scheduling the schema change has finished.
//
// The list of closures is cleared after (attempting) execution.
//
// Args:
//  results: The results from all statements in the group that scheduled the
//    schema changes we're about to execute. Results corresponding to the
//    schema change statements will be changed in case an error occurs.
func (scc *schemaChangerCollection) execSchemaChanges(
	e *Executor, planMaker *planner, results ResultList,
) {
	if planMaker.txn != nil {
		panic("trying to execute schema changes while still in a transaction")
	}
	ctx := e.AnnotateCtx(context.TODO())
	// Release the leases once a transaction is complete.
	planMaker.releaseLeases()
	if e.cfg.SchemaChangerTestingKnobs.SyncFilter != nil {
		e.cfg.SchemaChangerTestingKnobs.SyncFilter(TestingSchemaChangerCollection{scc})
	}
	// Execute any schema changes that were scheduled, in the order of the
	// statements that scheduled them.
	for _, scEntry := range scc.schemaChangers {
		sc := &scEntry.sc
		sc.db = *e.cfg.DB
		sc.testingKnobs = e.cfg.SchemaChangerTestingKnobs
		for r := retry.Start(base.DefaultRetryOptions()); r.Next(); {
			if done, err := sc.IsDone(); err != nil {
				log.Warning(ctx, err)
				break
			} else if done {
				break
			}
			if err := sc.exec(); err != nil {
				if isSchemaChangeRetryError(err) {
					// Try again
					continue
				}
				// All other errors can be reported; we report it as the result
				// corresponding to the statement that enqueued this changer.
				// There's some sketchiness here: we assume there's a single result
				// per statement and we clobber the result/error of the corresponding
				// statement.
				// There's also another subtlety: we can only report results for
				// statements in the current batch; we can't modify the results of older
				// statements.
				if scEntry.epoch == scc.curGroupNum {
					results[scEntry.idx] = Result{Err: err}
				}
				log.Warningf(ctx, "error executing schema change: %s", err)
			}
			break
		}
	}
	scc.schemaChangers = scc.schemaChangers[:0]
}
Exemple #4
0
func (ia *idAllocator) start() {
	ia.stopper.RunWorker(func() {
		ctx := ia.AnnotateCtx(context.Background())
		defer close(ia.ids)

		for {
			var newValue int64
			for newValue <= int64(ia.minID) {
				var err error
				var res client.KeyValue
				for r := retry.Start(base.DefaultRetryOptions()); r.Next(); {
					idKey := ia.idKey.Load().(roachpb.Key)
					if err := ia.stopper.RunTask(func() {
						res, err = ia.db.Inc(ctx, idKey, int64(ia.blockSize))
					}); err != nil {
						log.Warning(ctx, err)
						return
					}
					if err == nil {
						newValue = res.ValueInt()
						break
					}

					log.Warningf(ctx, "unable to allocate %d ids from %s: %s", ia.blockSize, idKey, err)
				}
				if err != nil {
					panic(fmt.Sprintf("unexpectedly exited id allocation retry loop: %s", err))
				}
			}

			end := newValue + 1
			start := end - int64(ia.blockSize)

			if start < int64(ia.minID) {
				start = int64(ia.minID)
			}

			// Add all new ids to the channel for consumption.
			for i := start; i < end; i++ {
				select {
				case ia.ids <- uint32(i):
				case <-ia.stopper.ShouldStop():
					return
				}
			}
		}
	})
}
Exemple #5
0
// heartbeat is called to update a node's expiration timestamp. This
// method does a conditional put on the node liveness record, and if
// successful, stores the updated liveness record in the nodes map.
func (nl *NodeLiveness) heartbeat(ctx context.Context) error {
	nodeID := nl.gossip.NodeID.Get()

	var newLiveness Liveness
	var oldLiveness *Liveness
	liveness, err := nl.GetLiveness(nodeID)
	if err == nil {
		oldLiveness = &liveness
		newLiveness = liveness
	} else {
		newLiveness = Liveness{
			NodeID: nodeID,
			Epoch:  1,
		}
	}

	// Retry heartbeat in the event the conditional put fails.
	for r := retry.StartWithCtx(ctx, base.DefaultRetryOptions()); r.Next(); {
		newLiveness.Expiration = nl.clock.Now().Add(nl.livenessThreshold.Nanoseconds(), 0)
		tryAgain := false
		if err := nl.updateLiveness(ctx, nodeID, &newLiveness, oldLiveness, func(actual Liveness) {
			oldLiveness = &actual
			newLiveness = actual
			tryAgain = true
		}); err != nil {
			nl.metrics.HeartbeatFailures.Inc(1)
			return err
		}
		if !tryAgain {
			break
		}
	}

	log.VEventf(ctx, 1, "heartbeat node %d liveness with expiration %s", nodeID, newLiveness.Expiration)
	nl.mu.Lock()
	defer nl.mu.Unlock()
	nl.mu.self = newLiveness
	nl.metrics.HeartbeatSuccesses.Inc(1)
	return nil
}
// InitSenderForLocalTestCluster initializes a TxnCoordSender that can be used
// with LocalTestCluster.
func InitSenderForLocalTestCluster(
	nodeDesc *roachpb.NodeDescriptor,
	tracer opentracing.Tracer,
	clock *hlc.Clock,
	latency time.Duration,
	stores client.Sender,
	stopper *stop.Stopper,
	gossip *gossip.Gossip,
) client.Sender {
	retryOpts := base.DefaultRetryOptions()
	retryOpts.Closer = stopper.ShouldQuiesce()
	senderTransportFactory := SenderTransportFactory(tracer, stores)
	distSender := NewDistSender(DistSenderConfig{
		Clock:           clock,
		RPCRetryOptions: &retryOpts,
		nodeDescriptor:  nodeDesc,
		TransportFactory: func(
			opts SendOptions,
			rpcContext *rpc.Context,
			replicas ReplicaSlice,
			args roachpb.BatchRequest,
		) (Transport, error) {
			transport, err := senderTransportFactory(opts, rpcContext, replicas, args)
			if err != nil {
				return nil, err
			}
			return &localTestClusterTransport{transport, latency}, nil
		},
	}, gossip)

	ambient := log.AmbientContext{Tracer: tracer}
	return NewTxnCoordSender(
		ambient,
		distSender,
		clock,
		false, /* !linearizable */
		stopper,
		MakeTxnMetrics(metric.TestSampleInterval),
	)
}
// StartHeartbeat starts a periodic heartbeat to refresh this node's
// last heartbeat in the node liveness table.
func (nl *NodeLiveness) StartHeartbeat(ctx context.Context, stopper *stop.Stopper) {
	log.VEventf(ctx, 1, "starting liveness heartbeat")
	retryOpts := base.DefaultRetryOptions()
	retryOpts.Closer = stopper.ShouldQuiesce()

	stopper.RunWorker(func() {
		ambient := nl.ambientCtx
		ambient.AddLogTag("hb", nil)
		ticker := time.NewTicker(nl.heartbeatInterval)
		defer ticker.Stop()
		for {
			if !nl.pauseHeartbeat.Load().(bool) {
				ctx, sp := ambient.AnnotateCtxWithSpan(context.Background(), "heartbeat")
				ctx, cancel := context.WithTimeout(ctx, nl.heartbeatInterval)
				// Retry heartbeat in the event the conditional put fails.
				for r := retry.StartWithCtx(ctx, retryOpts); r.Next(); {
					liveness, err := nl.Self()
					if err != nil && err != ErrNoLivenessRecord {
						log.Errorf(ctx, "unexpected error getting liveness: %v", err)
					}
					if err := nl.Heartbeat(ctx, liveness); err != nil {
						if err == errSkippedHeartbeat {
							continue
						}
						log.Errorf(ctx, "failed liveness heartbeat: %v", err)
					}
					break
				}
				cancel()
				sp.Finish()
			}
			select {
			case <-ticker.C:
			case <-stopper.ShouldStop():
				return
			}
		}
	})
}
Exemple #8
0
// waitForStoreFrozen polls the given stores until they all report having no
// unfrozen Replicas (or an error or timeout occurs).
func (s *adminServer) waitForStoreFrozen(
	stream serverpb.Admin_ClusterFreezeServer,
	stores map[roachpb.StoreID]roachpb.NodeID,
	wantFrozen bool,
) error {
	mu := struct {
		syncutil.Mutex
		oks map[roachpb.StoreID]bool
	}{
		oks: make(map[roachpb.StoreID]bool),
	}

	opts := base.DefaultRetryOptions()
	opts.Closer = s.server.stopper.ShouldQuiesce()
	opts.MaxRetries = 20
	sem := make(chan struct{}, 256)
	errChan := make(chan error, 1)
	sendErr := func(err error) {
		select {
		case errChan <- err:
		default:
		}
	}

	numWaiting := len(stores) // loop until this drops to zero
	var err error
	for r := retry.Start(opts); r.Next(); {
		mu.Lock()
		for storeID, nodeID := range stores {
			storeID, nodeID := storeID, nodeID // loop-local copies for goroutine
			var nodeDesc roachpb.NodeDescriptor
			if err := s.server.gossip.GetInfoProto(gossip.MakeNodeIDKey(nodeID), &nodeDesc); err != nil {
				sendErr(err)
				break
			}
			addr := nodeDesc.Address.String()

			if _, inflightOrSucceeded := mu.oks[storeID]; inflightOrSucceeded {
				continue
			}
			mu.oks[storeID] = false // mark as inflight
			action := func() (err error) {
				var resp *storage.PollFrozenResponse
				defer func() {
					message := fmt.Sprintf("node %d, store %d: ", nodeID, storeID)

					if err != nil {
						message += err.Error()
					} else {
						numMismatching := len(resp.Results)
						mu.Lock()
						if numMismatching == 0 {
							// If the Store is in the right state, mark it as such.
							// This means we won't try it again.
							message += "ready"
							mu.oks[storeID] = true
						} else {
							// Otherwise, forget that we tried the Store so that
							// the retry loop picks it up again.
							message += fmt.Sprintf("%d replicas report wrong status", numMismatching)
							if limit := 10; numMismatching > limit {
								message += " [truncated]: "
								resp.Results = resp.Results[:limit]
							} else {
								message += ": "
							}
							message += fmt.Sprintf("%+v", resp.Results)
							delete(mu.oks, storeID)
						}
						mu.Unlock()
					}
					err = stream.Send(&serverpb.ClusterFreezeResponse{
						Message: message,
					})
				}()
				conn, err := s.server.rpcContext.GRPCDial(addr)
				if err != nil {
					return err
				}
				client := storage.NewFreezeClient(conn)
				resp, err = client.PollFrozen(context.TODO(),
					&storage.PollFrozenRequest{
						StoreRequestHeader: storage.StoreRequestHeader{
							NodeID:  nodeID,
							StoreID: storeID,
						},
						// If we are looking to freeze everything, we want to
						// collect thawed Replicas, and vice versa.
						CollectFrozen: !wantFrozen,
					})
				return err
			}
			// Run a limited, non-blocking task. That means the task simply
			// won't run if the semaphore is full (or the node is draining).
			// Both are handled by the surrounding retry loop.
			if err := s.server.stopper.RunLimitedAsyncTask(
				context.TODO(), sem, true /* wait */, func(_ context.Context) {
					if err := action(); err != nil {
						sendErr(err)
					}
				}); err != nil {
				// Node draining.
				sendErr(err)
				break
			}
		}

		numWaiting = len(stores)
		for _, ok := range mu.oks {
			if ok {
				// Store has reported that it is frozen.
				numWaiting--
				continue
			}
		}
		mu.Unlock()

		select {
		case err = <-errChan:
		default:
		}

		// Keep going unless there's been an error or everyone's frozen.
		if err != nil || numWaiting == 0 {
			break
		}
		if err := stream.Send(&serverpb.ClusterFreezeResponse{
			Message: fmt.Sprintf("waiting for %d store%s to apply operation",
				numWaiting, util.Pluralize(int64(numWaiting))),
		}); err != nil {
			return err
		}
	}
	if err != nil {
		return err
	}
	if numWaiting > 0 {
		err = fmt.Errorf("timed out waiting for %d store%s to report freeze",
			numWaiting, util.Pluralize(int64(numWaiting)))
	}
	return err
}
Exemple #9
0
// DefaultDBContext returns (a copy of) the default options for
// NewDBWithContext.
func DefaultDBContext() DBContext {
	return DBContext{
		UserPriority:    roachpb.NormalUserPriority,
		TxnRetryOptions: base.DefaultRetryOptions(),
	}
}
Exemple #10
0
// NewDistSender returns a batch.Sender instance which connects to the
// Cockroach cluster via the supplied gossip instance. Supplying a
// DistSenderContext or the fields within is optional. For omitted values, sane
// defaults will be used.
func NewDistSender(cfg DistSenderConfig, g *gossip.Gossip) *DistSender {
	ds := &DistSender{gossip: g}

	ds.AmbientContext = cfg.AmbientCtx
	if ds.AmbientContext.Tracer == nil {
		ds.AmbientContext.Tracer = tracing.NewTracer()
	}

	ds.clock = cfg.Clock
	if ds.clock == nil {
		ds.clock = hlc.NewClock(hlc.UnixNano)
	}

	if cfg.nodeDescriptor != nil {
		atomic.StorePointer(&ds.nodeDescriptor, unsafe.Pointer(cfg.nodeDescriptor))
	}
	rcSize := cfg.RangeDescriptorCacheSize
	if rcSize <= 0 {
		rcSize = defaultRangeDescriptorCacheSize
	}
	rdb := cfg.RangeDescriptorDB
	if rdb == nil {
		rdb = ds
	}
	ds.rangeCache = newRangeDescriptorCache(ds.AnnotateCtx(context.TODO()), rdb, int(rcSize))
	lcSize := cfg.LeaseHolderCacheSize
	if lcSize <= 0 {
		lcSize = defaultLeaseHolderCacheSize
	}
	ds.leaseHolderCache = newLeaseHolderCache(int(lcSize))
	if cfg.RangeLookupMaxRanges <= 0 {
		ds.rangeLookupMaxRanges = defaultRangeLookupMaxRanges
	}
	if cfg.TransportFactory != nil {
		ds.transportFactory = cfg.TransportFactory
	}
	ds.rpcRetryOptions = base.DefaultRetryOptions()
	if cfg.RPCRetryOptions != nil {
		ds.rpcRetryOptions = *cfg.RPCRetryOptions
	}
	if cfg.RPCContext != nil {
		ds.rpcContext = cfg.RPCContext
		if ds.rpcRetryOptions.Closer == nil {
			ds.rpcRetryOptions.Closer = ds.rpcContext.Stopper.ShouldQuiesce()
		}
	}
	if cfg.SendNextTimeout != 0 {
		ds.sendNextTimeout = cfg.SendNextTimeout
	} else {
		ds.sendNextTimeout = defaultSendNextTimeout
	}
	if cfg.SenderConcurrency != 0 {
		ds.asyncSenderSem = make(chan struct{}, cfg.SenderConcurrency)
	} else {
		ds.asyncSenderSem = make(chan struct{}, defaultSenderConcurrency)
	}

	if g != nil {
		ctx := ds.AnnotateCtx(context.Background())
		g.RegisterCallback(gossip.KeyFirstRangeDescriptor,
			func(_ string, value roachpb.Value) {
				if log.V(1) {
					var desc roachpb.RangeDescriptor
					if err := value.GetProto(&desc); err != nil {
						log.Errorf(ctx, "unable to parse gossiped first range descriptor: %s", err)
					} else {
						log.Infof(ctx, "gossiped first range descriptor: %+v", desc.Replicas)
					}
				}
				err := ds.rangeCache.EvictCachedRangeDescriptor(roachpb.RKeyMin, nil, false)
				if err != nil {
					log.Warningf(ctx, "failed to evict first range descriptor: %s", err)
				}
			})
	}
	return ds
}
Exemple #11
0
// TestMultiRangeScanWithMaxResults tests that commands which access multiple
// ranges with MaxResults parameter are carried out properly.
func TestMultiRangeScanWithMaxResults(t *testing.T) {
	defer leaktest.AfterTest(t)()
	testCases := []struct {
		splitKeys []roachpb.Key
		keys      []roachpb.Key
	}{
		{[]roachpb.Key{roachpb.Key("m")},
			[]roachpb.Key{roachpb.Key("a"), roachpb.Key("z")}},
		{[]roachpb.Key{roachpb.Key("h"), roachpb.Key("q")},
			[]roachpb.Key{roachpb.Key("b"), roachpb.Key("f"), roachpb.Key("k"),
				roachpb.Key("r"), roachpb.Key("w"), roachpb.Key("y")}},
	}

	for i, tc := range testCases {
		s, _, _ := serverutils.StartServer(t, base.TestServerArgs{})
		defer s.Stopper().Stop()
		ts := s.(*TestServer)
		retryOpts := base.DefaultRetryOptions()
		retryOpts.Closer = ts.stopper.ShouldQuiesce()
		ds := kv.NewDistSender(kv.DistSenderConfig{
			Clock:           s.Clock(),
			RPCContext:      s.RPCContext(),
			RPCRetryOptions: &retryOpts,
		}, ts.Gossip())
		ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
		tds := kv.NewTxnCoordSender(
			ambient,
			ds,
			ts.Clock(),
			ts.Cfg.Linearizable,
			ts.stopper,
			kv.MakeTxnMetrics(metric.TestSampleInterval),
		)

		for _, sk := range tc.splitKeys {
			if err := ts.node.storeCfg.DB.AdminSplit(context.TODO(), sk); err != nil {
				t.Fatal(err)
			}
		}

		for _, k := range tc.keys {
			put := roachpb.NewPut(k, roachpb.MakeValueFromBytes(k))
			if _, err := client.SendWrapped(context.Background(), tds, put); err != nil {
				t.Fatal(err)
			}
		}

		// Try every possible ScanRequest startKey.
		for start := 0; start < len(tc.keys); start++ {
			// Try every possible maxResults, from 1 to beyond the size of key array.
			for maxResults := 1; maxResults <= len(tc.keys)-start+1; maxResults++ {
				scan := roachpb.NewScan(tc.keys[start], tc.keys[len(tc.keys)-1].Next())
				reply, err := client.SendWrappedWith(
					context.Background(), tds, roachpb.Header{MaxSpanRequestKeys: int64(maxResults)}, scan,
				)
				if err != nil {
					t.Fatal(err)
				}
				rows := reply.(*roachpb.ScanResponse).Rows
				if start+maxResults <= len(tc.keys) && len(rows) != maxResults {
					t.Errorf("%d: start=%s: expected %d rows, but got %d", i, tc.keys[start], maxResults, len(rows))
				} else if start+maxResults == len(tc.keys)+1 && len(rows) != maxResults-1 {
					t.Errorf("%d: expected %d rows, but got %d", i, maxResults-1, len(rows))
				}
			}
		}
	}
}
Exemple #12
0
// TestMultiRangeScanDeleteRange tests that commands which access multiple
// ranges are carried out properly.
func TestMultiRangeScanDeleteRange(t *testing.T) {
	defer leaktest.AfterTest(t)()
	s, _, _ := serverutils.StartServer(t, base.TestServerArgs{})
	defer s.Stopper().Stop()
	ts := s.(*TestServer)
	retryOpts := base.DefaultRetryOptions()
	retryOpts.Closer = ts.stopper.ShouldQuiesce()
	ds := kv.NewDistSender(kv.DistSenderConfig{
		Clock:           s.Clock(),
		RPCContext:      s.RPCContext(),
		RPCRetryOptions: &retryOpts,
	}, ts.Gossip())
	ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
	tds := kv.NewTxnCoordSender(
		ambient,
		ds,
		s.Clock(),
		ts.Cfg.Linearizable,
		ts.stopper,
		kv.MakeTxnMetrics(metric.TestSampleInterval),
	)

	if err := ts.node.storeCfg.DB.AdminSplit(context.TODO(), "m"); err != nil {
		t.Fatal(err)
	}
	writes := []roachpb.Key{roachpb.Key("a"), roachpb.Key("z")}
	get := &roachpb.GetRequest{
		Span: roachpb.Span{Key: writes[0]},
	}
	get.EndKey = writes[len(writes)-1]
	if _, err := client.SendWrapped(context.Background(), tds, get); err == nil {
		t.Errorf("able to call Get with a key range: %v", get)
	}
	var delTS hlc.Timestamp
	for i, k := range writes {
		put := roachpb.NewPut(k, roachpb.MakeValueFromBytes(k))
		if _, err := client.SendWrapped(context.Background(), tds, put); err != nil {
			t.Fatal(err)
		}
		scan := roachpb.NewScan(writes[0], writes[len(writes)-1].Next())
		reply, err := client.SendWrapped(context.Background(), tds, scan)
		if err != nil {
			t.Fatal(err)
		}
		sr := reply.(*roachpb.ScanResponse)
		if sr.Txn != nil {
			// This was the other way around at some point in the past.
			// Same below for Delete, etc.
			t.Errorf("expected no transaction in response header")
		}
		if rows := sr.Rows; len(rows) != i+1 {
			t.Fatalf("expected %d rows, but got %d", i+1, len(rows))
		}
	}

	del := &roachpb.DeleteRangeRequest{
		Span: roachpb.Span{
			Key:    writes[0],
			EndKey: roachpb.Key(writes[len(writes)-1]).Next(),
		},
		ReturnKeys: true,
	}
	reply, err := client.SendWrappedWith(context.Background(), tds, roachpb.Header{Timestamp: delTS}, del)
	if err != nil {
		t.Fatal(err)
	}
	dr := reply.(*roachpb.DeleteRangeResponse)
	if dr.Txn != nil {
		t.Errorf("expected no transaction in response header")
	}
	if !reflect.DeepEqual(dr.Keys, writes) {
		t.Errorf("expected %d keys to be deleted, but got %d instead", writes, dr.Keys)
	}

	scan := roachpb.NewScan(writes[0], writes[len(writes)-1].Next())
	txn := &roachpb.Transaction{Name: "MyTxn"}
	reply, err = client.SendWrappedWith(context.Background(), tds, roachpb.Header{Txn: txn}, scan)
	if err != nil {
		t.Fatal(err)
	}
	sr := reply.(*roachpb.ScanResponse)
	if txn := sr.Txn; txn == nil || txn.Name != "MyTxn" {
		t.Errorf("wanted Txn to persist, but it changed to %v", txn)
	}
	if rows := sr.Rows; len(rows) > 0 {
		t.Fatalf("scan after delete returned rows: %v", rows)
	}
}
Exemple #13
0
// restoreTable inserts the given DatabaseDescriptor. If the name conflicts with
// an existing table, the one being restored is rekeyed with a new ID and the
// old data is deleted.
func restoreTable(
	ctx context.Context,
	db client.DB,
	database sqlbase.DatabaseDescriptor,
	table *sqlbase.TableDescriptor,
	ranges []sqlbase.BackupRangeDescriptor,
) error {
	if log.V(1) {
		log.Infof(ctx, "Restoring Table %q", table.Name)
	}

	var newTableID sqlbase.ID
	if err := db.Txn(ctx, func(txn *client.Txn) error {
		// Make sure there's a database with a name that matches the original.
		if _, err := getDescriptorID(txn, tableKey{name: database.Name}); err != nil {
			return errors.Wrapf(err, "a database named %q needs to exist to restore table %q",
				database.Name, table.Name)
		}

		// Assign a new ID for the table. TODO(dan): For now, we're always
		// generating a new ID, but varints get longer as they get bigger and so
		// our keys will, too. We should someday figure out how to overwrite an
		// existing table and steal its ID.
		var err error
		newTableID, err = GenerateUniqueDescID(txn)
		return err
	}); err != nil {
		return err
	}

	// Create the iteration keys before we give the table its new ID.
	tableStartKeyOld := roachpb.Key(sqlbase.MakeIndexKeyPrefix(table, table.PrimaryIndex.ID))
	tableEndKeyOld := tableStartKeyOld.PrefixEnd()

	// This loop makes restoring multiple tables O(N*M), where N is the number
	// of tables and M is the number of ranges. We could reduce this using an
	// interval tree if necessary.
	var wg sync.WaitGroup
	result := struct {
		syncutil.Mutex
		firstErr error
		numErrs  int
	}{}
	for _, rangeDesc := range ranges {
		if len(rangeDesc.Path) == 0 {
			// Empty path means empty range.
			continue
		}

		intersectBegin, intersectEnd := IntersectHalfOpen(
			rangeDesc.StartKey, rangeDesc.EndKey, tableStartKeyOld, tableEndKeyOld)
		if intersectBegin != nil && intersectEnd != nil {
			// Write the data under the new ID.
			// TODO(dan): There's no SQL descriptors that point at this yet, so it
			// should be possible to remove it from the one txn this is all currently
			// run under. If we do that, make sure this data gets cleaned up on errors.
			wg.Add(1)
			go func(desc sqlbase.BackupRangeDescriptor) {
				for r := retry.StartWithCtx(ctx, base.DefaultRetryOptions()); r.Next(); {
					err := db.Txn(ctx, func(txn *client.Txn) error {
						return Ingest(ctx, txn, desc.Path, desc.CRC, intersectBegin, intersectEnd, newTableID)
					})
					if _, ok := err.(*client.AutoCommitError); ok {
						log.Errorf(ctx, "auto commit error during ingest: %s", err)
						// TODO(dan): Ingest currently does not rely on the
						// range being empty, but the plan is that it will. When
						// that change happens, this will have to delete any
						// partially ingested data or something.
						continue
					}

					if err != nil {
						log.Errorf(ctx, "%T %s", err, err)
						result.Lock()
						defer result.Unlock()
						if result.firstErr != nil {
							result.firstErr = err
						}
						result.numErrs++
					}
					break
				}
				wg.Done()
			}(rangeDesc)
		}
	}
	wg.Wait()
	// All concurrent accesses have finished, we don't need the lock anymore.
	if result.firstErr != nil {
		// This leaves the data that did get imported in case the user wants to
		// retry.
		// TODO(dan): Build tooling to allow a user to restart a failed restore.
		return errors.Wrapf(result.firstErr, "ingest encountered %d errors", result.numErrs)
	}

	table.ID = newTableID
	return db.Txn(ctx, func(txn *client.Txn) error {
		// Pass the descriptors by value to keep this idempotent.
		return restoreTableDesc(ctx, txn, database, *table)
	})
}
Exemple #14
0
// NewServer creates a Server from a server.Context.
func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) {
	if _, err := net.ResolveTCPAddr("tcp", cfg.AdvertiseAddr); err != nil {
		return nil, errors.Errorf("unable to resolve RPC address %q: %v", cfg.AdvertiseAddr, err)
	}

	if cfg.AmbientCtx.Tracer == nil {
		cfg.AmbientCtx.Tracer = tracing.NewTracer()
	}

	// Try loading the TLS configs before anything else.
	if _, err := cfg.GetServerTLSConfig(); err != nil {
		return nil, err
	}
	if _, err := cfg.GetClientTLSConfig(); err != nil {
		return nil, err
	}

	s := &Server{
		mux:     http.NewServeMux(),
		clock:   hlc.NewClock(hlc.UnixNano, cfg.MaxOffset),
		stopper: stopper,
		cfg:     cfg,
	}
	// Add a dynamic log tag value for the node ID.
	//
	// We need to pass an ambient context to the various server components, but we
	// won't know the node ID until we Start(). At that point it's too late to
	// change the ambient contexts in the components (various background processes
	// will have already started using them).
	//
	// NodeIDContainer allows us to add the log tag to the context now and update
	// the value asynchronously. It's not significantly more expensive than a
	// regular tag since it's just doing an (atomic) load when a log/trace message
	// is constructed. The node ID is set by the Store if this host was
	// bootstrapped; otherwise a new one is allocated in Node.
	s.cfg.AmbientCtx.AddLogTag("n", &s.nodeIDContainer)

	ctx := s.AnnotateCtx(context.Background())
	if s.cfg.Insecure {
		log.Warning(ctx, "running in insecure mode, this is strongly discouraged. See --insecure.")
	}

	s.rpcContext = rpc.NewContext(s.cfg.AmbientCtx, s.cfg.Config, s.clock, s.stopper)
	s.rpcContext.HeartbeatCB = func() {
		if err := s.rpcContext.RemoteClocks.VerifyClockOffset(); err != nil {
			log.Fatal(ctx, err)
		}
	}
	s.grpc = rpc.NewServer(s.rpcContext)

	s.registry = metric.NewRegistry()
	s.gossip = gossip.New(
		s.cfg.AmbientCtx,
		&s.nodeIDContainer,
		s.rpcContext,
		s.grpc,
		s.cfg.GossipBootstrapResolvers,
		s.stopper,
		s.registry,
	)
	s.storePool = storage.NewStorePool(
		s.cfg.AmbientCtx,
		s.gossip,
		s.clock,
		s.rpcContext,
		s.cfg.TimeUntilStoreDead,
		s.stopper,
		/* deterministic */ false,
	)

	// A custom RetryOptions is created which uses stopper.ShouldQuiesce() as
	// the Closer. This prevents infinite retry loops from occurring during
	// graceful server shutdown
	//
	// Such a loop loop occurs with the DistSender attempts a connection to the
	// local server during shutdown, and receives an internal server error (HTTP
	// Code 5xx). This is the correct error for a server to return when it is
	// shutting down, and is normally retryable in a cluster environment.
	// However, on a single-node setup (such as a test), retries will never
	// succeed because the only server has been shut down; thus, thus the
	// DistSender needs to know that it should not retry in this situation.
	retryOpts := base.DefaultRetryOptions()
	retryOpts.Closer = s.stopper.ShouldQuiesce()
	distSenderCfg := kv.DistSenderConfig{
		AmbientCtx:      s.cfg.AmbientCtx,
		Clock:           s.clock,
		RPCContext:      s.rpcContext,
		RPCRetryOptions: &retryOpts,
	}
	s.distSender = kv.NewDistSender(distSenderCfg, s.gossip)

	txnMetrics := kv.MakeTxnMetrics(s.cfg.MetricsSampleInterval)
	s.registry.AddMetricStruct(txnMetrics)
	s.txnCoordSender = kv.NewTxnCoordSender(
		s.cfg.AmbientCtx,
		s.distSender,
		s.clock,
		s.cfg.Linearizable,
		s.stopper,
		txnMetrics,
	)
	s.db = client.NewDB(s.txnCoordSender)

	// Use the range lease expiration and renewal durations as the node
	// liveness expiration and heartbeat interval.
	active, renewal := storage.RangeLeaseDurations(
		storage.RaftElectionTimeout(s.cfg.RaftTickInterval, s.cfg.RaftElectionTimeoutTicks))
	s.nodeLiveness = storage.NewNodeLiveness(
		s.cfg.AmbientCtx, s.clock, s.db, s.gossip, active, renewal,
	)
	s.registry.AddMetricStruct(s.nodeLiveness.Metrics())

	s.raftTransport = storage.NewRaftTransport(
		s.cfg.AmbientCtx, storage.GossipAddressResolver(s.gossip), s.grpc, s.rpcContext,
	)

	s.kvDB = kv.NewDBServer(s.cfg.Config, s.txnCoordSender, s.stopper)
	roachpb.RegisterExternalServer(s.grpc, s.kvDB)

	// Set up internal memory metrics for use by internal SQL executors.
	s.internalMemMetrics = sql.MakeMemMetrics("internal")
	s.registry.AddMetricStruct(s.internalMemMetrics)

	// Set up Lease Manager
	var lmKnobs sql.LeaseManagerTestingKnobs
	if cfg.TestingKnobs.SQLLeaseManager != nil {
		lmKnobs = *s.cfg.TestingKnobs.SQLLeaseManager.(*sql.LeaseManagerTestingKnobs)
	}
	s.leaseMgr = sql.NewLeaseManager(&s.nodeIDContainer, *s.db, s.clock, lmKnobs,
		s.stopper, &s.internalMemMetrics)
	s.leaseMgr.RefreshLeases(s.stopper, s.db, s.gossip)

	// Set up the DistSQL server
	distSQLCfg := distsql.ServerConfig{
		AmbientContext: s.cfg.AmbientCtx,
		DB:             s.db,
		RPCContext:     s.rpcContext,
		Stopper:        s.stopper,
	}
	s.distSQLServer = distsql.NewServer(distSQLCfg)
	distsql.RegisterDistSQLServer(s.grpc, s.distSQLServer)

	// Set up admin memory metrics for use by admin SQL executors.
	s.adminMemMetrics = sql.MakeMemMetrics("admin")
	s.registry.AddMetricStruct(s.adminMemMetrics)

	// Set up Executor
	execCfg := sql.ExecutorConfig{
		AmbientCtx:            s.cfg.AmbientCtx,
		NodeID:                &s.nodeIDContainer,
		DB:                    s.db,
		Gossip:                s.gossip,
		LeaseManager:          s.leaseMgr,
		Clock:                 s.clock,
		DistSQLSrv:            s.distSQLServer,
		MetricsSampleInterval: s.cfg.MetricsSampleInterval,
	}
	if s.cfg.TestingKnobs.SQLExecutor != nil {
		execCfg.TestingKnobs = s.cfg.TestingKnobs.SQLExecutor.(*sql.ExecutorTestingKnobs)
	} else {
		execCfg.TestingKnobs = &sql.ExecutorTestingKnobs{}
	}
	if s.cfg.TestingKnobs.SQLSchemaChanger != nil {
		execCfg.SchemaChangerTestingKnobs =
			s.cfg.TestingKnobs.SQLSchemaChanger.(*sql.SchemaChangerTestingKnobs)
	} else {
		execCfg.SchemaChangerTestingKnobs = &sql.SchemaChangerTestingKnobs{}
	}
	s.sqlExecutor = sql.NewExecutor(execCfg, s.stopper, &s.adminMemMetrics)
	s.registry.AddMetricStruct(s.sqlExecutor)

	s.pgServer = pgwire.MakeServer(
		s.cfg.AmbientCtx, s.cfg.Config, s.sqlExecutor, &s.internalMemMetrics, s.cfg.SQLMemoryPoolSize,
	)
	s.registry.AddMetricStruct(s.pgServer.Metrics())

	s.tsDB = ts.NewDB(s.db)
	s.tsServer = ts.MakeServer(s.cfg.AmbientCtx, s.tsDB, s.cfg.TimeSeriesServerConfig, s.stopper)

	// TODO(bdarnell): make StoreConfig configurable.
	storeCfg := storage.StoreConfig{
		AmbientCtx:                     s.cfg.AmbientCtx,
		Clock:                          s.clock,
		DB:                             s.db,
		Gossip:                         s.gossip,
		NodeLiveness:                   s.nodeLiveness,
		Transport:                      s.raftTransport,
		RaftTickInterval:               s.cfg.RaftTickInterval,
		ScanInterval:                   s.cfg.ScanInterval,
		ScanMaxIdleTime:                s.cfg.ScanMaxIdleTime,
		ConsistencyCheckInterval:       s.cfg.ConsistencyCheckInterval,
		ConsistencyCheckPanicOnFailure: s.cfg.ConsistencyCheckPanicOnFailure,
		MetricsSampleInterval:          s.cfg.MetricsSampleInterval,
		StorePool:                      s.storePool,
		SQLExecutor: sql.InternalExecutor{
			LeaseManager: s.leaseMgr,
		},
		LogRangeEvents: s.cfg.EventLogEnabled,
		AllocatorOptions: storage.AllocatorOptions{
			AllowRebalance: true,
		},
		RangeLeaseActiveDuration:  active,
		RangeLeaseRenewalDuration: renewal,
		TimeSeriesDataStore:       s.tsDB,
	}
	if s.cfg.TestingKnobs.Store != nil {
		storeCfg.TestingKnobs = *s.cfg.TestingKnobs.Store.(*storage.StoreTestingKnobs)
	}

	s.recorder = status.NewMetricsRecorder(s.clock)
	s.registry.AddMetricStruct(s.rpcContext.RemoteClocks.Metrics())

	s.runtime = status.MakeRuntimeStatSampler(s.clock)
	s.registry.AddMetricStruct(s.runtime)

	s.node = NewNode(storeCfg, s.recorder, s.registry, s.stopper, txnMetrics, sql.MakeEventLogger(s.leaseMgr))
	roachpb.RegisterInternalServer(s.grpc, s.node)
	storage.RegisterConsistencyServer(s.grpc, s.node.storesServer)
	storage.RegisterFreezeServer(s.grpc, s.node.storesServer)

	s.admin = newAdminServer(s)
	s.status = newStatusServer(
		s.cfg.AmbientCtx, s.db, s.gossip, s.recorder, s.rpcContext, s.node.stores,
	)
	for _, gw := range []grpcGatewayServer{s.admin, s.status, &s.tsServer} {
		gw.RegisterService(s.grpc)
	}

	return s, nil
}
Exemple #15
0
// EnsureMigrations should be run during node startup to ensure that all
// required migrations have been run (and running all those that are definitely
// safe to run).
func (m *Manager) EnsureMigrations(ctx context.Context) error {
	// First, check whether there are any migrations that need to be run.
	completedMigrations, err := m.getCompletedMigrations(ctx)
	if err != nil {
		return err
	}
	allMigrationsCompleted := true
	for _, migration := range backwardCompatibleMigrations {
		key := migrationKey(migration)
		if _, ok := completedMigrations[string(key)]; !ok {
			allMigrationsCompleted = false
		}
	}
	if allMigrationsCompleted {
		return nil
	}

	// If there are any, grab the migration lease to ensure that only one
	// node is ever doing migrations at a time.
	// Note that we shouldn't ever let client.LeaseNotAvailableErrors cause us
	// to stop trying, because if we return an error the server will be shut down,
	// and this server being down may prevent the leaseholder from finishing.
	var lease *client.Lease
	if log.V(1) {
		log.Info(ctx, "trying to acquire lease")
	}
	for r := retry.StartWithCtx(ctx, base.DefaultRetryOptions()); r.Next(); {
		lease, err = m.leaseManager.AcquireLease(ctx, keys.MigrationLease)
		if err == nil {
			break
		}
		log.Errorf(ctx, "failed attempt to acquire migration lease: %s", err)
	}
	if err != nil {
		return errors.Wrapf(err, "failed to acquire lease for running necessary migrations")
	}

	// Ensure that we hold the lease throughout the migration process and release
	// it when we're done.
	done := make(chan interface{}, 1)
	defer func() {
		done <- nil
		if log.V(1) {
			log.Info(ctx, "trying to release the lease")
		}
		if err := m.leaseManager.ReleaseLease(ctx, lease); err != nil {
			log.Errorf(ctx, "failed to release migration lease: %s", err)
		}
	}()
	if err := m.stopper.RunAsyncTask(ctx, func(ctx context.Context) {
		select {
		case <-done:
			return
		case <-time.After(leaseRefreshInterval):
			if err := m.leaseManager.ExtendLease(ctx, lease); err != nil {
				log.Warningf(ctx, "unable to extend ownership of expiration lease: %s", err)
			}
			if m.leaseManager.TimeRemaining(lease) < leaseRefreshInterval {
				// Note that we may be able to do better than this by influencing the
				// deadline of migrations' transactions based on the least expiration
				// time, but simply kill the process for now for the sake of simplicity.
				log.Fatal(ctx, "not enough time left on migration lease, terminating for safety")
			}
		}
	}); err != nil {
		return err
	}

	// Re-get the list of migrations in case any of them were completed between
	// our initial check and our grabbing of the lease.
	completedMigrations, err = m.getCompletedMigrations(ctx)
	if err != nil {
		return err
	}

	startTime := timeutil.Now().String()
	r := runner{
		db:          m.db,
		sqlExecutor: m.sqlExecutor,
	}
	for _, migration := range backwardCompatibleMigrations {
		key := migrationKey(migration)
		if _, ok := completedMigrations[string(key)]; ok {
			continue
		}

		if log.V(1) {
			log.Infof(ctx, "running migration %q", migration.name)
		}
		if err := migration.workFn(ctx, r); err != nil {
			return errors.Wrapf(err, "failed to run migration %q", migration.name)
		}

		if log.V(1) {
			log.Infof(ctx, "trying to persist record of completing migration %s", migration.name)
		}
		if err := m.db.Put(ctx, key, startTime); err != nil {
			return errors.Wrapf(err, "failed to persist record of completing migration %q",
				migration.name)
		}
	}

	return nil
}
Exemple #16
0
// Publish updates a table descriptor. It also maintains the invariant that
// there are at most two versions of the descriptor out in the wild at any time
// by first waiting for all nodes to be on the current (pre-update) version of
// the table desc.
// The update closure is called after the wait, and it provides the new version
// of the descriptor to be written. In a multi-step schema operation, this
// update should perform a single step.
// The closure may be called multiple times if retries occur; make sure it does
// not have side effects.
// Returns the updated version of the descriptor.
func (s LeaseStore) Publish(
	tableID sqlbase.ID, update func(*sqlbase.TableDescriptor) error, logEvent func(*client.Txn) error,
) (*sqlbase.Descriptor, error) {
	errLeaseVersionChanged := errors.New("lease version changed")
	// Retry while getting errLeaseVersionChanged.
	for r := retry.Start(base.DefaultRetryOptions()); r.Next(); {
		// Wait until there are no unexpired leases on the previous version
		// of the table.
		expectedVersion, err := s.waitForOneVersion(tableID, base.DefaultRetryOptions())
		if err != nil {
			return nil, err
		}

		desc := &sqlbase.Descriptor{}
		// There should be only one version of the descriptor, but it's
		// a race now to update to the next version.
		err = s.db.Txn(context.TODO(), func(txn *client.Txn) error {
			descKey := sqlbase.MakeDescMetadataKey(tableID)

			// Re-read the current version of the table descriptor, this time
			// transactionally.
			if err := txn.GetProto(descKey, desc); err != nil {
				return err
			}
			tableDesc := desc.GetTable()
			if tableDesc == nil {
				return errors.Errorf("ID %d is not a table", tableID)
			}
			if expectedVersion != tableDesc.Version {
				// The version changed out from under us. Someone else must be
				// performing a schema change operation.
				if log.V(3) {
					log.Infof(txn.Context, "publish (version changed): %d != %d", expectedVersion, tableDesc.Version)
				}
				return errLeaseVersionChanged
			}

			// Run the update closure.
			if err := update(tableDesc); err != nil {
				return err
			}

			// Bump the version and modification time.
			tableDesc.Version++
			now := s.clock.Now()
			tableDesc.ModificationTime = now
			if log.V(3) {
				log.Infof(txn.Context, "publish: descID=%d (%s) version=%d mtime=%s",
					tableDesc.ID, tableDesc.Name, tableDesc.Version, now.GoTime())
			}
			if err := tableDesc.ValidateTable(); err != nil {
				return err
			}

			// Write the updated descriptor.
			txn.SetSystemConfigTrigger()
			b := txn.NewBatch()
			b.Put(descKey, desc)
			if logEvent != nil {
				// If an event log is required for this update, ensure that the
				// descriptor change occurs first in the transaction. This is
				// necessary to ensure that the System configuration change is
				// gossiped. See the documentation for
				// transaction.SetSystemConfigTrigger() for more information.
				if err := txn.Run(b); err != nil {
					return err
				}
				if err := logEvent(txn); err != nil {
					return err
				}
				return txn.Commit()
			}
			// More efficient batching can be used if no event log message
			// is required.
			return txn.CommitInBatch(b)
		})

		switch err {
		case nil, errDidntUpdateDescriptor:
			return desc, nil
		case errLeaseVersionChanged:
			// will loop around to retry
		default:
			return nil, err
		}
	}

	panic("not reached")
}