Beispiel #1
0
func (sc *SchemaChanger) maybeWriteResumeSpan(
	txn *client.Txn,
	version sqlbase.DescriptorVersion,
	resume roachpb.Span,
	mutationIdx int,
	lastCheckpoint *time.Time,
) error {
	checkpointInterval := checkpointInterval
	if sc.testingKnobs.WriteCheckpointInterval > 0 {
		checkpointInterval = sc.testingKnobs.WriteCheckpointInterval
	}
	if timeutil.Since(*lastCheckpoint) < checkpointInterval {
		return nil
	}
	tableDesc, err := sqlbase.GetTableDescFromID(txn, sc.tableID)
	if err != nil {
		return err
	}
	if tableDesc.Version != version {
		return errVersionMismatch
	}
	tableDesc.Mutations[mutationIdx].ResumeSpan = resume
	txn.SetSystemConfigTrigger()
	if err := txn.Put(sqlbase.MakeDescMetadataKey(tableDesc.GetID()),
		sqlbase.WrapDescriptor(tableDesc)); err != nil {
		return err
	}
	*lastCheckpoint = timeutil.Now()
	return nil
}
Beispiel #2
0
// processReplica processes a single replica. This should not be
// called externally to the queue. bq.mu.Lock must not be held
// while calling this method.
func (bq *baseQueue) processReplica(
	queueCtx context.Context, repl *Replica, clock *hlc.Clock,
) error {
	bq.processMu.Lock()
	defer bq.processMu.Unlock()

	// Load the system config.
	cfg, ok := bq.gossip.GetSystemConfig()
	if !ok {
		log.VEventf(queueCtx, 1, "no system config available, skipping")
		return nil
	}

	if bq.requiresSplit(cfg, repl) {
		// Range needs to be split due to zone configs, but queue does
		// not accept unsplit ranges.
		log.VEventf(queueCtx, 3, "%s: split needed; skipping", repl)
		return nil
	}

	// Putting a span in a context means that events will no longer go to the
	// event log. Use queueCtx for events that are intended for the event log.
	ctx, span := bq.AnnotateCtxWithSpan(queueCtx, bq.name)
	defer span.Finish()
	// Also add the Replica annotations to ctx.
	ctx = repl.AnnotateCtx(ctx)
	ctx, cancel := context.WithTimeout(ctx, bq.processTimeout)
	defer cancel()
	log.Eventf(ctx, "processing replica")

	// If the queue requires a replica to have the range lease in
	// order to be processed, check whether this replica has range lease
	// and renew or acquire if necessary.
	if bq.needsLease {
		// Create a "fake" get request in order to invoke redirectOnOrAcquireLease.
		if err := repl.redirectOnOrAcquireLease(ctx); err != nil {
			if _, harmless := err.GetDetail().(*roachpb.NotLeaseHolderError); harmless {
				log.VEventf(queueCtx, 3, "not holding lease; skipping")
				return nil
			}
			return errors.Wrapf(err.GoError(), "%s: could not obtain lease", repl)
		}
		log.Event(ctx, "got range lease")
	}

	log.VEventf(queueCtx, 3, "processing")
	start := timeutil.Now()
	err := bq.impl.process(ctx, clock.Now(), repl, cfg)
	duration := timeutil.Since(start)
	bq.processingNanos.Inc(duration.Nanoseconds())
	if err != nil {
		return err
	}
	log.VEventf(queueCtx, 2, "done: %s", duration)
	log.Event(ctx, "done")
	bq.successes.Inc(1)
	return nil
}
Beispiel #3
0
func testDecimalSingleArgFunc(
	t *testing.T,
	f func(*inf.Dec, *inf.Dec, inf.Scale) (*inf.Dec, error),
	s inf.Scale,
	tests []decimalOneArgTestCase,
) {
	for _, tc := range tests {
		t.Run(fmt.Sprintf("%s = %s", tc.input, tc.expected), func(t *testing.T) {
			x, exp := new(inf.Dec), new(inf.Dec)
			x.SetString(tc.input)
			exp.SetString(tc.expected)

			// Test allocated return value.
			var z *inf.Dec
			var err error
			done := make(chan struct{}, 1)
			start := timeutil.Now()
			go func() {
				z, err = f(nil, x, s)
				done <- struct{}{}
			}()
			var after <-chan time.Time
			if *flagDurationLimit > 0 {
				after = time.After(*flagDurationLimit)
			}
			select {
			case <-done:
				t.Logf("execute duration: %s", timeutil.Since(start))
			case <-after:
				t.Fatalf("timedout after %s", *flagDurationLimit)
			}
			if err != nil {
				if tc.expected != err.Error() {
					t.Errorf("expected error %s, got %s", tc.expected, err)
				}
				return
			}
			if exp.Cmp(z) != 0 {
				t.Errorf("expected %s, got %s", exp, z)
			}

			// Test provided decimal mutation.
			z.SetString("0.0")
			_, _ = f(z, x, s)
			if exp.Cmp(z) != 0 {
				t.Errorf("expected %s, got %s", exp, z)
			}

			// Test same arg mutation.
			_, _ = f(x, x, s)
			if exp.Cmp(x) != 0 {
				t.Errorf("expected %s, got %s", exp, x)
			}
			x.SetString(tc.input)
		})
	}
}
Beispiel #4
0
func testPutInner(ctx context.Context, t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) {
	db, err := c.NewClient(ctx, 0)
	if err != nil {
		t.Fatal(err)
	}

	errs := make(chan error, c.NumNodes())
	start := timeutil.Now()
	deadline := start.Add(cfg.Duration)
	var count int64
	for i := 0; i < c.NumNodes(); i++ {
		go func() {
			r, _ := randutil.NewPseudoRand()
			value := randutil.RandBytes(r, 8192)

			for timeutil.Now().Before(deadline) {
				k := atomic.AddInt64(&count, 1)
				v := value[:r.Intn(len(value))]
				if err := db.Put(ctx, fmt.Sprintf("%08d", k), v); err != nil {
					errs <- err
					return
				}
			}
			errs <- nil
		}()
	}

	for i := 0; i < c.NumNodes(); {
		baseCount := atomic.LoadInt64(&count)
		select {
		case <-stopper.ShouldStop():
			t.Fatalf("interrupted")
		case err := <-errs:
			if err != nil {
				t.Fatal(err)
			}
			i++
		case <-time.After(1 * time.Second):
			// Periodically print out progress so that we know the test is still
			// running.
			loadedCount := atomic.LoadInt64(&count)
			log.Infof(ctx, "%d (%d/s)", loadedCount, loadedCount-baseCount)
			c.Assert(ctx, t)
			if err := cluster.Consistent(ctx, c, 0); err != nil {
				t.Fatal(err)
			}
		}
	}

	elapsed := timeutil.Since(start)
	log.Infof(ctx, "%d %.1f/sec", count, float64(count)/elapsed.Seconds())
}
Beispiel #5
0
func (at *allocatorTest) runSchemaChanges(ctx context.Context, t *testing.T) error {
	db, err := gosql.Open("postgres", at.f.PGUrl(ctx, 0))
	if err != nil {
		return err
	}
	defer func() {
		_ = db.Close()
	}()

	const tableName = "datablocks.blocks"
	schemaChanges := []string{
		"ALTER TABLE %s ADD COLUMN newcol DECIMAL DEFAULT (DECIMAL '1.4')",
		"CREATE INDEX foo ON %s (block_id)",
	}
	var wg sync.WaitGroup
	wg.Add(len(schemaChanges))
	for i := range schemaChanges {
		go func(i int) {
			start := timeutil.Now()
			cmd := fmt.Sprintf(schemaChanges[i], tableName)
			log.Infof(ctx, "starting schema change: %s", cmd)
			if _, err := db.Exec(cmd); err != nil {
				log.Infof(ctx, "hit schema change error: %s, for %s, in %s", err, cmd, timeutil.Since(start))
				t.Error(err)
				return
			}
			log.Infof(ctx, "completed schema change: %s, in %s", cmd, timeutil.Since(start))
			wg.Done()
			// TODO(vivek): Monitor progress of schema changes and log progress.
		}(i)
	}
	wg.Wait()

	log.Info(ctx, "validate applied schema changes")
	if err := at.ValidateSchemaChanges(ctx, t); err != nil {
		t.Fatal(err)
	}
	return nil
}
Beispiel #6
0
func testClusterRecoveryInner(
	ctx context.Context, t *testing.T, c cluster.Cluster, cfg cluster.TestConfig,
) {
	num := c.NumNodes()

	// One client for each node.
	initBank(t, c.PGUrl(ctx, 0))

	start := timeutil.Now()
	state := testState{
		t:        t,
		errChan:  make(chan error, num),
		teardown: make(chan struct{}),
		deadline: start.Add(cfg.Duration),
		clients:  make([]testClient, num),
	}

	for i := 0; i < num; i++ {
		state.clients[i].Lock()
		state.initClient(ctx, t, c, i)
		state.clients[i].Unlock()
		go transferMoneyLoop(ctx, i, &state, *numAccounts, *maxTransfer)
	}

	defer func() {
		<-state.teardown
	}()

	// Chaos monkey.
	rnd, seed := randutil.NewPseudoRand()
	log.Warningf(ctx, "monkey starts (seed %d)", seed)
	pickNodes := func() []int {
		return rnd.Perm(num)[:rnd.Intn(num)+1]
	}
	go chaosMonkey(ctx, &state, c, true, pickNodes, 0)

	waitClientsStop(ctx, num, &state, stall)

	// Verify accounts.
	verifyAccounts(t, &state.clients[0])

	elapsed := timeutil.Since(start)
	var count uint64
	counts := state.counts()
	for _, c := range counts {
		count += c
	}
	log.Infof(ctx, "%d %.1f/sec", count, float64(count)/elapsed.Seconds())
}
Beispiel #7
0
func TestSucceedsSoon(t *testing.T) {
	// Try a method which always succeeds.
	SucceedsSoon(t, func() error { return nil })

	// Try a method which succeeds after a known duration.
	start := timeutil.Now()
	duration := time.Millisecond * 10
	SucceedsSoon(t, func() error {
		elapsed := timeutil.Since(start)
		if elapsed > duration {
			return nil
		}
		return errors.Errorf("%s elapsed, waiting until %s elapses", elapsed, duration)
	})
}
Beispiel #8
0
func (s *server) status() string {
	s.mu.Lock()
	defer s.mu.Unlock()
	var buf bytes.Buffer
	fmt.Fprintf(&buf, "gossip server (%d/%d cur/max conns, %s)\n",
		s.mu.incoming.gauge.Value(), s.mu.incoming.maxSize, s.serverMetrics)
	for addr, info := range s.mu.nodeMap {
		// TODO(peter): Report per connection sent/received statistics. The
		// structure of server.Gossip and server.gossipReceiver makes this
		// irritating to track.
		fmt.Fprintf(&buf, "  %d: %s (%s)\n",
			info.peerID, addr.AddressField, roundSecs(timeutil.Since(info.createdAt)))
	}
	return buf.String()
}
Beispiel #9
0
// waitForFullReplication waits for the cluster to be fully replicated.
func (c *Cluster) waitForFullReplication() {
	for i := 1; true; i++ {
		done, detail := c.isReplicated()
		if (done && i >= 50) || (i%50) == 0 {
			fmt.Print(detail)
			log.Infof(context.Background(), "waiting for replication")
		}
		if done {
			break
		}
		time.Sleep(100 * time.Millisecond)
	}

	log.Infof(context.Background(), "replicated %.3fs", timeutil.Since(c.started).Seconds())
}
Beispiel #10
0
func (g *Gossip) clientStatus() string {
	var buf bytes.Buffer

	g.mu.Lock()
	defer g.mu.Unlock()
	g.clientsMu.Lock()
	defer g.clientsMu.Unlock()

	fmt.Fprintf(&buf, "gossip client (%d/%d cur/max conns)\n", len(g.clientsMu.clients), g.outgoing.maxSize)
	for _, c := range g.clientsMu.clients {
		fmt.Fprintf(&buf, "  %d: %s (%s: %s)\n",
			c.peerID, c.addr, roundSecs(timeutil.Since(c.createdAt)), c.clientMetrics)
	}
	return buf.String()
}
Beispiel #11
0
func testNodeRestartInner(
	ctx context.Context, t *testing.T, c cluster.Cluster, cfg cluster.TestConfig,
) {
	num := c.NumNodes()
	if minNum := 3; num < minNum {
		t.Skipf("need at least %d nodes, got %d", minNum, num)
	}

	// One client for each node.
	initBank(t, c.PGUrl(ctx, 0))

	start := timeutil.Now()
	state := testState{
		t:        t,
		errChan:  make(chan error, 1),
		teardown: make(chan struct{}),
		deadline: start.Add(cfg.Duration),
		clients:  make([]testClient, 1),
	}

	clientIdx := num - 1
	client := &state.clients[0]
	client.Lock()
	client.db = makePGClient(t, c.PGUrl(ctx, clientIdx))
	client.Unlock()
	go transferMoneyLoop(ctx, 0, &state, *numAccounts, *maxTransfer)

	defer func() {
		<-state.teardown
	}()

	// Chaos monkey.
	rnd, seed := randutil.NewPseudoRand()
	log.Warningf(ctx, "monkey starts (seed %d)", seed)
	pickNodes := func() []int {
		return []int{rnd.Intn(clientIdx)}
	}
	go chaosMonkey(ctx, &state, c, false, pickNodes, clientIdx)

	waitClientsStop(ctx, 1, &state, stall)

	// Verify accounts.
	verifyAccounts(t, client)

	elapsed := timeutil.Since(start)
	count := atomic.LoadUint64(&client.count)
	log.Infof(ctx, "%d %.1f/sec", count, float64(count)/elapsed.Seconds())
}
Beispiel #12
0
// finishSQLTxn closes the root span for the current SQL txn.
// This needs to be called before resetForNewSQLTransaction() is called for
// starting another SQL txn.
// The session context is just used for logging the SQL trace.
func (ts *txnState) finishSQLTxn(sessionCtx context.Context) {
	ts.mon.Stop(ts.Ctx)
	if ts.sp == nil {
		panic("No span in context? Was resetForNewSQLTxn() called previously?")
	}
	sampledFor7881 := (ts.sp.BaggageItem(keyFor7881Sample) != "")
	ts.sp.Finish()
	ts.sp = nil
	if (traceSQL && timeutil.Since(ts.sqlTimestamp) >= traceSQLDuration) ||
		(traceSQLFor7881 && sampledFor7881) {
		dump := tracing.FormatRawSpans(ts.CollectedSpans)
		if len(dump) > 0 {
			log.Infof(sessionCtx, "SQL trace:\n%s", dump)
		}
	}
}
Beispiel #13
0
// scanLoop loops endlessly, scanning through replicas available via
// the replica set, or until the scanner is stopped. The iteration
// is paced to complete a full scan in approximately the scan interval.
func (rs *replicaScanner) scanLoop(clock *hlc.Clock, stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		ctx := rs.AnnotateCtx(context.Background())
		start := timeutil.Now()

		// waitTimer is reset in each call to waitAndProcess.
		defer rs.waitTimer.Stop()

		for {
			if rs.GetDisabled() {
				if done := rs.waitEnabled(stopper); done {
					return
				}
				continue
			}
			var shouldStop bool
			count := 0
			rs.replicas.Visit(func(repl *Replica) bool {
				count++
				shouldStop = rs.waitAndProcess(ctx, start, clock, stopper, repl)
				return !shouldStop
			})
			if count == 0 {
				// No replicas processed, just wait.
				shouldStop = rs.waitAndProcess(ctx, start, clock, stopper, nil)
			}

			shouldStop = shouldStop || nil != stopper.RunTask(func() {
				// Increment iteration count.
				rs.mu.Lock()
				defer rs.mu.Unlock()
				rs.mu.scanCount++
				rs.mu.total += timeutil.Since(start)
				if log.V(6) {
					log.Infof(ctx, "reset replica scan iteration")
				}

				// Reset iteration and start time.
				start = timeutil.Now()
			})
			if shouldStop {
				return
			}
		}
	})
}
Beispiel #14
0
// Start starts a cluster. The numWorkers parameter controls the SQL connection
// settings to avoid unnecessary connection creation. The args parameter can be
// used to pass extra arguments to each node.
func (c *Cluster) Start(db string, numWorkers int, args, env []string) {
	c.started = timeutil.Now()

	baseCtx := &base.Config{
		User:     security.NodeUser,
		Insecure: true,
	}
	c.rpcCtx = rpc.NewContext(log.AmbientContext{}, baseCtx, nil, c.stopper)

	for i := range c.Nodes {
		c.Nodes[i] = c.makeNode(i, args, env)
		c.Clients[i] = c.makeClient(i)
		c.Status[i] = c.makeStatus(i)
		c.DB[i] = c.makeDB(i, numWorkers, db)
	}

	log.Infof(context.Background(), "started %.3fs", timeutil.Since(c.started).Seconds())
	c.waitForFullReplication()
}
Beispiel #15
0
// PeriodicallyCheckForUpdates starts a background worker that periodically
// phones home to check for updates and report usage.
func (s *Server) PeriodicallyCheckForUpdates() {
	s.stopper.RunWorker(func() {
		startup := timeutil.Now()

		for {
			// `maybeCheckForUpdates` and `maybeReportUsage` both return the
			// duration until they should next be checked.
			// Wait for the shorter of the durations returned by the two checks.
			wait := s.maybeCheckForUpdates()
			if reportWait := s.maybeReportUsage(timeutil.Since(startup)); reportWait < wait {
				wait = reportWait
			}
			jitter := rand.Intn(updateCheckJitterSeconds) - updateCheckJitterSeconds/2
			wait = wait + (time.Duration(jitter) * time.Second)
			select {
			case <-s.stopper.ShouldQuiesce():
				return
			case <-time.After(wait):
			}
		}
	})
}
Beispiel #16
0
func (r *rocksDBBatch) Commit() error {
	if r.closed() {
		panic("this batch was already committed")
	}

	start := timeutil.Now()
	var count, size int

	r.distinctOpen = false
	if r.flushes > 0 {
		// We've previously flushed mutations to the C++ batch, so we have to flush
		// any remaining mutations as well and then commit the batch.
		r.flushMutations()
		if err := statusToError(C.DBCommitAndCloseBatch(r.batch)); err != nil {
			return err
		}
		r.batch = nil
		count, size = r.flushedCount, r.flushedSize
	} else if r.builder.count > 0 {
		count, size = r.builder.count, len(r.builder.repr)

		// Fast-path which avoids flushing mutations to the C++ batch. Instead, we
		// directly apply the mutations to the database.
		if err := r.parent.ApplyBatchRepr(r.builder.Finish()); err != nil {
			return err
		}
		C.DBClose(r.batch)
		r.batch = nil
	}

	const batchCommitWarnThreshold = 500 * time.Millisecond
	if elapsed := timeutil.Since(start); elapsed >= batchCommitWarnThreshold {
		log.Warningf(context.TODO(), "batch [%d/%d/%d] commit took %s (>%s):\n%s",
			count, size, r.flushes, elapsed, batchCommitWarnThreshold, debug.Stack())
	}

	return nil
}
Beispiel #17
0
func TestConcurrentBatch(t *testing.T) {
	defer leaktest.AfterTest(t)()

	dir, err := ioutil.TempDir("", "TestConcurrentBatch")
	if err != nil {
		t.Fatal(err)
	}
	defer func() {
		if err := os.RemoveAll(dir); err != nil {
			t.Fatal(err)
		}
	}()

	db, err := NewRocksDB(roachpb.Attributes{}, dir, RocksDBCache{},
		0, DefaultMaxOpenFiles)
	if err != nil {
		t.Fatalf("could not create new rocksdb db instance at %s: %v", dir, err)
	}
	defer db.Close()

	// Prepare 16 4 MB batches containing non-overlapping contents.
	var batches []Batch
	for i := 0; i < 16; i++ {
		batch := db.NewBatch()
		for j := 0; true; j++ {
			key := encoding.EncodeUvarintAscending([]byte("bar"), uint64(i))
			key = encoding.EncodeUvarintAscending(key, uint64(j))
			if err := batch.Put(MakeMVCCMetadataKey(key), nil); err != nil {
				t.Fatal(err)
			}
			if len(batch.Repr()) >= 4<<20 {
				break
			}
		}
		batches = append(batches, batch)
	}

	errChan := make(chan error, len(batches))

	// Concurrently write all the batches.
	for _, batch := range batches {
		go func(batch Batch) {
			errChan <- batch.Commit()
		}(batch)
	}

	// While the batch writes are in progress, try to write another key.
	time.Sleep(100 * time.Millisecond)
	remainingBatches := len(batches)
	for i := 0; remainingBatches > 0; i++ {
		select {
		case err := <-errChan:
			if err != nil {
				t.Fatal(err)
			}
			remainingBatches--
		default:
		}

		// This write can get delayed excessively if we hit the max memtable count
		// or the L0 stop writes threshold.
		start := timeutil.Now()
		key := encoding.EncodeUvarintAscending([]byte("foo"), uint64(i))
		if err := db.Put(MakeMVCCMetadataKey(key), nil); err != nil {
			t.Fatal(err)
		}
		if elapsed := timeutil.Since(start); elapsed >= 10*time.Second {
			t.Fatalf("write took %0.1fs\n", elapsed.Seconds())
		}
	}
}
Beispiel #18
0
// grpcTransportFactory during race builds wraps the implementation and
// intercepts all BatchRequests, reading them in a tight loop. This allows the
// race detector to catch any mutations of a batch passed to the transport.
func grpcTransportFactory(
	opts SendOptions, rpcContext *rpc.Context, replicas ReplicaSlice, args roachpb.BatchRequest,
) (Transport, error) {
	if atomic.AddInt32(&running, 1) <= 1 {
		rpcContext.Stopper.RunWorker(func() {
			var iters int
			var curIdx int
			defer func() {
				atomic.StoreInt32(&running, 0)
				log.Infof(
					context.TODO(),
					"transport race promotion: ran %d iterations on up to %d requests",
					iters, curIdx+1,
				)
			}()
			// Make a fixed-size slice of *BatchRequest. When full, entries
			// are evicted in FIFO order.
			const size = 1000
			bas := make([]*roachpb.BatchRequest, size)
			encoder := gob.NewEncoder(ioutil.Discard)
			for {
				iters++
				start := timeutil.Now()
				for _, ba := range bas {
					if ba != nil {
						if err := encoder.Encode(ba); err != nil {
							panic(err)
						}
					}
				}
				// Prevent the goroutine from spinning too hot as this lets CI
				// times skyrocket. Sleep on average for as long as we worked
				// on the last iteration so we spend no more than half our CPU
				// time on this task.
				jittered := time.After(jitter(timeutil.Since(start)))
				// Collect incoming requests until the jittered timer fires,
				// then access everything we have.
				for {
					select {
					case <-rpcContext.Stopper.ShouldStop():
						return
					case ba := <-incoming:
						bas[curIdx%size] = ba
						curIdx++
						continue
					case <-jittered:
					}
					break
				}
			}
		})
	}
	select {
	// We have a shallow copy here and so the top level scalar fields can't
	// really race, but making more copies doesn't make anything more
	// transparent, so from now on we operate on a pointer.
	case incoming <- &args:
	default:
		// Avoid slowing down the tests if we're backed up.
	}
	return grpcTransportFactoryImpl(opts, rpcContext, replicas, args)
}
Beispiel #19
0
// Test that a connection closed abruptly while a SQL txn is in progress results
// in that txn being rolled back.
func TestSessionFinishRollsBackTxn(t *testing.T) {
	defer leaktest.AfterTest(t)()
	aborter := NewTxnAborter()
	defer aborter.Close(t)
	params, _ := createTestServerParams()
	params.Knobs.SQLExecutor = aborter.executorKnobs()
	s, mainDB, _ := serverutils.StartServer(t, params)
	defer s.Stopper().Stop()
	{
		pgURL, cleanup := sqlutils.PGUrl(
			t, s.ServingAddr(), "TestSessionFinishRollsBackTxn", url.User(security.RootUser))
		defer cleanup()
		if err := aborter.Init(pgURL); err != nil {
			t.Fatal(err)
		}
	}
	if _, err := mainDB.Exec(`
CREATE DATABASE t;
CREATE TABLE t.test (k INT PRIMARY KEY, v TEXT);
`); err != nil {
		t.Fatal(err)
	}

	// We're going to test the rollback of transactions left in various states
	// when the connection closes abruptly.
	// For the state CommitWait, there's no actual rollback we can test for (since
	// the kv-level transaction has already been committed). But we still
	// exercise this state to check that the server doesn't crash (which used to
	// happen - #9879).
	tests := []sql.TxnStateEnum{sql.Open, sql.RestartWait, sql.CommitWait}
	for _, state := range tests {
		t.Run(state.String(), func(t *testing.T) {
			// Create a low-level lib/pq connection so we can close it at will.
			pgURL, cleanupDB := sqlutils.PGUrl(
				t, s.ServingAddr(), state.String(), url.User(security.RootUser))
			defer cleanupDB()
			conn, err := pq.Open(pgURL.String())
			if err != nil {
				t.Fatal(err)
			}
			connClosed := false
			defer func() {
				if connClosed {
					return
				}
				if err := conn.Close(); err != nil {
					t.Fatal(err)
				}
			}()

			txn, err := conn.Begin()
			if err != nil {
				t.Fatal(err)
			}
			tx := txn.(driver.Execer)
			if _, err := tx.Exec("SET TRANSACTION PRIORITY NORMAL", nil); err != nil {
				t.Fatal(err)
			}

			if state == sql.RestartWait || state == sql.CommitWait {
				if _, err := tx.Exec("SAVEPOINT cockroach_restart", nil); err != nil {
					t.Fatal(err)
				}
			}

			insertStmt := "INSERT INTO t.test(k, v) VALUES (1, 'a')"
			if state == sql.RestartWait {
				// To get a txn in RestartWait, we'll use an aborter.
				if err := aborter.QueueStmtForAbortion(
					insertStmt, 1 /* restartCount */, false /* willBeRetriedIbid */); err != nil {
					t.Fatal(err)
				}
			}
			if _, err := tx.Exec(insertStmt, nil); err != nil {
				t.Fatal(err)
			}

			if err := aborter.VerifyAndClear(); err != nil {
				t.Fatal(err)
			}

			if state == sql.RestartWait || state == sql.CommitWait {
				_, err := tx.Exec("RELEASE SAVEPOINT cockroach_restart", nil)
				if state == sql.CommitWait {
					if err != nil {
						t.Fatal(err)
					}
				} else if !testutils.IsError(err, "pq: restart transaction:.*") {
					t.Fatal(err)
				}
			}

			// Abruptly close the connection.
			connClosed = true
			if err := conn.Close(); err != nil {
				t.Fatal(err)
			}

			// Check that the txn we had above was rolled back. We do this by reading
			// after the preceding txn and checking that we don't get an error and
			// that we haven't been blocked by intents (we can't exactly test that we
			// haven't been blocked but we assert that the query didn't take too
			// long).
			// We do the read in an explicit txn so that automatic retries don't hide
			// any errors.
			// TODO(andrei): Figure out a better way to test for non-blocking.
			// Use a trace when the client-side tracing story gets good enough.
			txCheck, err := mainDB.Begin()
			if err != nil {
				t.Fatal(err)
			}
			// Run check at low priority so we don't push the previous transaction and
			// fool ourselves into thinking it had been rolled back.
			if _, err := txCheck.Exec("SET TRANSACTION PRIORITY LOW"); err != nil {
				t.Fatal(err)
			}
			ts := timeutil.Now()
			var count int
			if err := txCheck.QueryRow("SELECT COUNT(1) FROM t.test").Scan(&count); err != nil {
				t.Fatal(err)
			}
			// CommitWait actually committed, so we'll need to clean up.
			if state != sql.CommitWait {
				if count != 0 {
					t.Fatalf("expected no rows, got: %d", count)
				}
			} else {
				if _, err := txCheck.Exec("DELETE FROM t.test"); err != nil {
					t.Fatal(err)
				}
			}
			if err := txCheck.Commit(); err != nil {
				t.Fatal(err)
			}
			if d := timeutil.Since(ts); d > time.Second {
				t.Fatalf("Looks like the checking tx was unexpectedly blocked. "+
					"It took %s to commit.", d)
			}

		})
	}
}
Beispiel #20
0
// TestDumpRandom generates a random number of random rows with all data
// types. This data is dumped, inserted, and dumped again. The two dumps
// are compared for exactness. The data from the inserted dump is then
// SELECT'd and compared to the original generated data to ensure it is
// round-trippable.
func TestDumpRandom(t *testing.T) {
	defer leaktest.AfterTest(t)()

	c, err := newCLITest(t, false)
	if err != nil {
		t.Fatal(err)
	}
	defer c.stop(true)

	url, cleanup := sqlutils.PGUrl(t, c.ServingAddr(), "TestDumpRandom", url.User(security.RootUser))
	defer cleanup()

	conn := makeSQLConn(url.String())
	defer conn.Close()

	if err := conn.Exec(`
		CREATE DATABASE d;
		CREATE DATABASE o;
		CREATE TABLE d.t (
			rowid int,
			i int,
			f float,
			d date,
			m timestamp,
			n interval,
			o bool,
			e decimal,
			s string,
			b bytes,
			PRIMARY KEY (rowid, i, f, d, m, n, o, e, s, b)
		);
	`, nil); err != nil {
		t.Fatal(err)
	}

	rnd, seed := randutil.NewPseudoRand()
	t.Logf("random seed: %v", seed)

	start := timeutil.Now()

	for iteration := 0; timeutil.Since(start) < *randomTestTime; iteration++ {
		if err := conn.Exec(`DELETE FROM d.t`, nil); err != nil {
			t.Fatal(err)
		}
		var generatedRows [][]driver.Value
		count := rnd.Int63n(500)
		t.Logf("random iteration %v: %v rows", iteration, count)
		for _i := int64(0); _i < count; _i++ {
			// Generate a random number of random inserts.
			i := rnd.Int63()
			f := rnd.Float64()
			d := time.Unix(0, rnd.Int63()).Round(time.Hour * 24).UTC()
			m := time.Unix(0, rnd.Int63()).Round(time.Microsecond).UTC()
			n := time.Duration(rnd.Int63()).String()
			o := rnd.Intn(2) == 1
			e := strings.TrimRight(inf.NewDec(rnd.Int63(), inf.Scale(rnd.Int31n(20)-10)).String(), ".0")
			sr := make([]byte, rnd.Intn(500))
			if _, err := rnd.Read(sr); err != nil {
				t.Fatal(err)
			}
			s := make([]byte, 0, len(sr))
			for _, b := range sr {
				r := rune(b)
				if !utf8.ValidRune(r) {
					continue
				}
				s = append(s, []byte(string(r))...)
			}
			b := make([]byte, rnd.Intn(500))
			if _, err := rnd.Read(b); err != nil {
				t.Fatal(err)
			}

			vals := []driver.Value{
				_i,
				i,
				f,
				d,
				m,
				[]byte(n), // intervals come out as `[]byte`s
				o,
				[]byte(e), // decimals come out as `[]byte`s
				string(s),
				b,
			}
			if err := conn.Exec("INSERT INTO d.t VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)", vals); err != nil {
				t.Fatal(err)
			}
			generatedRows = append(generatedRows, vals[1:])
		}

		check := func(table string) {
			q := fmt.Sprintf("SELECT i, f, d, m, n, o, e, s, b FROM %s ORDER BY rowid", table)
			nrows, err := conn.Query(q, nil)
			if err != nil {
				t.Fatal(err)
			}
			defer func() {
				if err := nrows.Close(); err != nil {
					t.Fatal(err)
				}
			}()
			for gi, generatedRow := range generatedRows {
				fetched := make([]driver.Value, len(nrows.Columns()))
				if err := nrows.Next(fetched); err != nil {
					t.Fatal(err)
				}

				for i, fetchedVal := range fetched {
					generatedVal := generatedRow[i]
					if t, ok := fetchedVal.(time.Time); ok {
						// dates and timestamps come out with offset zero (but
						// not UTC specifically).
						fetchedVal = t.UTC()
					}
					if !reflect.DeepEqual(fetchedVal, generatedVal) {
						t.Errorf("NOT EQUAL: table %s, row %d, col %d\ngenerated (%T): %v\nselected (%T): %v\n", table, gi, i, generatedVal, generatedVal, fetchedVal, fetchedVal)
					}
				}
				if t.Failed() {
					t.FailNow()
				}
			}
		}

		check("d.t")

		var buf bytes.Buffer
		if err := DumpTable(&buf, conn, "d", "t"); err != nil {
			t.Fatal(err)
		}
		dump := buf.String()
		buf.Reset()

		if err := conn.Exec(`
			SET DATABASE = o;
			DROP TABLE IF EXISTS t;
		`, nil); err != nil {
			t.Fatal(err)
		}
		if err := conn.Exec(dump, nil); err != nil {
			t.Fatal(err)
		}

		check("o.t")

		if err := DumpTable(&buf, conn, "o", "t"); err != nil {
			t.Fatal(err)
		}
		dump2 := buf.String()
		if dump != dump2 {
			t.Fatalf("unmatching dumps:\nFIRST:\n%s\n\nSECOND:\n%s", dump, dump2)
		}
	}
}
Beispiel #21
0
// Start starts a goroutine that runs outstanding schema changes
// for tables received in the latest system configuration via gossip.
func (s *SchemaChangeManager) Start(stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		descKeyPrefix := keys.MakeTablePrefix(uint32(sqlbase.DescriptorTable.ID))
		gossipUpdateC := s.gossip.RegisterSystemConfigChannel()
		timer := &time.Timer{}
		delay := 360 * time.Second
		if s.testingKnobs.AsyncExecQuickly {
			delay = 20 * time.Millisecond
		}
		for {
			select {
			case <-gossipUpdateC:
				cfg, _ := s.gossip.GetSystemConfig()
				// Read all tables and their versions
				if log.V(2) {
					log.Info(context.TODO(), "received a new config")
				}
				schemaChanger := SchemaChanger{
					nodeID:       s.leaseMgr.nodeID.Get(),
					db:           s.db,
					leaseMgr:     s.leaseMgr,
					testingKnobs: s.testingKnobs,
				}
				// Keep track of existing schema changers.
				oldSchemaChangers := make(map[sqlbase.ID]struct{}, len(s.schemaChangers))
				for k := range s.schemaChangers {
					oldSchemaChangers[k] = struct{}{}
				}
				execAfter := timeutil.Now().Add(delay)
				// Loop through the configuration to find all the tables.
				for _, kv := range cfg.Values {
					if !bytes.HasPrefix(kv.Key, descKeyPrefix) {
						continue
					}
					// Attempt to unmarshal config into a table/database descriptor.
					var descriptor sqlbase.Descriptor
					if err := kv.Value.GetProto(&descriptor); err != nil {
						log.Warningf(context.TODO(), "%s: unable to unmarshal descriptor %v", kv.Key, kv.Value)
						continue
					}
					switch union := descriptor.Union.(type) {
					case *sqlbase.Descriptor_Table:
						table := union.Table
						table.MaybeUpgradeFormatVersion()
						if err := table.ValidateTable(); err != nil {
							log.Errorf(context.TODO(), "%s: received invalid table descriptor: %v", kv.Key, table)
							continue
						}

						// Keep track of outstanding schema changes.
						// If all schema change commands always set UpVersion, why
						// check for the presence of mutations?
						// A schema change execution might fail soon after
						// unsetting UpVersion, and we still want to process
						// outstanding mutations. Similar with a table marked for deletion.
						if table.UpVersion || table.Dropped() || table.Adding() ||
							table.Renamed() || len(table.Mutations) > 0 {
							if log.V(2) {
								log.Infof(context.TODO(), "%s: queue up pending schema change; table: %d, version: %d",
									kv.Key, table.ID, table.Version)
							}

							// Only track the first schema change. We depend on
							// gossip to renotify us when a schema change has been
							// completed.
							schemaChanger.tableID = table.ID
							if len(table.Mutations) == 0 {
								schemaChanger.mutationID = sqlbase.InvalidMutationID
							} else {
								schemaChanger.mutationID = table.Mutations[0].MutationID
							}
							schemaChanger.execAfter = execAfter
							// Keep track of this schema change.
							// Remove from oldSchemaChangers map.
							delete(oldSchemaChangers, table.ID)
							if sc, ok := s.schemaChangers[table.ID]; ok {
								if sc.mutationID == schemaChanger.mutationID {
									// Ignore duplicate.
									continue
								}
							}
							s.schemaChangers[table.ID] = schemaChanger
						}

					case *sqlbase.Descriptor_Database:
						// Ignore.
					}
				}
				// Delete old schema changers.
				for k := range oldSchemaChangers {
					delete(s.schemaChangers, k)
				}
				timer = s.newTimer()

			case <-timer.C:
				if s.testingKnobs.AsyncExecNotification != nil &&
					s.testingKnobs.AsyncExecNotification() != nil {
					timer = s.newTimer()
					continue
				}
				for tableID, sc := range s.schemaChangers {
					if timeutil.Since(sc.execAfter) > 0 {
						err := sc.exec()
						if err != nil {
							if err == errExistingSchemaChangeLease {
							} else if err == sqlbase.ErrDescriptorNotFound {
								// Someone deleted this table. Don't try to run the schema
								// changer again. Note that there's no gossip update for the
								// deletion which would remove this schemaChanger.
								delete(s.schemaChangers, tableID)
							} else {
								// We don't need to act on integrity
								// constraints violations because exec()
								// purges mutations that violate integrity
								// constraints.
								log.Warningf(context.TODO(), "Error executing schema change: %s", err)
							}
						}
						// Advance the execAfter time so that this schema
						// changer doesn't get called again for a while.
						sc.execAfter = timeutil.Now().Add(delay)
					}
					// Only attempt to run one schema changer.
					break
				}
				timer = s.newTimer()

			case <-stopper.ShouldStop():
				return
			}
		}
	})
}
Beispiel #22
0
func testDecimalDoubleArgFunc(
	t *testing.T,
	f func(*inf.Dec, *inf.Dec, *inf.Dec, inf.Scale) (*inf.Dec, error),
	s inf.Scale,
	tests []decimalTwoArgsTestCase,
) {
	for _, tc := range tests {
		t.Run(fmt.Sprintf("%s, %s = %s", tc.input1, tc.input2, tc.expected), func(t *testing.T) {
			x, y, exp := new(inf.Dec), new(inf.Dec), new(inf.Dec)
			if _, ok := x.SetString(tc.input1); !ok {
				t.Fatalf("could not set decimal: %s", tc.input1)
			}
			if _, ok := y.SetString(tc.input2); !ok {
				t.Fatalf("could not set decimal: %s", tc.input2)
			}

			// Test allocated return value.
			var z *inf.Dec
			var err error
			done := make(chan struct{}, 1)
			start := timeutil.Now()
			go func() {
				z, err = f(nil, x, y, s)
				done <- struct{}{}
			}()
			var after <-chan time.Time
			if *flagDurationLimit > 0 {
				after = time.After(*flagDurationLimit)
			}
			select {
			case <-done:
				t.Logf("execute duration: %s", timeutil.Since(start))
			case <-after:
				t.Fatalf("timedout after %s", *flagDurationLimit)
			}
			if err != nil {
				if tc.expected != err.Error() {
					t.Errorf("expected error %s, got %s", tc.expected, err)
				}
				return
			}
			if z == nil {
				if tc.expected != "nil" {
					t.Errorf("expected %s, got nil", tc.expected)
				}
				return
			} else if tc.expected == "nil" {
				t.Errorf("expected nil, got %s", z)
				return
			}
			if _, ok := exp.SetString(tc.expected); !ok {
				t.Errorf("could not set decimal: %s", tc.expected)
				return
			}
			if exp.Cmp(z) != 0 {
				t.Errorf("expected %s, got %s", exp, z)
			}

			// Test provided decimal mutation.
			z.SetString("0.0")
			_, _ = f(z, x, y, s)
			if exp.Cmp(z) != 0 {
				t.Errorf("expected %s, got %s", exp, z)
			}

			// Test first arg mutation.
			_, _ = f(x, x, y, s)
			if exp.Cmp(x) != 0 {
				t.Errorf("expected %s, got %s", exp, x)
			}
			x.SetString(tc.input1)

			// Test second arg mutation.
			_, _ = f(y, x, y, s)
			if exp.Cmp(y) != 0 {
				t.Errorf("expected %s, got %s", exp, y)
			}
			y.SetString(tc.input2)

			// Test both arg mutation, if possible.
			if tc.input1 == tc.input2 {
				_, _ = f(x, x, x, s)
				if exp.Cmp(x) != 0 {
					t.Errorf("expected %s, got %s", exp, x)
				}
				x.SetString(tc.input1)
			}
		})
	}
}
Beispiel #23
0
// Start starts the server on the specified port, starts gossip and initializes
// the node using the engines from the server's context.
//
// The passed context can be used to trace the server startup. The context
// should represent the general startup operation.
func (s *Server) Start(ctx context.Context) error {
	ctx = s.AnnotateCtx(ctx)

	startTime := timeutil.Now()

	tlsConfig, err := s.cfg.GetServerTLSConfig()
	if err != nil {
		return err
	}

	httpServer := netutil.MakeServer(s.stopper, tlsConfig, s)
	plainRedirectServer := netutil.MakeServer(s.stopper, tlsConfig, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		http.Redirect(w, r, "https://"+r.Host+r.RequestURI, http.StatusPermanentRedirect)
	}))

	// The following code is a specialization of util/net.go's ListenAndServe
	// which adds pgwire support. A single port is used to serve all protocols
	// (pg, http, h2) via the following construction:
	//
	// non-TLS case:
	// net.Listen -> cmux.New
	//               |
	//               -  -> pgwire.Match -> pgwire.Server.ServeConn
	//               -  -> cmux.Any -> grpc.(*Server).Serve
	//
	// TLS case:
	// net.Listen -> cmux.New
	//               |
	//               -  -> pgwire.Match -> pgwire.Server.ServeConn
	//               -  -> cmux.Any -> grpc.(*Server).Serve
	//
	// Note that the difference between the TLS and non-TLS cases exists due to
	// Go's lack of an h2c (HTTP2 Clear Text) implementation. See inline comments
	// in util.ListenAndServe for an explanation of how h2c is implemented there
	// and here.

	ln, err := net.Listen("tcp", s.cfg.Addr)
	if err != nil {
		return err
	}
	log.Eventf(ctx, "listening on port %s", s.cfg.Addr)
	unresolvedListenAddr, err := officialAddr(s.cfg.Addr, ln.Addr())
	if err != nil {
		return err
	}
	s.cfg.Addr = unresolvedListenAddr.String()
	unresolvedAdvertAddr, err := officialAddr(s.cfg.AdvertiseAddr, ln.Addr())
	if err != nil {
		return err
	}
	s.cfg.AdvertiseAddr = unresolvedAdvertAddr.String()

	s.rpcContext.SetLocalInternalServer(s.node)

	m := cmux.New(ln)
	pgL := m.Match(pgwire.Match)
	anyL := m.Match(cmux.Any())

	httpLn, err := net.Listen("tcp", s.cfg.HTTPAddr)
	if err != nil {
		return err
	}
	unresolvedHTTPAddr, err := officialAddr(s.cfg.HTTPAddr, httpLn.Addr())
	if err != nil {
		return err
	}
	s.cfg.HTTPAddr = unresolvedHTTPAddr.String()

	workersCtx := s.AnnotateCtx(context.Background())

	s.stopper.RunWorker(func() {
		<-s.stopper.ShouldQuiesce()
		if err := httpLn.Close(); err != nil {
			log.Fatal(workersCtx, err)
		}
	})

	if tlsConfig != nil {
		httpMux := cmux.New(httpLn)
		clearL := httpMux.Match(cmux.HTTP1())
		tlsL := httpMux.Match(cmux.Any())

		s.stopper.RunWorker(func() {
			netutil.FatalIfUnexpected(httpMux.Serve())
		})

		s.stopper.RunWorker(func() {
			netutil.FatalIfUnexpected(plainRedirectServer.Serve(clearL))
		})

		httpLn = tls.NewListener(tlsL, tlsConfig)
	}

	s.stopper.RunWorker(func() {
		netutil.FatalIfUnexpected(httpServer.Serve(httpLn))
	})

	s.stopper.RunWorker(func() {
		<-s.stopper.ShouldQuiesce()
		netutil.FatalIfUnexpected(anyL.Close())
		<-s.stopper.ShouldStop()
		s.grpc.Stop()
	})

	s.stopper.RunWorker(func() {
		netutil.FatalIfUnexpected(s.grpc.Serve(anyL))
	})

	s.stopper.RunWorker(func() {
		pgCtx := s.pgServer.AmbientCtx.AnnotateCtx(context.Background())
		netutil.FatalIfUnexpected(httpServer.ServeWith(s.stopper, pgL, func(conn net.Conn) {
			connCtx := log.WithLogTagStr(pgCtx, "client", conn.RemoteAddr().String())
			if err := s.pgServer.ServeConn(connCtx, conn); err != nil && !netutil.IsClosedConnection(err) {
				// Report the error on this connection's context, so that we
				// know which remote client caused the error when looking at
				// the logs.
				log.Error(connCtx, err)
			}
		}))
	})

	if len(s.cfg.SocketFile) != 0 {
		// Unix socket enabled: postgres protocol only.
		unixLn, err := net.Listen("unix", s.cfg.SocketFile)
		if err != nil {
			return err
		}

		s.stopper.RunWorker(func() {
			<-s.stopper.ShouldQuiesce()
			if err := unixLn.Close(); err != nil {
				log.Fatal(workersCtx, err)
			}
		})

		s.stopper.RunWorker(func() {
			pgCtx := s.pgServer.AmbientCtx.AnnotateCtx(context.Background())
			netutil.FatalIfUnexpected(httpServer.ServeWith(s.stopper, unixLn, func(conn net.Conn) {
				connCtx := log.WithLogTagStr(pgCtx, "client", conn.RemoteAddr().String())
				if err := s.pgServer.ServeConn(connCtx, conn); err != nil && !netutil.IsClosedConnection(err) {
					// Report the error on this connection's context, so that we
					// know which remote client caused the error when looking at
					// the logs.
					log.Error(connCtx, err)
				}
			}))
		})
	}

	// Enable the debug endpoints first to provide an earlier window
	// into what's going on with the node in advance of exporting node
	// functionality.
	// TODO(marc): when cookie-based authentication exists,
	// apply it for all web endpoints.
	s.mux.HandleFunc(debugEndpoint, http.HandlerFunc(handleDebug))

	s.gossip.Start(unresolvedAdvertAddr)
	log.Event(ctx, "started gossip")

	s.engines, err = s.cfg.CreateEngines()
	if err != nil {
		return errors.Wrap(err, "failed to create engines")
	}
	s.stopper.AddCloser(&s.engines)

	// We might have to sleep a bit to protect against this node producing non-
	// monotonic timestamps. Before restarting, its clock might have been driven
	// by other nodes' fast clocks, but when we restarted, we lost all this
	// information. For example, a client might have written a value at a
	// timestamp that's in the future of the restarted node's clock, and if we
	// don't do something, the same client's read would not return the written
	// value. So, we wait up to MaxOffset; we couldn't have served timestamps more
	// than MaxOffset in the future (assuming that MaxOffset was not changed, see
	// #9733).
	//
	// As an optimization for tests, we don't sleep if all the stores are brand
	// new. In this case, the node will not serve anything anyway until it
	// synchronizes with other nodes.
	{
		anyStoreBootstrapped := false
		for _, e := range s.engines {
			if _, err := storage.ReadStoreIdent(ctx, e); err != nil {
				// NotBootstrappedError is expected.
				if _, ok := err.(*storage.NotBootstrappedError); !ok {
					return err
				}
			} else {
				anyStoreBootstrapped = true
				break
			}
		}
		if anyStoreBootstrapped {
			sleepDuration := s.clock.MaxOffset() - timeutil.Since(startTime)
			if sleepDuration > 0 {
				log.Infof(ctx, "sleeping for %s to guarantee HLC monotonicity", sleepDuration)
				time.Sleep(sleepDuration)
			}
		}
	}

	// Now that we have a monotonic HLC wrt previous incarnations of the process,
	// init all the replicas.
	err = s.node.start(
		ctx,
		unresolvedAdvertAddr,
		s.engines,
		s.cfg.NodeAttributes,
		s.cfg.Locality,
	)
	if err != nil {
		return err
	}
	log.Event(ctx, "started node")

	s.nodeLiveness.StartHeartbeat(ctx, s.stopper)

	// We can now add the node registry.
	s.recorder.AddNode(s.registry, s.node.Descriptor, s.node.startedAt)

	// Begin recording runtime statistics.
	s.startSampleEnvironment(s.cfg.MetricsSampleInterval)

	// Begin recording time series data collected by the status monitor.
	s.tsDB.PollSource(
		s.cfg.AmbientCtx, s.recorder, s.cfg.MetricsSampleInterval, ts.Resolution10s, s.stopper,
	)

	// Begin recording status summaries.
	s.node.startWriteSummaries(s.cfg.MetricsSampleInterval)

	// Create and start the schema change manager only after a NodeID
	// has been assigned.
	testingKnobs := &sql.SchemaChangerTestingKnobs{}
	if s.cfg.TestingKnobs.SQLSchemaChanger != nil {
		testingKnobs = s.cfg.TestingKnobs.SQLSchemaChanger.(*sql.SchemaChangerTestingKnobs)
	}
	sql.NewSchemaChangeManager(testingKnobs, *s.db, s.gossip, s.leaseMgr).Start(s.stopper)

	s.distSQLServer.Start()

	log.Infof(ctx, "starting %s server at %s", s.cfg.HTTPRequestScheme(), unresolvedHTTPAddr)
	log.Infof(ctx, "starting grpc/postgres server at %s", unresolvedListenAddr)
	log.Infof(ctx, "advertising CockroachDB node at %s", unresolvedAdvertAddr)
	if len(s.cfg.SocketFile) != 0 {
		log.Infof(ctx, "starting postgres server at unix:%s", s.cfg.SocketFile)
	}

	s.stopper.RunWorker(func() {
		netutil.FatalIfUnexpected(m.Serve())
	})

	log.Event(ctx, "accepting connections")

	// Initialize grpc-gateway mux and context.
	jsonpb := &protoutil.JSONPb{
		EnumsAsInts:  true,
		EmitDefaults: true,
		Indent:       "  ",
	}
	protopb := new(protoutil.ProtoPb)
	gwMux := gwruntime.NewServeMux(
		gwruntime.WithMarshalerOption(gwruntime.MIMEWildcard, jsonpb),
		gwruntime.WithMarshalerOption(httputil.JSONContentType, jsonpb),
		gwruntime.WithMarshalerOption(httputil.AltJSONContentType, jsonpb),
		gwruntime.WithMarshalerOption(httputil.ProtoContentType, protopb),
		gwruntime.WithMarshalerOption(httputil.AltProtoContentType, protopb),
	)
	gwCtx, gwCancel := context.WithCancel(s.AnnotateCtx(context.Background()))
	s.stopper.AddCloser(stop.CloserFn(gwCancel))

	// Setup HTTP<->gRPC handlers.
	conn, err := s.rpcContext.GRPCDial(s.cfg.Addr)
	if err != nil {
		return errors.Errorf("error constructing grpc-gateway: %s; are your certificates valid?", err)
	}

	for _, gw := range []grpcGatewayServer{s.admin, s.status, &s.tsServer} {
		if err := gw.RegisterGateway(gwCtx, gwMux, conn); err != nil {
			return err
		}
	}

	var uiFileSystem http.FileSystem
	uiDebug := envutil.EnvOrDefaultBool("COCKROACH_DEBUG_UI", false)
	if uiDebug {
		uiFileSystem = http.Dir("pkg/ui")
	} else {
		uiFileSystem = &assetfs.AssetFS{
			Asset:     ui.Asset,
			AssetDir:  ui.AssetDir,
			AssetInfo: ui.AssetInfo,
		}
	}
	uiFileServer := http.FileServer(uiFileSystem)

	s.mux.HandleFunc("/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		if r.URL.Path == "/" {
			if uiDebug {
				r.URL.Path = "debug.html"
			} else {
				r.URL.Path = "release.html"
			}
		}
		uiFileServer.ServeHTTP(w, r)
	}))

	// TODO(marc): when cookie-based authentication exists,
	// apply it for all web endpoints.
	s.mux.Handle(adminPrefix, gwMux)
	s.mux.Handle(ts.URLPrefix, gwMux)
	s.mux.Handle(statusPrefix, gwMux)
	s.mux.Handle("/health", gwMux)
	s.mux.Handle(statusVars, http.HandlerFunc(s.status.handleVars))
	log.Event(ctx, "added http endpoints")

	if err := sdnotify.Ready(); err != nil {
		log.Errorf(ctx, "failed to signal readiness using systemd protocol: %s", err)
	}
	log.Event(ctx, "server ready")

	return nil
}
// applySnapshot updates the replica based on the given snapshot and associated
// HardState (which may be empty, as Raft may apply some snapshots which don't
// require an update to the HardState). All snapshots must pass through Raft
// for correctness, i.e. the parameters to this method must be taken from
// a raft.Ready. It is the caller's responsibility to call
// r.store.processRangeDescriptorUpdate(r) after a successful applySnapshot.
func (r *Replica) applySnapshot(
	ctx context.Context, inSnap IncomingSnapshot, snap raftpb.Snapshot, hs raftpb.HardState,
) error {
	// Extract the updated range descriptor.
	desc := inSnap.RangeDescriptor

	r.mu.Lock()
	replicaID := r.mu.replicaID
	raftLogSize := r.mu.raftLogSize
	r.mu.Unlock()

	isPreemptive := replicaID == 0 // only used for accounting and log format
	var appliedSuccessfully bool
	defer func() {
		if appliedSuccessfully {
			if !isPreemptive {
				r.store.metrics.RangeSnapshotsNormalApplied.Inc(1)
			} else {
				r.store.metrics.RangeSnapshotsPreemptiveApplied.Inc(1)
			}
		}
	}()

	if raft.IsEmptySnap(snap) {
		// Raft discarded the snapshot, indicating that our local state is
		// already ahead of what the snapshot provides. But we count it for
		// stats (see the defer above).
		appliedSuccessfully = true
		return nil
	}

	snapType := "preemptive"
	if !isPreemptive {
		snapType = "Raft"
	}

	log.Infof(ctx, "applying %s snapshot at index %d "+
		"(id=%s, encoded size=%d, %d rocksdb batches, %d log entries)",
		snapType, snap.Metadata.Index, inSnap.SnapUUID.Short(),
		len(snap.Data), len(inSnap.Batches), len(inSnap.LogEntries))
	defer func(start time.Time) {
		log.Infof(ctx, "applied %s snapshot in %.3fs",
			snapType, timeutil.Since(start).Seconds())
	}(timeutil.Now())

	batch := r.store.Engine().NewBatch()
	defer batch.Close()

	// Clear the range using a distinct batch in order to prevent the iteration
	// from forcing the batch to flush from Go to C++.
	distinctBatch := batch.Distinct()

	// Delete everything in the range and recreate it from the snapshot.
	// We need to delete any old Raft log entries here because any log entries
	// that predate the snapshot will be orphaned and never truncated or GC'd.
	iter := NewReplicaDataIterator(&desc, distinctBatch, false /* !replicatedOnly */)
	defer iter.Close()
	for ; iter.Valid(); iter.Next() {
		if err := distinctBatch.Clear(iter.Key()); err != nil {
			return err
		}
	}

	distinctBatch.Close()

	// Write the snapshot into the range.
	for _, batchRepr := range inSnap.Batches {
		if err := batch.ApplyBatchRepr(batchRepr); err != nil {
			return err
		}
	}

	// The log entries are all written to distinct keys so we can use a
	// distinct batch.
	distinctBatch = batch.Distinct()

	logEntries := make([]raftpb.Entry, len(inSnap.LogEntries))
	for i, bytes := range inSnap.LogEntries {
		if err := logEntries[i].Unmarshal(bytes); err != nil {
			return err
		}
	}
	// Write the snapshot's Raft log into the range.
	_, raftLogSize, err := r.append(ctx, distinctBatch, 0, raftLogSize, logEntries)
	if err != nil {
		return err
	}

	if !raft.IsEmptyHardState(hs) {
		if err := setHardState(ctx, distinctBatch, r.RangeID, hs); err != nil {
			return errors.Wrapf(err, "unable to persist HardState %+v", &hs)
		}
	} else {
		// Note that we don't require that Raft supply us with a nonempty
		// HardState on a snapshot. We don't want to make that assumption
		// because it's not guaranteed by the contract. Raft *must* send us
		// a HardState when it increases the committed index as a result of the
		// snapshot, but who is to say it isn't going to accept a snapshot
		// which is identical to the current state?
	}

	// We need to close the distinct batch and start using the normal batch for
	// the read below.
	distinctBatch.Close()

	s, err := loadState(ctx, batch, &desc)
	if err != nil {
		return err
	}

	if s.Desc.RangeID != r.RangeID {
		log.Fatalf(ctx, "unexpected range ID %d", s.Desc.RangeID)
	}

	// As outlined above, last and applied index are the same after applying
	// the snapshot (i.e. the snapshot has no uncommitted tail).
	if s.RaftAppliedIndex != snap.Metadata.Index {
		log.Fatalf(ctx, "snapshot RaftAppliedIndex %d doesn't match its metadata index %d",
			s.RaftAppliedIndex, snap.Metadata.Index)
	}

	if err := batch.Commit(); err != nil {
		return err
	}

	r.mu.Lock()
	// We set the persisted last index to the last applied index. This is
	// not a correctness issue, but means that we may have just transferred
	// some entries we're about to re-request from the leader and overwrite.
	// However, raft.MultiNode currently expects this behaviour, and the
	// performance implications are not likely to be drastic. If our
	// feelings about this ever change, we can add a LastIndex field to
	// raftpb.SnapshotMetadata.
	r.mu.lastIndex = s.RaftAppliedIndex
	r.mu.raftLogSize = raftLogSize
	// Update the range and store stats.
	r.store.metrics.subtractMVCCStats(r.mu.state.Stats)
	r.store.metrics.addMVCCStats(s.Stats)
	r.mu.state = s
	r.assertStateLocked(r.store.Engine())
	r.mu.Unlock()

	// As the last deferred action after committing the batch, update other
	// fields which are uninitialized or need updating. This may not happen
	// if the system config has not yet been loaded. While config update
	// will correctly set the fields, there is no order guarantee in
	// ApplySnapshot.
	// TODO: should go through the standard store lock when adding a replica.
	if err := r.updateRangeInfo(&desc); err != nil {
		panic(err)
	}

	r.setDescWithoutProcessUpdate(&desc)

	appliedSuccessfully = true
	return nil
}
Beispiel #25
0
// processLoop processes the entries in the queue until the provided
// stopper signals exit.
func (bq *baseQueue) processLoop(clock *hlc.Clock, stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		ctx := bq.AnnotateCtx(context.Background())
		defer func() {
			bq.mu.Lock()
			bq.mu.stopped = true
			bq.mu.Unlock()
			bq.AmbientContext.FinishEventLog()
		}()

		// nextTime is initially nil; we don't start any timers until the queue
		// becomes non-empty.
		var nextTime <-chan time.Time

		immediately := make(chan time.Time)
		close(immediately)

		for {
			select {
			// Exit on stopper.
			case <-stopper.ShouldStop():
				return

			// Incoming signal sets the next time to process if there were previously
			// no replicas in the queue.
			case <-bq.incoming:
				if nextTime == nil {
					// When a replica is added, wake up immediately. This is mainly
					// to facilitate testing without unnecessary sleeps.
					nextTime = immediately

					// In case we're in a test, still block on the impl.
					bq.impl.timer(0)
				}
			// Process replicas as the timer expires.
			case <-nextTime:
				repl := bq.pop()
				var duration time.Duration
				if repl != nil {
					if stopper.RunTask(func() {
						annotatedCtx := repl.AnnotateCtx(ctx)
						start := timeutil.Now()
						if err := bq.processReplica(annotatedCtx, repl, clock); err != nil {
							// Maybe add failing replica to purgatory if the queue supports it.
							bq.maybeAddToPurgatory(annotatedCtx, repl, err, clock, stopper)
						}
						duration = timeutil.Since(start)
						log.VEventf(annotatedCtx, 2, "done %s", duration)
						bq.processingNanos.Inc(duration.Nanoseconds())
					}) != nil {
						return
					}
				}
				if bq.Length() == 0 {
					nextTime = nil
				} else {
					nextTime = time.After(bq.impl.timer(duration))
				}
			}
		}
	})
}
Beispiel #26
0
func testSingleKeyInner(
	ctx context.Context, t *testing.T, c cluster.Cluster, cfg cluster.TestConfig,
) {
	num := c.NumNodes()

	// Initialize the value for our test key to zero.
	const key = "test-key"
	initDB := c.NewClient(ctx, t, 0)
	if err := initDB.Put(ctx, key, 0); err != nil {
		t.Fatal(err)
	}

	type result struct {
		err        error
		maxLatency time.Duration
	}

	resultCh := make(chan result, num)
	deadline := timeutil.Now().Add(cfg.Duration)
	var expected int64

	// Start up num workers each reading and writing the same
	// key. Each worker is configured to talk to a different node in the
	// cluster.
	for i := 0; i < num; i++ {
		db := c.NewClient(ctx, t, i)
		go func() {
			var r result
			for timeutil.Now().Before(deadline) {
				start := timeutil.Now()
				err := db.Txn(ctx, func(txn *client.Txn) error {
					minExp := atomic.LoadInt64(&expected)
					r, err := txn.Get(key)
					if err != nil {
						return err
					}
					b := txn.NewBatch()
					v := r.ValueInt()
					b.Put(key, v+1)
					err = txn.CommitInBatch(b)
					// Atomic updates after the fact mean that we should read
					// exp or larger (since concurrent writers might have
					// committed but not yet performed their atomic update).
					if err == nil && v < minExp {
						return errors.Errorf("unexpected read: %d, expected >= %d", v, minExp)
					}
					return err
				})
				if err != nil {
					resultCh <- result{err: err}
					return
				}
				atomic.AddInt64(&expected, 1)
				latency := timeutil.Since(start)
				if r.maxLatency < latency {
					r.maxLatency = latency
				}
			}
			resultCh <- r
		}()
	}

	// Verify that none of the workers encountered an error.
	var results []result
	for len(results) < num {
		select {
		case <-stopper.ShouldStop():
			t.Fatalf("interrupted")
		case r := <-resultCh:
			if r.err != nil {
				t.Fatal(r.err)
			}
			results = append(results, r)
		case <-time.After(1 * time.Second):
			// Periodically print out progress so that we know the test is still
			// running.
			log.Infof(ctx, "%d", atomic.LoadInt64(&expected))
		}
	}

	// Verify the resulting value stored at the key is what we expect.
	r, err := initDB.Get(ctx, key)
	if err != nil {
		t.Fatal(err)
	}
	v := r.ValueInt()
	if expected != v {
		t.Fatalf("expected %d, but found %d", expected, v)
	}
	var maxLatency []time.Duration
	for _, r := range results {
		maxLatency = append(maxLatency, r.maxLatency)
	}
	log.Infof(ctx, "%d increments: %s", v, maxLatency)
}
// Run a particular schema change and run some OLTP operations in parallel, as
// soon as the schema change starts executing its backfill.
func runSchemaChangeWithOperations(
	t *testing.T,
	sqlDB *gosql.DB,
	kvDB *client.DB,
	schemaChange string,
	maxValue int,
	keyMultiple int,
	backfillNotification chan bool,
) {
	tableDesc := sqlbase.GetTableDescriptor(kvDB, "t", "test")

	// Run the schema change in a separate goroutine.
	var wg sync.WaitGroup
	wg.Add(1)
	go func() {
		start := timeutil.Now()
		// Start schema change that eventually runs a backfill.
		if _, err := sqlDB.Exec(schemaChange); err != nil {
			t.Error(err)
		}
		t.Logf("schema change %s took %v", schemaChange, timeutil.Since(start))
		wg.Done()
	}()

	// Wait until the schema change backfill starts.
	<-backfillNotification

	// Run a variety of operations during the backfill.

	// Grabbing a schema change lease on the table will fail, disallowing
	// another schema change from being simultaneously executed.
	sc := csql.NewSchemaChangerForTesting(tableDesc.ID, 0, 0, *kvDB, nil)
	if l, err := sc.AcquireLease(); err == nil {
		t.Fatalf("schema change lease acquisition on table %d succeeded: %v", tableDesc.ID, l)
	}

	// Update some rows.
	var updatedKeys []int
	for i := 0; i < 10; i++ {
		k := rand.Intn(maxValue)
		v := maxValue + i + 1
		if _, err := sqlDB.Exec(`UPDATE t.test SET v = $1 WHERE k = $2`, v, k); err != nil {
			t.Error(err)
		}
		updatedKeys = append(updatedKeys, k)
	}

	// Reupdate updated values back to what they were before.
	for _, k := range updatedKeys {
		if _, err := sqlDB.Exec(`UPDATE t.test SET v = $1 WHERE k = $2`, maxValue-k, k); err != nil {
			t.Error(err)
		}
	}

	// Delete some rows.
	deleteStartKey := rand.Intn(maxValue - 10)
	for i := 0; i < 10; i++ {
		if _, err := sqlDB.Exec(`DELETE FROM t.test WHERE k = $1`, deleteStartKey+i); err != nil {
			t.Error(err)
		}
	}
	// Reinsert deleted rows.
	for i := 0; i < 10; i++ {
		k := deleteStartKey + i
		if _, err := sqlDB.Exec(`INSERT INTO t.test VALUES($1, $2)`, k, maxValue-k); err != nil {
			t.Error(err)
		}
	}

	// Insert some new rows.
	numInserts := 10
	for i := 0; i < numInserts; i++ {
		k := maxValue + i + 1
		if _, err := sqlDB.Exec(`INSERT INTO t.test VALUES($1, $1)`, k); err != nil {
			t.Error(err)
		}
	}

	wg.Wait() // for schema change to complete.

	// Verify the number of keys left behind in the table to validate schema
	// change operations.
	tablePrefix := roachpb.Key(keys.MakeTablePrefix(uint32(tableDesc.ID)))
	tableEnd := tablePrefix.PrefixEnd()
	if kvs, err := kvDB.Scan(context.TODO(), tablePrefix, tableEnd, 0); err != nil {
		t.Fatal(err)
	} else if e := keyMultiple * (maxValue + numInserts + 1); len(kvs) != e {
		for _, kv := range kvs {
			t.Errorf("key %s, value %s", kv.Key, kv.Value)
		}
		t.Fatalf("expected %d key value pairs, but got %d", e, len(kvs))
	}

	// Delete the rows inserted.
	for i := 0; i < numInserts; i++ {
		if _, err := sqlDB.Exec(`DELETE FROM t.test WHERE k = $1`, maxValue+i+1); err != nil {
			t.Error(err)
		}
	}
}
Beispiel #28
0
func (n *Node) batchInternal(
	ctx context.Context, args *roachpb.BatchRequest,
) (*roachpb.BatchResponse, error) {
	// TODO(marc): grpc's authentication model (which gives credential access in
	// the request handler) doesn't really fit with the current design of the
	// security package (which assumes that TLS state is only given at connection
	// time) - that should be fixed.
	if peer, ok := peer.FromContext(ctx); ok {
		if tlsInfo, ok := peer.AuthInfo.(credentials.TLSInfo); ok {
			certUser, err := security.GetCertificateUser(&tlsInfo.State)
			if err != nil {
				return nil, err
			}
			if certUser != security.NodeUser {
				return nil, errors.Errorf("user %s is not allowed", certUser)
			}
		}
	}

	var br *roachpb.BatchResponse

	type snowballInfo struct {
		syncutil.Mutex
		collectedSpans [][]byte
		done           bool
	}
	var snowball *snowballInfo

	if err := n.stopper.RunTaskWithErr(func() error {
		const opName = "node.Batch"
		sp, err := tracing.JoinOrNew(n.storeCfg.AmbientCtx.Tracer, args.TraceContext, opName)
		if err != nil {
			return err
		}
		// If this is a snowball span, it gets special treatment: It skips the
		// regular tracing machinery, and we instead send the collected spans
		// back with the response. This is more expensive, but then again,
		// those are individual requests traced by users, so they can be.
		if sp.BaggageItem(tracing.Snowball) != "" {
			sp.LogEvent("delegating to snowball tracing")
			sp.Finish()

			snowball = new(snowballInfo)
			recorder := func(rawSpan basictracer.RawSpan) {
				snowball.Lock()
				defer snowball.Unlock()
				if snowball.done {
					// This is a late span that we must discard because the request was
					// already completed.
					return
				}
				encSp, err := tracing.EncodeRawSpan(&rawSpan, nil)
				if err != nil {
					log.Warning(ctx, err)
				}
				snowball.collectedSpans = append(snowball.collectedSpans, encSp)
			}

			if sp, err = tracing.JoinOrNewSnowball(opName, args.TraceContext, recorder); err != nil {
				return err
			}
		}
		defer sp.Finish()
		traceCtx := opentracing.ContextWithSpan(ctx, sp)
		log.Event(traceCtx, args.Summary())

		tStart := timeutil.Now()
		var pErr *roachpb.Error
		br, pErr = n.stores.Send(traceCtx, *args)
		if pErr != nil {
			br = &roachpb.BatchResponse{}
			log.ErrEventf(traceCtx, "%T", pErr.GetDetail())
		}
		if br.Error != nil {
			panic(roachpb.ErrorUnexpectedlySet(n.stores, br))
		}
		n.metrics.callComplete(timeutil.Since(tStart), pErr)
		br.Error = pErr
		return nil
	}); err != nil {
		return nil, err
	}

	if snowball != nil {
		snowball.Lock()
		br.CollectedSpans = snowball.collectedSpans
		snowball.done = true
		snowball.Unlock()
	}

	return br, nil
}
func (rq *replicateQueue) canTransferLease() bool {
	if lastLeaseTransfer := rq.lastLeaseTransfer.Load(); lastLeaseTransfer != nil {
		return timeutil.Since(lastLeaseTransfer.(time.Time)) > minLeaseTransferInterval
	}
	return true
}
Beispiel #30
0
// snapshot creates an OutgoingSnapshot containing a rocksdb snapshot for the given range.
func snapshot(
	ctx context.Context,
	snap engine.Reader,
	rangeID roachpb.RangeID,
	eCache *raftEntryCache,
	startKey roachpb.RKey,
) (OutgoingSnapshot, error) {
	start := timeutil.Now()

	var desc roachpb.RangeDescriptor
	// We ignore intents on the range descriptor (consistent=false) because we
	// know they cannot be committed yet; operations that modify range
	// descriptors resolve their own intents when they commit.
	ok, err := engine.MVCCGetProto(ctx, snap, keys.RangeDescriptorKey(startKey),
		hlc.MaxTimestamp, false /* !consistent */, nil, &desc)
	if err != nil {
		return OutgoingSnapshot{}, errors.Errorf("failed to get desc: %s", err)
	}
	if !ok {
		return OutgoingSnapshot{}, errors.Errorf("couldn't find range descriptor")
	}

	var snapData roachpb.RaftSnapshotData
	// Store RangeDescriptor as metadata, it will be retrieved by ApplySnapshot()
	snapData.RangeDescriptor = desc

	// Read the range metadata from the snapshot instead of the members
	// of the Range struct because they might be changed concurrently.
	appliedIndex, _, err := loadAppliedIndex(ctx, snap, rangeID)
	if err != nil {
		return OutgoingSnapshot{}, err
	}

	// Synthesize our raftpb.ConfState from desc.
	var cs raftpb.ConfState
	for _, rep := range desc.Replicas {
		cs.Nodes = append(cs.Nodes, uint64(rep.ReplicaID))
	}

	term, err := term(ctx, snap, rangeID, eCache, appliedIndex)
	if err != nil {
		return OutgoingSnapshot{}, errors.Errorf("failed to fetch term of %d: %s", appliedIndex, err)
	}

	// Intentionally let this iterator and the snapshot escape so that the
	// streamer can send chunks from it bit by bit.
	iter := NewReplicaDataIterator(&desc, snap, true /* replicatedOnly */)
	snapUUID := uuid.NewV4()

	log.Infof(ctx, "generated snapshot %s for range %s at index %d in %s.",
		snapUUID.Short(), rangeID, appliedIndex, timeutil.Since(start))
	return OutgoingSnapshot{
		EngineSnap: snap,
		Iter:       iter,
		SnapUUID:   *snapUUID,
		RaftSnap: raftpb.Snapshot{
			Data: snapUUID.GetBytes(),
			Metadata: raftpb.SnapshotMetadata{
				Index:     appliedIndex,
				Term:      term,
				ConfState: cs,
			},
		},
	}, nil
}