Example #1
0
func (at *allocatorTest) Run(ctx context.Context, t *testing.T) {
	at.f = MakeFarmer(t, at.Prefix, stopper)

	if at.CockroachDiskSizeGB != 0 {
		at.f.AddVars["cockroach_disk_size"] = strconv.Itoa(at.CockroachDiskSizeGB)
	}

	log.Infof(ctx, "creating cluster with %d node(s)", at.StartNodes)
	if err := at.f.Resize(at.StartNodes); err != nil {
		t.Fatal(err)
	}
	CheckGossip(ctx, t, at.f, longWaitTime, HasPeers(at.StartNodes))
	at.f.Assert(ctx, t)
	log.Info(ctx, "initial cluster is up")

	// We must stop the cluster because a) `nodectl` pokes at the data directory
	// and, more importantly, b) we don't want the cluster above and the cluster
	// below to ever talk to each other (see #7224).
	log.Info(ctx, "stopping cluster")
	for i := 0; i < at.f.NumNodes(); i++ {
		if err := at.f.Kill(ctx, i); err != nil {
			t.Fatalf("error stopping node %d: %s", i, err)
		}
	}

	log.Info(ctx, "downloading archived stores from Google Cloud Storage in parallel")
	errors := make(chan error, at.f.NumNodes())
	for i := 0; i < at.f.NumNodes(); i++ {
		go func(nodeNum int) {
			errors <- at.f.Exec(nodeNum, "./nodectl download "+at.StoreURL)
		}(i)
	}
	for i := 0; i < at.f.NumNodes(); i++ {
		if err := <-errors; err != nil {
			t.Fatalf("error downloading store %d: %s", i, err)
		}
	}

	log.Info(ctx, "restarting cluster with archived store(s)")
	for i := 0; i < at.f.NumNodes(); i++ {
		if err := at.f.Restart(ctx, i); err != nil {
			t.Fatalf("error restarting node %d: %s", i, err)
		}
	}
	at.f.Assert(ctx, t)

	log.Infof(ctx, "resizing cluster to %d nodes", at.EndNodes)
	if err := at.f.Resize(at.EndNodes); err != nil {
		t.Fatal(err)
	}
	CheckGossip(ctx, t, at.f, longWaitTime, HasPeers(at.EndNodes))
	at.f.Assert(ctx, t)

	log.Info(ctx, "waiting for rebalance to finish")
	if err := at.WaitForRebalance(ctx, t); err != nil {
		t.Fatal(err)
	}
	at.f.Assert(ctx, t)
}
Example #2
0
// Stop signals all live workers to stop and then waits for each to
// confirm it has stopped.
func (s *Stopper) Stop() {
	defer s.Recover()
	defer unregister(s)

	log.Info(context.TODO(), "stop has been called, stopping or quiescing all running tasks")
	// Don't bother doing stuff cleanly if we're panicking, that would likely
	// block. Instead, best effort only. This cleans up the stack traces,
	// avoids stalls and helps some tests in `./cli` finish cleanly (where
	// panics happen on purpose).
	if r := recover(); r != nil {
		go s.Quiesce()
		close(s.stopper)
		close(s.stopped)
		s.mu.Lock()
		for _, c := range s.mu.closers {
			go c.Close()
		}
		s.mu.Unlock()
		panic(r)
	}

	s.Quiesce()
	close(s.stopper)
	s.stop.Wait()
	s.mu.Lock()
	defer s.mu.Unlock()
	for _, c := range s.mu.closers {
		c.Close()
	}
	close(s.stopped)
}
func (rq *replicateQueue) process(
	ctx context.Context, now hlc.Timestamp, repl *Replica, sysCfg config.SystemConfig,
) error {
	retryOpts := retry.Options{
		InitialBackoff: 50 * time.Millisecond,
		MaxBackoff:     1 * time.Second,
		Multiplier:     2,
		MaxRetries:     5,
	}

	// Use a retry loop in order to backoff in the case of preemptive
	// snapshot errors, usually signalling that a rebalancing
	// reservation could not be made with the selected target.
	for r := retry.StartWithCtx(ctx, retryOpts); r.Next(); {
		if err := rq.processOneChange(ctx, now, repl, sysCfg); err != nil {
			if IsPreemptiveSnapshotError(err) {
				// If ChangeReplicas failed because the preemptive snapshot failed, we
				// log the error but then return success indicating we should retry the
				// operation. The most likely causes of the preemptive snapshot failing are
				// a declined reservation or the remote node being unavailable. In either
				// case we don't want to wait another scanner cycle before reconsidering
				// the range.
				log.Info(ctx, err)
				continue
			}
			return err
		}
		// Enqueue this replica again to see if there are more changes to be made.
		rq.MaybeAdd(repl, rq.clock.Now())
		return nil
	}
	return errors.Errorf("failed to replicate %s after %d retries", repl, retryOpts.MaxRetries)
}
Example #4
0
func (c *sqlConn) Close() {
	if c.conn != nil {
		err := c.conn.Close()
		if err != nil && err != driver.ErrBadConn {
			log.Info(context.TODO(), err)
		}
		c.conn = nil
	}
}
Example #5
0
// deleteAllRows runs the kv operations necessary to delete all sql rows in the
// table passed at construction. This may require a scan.
//
// resume is the resume-span which should be used for the table deletion when
// the table deletion is chunked. The first call to this method should use a
// zero resume-span. After a chunk is deleted a new resume-span is returned.
//
// limit is a limit on either the number of keys or table-rows (for
// interleaved tables) deleted in the operation.
func (td *tableDeleter) deleteAllRows(
	ctx context.Context, resume roachpb.Span, limit int64,
) (roachpb.Span, error) {
	if td.rd.helper.tableDesc.IsInterleaved() {
		if log.V(2) {
			log.Info(ctx, "delete forced to scan: table is interleaved")
		}
		return td.deleteAllRowsScan(ctx, resume, limit)
	}
	return td.deleteAllRowsFast(ctx, resume, limit)
}
Example #6
0
// deleteIndex runs the kv operations necessary to delete all kv entries in the
// given index. This may require a scan.
//
// resume is the resume-span which should be used for the index deletion
// when the index deletion is chunked. The first call to this method should
// use a zero resume-span. After a chunk of the index is deleted a new resume-
// span is returned.
//
// limit is a limit on the number of index entries deleted in the operation.
func (td *tableDeleter) deleteIndex(
	ctx context.Context, idx *sqlbase.IndexDescriptor, resume roachpb.Span, limit int64,
) (roachpb.Span, error) {
	if len(idx.Interleave.Ancestors) > 0 || len(idx.InterleavedBy) > 0 {
		if log.V(2) {
			log.Info(ctx, "delete forced to scan: table is interleaved")
		}
		return td.deleteIndexScan(ctx, idx, resume, limit)
	}
	return td.deleteIndexFast(ctx, idx, resume, limit)
}
Example #7
0
// fastPathAvailable returns true if the fastDelete optimization can be used.
func (td *tableDeleter) fastPathAvailable(ctx context.Context) bool {
	if len(td.rd.helper.indexes) != 0 {
		if log.V(2) {
			log.Infof(ctx, "delete forced to scan: values required to update %d secondary indexes", len(td.rd.helper.indexes))
		}
		return false
	}
	if td.rd.helper.tableDesc.IsInterleaved() {
		if log.V(2) {
			log.Info(ctx, "delete forced to scan: table is interleaved")
		}
		return false
	}
	if len(td.rd.helper.tableDesc.PrimaryIndex.ReferencedBy) > 0 {
		if log.V(2) {
			log.Info(ctx, "delete forced to scan: table is referenced by foreign keys")
		}
		return false
	}
	return true
}
Example #8
0
func (m *Manager) getCompletedMigrations(ctx context.Context) (map[string]struct{}, error) {
	if log.V(1) {
		log.Info(ctx, "trying to get the list of completed migrations")
	}
	keyvals, err := m.db.Scan(ctx, keys.MigrationPrefix, keys.MigrationKeyMax, 0 /* maxRows */)
	if err != nil {
		return nil, errors.Wrapf(err, "failed to get list of completed migrations")
	}
	completedMigrations := make(map[string]struct{})
	for _, keyval := range keyvals {
		completedMigrations[string(keyval.Key)] = struct{}{}
	}
	return completedMigrations, nil
}
Example #9
0
// WaitForRebalance waits until there's been no recent range adds, removes, and
// splits. We wait until the cluster is balanced or until `StableInterval`
// elapses, whichever comes first. Then, it prints stats about the rebalancing
// process. If the replica count appears unbalanced, an error is returned.
//
// This method is crude but necessary. If we were to wait until range counts
// were just about even, we'd miss potential post-rebalance thrashing.
func (at *allocatorTest) WaitForRebalance(ctx context.Context, t *testing.T) error {
	const statsInterval = 20 * time.Second

	db, err := gosql.Open("postgres", at.f.PGUrl(ctx, 0))
	if err != nil {
		return err
	}
	defer func() {
		_ = db.Close()
	}()

	var statsTimer timeutil.Timer
	var assertTimer timeutil.Timer
	defer statsTimer.Stop()
	defer assertTimer.Stop()
	statsTimer.Reset(statsInterval)
	assertTimer.Reset(0)
	for {
		select {
		case <-statsTimer.C:
			statsTimer.Read = true
			stats, err := at.allocatorStats(db)
			if err != nil {
				return err
			}

			log.Info(ctx, stats)
			if StableInterval <= stats.ElapsedSinceLastEvent {
				host := at.f.Hostname(0)
				log.Infof(context.Background(), "replica count = %f, max = %f", stats.ReplicaCountStdDev, *flagATMaxStdDev)
				if stats.ReplicaCountStdDev > *flagATMaxStdDev {
					_ = at.printRebalanceStats(db, host)
					return errors.Errorf(
						"%s elapsed without changes, but replica count standard "+
							"deviation is %.2f (>%.2f)", stats.ElapsedSinceLastEvent,
						stats.ReplicaCountStdDev, *flagATMaxStdDev)
				}
				return at.printRebalanceStats(db, host)
			}
			statsTimer.Reset(statsInterval)
		case <-assertTimer.C:
			assertTimer.Read = true
			at.f.Assert(ctx, t)
			assertTimer.Reset(time.Minute)
		case <-stopper.ShouldStop():
			return errors.New("interrupted")
		}
	}
}
Example #10
0
// addHistory persists a line of input to the readline history
// file.
func (c *cliState) addHistory(line string) {
	if !isInteractive {
		return
	}

	// ins.SaveHistory will push command into memory and try to
	// persist to disk (if ins's config.HistoryFile is set).  err can
	// be not nil only if it got a IO error while trying to persist.
	if err := c.ins.SaveHistory(line); err != nil {
		log.Warningf(context.TODO(), "cannot save command-line history: %s", err)
		log.Info(context.TODO(), "command-line history will not be saved in this session")
		cfg := c.ins.Config.Clone()
		cfg.HistoryFile = ""
		c.ins.SetConfig(cfg)
	}
}
Example #11
0
func (rq *replicateQueue) addReplica(
	ctx context.Context,
	repl *Replica,
	repDesc roachpb.ReplicaDescriptor,
	desc *roachpb.RangeDescriptor,
) error {
	err := repl.ChangeReplicas(ctx, roachpb.ADD_REPLICA, repDesc, desc)
	if IsPreemptiveSnapshotError(err) {
		// If the ChangeReplicas failed because the preemptive snapshot failed, we
		// log the error but then return success indicating we should retry the
		// operation. The most likely causes of the preemptive snapshot failing are
		// a declined reservation or the remote node being unavailable. In either
		// case we don't want to wait another scanner cycle before reconsidering
		// the range.
		log.Info(ctx, err)
		return nil
	}
	return err
}
Example #12
0
func (c *cliState) doStart(nextState cliStateEnum) cliStateEnum {
	// Common initialization.
	c.syntax = parser.Traditional
	c.querySyntax = true
	c.partialLines = []string{}

	if isInteractive {
		c.fullPrompt, c.continuePrompt = preparePrompts(c.conn.url)

		// We only enable history management when the terminal is actually
		// interactive. This saves on memory when e.g. piping a large SQL
		// script through the command-line client.
		userAcct, err := user.Current()
		if err != nil {
			if log.V(2) {
				log.Warningf(context.TODO(), "cannot retrieve user information: %s", err)
				log.Info(context.TODO(), "cannot load or save the command-line history")
			}
		} else {
			histFile := filepath.Join(userAcct.HomeDir, cmdHistFile)
			cfg := c.ins.Config.Clone()
			cfg.HistoryFile = histFile
			cfg.HistorySearchFold = true
			c.ins.SetConfig(cfg)
		}

		// The user only gets to see the info screen on interactive session.
		fmt.Print(infoMessage)

		c.checkSyntax = true
		c.normalizeHistory = true
		c.errExit = false
	} else {
		// When running non-interactive, by default we want errors to stop
		// further processing and all syntax checking to be done
		// server-side.
		c.errExit = true
		c.checkSyntax = false
	}

	return nextState
}
Example #13
0
func (at *allocatorTest) runSchemaChanges(ctx context.Context, t *testing.T) error {
	db, err := gosql.Open("postgres", at.f.PGUrl(ctx, 0))
	if err != nil {
		return err
	}
	defer func() {
		_ = db.Close()
	}()

	const tableName = "datablocks.blocks"
	schemaChanges := []string{
		"ALTER TABLE %s ADD COLUMN newcol DECIMAL DEFAULT (DECIMAL '1.4')",
		"CREATE INDEX foo ON %s (block_id)",
	}
	var wg sync.WaitGroup
	wg.Add(len(schemaChanges))
	for i := range schemaChanges {
		go func(i int) {
			start := timeutil.Now()
			cmd := fmt.Sprintf(schemaChanges[i], tableName)
			log.Infof(ctx, "starting schema change: %s", cmd)
			if _, err := db.Exec(cmd); err != nil {
				log.Infof(ctx, "hit schema change error: %s, for %s, in %s", err, cmd, timeutil.Since(start))
				t.Error(err)
				return
			}
			log.Infof(ctx, "completed schema change: %s, in %s", cmd, timeutil.Since(start))
			wg.Done()
			// TODO(vivek): Monitor progress of schema changes and log progress.
		}(i)
	}
	wg.Wait()

	log.Info(ctx, "validate applied schema changes")
	if err := at.ValidateSchemaChanges(ctx, t); err != nil {
		t.Fatal(err)
	}
	return nil
}
Example #14
0
// EnsureMigrations should be run during node startup to ensure that all
// required migrations have been run (and running all those that are definitely
// safe to run).
func (m *Manager) EnsureMigrations(ctx context.Context) error {
	// First, check whether there are any migrations that need to be run.
	completedMigrations, err := m.getCompletedMigrations(ctx)
	if err != nil {
		return err
	}
	allMigrationsCompleted := true
	for _, migration := range backwardCompatibleMigrations {
		key := migrationKey(migration)
		if _, ok := completedMigrations[string(key)]; !ok {
			allMigrationsCompleted = false
		}
	}
	if allMigrationsCompleted {
		return nil
	}

	// If there are any, grab the migration lease to ensure that only one
	// node is ever doing migrations at a time.
	// Note that we shouldn't ever let client.LeaseNotAvailableErrors cause us
	// to stop trying, because if we return an error the server will be shut down,
	// and this server being down may prevent the leaseholder from finishing.
	var lease *client.Lease
	if log.V(1) {
		log.Info(ctx, "trying to acquire lease")
	}
	for r := retry.StartWithCtx(ctx, base.DefaultRetryOptions()); r.Next(); {
		lease, err = m.leaseManager.AcquireLease(ctx, keys.MigrationLease)
		if err == nil {
			break
		}
		log.Errorf(ctx, "failed attempt to acquire migration lease: %s", err)
	}
	if err != nil {
		return errors.Wrapf(err, "failed to acquire lease for running necessary migrations")
	}

	// Ensure that we hold the lease throughout the migration process and release
	// it when we're done.
	done := make(chan interface{}, 1)
	defer func() {
		done <- nil
		if log.V(1) {
			log.Info(ctx, "trying to release the lease")
		}
		if err := m.leaseManager.ReleaseLease(ctx, lease); err != nil {
			log.Errorf(ctx, "failed to release migration lease: %s", err)
		}
	}()
	if err := m.stopper.RunAsyncTask(ctx, func(ctx context.Context) {
		select {
		case <-done:
			return
		case <-time.After(leaseRefreshInterval):
			if err := m.leaseManager.ExtendLease(ctx, lease); err != nil {
				log.Warningf(ctx, "unable to extend ownership of expiration lease: %s", err)
			}
			if m.leaseManager.TimeRemaining(lease) < leaseRefreshInterval {
				// Note that we may be able to do better than this by influencing the
				// deadline of migrations' transactions based on the least expiration
				// time, but simply kill the process for now for the sake of simplicity.
				log.Fatal(ctx, "not enough time left on migration lease, terminating for safety")
			}
		}
	}); err != nil {
		return err
	}

	// Re-get the list of migrations in case any of them were completed between
	// our initial check and our grabbing of the lease.
	completedMigrations, err = m.getCompletedMigrations(ctx)
	if err != nil {
		return err
	}

	startTime := timeutil.Now().String()
	r := runner{
		db:          m.db,
		sqlExecutor: m.sqlExecutor,
	}
	for _, migration := range backwardCompatibleMigrations {
		key := migrationKey(migration)
		if _, ok := completedMigrations[string(key)]; ok {
			continue
		}

		if log.V(1) {
			log.Infof(ctx, "running migration %q", migration.name)
		}
		if err := migration.workFn(ctx, r); err != nil {
			return errors.Wrapf(err, "failed to run migration %q", migration.name)
		}

		if log.V(1) {
			log.Infof(ctx, "trying to persist record of completing migration %s", migration.name)
		}
		if err := m.db.Put(ctx, key, startTime); err != nil {
			return errors.Wrapf(err, "failed to persist record of completing migration %q",
				migration.name)
		}
	}

	return nil
}
Example #15
0
// Start starts the server on the specified port, starts gossip and initializes
// the node using the engines from the server's context.
//
// The passed context can be used to trace the server startup. The context
// should represent the general startup operation.
func (s *Server) Start(ctx context.Context) error {
	ctx = s.AnnotateCtx(ctx)

	startTime := timeutil.Now()

	tlsConfig, err := s.cfg.GetServerTLSConfig()
	if err != nil {
		return err
	}

	httpServer := netutil.MakeServer(s.stopper, tlsConfig, s)
	plainRedirectServer := netutil.MakeServer(s.stopper, tlsConfig, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		http.Redirect(w, r, "https://"+r.Host+r.RequestURI, http.StatusPermanentRedirect)
	}))

	// The following code is a specialization of util/net.go's ListenAndServe
	// which adds pgwire support. A single port is used to serve all protocols
	// (pg, http, h2) via the following construction:
	//
	// non-TLS case:
	// net.Listen -> cmux.New
	//               |
	//               -  -> pgwire.Match -> pgwire.Server.ServeConn
	//               -  -> cmux.Any -> grpc.(*Server).Serve
	//
	// TLS case:
	// net.Listen -> cmux.New
	//               |
	//               -  -> pgwire.Match -> pgwire.Server.ServeConn
	//               -  -> cmux.Any -> grpc.(*Server).Serve
	//
	// Note that the difference between the TLS and non-TLS cases exists due to
	// Go's lack of an h2c (HTTP2 Clear Text) implementation. See inline comments
	// in util.ListenAndServe for an explanation of how h2c is implemented there
	// and here.

	ln, err := net.Listen("tcp", s.cfg.Addr)
	if err != nil {
		return err
	}
	log.Eventf(ctx, "listening on port %s", s.cfg.Addr)
	unresolvedListenAddr, err := officialAddr(s.cfg.Addr, ln.Addr())
	if err != nil {
		return err
	}
	s.cfg.Addr = unresolvedListenAddr.String()
	unresolvedAdvertAddr, err := officialAddr(s.cfg.AdvertiseAddr, ln.Addr())
	if err != nil {
		return err
	}
	s.cfg.AdvertiseAddr = unresolvedAdvertAddr.String()

	s.rpcContext.SetLocalInternalServer(s.node)

	m := cmux.New(ln)
	pgL := m.Match(pgwire.Match)
	anyL := m.Match(cmux.Any())

	httpLn, err := net.Listen("tcp", s.cfg.HTTPAddr)
	if err != nil {
		return err
	}
	unresolvedHTTPAddr, err := officialAddr(s.cfg.HTTPAddr, httpLn.Addr())
	if err != nil {
		return err
	}
	s.cfg.HTTPAddr = unresolvedHTTPAddr.String()

	workersCtx := s.AnnotateCtx(context.Background())

	s.stopper.RunWorker(func() {
		<-s.stopper.ShouldQuiesce()
		if err := httpLn.Close(); err != nil {
			log.Fatal(workersCtx, err)
		}
	})

	if tlsConfig != nil {
		httpMux := cmux.New(httpLn)
		clearL := httpMux.Match(cmux.HTTP1())
		tlsL := httpMux.Match(cmux.Any())

		s.stopper.RunWorker(func() {
			netutil.FatalIfUnexpected(httpMux.Serve())
		})

		s.stopper.RunWorker(func() {
			netutil.FatalIfUnexpected(plainRedirectServer.Serve(clearL))
		})

		httpLn = tls.NewListener(tlsL, tlsConfig)
	}

	s.stopper.RunWorker(func() {
		netutil.FatalIfUnexpected(httpServer.Serve(httpLn))
	})

	s.stopper.RunWorker(func() {
		<-s.stopper.ShouldQuiesce()
		netutil.FatalIfUnexpected(anyL.Close())
		<-s.stopper.ShouldStop()
		s.grpc.Stop()
	})

	s.stopper.RunWorker(func() {
		netutil.FatalIfUnexpected(s.grpc.Serve(anyL))
	})

	// Running the SQL migrations safely requires that we aren't serving SQL
	// requests at the same time -- to ensure that, block the serving of SQL
	// traffic until the migrations are done, as indicated by this channel.
	serveSQL := make(chan bool)

	s.stopper.RunWorker(func() {
		select {
		case <-serveSQL:
		case <-s.stopper.ShouldQuiesce():
			return
		}
		pgCtx := s.pgServer.AmbientCtx.AnnotateCtx(context.Background())
		netutil.FatalIfUnexpected(httpServer.ServeWith(s.stopper, pgL, func(conn net.Conn) {
			connCtx := log.WithLogTagStr(pgCtx, "client", conn.RemoteAddr().String())
			if err := s.pgServer.ServeConn(connCtx, conn); err != nil && !netutil.IsClosedConnection(err) {
				// Report the error on this connection's context, so that we
				// know which remote client caused the error when looking at
				// the logs.
				log.Error(connCtx, err)
			}
		}))
	})

	// Enable the debug endpoints first to provide an earlier window
	// into what's going on with the node in advance of exporting node
	// functionality.
	// TODO(marc): when cookie-based authentication exists,
	// apply it for all web endpoints.
	s.mux.HandleFunc(debugEndpoint, http.HandlerFunc(handleDebug))

	s.gossip.Start(unresolvedAdvertAddr)
	log.Event(ctx, "started gossip")

	s.engines, err = s.cfg.CreateEngines()
	if err != nil {
		return errors.Wrap(err, "failed to create engines")
	}
	s.stopper.AddCloser(&s.engines)

	// We might have to sleep a bit to protect against this node producing non-
	// monotonic timestamps. Before restarting, its clock might have been driven
	// by other nodes' fast clocks, but when we restarted, we lost all this
	// information. For example, a client might have written a value at a
	// timestamp that's in the future of the restarted node's clock, and if we
	// don't do something, the same client's read would not return the written
	// value. So, we wait up to MaxOffset; we couldn't have served timestamps more
	// than MaxOffset in the future (assuming that MaxOffset was not changed, see
	// #9733).
	//
	// As an optimization for tests, we don't sleep if all the stores are brand
	// new. In this case, the node will not serve anything anyway until it
	// synchronizes with other nodes.
	{
		anyStoreBootstrapped := false
		for _, e := range s.engines {
			if _, err := storage.ReadStoreIdent(ctx, e); err != nil {
				// NotBootstrappedError is expected.
				if _, ok := err.(*storage.NotBootstrappedError); !ok {
					return err
				}
			} else {
				anyStoreBootstrapped = true
				break
			}
		}
		if anyStoreBootstrapped {
			sleepDuration := s.clock.MaxOffset() - timeutil.Since(startTime)
			if sleepDuration > 0 {
				log.Infof(ctx, "sleeping for %s to guarantee HLC monotonicity", sleepDuration)
				time.Sleep(sleepDuration)
			}
		}
	}

	// Now that we have a monotonic HLC wrt previous incarnations of the process,
	// init all the replicas.
	err = s.node.start(
		ctx,
		unresolvedAdvertAddr,
		s.engines,
		s.cfg.NodeAttributes,
		s.cfg.Locality,
	)
	if err != nil {
		return err
	}
	log.Event(ctx, "started node")

	// We can now add the node registry.
	s.recorder.AddNode(s.registry, s.node.Descriptor, s.node.startedAt)

	// Begin recording runtime statistics.
	s.startSampleEnvironment(s.cfg.MetricsSampleInterval)

	// Begin recording time series data collected by the status monitor.
	s.tsDB.PollSource(
		s.cfg.AmbientCtx, s.recorder, s.cfg.MetricsSampleInterval, ts.Resolution10s, s.stopper,
	)

	// Begin recording status summaries.
	s.node.startWriteSummaries(s.cfg.MetricsSampleInterval)

	// Create and start the schema change manager only after a NodeID
	// has been assigned.
	testingKnobs := &sql.SchemaChangerTestingKnobs{}
	if s.cfg.TestingKnobs.SQLSchemaChanger != nil {
		testingKnobs = s.cfg.TestingKnobs.SQLSchemaChanger.(*sql.SchemaChangerTestingKnobs)
	}
	sql.NewSchemaChangeManager(testingKnobs, *s.db, s.gossip, s.leaseMgr).Start(s.stopper)

	s.sqlExecutor.Start(ctx, &s.adminMemMetrics, s.node.Descriptor)
	s.distSQLServer.Start()

	log.Infof(ctx, "starting %s server at %s", s.cfg.HTTPRequestScheme(), unresolvedHTTPAddr)
	log.Infof(ctx, "starting grpc/postgres server at %s", unresolvedListenAddr)
	log.Infof(ctx, "advertising CockroachDB node at %s", unresolvedAdvertAddr)
	s.stopper.RunWorker(func() {
		netutil.FatalIfUnexpected(m.Serve())
	})

	if len(s.cfg.SocketFile) != 0 {
		log.Infof(ctx, "starting postgres server at unix:%s", s.cfg.SocketFile)

		// Unix socket enabled: postgres protocol only.
		unixLn, err := net.Listen("unix", s.cfg.SocketFile)
		if err != nil {
			return err
		}

		s.stopper.RunWorker(func() {
			<-s.stopper.ShouldQuiesce()
			if err := unixLn.Close(); err != nil {
				log.Fatal(workersCtx, err)
			}
		})

		s.stopper.RunWorker(func() {
			select {
			case <-serveSQL:
			case <-s.stopper.ShouldQuiesce():
				return
			}
			pgCtx := s.pgServer.AmbientCtx.AnnotateCtx(context.Background())
			netutil.FatalIfUnexpected(httpServer.ServeWith(s.stopper, unixLn, func(conn net.Conn) {
				connCtx := log.WithLogTagStr(pgCtx, "client", conn.RemoteAddr().String())
				if err := s.pgServer.ServeConn(connCtx, conn); err != nil && !netutil.IsClosedConnection(err) {
					// Report the error on this connection's context, so that we
					// know which remote client caused the error when looking at
					// the logs.
					log.Error(connCtx, err)
				}
			}))
		})
	}

	log.Event(ctx, "accepting connections")

	s.nodeLiveness.StartHeartbeat(ctx, s.stopper)

	// Before serving SQL requests, we have to make sure the database is
	// in an acceptable form for this version of the software.
	// We have to do this after actually starting up the server to be able to
	// seamlessly use the kv client against other nodes in the cluster.
	migMgr := migrations.NewManager(
		s.stopper, s.db, s.sqlExecutor, s.clock, s.NodeID().String())
	if err := migMgr.EnsureMigrations(ctx); err != nil {
		log.Fatal(ctx, err)
	}
	log.Infof(ctx, "done ensuring all necessary migrations have run")
	close(serveSQL)
	log.Info(ctx, "serving sql connections")

	// Initialize grpc-gateway mux and context.
	jsonpb := &protoutil.JSONPb{
		EnumsAsInts:  true,
		EmitDefaults: true,
		Indent:       "  ",
	}
	protopb := new(protoutil.ProtoPb)
	gwMux := gwruntime.NewServeMux(
		gwruntime.WithMarshalerOption(gwruntime.MIMEWildcard, jsonpb),
		gwruntime.WithMarshalerOption(httputil.JSONContentType, jsonpb),
		gwruntime.WithMarshalerOption(httputil.AltJSONContentType, jsonpb),
		gwruntime.WithMarshalerOption(httputil.ProtoContentType, protopb),
		gwruntime.WithMarshalerOption(httputil.AltProtoContentType, protopb),
	)
	gwCtx, gwCancel := context.WithCancel(s.AnnotateCtx(context.Background()))
	s.stopper.AddCloser(stop.CloserFn(gwCancel))

	// Setup HTTP<->gRPC handlers.
	conn, err := s.rpcContext.GRPCDial(s.cfg.Addr)
	if err != nil {
		return errors.Errorf("error constructing grpc-gateway: %s; are your certificates valid?", err)
	}

	for _, gw := range []grpcGatewayServer{s.admin, s.status, &s.tsServer} {
		if err := gw.RegisterGateway(gwCtx, gwMux, conn); err != nil {
			return err
		}
	}

	var uiFileSystem http.FileSystem
	uiDebug := envutil.EnvOrDefaultBool("COCKROACH_DEBUG_UI", false)
	if uiDebug {
		uiFileSystem = http.Dir("pkg/ui")
	} else {
		uiFileSystem = &assetfs.AssetFS{
			Asset:     ui.Asset,
			AssetDir:  ui.AssetDir,
			AssetInfo: ui.AssetInfo,
		}
	}
	uiFileServer := http.FileServer(uiFileSystem)

	s.mux.HandleFunc("/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		if r.URL.Path == "/" {
			if uiDebug {
				r.URL.Path = "debug.html"
			} else {
				r.URL.Path = "release.html"
			}
		}
		uiFileServer.ServeHTTP(w, r)
	}))

	// TODO(marc): when cookie-based authentication exists,
	// apply it for all web endpoints.
	s.mux.Handle(adminPrefix, gwMux)
	s.mux.Handle(ts.URLPrefix, gwMux)
	s.mux.Handle(statusPrefix, gwMux)
	s.mux.Handle("/health", gwMux)
	s.mux.Handle(statusVars, http.HandlerFunc(s.status.handleVars))
	log.Event(ctx, "added http endpoints")

	if err := sdnotify.Ready(); err != nil {
		log.Errorf(ctx, "failed to signal readiness using systemd protocol: %s", err)
	}
	log.Event(ctx, "server ready")

	return nil
}
Example #16
0
// RefreshLeases starts a goroutine that refreshes the lease manager
// leases for tables received in the latest system configuration via gossip.
func (m *LeaseManager) RefreshLeases(s *stop.Stopper, db *client.DB, gossip *gossip.Gossip) {
	s.RunWorker(func() {
		descKeyPrefix := keys.MakeTablePrefix(uint32(sqlbase.DescriptorTable.ID))
		gossipUpdateC := gossip.RegisterSystemConfigChannel()
		for {
			select {
			case <-gossipUpdateC:
				cfg, _ := gossip.GetSystemConfig()
				if m.testingKnobs.GossipUpdateEvent != nil {
					m.testingKnobs.GossipUpdateEvent(cfg)
				}
				// Read all tables and their versions
				if log.V(2) {
					log.Info(context.TODO(), "received a new config; will refresh leases")
				}

				// Loop through the configuration to find all the tables.
				for _, kv := range cfg.Values {
					if !bytes.HasPrefix(kv.Key, descKeyPrefix) {
						continue
					}
					// Attempt to unmarshal config into a table/database descriptor.
					var descriptor sqlbase.Descriptor
					if err := kv.Value.GetProto(&descriptor); err != nil {
						log.Warningf(context.TODO(), "%s: unable to unmarshal descriptor %v", kv.Key, kv.Value)
						continue
					}
					switch union := descriptor.Union.(type) {
					case *sqlbase.Descriptor_Table:
						table := union.Table
						table.MaybeUpgradeFormatVersion()
						if err := table.ValidateTable(); err != nil {
							log.Errorf(context.TODO(), "%s: received invalid table descriptor: %v", kv.Key, table)
							continue
						}
						if log.V(2) {
							log.Infof(context.TODO(), "%s: refreshing lease table: %d (%s), version: %d, deleted: %t",
								kv.Key, table.ID, table.Name, table.Version, table.Dropped())
						}
						// Try to refresh the table lease to one >= this version.
						if t := m.findTableState(table.ID, false /* create */); t != nil {
							if err := t.purgeOldLeases(
								db, table.Dropped(), table.Version, m.LeaseStore); err != nil {
								log.Warningf(context.TODO(), "error purging leases for table %d(%s): %s",
									table.ID, table.Name, err)
							}
						}
					case *sqlbase.Descriptor_Database:
						// Ignore.
					}
				}
				if m.testingKnobs.TestingLeasesRefreshedEvent != nil {
					m.testingKnobs.TestingLeasesRefreshedEvent(cfg)
				}

			case <-s.ShouldStop():
				return
			}
		}
	})
}
Example #17
0
// Start starts a goroutine that runs outstanding schema changes
// for tables received in the latest system configuration via gossip.
func (s *SchemaChangeManager) Start(stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		descKeyPrefix := keys.MakeTablePrefix(uint32(sqlbase.DescriptorTable.ID))
		gossipUpdateC := s.gossip.RegisterSystemConfigChannel()
		timer := &time.Timer{}
		delay := 360 * time.Second
		if s.testingKnobs.AsyncExecQuickly {
			delay = 20 * time.Millisecond
		}
		for {
			select {
			case <-gossipUpdateC:
				cfg, _ := s.gossip.GetSystemConfig()
				// Read all tables and their versions
				if log.V(2) {
					log.Info(context.TODO(), "received a new config")
				}
				schemaChanger := SchemaChanger{
					nodeID:       s.leaseMgr.nodeID.Get(),
					db:           s.db,
					leaseMgr:     s.leaseMgr,
					testingKnobs: s.testingKnobs,
				}
				// Keep track of existing schema changers.
				oldSchemaChangers := make(map[sqlbase.ID]struct{}, len(s.schemaChangers))
				for k := range s.schemaChangers {
					oldSchemaChangers[k] = struct{}{}
				}
				execAfter := timeutil.Now().Add(delay)
				// Loop through the configuration to find all the tables.
				for _, kv := range cfg.Values {
					if !bytes.HasPrefix(kv.Key, descKeyPrefix) {
						continue
					}
					// Attempt to unmarshal config into a table/database descriptor.
					var descriptor sqlbase.Descriptor
					if err := kv.Value.GetProto(&descriptor); err != nil {
						log.Warningf(context.TODO(), "%s: unable to unmarshal descriptor %v", kv.Key, kv.Value)
						continue
					}
					switch union := descriptor.Union.(type) {
					case *sqlbase.Descriptor_Table:
						table := union.Table
						table.MaybeUpgradeFormatVersion()
						if err := table.ValidateTable(); err != nil {
							log.Errorf(context.TODO(), "%s: received invalid table descriptor: %v", kv.Key, table)
							continue
						}

						// Keep track of outstanding schema changes.
						// If all schema change commands always set UpVersion, why
						// check for the presence of mutations?
						// A schema change execution might fail soon after
						// unsetting UpVersion, and we still want to process
						// outstanding mutations. Similar with a table marked for deletion.
						if table.UpVersion || table.Dropped() || table.Adding() ||
							table.Renamed() || len(table.Mutations) > 0 {
							if log.V(2) {
								log.Infof(context.TODO(), "%s: queue up pending schema change; table: %d, version: %d",
									kv.Key, table.ID, table.Version)
							}

							// Only track the first schema change. We depend on
							// gossip to renotify us when a schema change has been
							// completed.
							schemaChanger.tableID = table.ID
							if len(table.Mutations) == 0 {
								schemaChanger.mutationID = sqlbase.InvalidMutationID
							} else {
								schemaChanger.mutationID = table.Mutations[0].MutationID
							}
							schemaChanger.execAfter = execAfter
							// Keep track of this schema change.
							// Remove from oldSchemaChangers map.
							delete(oldSchemaChangers, table.ID)
							if sc, ok := s.schemaChangers[table.ID]; ok {
								if sc.mutationID == schemaChanger.mutationID {
									// Ignore duplicate.
									continue
								}
							}
							s.schemaChangers[table.ID] = schemaChanger
						}

					case *sqlbase.Descriptor_Database:
						// Ignore.
					}
				}
				// Delete old schema changers.
				for k := range oldSchemaChangers {
					delete(s.schemaChangers, k)
				}
				timer = s.newTimer()

			case <-timer.C:
				if s.testingKnobs.AsyncExecNotification != nil &&
					s.testingKnobs.AsyncExecNotification() != nil {
					timer = s.newTimer()
					continue
				}
				for tableID, sc := range s.schemaChangers {
					if timeutil.Since(sc.execAfter) > 0 {
						err := sc.exec()
						if err != nil {
							if err == errExistingSchemaChangeLease {
							} else if err == sqlbase.ErrDescriptorNotFound {
								// Someone deleted this table. Don't try to run the schema
								// changer again. Note that there's no gossip update for the
								// deletion which would remove this schemaChanger.
								delete(s.schemaChangers, tableID)
							} else {
								// We don't need to act on integrity
								// constraints violations because exec()
								// purges mutations that violate integrity
								// constraints.
								log.Warningf(context.TODO(), "Error executing schema change: %s", err)
							}
						}
						// Advance the execAfter time so that this schema
						// changer doesn't get called again for a while.
						sc.execAfter = timeutil.Now().Add(delay)
					}
					// Only attempt to run one schema changer.
					break
				}
				timer = s.newTimer()

			case <-stopper.ShouldStop():
				return
			}
		}
	})
}
Example #18
0
func TestReplicateQueueRebalance(t *testing.T) {
	defer leaktest.AfterTest(t)()

	// Set the gossip stores interval lower to speed up rebalancing. With the
	// default of 5s we have to wait ~5s for the rebalancing to start.
	defer func(v time.Duration) {
		gossip.GossipStoresInterval = v
	}(gossip.GossipStoresInterval)
	gossip.GossipStoresInterval = 100 * time.Millisecond

	// TODO(peter): Remove when lease rebalancing is the default.
	defer func(v bool) {
		storage.EnableLeaseRebalancing = v
	}(storage.EnableLeaseRebalancing)
	storage.EnableLeaseRebalancing = true

	const numNodes = 5
	tc := testcluster.StartTestCluster(t, numNodes,
		base.TestClusterArgs{ReplicationMode: base.ReplicationAuto},
	)
	defer tc.Stopper().Stop()

	const newRanges = 5
	for i := 0; i < newRanges; i++ {
		tableID := keys.MaxReservedDescID + i + 1
		splitKey := keys.MakeRowSentinelKey(keys.MakeTablePrefix(uint32(tableID)))
		for {
			if _, _, err := tc.SplitRange(splitKey); err != nil {
				if testutils.IsError(err, "split at key .* failed: conflict updating range descriptors") ||
					testutils.IsError(err, "range is already split at key") {
					continue
				}
				t.Fatal(err)
			}
			break
		}
	}

	countReplicas := func() []int {
		counts := make([]int, len(tc.Servers))
		for _, s := range tc.Servers {
			err := s.Stores().VisitStores(func(s *storage.Store) error {
				counts[s.StoreID()-1] += s.ReplicaCount()
				return nil
			})
			if err != nil {
				t.Fatal(err)
			}
		}
		return counts
	}

	numRanges := newRanges + server.ExpectedInitialRangeCount()
	numReplicas := numRanges * 3
	const minThreshold = 0.9
	minReplicas := int(math.Floor(minThreshold * (float64(numReplicas) / numNodes)))

	util.SucceedsSoon(t, func() error {
		counts := countReplicas()
		for _, c := range counts {
			if c < minReplicas {
				err := errors.Errorf("not balanced: %d", counts)
				log.Info(context.Background(), err)
				return err
			}
		}
		return nil
	})
}
Example #19
0
func (s *server) gossipReceiver(
	ctx context.Context,
	argsPtr **Request,
	senderFn func(*Response) error,
	receiverFn func() (*Request, error),
) error {
	s.mu.Lock()
	defer s.mu.Unlock()

	reply := new(Response)

	// This loop receives gossip from the client. It does not attempt to send the
	// server's gossip to the client.
	for {
		args := *argsPtr
		if args.NodeID != 0 {
			// Decide whether or not we can accept the incoming connection
			// as a permanent peer.
			if args.NodeID == s.NodeID.Get() {
				// This is an incoming loopback connection which should be closed by
				// the client.
				if log.V(2) {
					log.Infof(ctx, "ignoring gossip from node %d (loopback)", args.NodeID)
				}
			} else if s.mu.incoming.hasNode(args.NodeID) {
				// Do nothing.
				if log.V(2) {
					log.Infof(ctx, "gossip received from node %d", args.NodeID)
				}
			} else if s.mu.incoming.hasSpace() {
				if log.V(2) {
					log.Infof(ctx, "adding node %d to incoming set", args.NodeID)
				}

				s.mu.incoming.addNode(args.NodeID)
				s.mu.nodeMap[args.Addr] = serverInfo{
					peerID:    args.NodeID,
					createdAt: timeutil.Now(),
				}

				defer func(nodeID roachpb.NodeID, addr util.UnresolvedAddr) {
					if log.V(2) {
						log.Infof(ctx, "removing node %d from incoming set", args.NodeID)
					}

					s.mu.incoming.removeNode(nodeID)
					delete(s.mu.nodeMap, addr)
				}(args.NodeID, args.Addr)
			} else {
				var alternateAddr util.UnresolvedAddr
				var alternateNodeID roachpb.NodeID
				// Choose a random peer for forwarding.
				altIdx := rand.Intn(len(s.mu.nodeMap))
				for addr, info := range s.mu.nodeMap {
					if altIdx == 0 {
						alternateAddr = addr
						alternateNodeID = info.peerID
						break
					}
					altIdx--
				}

				log.Infof(ctx, "refusing gossip from node %d (max %d conns); forwarding to %d (%s)",
					args.NodeID, s.mu.incoming.maxSize, alternateNodeID, alternateAddr)

				*reply = Response{
					NodeID:          s.NodeID.Get(),
					AlternateAddr:   &alternateAddr,
					AlternateNodeID: alternateNodeID,
				}

				s.mu.Unlock()
				err := senderFn(reply)
				s.mu.Lock()
				// Naively, we would return err here unconditionally, but that
				// introduces a race. Specifically, the client may observe the
				// end of the connection before it has a chance to receive and
				// process this message, which instructs it to hang up anyway.
				// Instead, we send the message and proceed to gossip
				// normally, depending on the client to end the connection.
				if err != nil {
					return err
				}
			}
		} else {
			log.Info(ctx, "received gossip from unknown node")
		}

		bytesReceived := int64(args.Size())
		infosReceived := int64(len(args.Delta))
		s.nodeMetrics.BytesReceived.Inc(bytesReceived)
		s.nodeMetrics.InfosReceived.Inc(infosReceived)
		s.serverMetrics.BytesReceived.Inc(bytesReceived)
		s.serverMetrics.InfosReceived.Inc(infosReceived)

		freshCount, err := s.mu.is.combine(args.Delta, args.NodeID)
		if err != nil {
			log.Warningf(ctx, "failed to fully combine gossip delta from node %d: %s", args.NodeID, err)
		}
		if log.V(1) {
			log.Infof(ctx, "received %s from node %d (%d fresh)", extractKeys(args.Delta), args.NodeID, freshCount)
		}
		s.maybeTightenLocked()

		*reply = Response{
			NodeID:          s.NodeID.Get(),
			HighWaterStamps: s.mu.is.getHighWaterStamps(),
		}

		s.mu.Unlock()
		err = senderFn(reply)
		s.mu.Lock()
		if err != nil {
			return err
		}

		if cycler := s.simulationCycler; cycler != nil {
			cycler.Wait()
		}

		s.mu.Unlock()
		recvArgs, err := receiverFn()
		s.mu.Lock()
		if err != nil {
			return err
		}

		// *argsPtr holds the remote peer state; we need to update it whenever we
		// receive a new non-nil request. We avoid assigning to *argsPtr directly
		// because the gossip sender above has closed over *argsPtr and will NPE if
		// *argsPtr were set to nil.
		*argsPtr = recvArgs
	}
}