Beispiel #1
0
// maybeWarnAboutInit looks for signs indicating a cluster which
// hasn't been initialized and warns. There's no absolutely sure way
// to determine whether the current node is simply waiting to be
// bootstrapped to an existing cluster vs. the operator having failed
// to initialize the cluster via the "cockroach init" command, so
// we can only warn.
//
// This method checks whether all gossip bootstrap hosts are
// connected, and whether the node itself is a bootstrap host, but
// there is still no sentinel gossip.
func (g *Gossip) maybeWarnAboutInit(stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		// Wait 5s before first check.
		select {
		case <-stopper.ShouldStop():
			return
		case <-time.After(5 * time.Second):
		}
		retryOptions := retry.Options{
			InitialBackoff: 5 * time.Second,      // first backoff at 5s
			MaxBackoff:     60 * time.Second,     // max backoff is 60s
			Multiplier:     2,                    // doubles
			Closer:         stopper.ShouldStop(), // stop no matter what on stopper
		}
		// This will never error because of infinite retries.
		for r := retry.Start(retryOptions); r.Next(); {
			g.mu.Lock()
			hasConnections := g.outgoing.len()+g.incoming.len() > 0
			hasSentinel := g.is.getInfo(KeySentinel) != nil
			triedAll := g.triedAll
			g.mu.Unlock()
			// If we have the sentinel, exit the retry loop.
			if hasSentinel {
				break
			}
			if !hasConnections {
				log.Warningf("not connected to gossip; check that gossip flag is set appropriately")
			} else if triedAll {
				log.Warningf("missing gossip sentinel; first range unavailable or cluster not initialized")
			}
		}
	})
}
Beispiel #2
0
// RefreshLeases starts a goroutine that refreshes the lease manager
// leases for tables received in the latest system configuration via gossip.
func (m *LeaseManager) RefreshLeases(s *stop.Stopper, db *client.DB, gossip *gossip.Gossip) {
	s.RunWorker(func() {
		descKeyPrefix := keys.MakeTablePrefix(uint32(sqlbase.DescriptorTable.ID))
		gossipUpdateC := gossip.RegisterSystemConfigChannel()
		for {
			select {
			case <-gossipUpdateC:
				cfg, _ := gossip.GetSystemConfig()
				if m.testingKnobs.GossipUpdateEvent != nil {
					m.testingKnobs.GossipUpdateEvent(cfg)
				}
				// Read all tables and their versions
				if log.V(2) {
					log.Info("received a new config; will refresh leases")
				}

				// Loop through the configuration to find all the tables.
				for _, kv := range cfg.Values {
					if !bytes.HasPrefix(kv.Key, descKeyPrefix) {
						continue
					}
					// Attempt to unmarshal config into a table/database descriptor.
					var descriptor sqlbase.Descriptor
					if err := kv.Value.GetProto(&descriptor); err != nil {
						log.Warningf("%s: unable to unmarshal descriptor %v", kv.Key, kv.Value)
						continue
					}
					switch union := descriptor.Union.(type) {
					case *sqlbase.Descriptor_Table:
						table := union.Table
						if err := table.Validate(); err != nil {
							log.Errorf("%s: received invalid table descriptor: %v", kv.Key, table)
							continue
						}
						if log.V(2) {
							log.Infof("%s: refreshing lease table: %d (%s), version: %d",
								kv.Key, table.ID, table.Name, table.Version)
						}
						// Try to refresh the table lease to one >= this version.
						if t := m.findTableState(table.ID, false /* create */, nil); t != nil {
							if err := t.purgeOldLeases(
								db, table.Deleted(), table.Version, m.LeaseStore); err != nil {
								log.Warningf("error purging leases for table %d(%s): %s",
									table.ID, table.Name, err)
							}
						}
					case *sqlbase.Descriptor_Database:
						// Ignore.
					}
				}
				if m.testingKnobs.TestingLeasesRefreshedEvent != nil {
					m.testingKnobs.TestingLeasesRefreshedEvent(cfg)
				}

			case <-s.ShouldStop():
				return
			}
		}
	})
}
Beispiel #3
0
// runStart starts the cockroach node using --stores as the list of
// storage devices ("stores") on this machine and --gossip as the list
// of "well-known" hosts used to join this node to the cockroach
// cluster via the gossip network.
func runStart(cmd *cobra.Command, args []string) {
	info := util.GetBuildInfo()
	log.Infof("build Vers: %s", info.Vers)
	log.Infof("build Tag:  %s", info.Tag)
	log.Infof("build Time: %s", info.Time)
	log.Infof("build Deps: %s", info.Deps)

	// Default user for servers.
	Context.User = security.NodeUser
	// First initialize the Context as it is used in other places.
	err := Context.Init("start")
	if err != nil {
		log.Errorf("failed to initialize context: %s", err)
		return
	}

	log.Info("starting cockroach cluster")
	stopper := util.NewStopper()
	stopper.AddWorker()
	s, err := server.NewServer(Context, stopper)
	if err != nil {
		log.Errorf("failed to start Cockroach server: %s", err)
		return
	}

	err = s.Start(false)
	if err != nil {
		log.Errorf("cockroach server exited with error: %s", err)
		return
	}

	signalCh := make(chan os.Signal, 1)
	signal.Notify(signalCh, os.Interrupt, os.Kill)
	// TODO(spencer): move this behind a build tag.
	signal.Notify(signalCh, syscall.SIGTERM)

	// Block until one of the signals above is received or the stopper
	// is stopped externally (for example, via the quit endpoint).
	select {
	case <-stopper.ShouldStop():
		stopper.SetStopped()
	case <-signalCh:
		log.Infof("initiating graceful shutdown of server")
		stopper.SetStopped()
		go func() {
			s.Stop()
		}()
	}

	select {
	case <-signalCh:
		log.Warningf("second signal received, initiating hard shutdown")
	case <-time.After(time.Minute):
		log.Warningf("time limit reached, initiating hard shutdown")
		return
	case <-stopper.IsStopped():
		log.Infof("server drained and shutdown completed")
	}
	log.Flush()
}
Beispiel #4
0
func cutNetwork(t *testing.T, c cluster.Cluster, closer <-chan struct{}, partitions ...[]int) {
	addrs, addrsToNode := mustGetHosts(t, c)
	ipPartitions := make([][]iptables.IP, 0, len(partitions))
	for _, partition := range partitions {
		ipPartition := make([]iptables.IP, 0, len(partition))
		for _, nodeIndex := range partition {
			ipPartition = append(ipPartition, addrs[nodeIndex])
		}
		ipPartitions = append(ipPartitions, ipPartition)
	}
	log.Warningf("partitioning: %v (%v)", partitions, ipPartitions)
	for host, cmds := range iptables.Rules(iptables.Bidirectional(ipPartitions...)) {
		for _, cmd := range cmds {
			if err := c.ExecRoot(addrsToNode[host], cmd); err != nil {
				t.Fatal(err)
			}
		}
	}
	<-closer
	for i := 0; i < c.NumNodes(); i++ {
		for _, cmd := range iptables.Reset() {
			if err := c.ExecRoot(i, cmd); err != nil {
				t.Fatal(err)
			}
		}
	}
	log.Warningf("resolved all partitions")
}
Beispiel #5
0
// MIGRATION(tschottdorf): As of #7310, we make sure that a Replica always has
// a complete Raft state on disk. Prior versions may not have that, which
// causes issues due to the fact that we used to synthesize a TruncatedState
// and do so no more. To make up for that, write a missing TruncatedState here.
// That key is in the replicated state, but since during a cluster upgrade, all
// nodes do it, it's fine (and we never CPut on that key, so anything in the
// Raft pipeline will simply overwrite it).
//
// Migration(tschottdorf): See #6991. It's possible that the HardState is
// missing after a snapshot was applied (so there is a TruncatedState). In this
// case, synthesize a HardState (simply setting everything that was in the
// snapshot to committed). Having lost the original HardState can theoretically
// mean that the replica was further ahead or had voted, and so there's no
// guarantee that this will be correct. But it will be correct in the majority
// of cases, and some state *has* to be recovered.
func migrate7310And6991(ctx context.Context, batch engine.ReadWriter, desc roachpb.RangeDescriptor) error {
	state, err := loadState(ctx, batch, &desc)
	if err != nil {
		return errors.Wrap(err, "could not migrate TruncatedState: %s")
	}
	if (*state.TruncatedState == roachpb.RaftTruncatedState{}) {
		state.TruncatedState.Term = raftInitialLogTerm
		state.TruncatedState.Index = raftInitialLogIndex
		state.RaftAppliedIndex = raftInitialLogIndex
		if _, err := saveState(ctx, batch, state); err != nil {
			return errors.Wrapf(err, "could not migrate TruncatedState to %+v", &state.TruncatedState)
		}
		log.Warningf(ctx, "migration: synthesized TruncatedState for %+v", desc)
	}

	hs, err := loadHardState(ctx, batch, desc.RangeID)
	if err != nil {
		return errors.Wrap(err, "unable to load HardState")
	}
	// Only update the HardState when there is a nontrivial Commit field. We
	// don't have a snapshot here, so we could wind up lowering the commit
	// index (which would error out and fatal us).
	if hs.Commit == 0 {
		log.Warningf(ctx, "migration: synthesized HardState for %+v", desc)
		if err := synthesizeHardState(ctx, batch, state, hs); err != nil {
			return errors.Wrap(err, "could not migrate HardState")
		}
	}
	return nil
}
Beispiel #6
0
func cutNetwork(t *testing.T, c cluster.Cluster, closer <-chan struct{}, partitions ...[]int) {
	defer func() {
		if errs := restoreNetwork(t, c); len(errs) > 0 {
			t.Fatalf("errors restoring the network: %+v", errs)
		}
	}()
	addrs, addrsToNode := mustGetHosts(t, c)
	ipPartitions := make([][]iptables.IP, 0, len(partitions))
	for _, partition := range partitions {
		ipPartition := make([]iptables.IP, 0, len(partition))
		for _, nodeIndex := range partition {
			ipPartition = append(ipPartition, addrs[nodeIndex])
		}
		ipPartitions = append(ipPartitions, ipPartition)
	}
	log.Warningf(context.TODO(), "partitioning: %v (%v)", partitions, ipPartitions)
	for host, cmds := range iptables.Rules(iptables.Bidirectional(ipPartitions...)) {
		for _, cmd := range cmds {
			if err := c.ExecRoot(addrsToNode[host], cmd); err != nil {
				t.Fatal(err)
			}
		}
	}
	<-closer
	log.Warningf(context.TODO(), "resolved all partitions")
}
Beispiel #7
0
// maybeSignalStatusChangeLocked checks whether gossip should transition its
// internal state from connected to stalled or vice versa.
func (g *Gossip) maybeSignalStatusChangeLocked() {
	orphaned := g.outgoing.len()+g.mu.incoming.len() == 0
	stalled := orphaned || g.mu.is.getInfo(KeySentinel) == nil
	if stalled {
		// We employ the stalled boolean to avoid filling logs with warnings.
		if !g.stalled {
			log.Eventf(g.ctx, "now stalled")
			if orphaned {
				if len(g.resolvers) == 0 {
					log.Warningf(g.ctx, "no resolvers found; use --join to specify a connected node")
				} else {
					log.Warningf(g.ctx, "no incoming or outgoing connections")
				}
			} else if len(g.resolversTried) == len(g.resolvers) {
				log.Warningf(g.ctx, "first range unavailable; resolvers exhausted")
			} else {
				log.Warningf(g.ctx, "first range unavailable; trying remaining resolvers")
			}
		}
		if len(g.resolvers) > 0 {
			g.signalStalledLocked()
		}
	} else {
		if g.stalled {
			log.Eventf(g.ctx, "connected")
			log.Infof(g.ctx, "node has connected to cluster via gossip")
			g.signalConnectedLocked()
		}
		g.maybeCleanupBootstrapAddressesLocked()
	}
	g.stalled = stalled
}
Beispiel #8
0
// Send sends call to Cockroach via an RPC request. Errors which are retryable
// are retried with backoff in a loop using the default retry options. Other
// errors sending the request are retried indefinitely using the same client
// command ID to avoid reporting failure when in fact the command may have gone
// through and been executed successfully. We retry here to eventually get
// through with the same client command ID and be given the cached response.
func (s *Sender) Send(_ context.Context, call proto.Call) {
	var err error
	for r := retry.Start(s.retryOpts); r.Next(); {
		if !s.client.IsHealthy() {
			log.Warningf("client %s is unhealthy; retrying", s.client)
			continue
		}

		method := call.Args.Method().String()
		c := s.client.Go("Server."+method, call.Args, call.Reply, nil)
		<-c.Done
		err = c.Error
		if err != nil {
			// Assume all errors sending request are retryable. The actual
			// number of things that could go wrong is vast, but we don't
			// want to miss any which should in theory be retried with the
			// same client command ID. We log the error here as a warning so
			// there's visiblity that this is happening. Some of the errors
			// we'll sweep up in this net shouldn't be retried, but we can't
			// really know for sure which.
			log.Warningf("failed to send RPC request %s: %v", method, err)
			continue
		}

		// On successful post, we're done with retry loop.
		break
	}
	if err != nil {
		call.Reply.Header().SetGoError(err)
	}
}
Beispiel #9
0
// warnAboutStall attempts to diagnose the cause of a gossip network
// not being connected to the sentinel. This could happen in a network
// partition, or because of misconfiguration. It's impossible to tell,
// but we can warn appropriately. If there are no incoming or outgoing
// connections, we warn about the --join flag being set. If we've
// connected, and all resolvers have been tried, we warn about either
// the first range not being available or else possible the cluster
// never having been initialized.
func (g *Gossip) warnAboutStall() {
	if g.outgoing.len()+g.incoming.len() == 0 {
		log.Warningf("not connected to cluster; use --join to specify a connected node")
	} else if len(g.resolversTried) == len(g.resolvers) {
		log.Warningf("first range unavailable or cluster not initialized")
	} else {
		log.Warningf("partition in gossip network; attempting new connection")
	}
}
Beispiel #10
0
// warnAboutStall attempts to diagnose the cause of a gossip network
// not being connected to the sentinel. This could happen in a network
// partition, or because of misconfiguration. It's impossible to tell,
// but we can warn appropriately. If there are no incoming or outgoing
// connections, we warn about the --gossip flag being set. If we've
// connected, and all resolvers have been tried, we warn about either
// the first range not being available or else possible the cluster
// never having been initialized.
func (g *Gossip) warnAboutStall() {
	if g.outgoing.len()+g.incoming.len() == 0 {
		log.Warningf("not connected to gossip; check that gossip flag is set appropriately")
	} else if len(g.resolversTried) == len(g.resolvers) {
		log.Warningf("first range unavailable or cluster not initialized")
	} else {
		log.Warningf("partition in gossip network; attempting new connection")
	}
}
Beispiel #11
0
func logLinuxStats() {
	if !log.V(1) {
		return
	}

	// We don't know which fields in struct mallinfo are most relevant to us yet,
	// so log it all for now.
	//
	// A major caveat is that mallinfo() returns stats only for the main arena.
	// glibc uses multiple allocation arenas to increase malloc performance for
	// multithreaded processes, so mallinfo may not report on significant parts
	// of the heap.
	mi := C.mallinfo()
	log.Infof("mallinfo stats: ordblks=%s, smblks=%s, hblks=%s, hblkhd=%s, usmblks=%s, fsmblks=%s, "+
		"uordblks=%s, fordblks=%s, keepcost=%s",
		humanize.IBytes(uint64(mi.ordblks)),
		humanize.IBytes(uint64(mi.smblks)),
		humanize.IBytes(uint64(mi.hblks)),
		humanize.IBytes(uint64(mi.hblkhd)),
		humanize.IBytes(uint64(mi.usmblks)),
		humanize.IBytes(uint64(mi.fsmblks)),
		humanize.IBytes(uint64(mi.uordblks)),
		humanize.IBytes(uint64(mi.fordblks)),
		humanize.IBytes(uint64(mi.fsmblks)))

	// malloc_info() emits a *lot* of XML, partly because it generates stats for
	// all arenas, unlike mallinfo().
	//
	// TODO(cdo): extract the useful bits and record to time-series DB.
	if !log.V(2) {
		return
	}

	// Create a memstream and make malloc_info() output to it.
	var buf *C.char
	var bufSize C.size_t
	memstream := C.open_memstream(&buf, &bufSize)
	if memstream == nil {
		log.Warning("couldn't open memstream")
		return
	}
	defer func() {
		C.fclose(memstream)
		C.free(unsafe.Pointer(buf))
	}()
	if rc := C.malloc_info(0, memstream); rc != 0 {
		log.Warningf("malloc_info returned %d", rc)
		return
	}
	if rc := C.fflush(memstream); rc != 0 {
		log.Warningf("fflush returned %d", rc)
		return
	}
	log.Infof("malloc_info: %s", C.GoString(buf))
}
Beispiel #12
0
func (s *state) handleMessage(req *RaftMessageRequest) {
	// We only want to lazily create the group if it's not heartbeat-related;
	// our heartbeats are coalesced and contain a dummy GroupID.
	switch req.Message.Type {
	case raftpb.MsgHeartbeat:
		s.fanoutHeartbeat(req)
		return
	case raftpb.MsgHeartbeatResp:
		s.fanoutHeartbeatResponse(req)
		return
	}

	s.CacheReplicaDescriptor(req.GroupID, req.FromReplica)
	s.CacheReplicaDescriptor(req.GroupID, req.ToReplica)
	if g, ok := s.groups[req.GroupID]; ok {
		if g.replicaID > req.ToReplica.ReplicaID {
			log.Warningf("node %v: got message for group %s with stale replica ID %s (expected %s)",
				s.nodeID, req.GroupID, req.ToReplica.ReplicaID, g.replicaID)
			return
		} else if g.replicaID < req.ToReplica.ReplicaID {
			// The message has a newer ReplicaID than we know about. This
			// means that this node has been removed from a group and
			// re-added to it, before our GC process was able to remove the
			// remnants of the old group.
			log.Infof("node %v: got message for group %s with newer replica ID (%s vs %s), recreating group",
				s.nodeID, req.GroupID, req.ToReplica.ReplicaID, g.replicaID)
			if err := s.removeGroup(req.GroupID); err != nil {
				log.Warningf("Error removing group %d (in response to incoming message): %s",
					req.GroupID, err)
				return
			}
			if err := s.createGroup(req.GroupID, req.ToReplica.ReplicaID); err != nil {
				log.Warningf("Error recreating group %d (in response to incoming message): %s",
					req.GroupID, err)
				return
			}
		}
	} else {
		if log.V(1) {
			log.Infof("node %v: got message for unknown group %d; creating it", s.nodeID, req.GroupID)
		}
		if err := s.createGroup(req.GroupID, req.ToReplica.ReplicaID); err != nil {
			log.Warningf("Error creating group %d (in response to incoming message): %s",
				req.GroupID, err)
			return
		}
	}

	if err := s.multiNode.Step(context.Background(), uint64(req.GroupID), req.Message); err != nil {
		if log.V(4) {
			log.Infof("node %v: multinode step to group %v failed for message %.200s", s.nodeID, req.GroupID,
				raft.DescribeMessage(req.Message, s.EntryFormatter))
		}
	}
}
Beispiel #13
0
// RefreshLeases starts a goroutine that refreshes the lease manager
// leases for tables received in the latest system configuration via gossip.
func (m *LeaseManager) RefreshLeases(s *stop.Stopper, db *client.DB, gossip *gossip.Gossip) {
	s.RunWorker(func() {
		descKeyPrefix := keys.MakeTablePrefix(uint32(DescriptorTable.ID))
		gossipUpdateC := gossip.RegisterSystemConfigChannel()
		for {
			select {
			case <-gossipUpdateC:
				cfg := *gossip.GetSystemConfig()
				m.updateSystemConfig(cfg)

				// Read all tables and their versions
				if log.V(2) {
					log.Info("received a new config %v", cfg)
				}

				// Loop through the configuration to find all the tables.
				for _, kv := range cfg.Values {
					if !bytes.HasPrefix(kv.Key, descKeyPrefix) {
						continue
					}
					// Attempt to unmarshal config into a table/database descriptor.
					var descriptor Descriptor
					if err := kv.Value.GetProto(&descriptor); err != nil {
						log.Warningf("%s: unable to unmarshal descriptor %v", kv.Key, kv.Value)
						continue
					}
					switch union := descriptor.Union.(type) {
					case *Descriptor_Table:
						table := union.Table
						if err := table.Validate(); err != nil {
							log.Errorf("%s: received invalid table descriptor: %v", kv.Key, table)
							continue
						}
						if log.V(2) {
							log.Infof("%s: refreshing lease table: %d, version: %d",
								kv.Key, table.ID, table.Version)
						}
						// Try to refresh the table lease to one >= this version.
						if err := m.refreshLease(db, table.ID, table.Version); err != nil {
							log.Warningf("%s: %v", kv.Key, err)
						}

					case *Descriptor_Database:
						// Ignore.
					}
				}

			case <-s.ShouldStop():
				return
			}
		}
	})
}
Beispiel #14
0
// t.mu needs to be locked.
func (t *tableState) removeLease(lease *LeaseState, store LeaseStore) {
	t.active.remove(lease)
	t.tableNameCache.remove(lease)
	// Release to the store asynchronously, without the tableState lock.
	err := t.stopper.RunAsyncTask(func() {
		if err := store.Release(lease); err != nil {
			log.Warningf(context.TODO(), "error releasing lease %q: %s", lease, err)
		}
	})
	if log.V(1) && err != nil {
		log.Warningf(context.TODO(), "error removing lease from store: %s", err)
	}
}
Beispiel #15
0
// ShouldRebalance returns whether the specified store should attempt to
// rebalance a replica to another store.
func (a Allocator) ShouldRebalance(storeID roachpb.StoreID) bool {
	if !a.options.AllowRebalance {
		return false
	}
	// In production, add some random jitter to shouldRebalance.
	if !a.options.Deterministic && a.randGen.Float32() > rebalanceShouldRebalanceChance {
		return false
	}
	if log.V(2) {
		log.Infof("ShouldRebalance from store %d", storeID)
	}
	storeDesc := a.storePool.getStoreDescriptor(storeID)
	if storeDesc == nil {
		if log.V(2) {
			log.Warningf(
				"ShouldRebalance couldn't find store with id %d in StorePool",
				storeID)
		}
		return false
	}

	sl := a.storePool.getStoreList(*storeDesc.CombinedAttrs(), []roachpb.NodeID{storeDesc.Node.NodeID}, a.options.Deterministic)

	// ShouldRebalance is true if a suitable replacement can be found.
	return a.balancer.improve(storeDesc, sl) != nil
}
Beispiel #16
0
func parseOptions(data []byte) (sql.SessionArgs, error) {
	args := sql.SessionArgs{}
	buf := readBuffer{msg: data}
	for {
		key, err := buf.getString()
		if err != nil {
			return sql.SessionArgs{}, util.Errorf("error reading option key: %s", err)
		}
		if len(key) == 0 {
			break
		}
		value, err := buf.getString()
		if err != nil {
			return sql.SessionArgs{}, util.Errorf("error reading option value: %s", err)
		}
		switch key {
		case "database":
			args.Database = value
		case "user":
			args.User = value
		default:
			if log.V(1) {
				log.Warningf("unrecognized configuration parameter %q", key)
			}
		}
	}
	return args, nil
}
Beispiel #17
0
// Batch sends a request to Cockroach via RPC. Errors which are retryable are
// retried with backoff in a loop using the default retry options. Other errors
// sending the request are retried indefinitely using the same client command
// ID to avoid reporting failure when in fact the command may have gone through
// and been executed successfully. We retry here to eventually get through with
// the same client command ID and be given the cached response.
func (s *rpcSender) Send(ctx context.Context, ba proto.BatchRequest) (*proto.BatchResponse, *proto.Error) {
	var err error
	var br proto.BatchResponse
	for r := retry.Start(s.retryOpts); r.Next(); {
		select {
		case <-s.client.Healthy():
		default:
			err = fmt.Errorf("failed to send RPC request %s: client is unhealthy", method)
			log.Warning(err)
			continue
		}

		if err = s.client.Call(method, &ba, &br); err != nil {
			br.Reset() // don't trust anyone.
			// Assume all errors sending request are retryable. The actual
			// number of things that could go wrong is vast, but we don't
			// want to miss any which should in theory be retried with the
			// same client command ID. We log the error here as a warning so
			// there's visiblity that this is happening. Some of the errors
			// we'll sweep up in this net shouldn't be retried, but we can't
			// really know for sure which.
			log.Warningf("failed to send RPC request %s: %s", method, err)
			continue
		}

		// On successful post, we're done with retry loop.
		break
	}
	if err != nil {
		return nil, proto.NewError(err)
	}
	pErr := br.Error
	br.Error = nil
	return &br, pErr
}
Beispiel #18
0
// pushTxn attempts to abort the txn via push. If the transaction
// cannot be aborted, the oldestIntentNanos value is atomically
// updated to the min of oldestIntentNanos and the intent's
// timestamp. The wait group is signaled on completion.
func (gcq *gcQueue) pushTxn(repl *Replica, now roachpb.Timestamp, txn *roachpb.Transaction, updateOldestIntent func(int64), wg *sync.WaitGroup) {
	defer wg.Done() // signal wait group always on completion
	if log.V(1) {
		log.Infof("pushing txn %s ts=%s", txn, txn.OrigTimestamp)
	}

	// Attempt to push the transaction which created the intent.
	pushArgs := &roachpb.PushTxnRequest{
		RequestHeader: roachpb.RequestHeader{
			Key: txn.Key,
		},
		Now:       now,
		PusherTxn: roachpb.Transaction{Priority: roachpb.MaxPriority},
		PusheeTxn: *txn,
		PushType:  roachpb.ABORT_TXN,
	}
	b := &client.Batch{}
	b.InternalAddRequest(pushArgs)
	br, err := repl.rm.DB().RunWithResponse(b)
	if err != nil {
		log.Warningf("push of txn %s failed: %s", txn, err)
		updateOldestIntent(txn.OrigTimestamp.WallTime)
		return
	}
	// Update the supplied txn on successful push.
	*txn = *br.Responses[0].GetInner().(*roachpb.PushTxnResponse).PusheeTxn
}
Beispiel #19
0
// maybeWarnAboutInit looks for signs indicating a cluster which
// hasn't been initialized and warns. There's no absolutely sure way
// to determine whether the current node is simply waiting to be
// bootstrapped to an existing cluster vs. the operator having failed
// to initialize the cluster via the "cockroach init" command, so
// we can only warn.
//
// This method checks whether all gossip bootstrap hosts are
// connected, and whether the node itself is a bootstrap host, but
// there is still no sentinel gossip.
func (g *Gossip) maybeWarnAboutInit(stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		// Wait 5s before first check.
		select {
		case <-stopper.ShouldStop():
			return
		case <-time.After(5 * time.Second):
		}
		retryOptions := retry.Options{
			InitialBackoff: 5 * time.Second,  // first backoff at 5s
			MaxBackoff:     60 * time.Second, // max backoff is 60s
			Multiplier:     2,                // doubles
			Stopper:        stopper,          // stop no matter what on stopper
		}
		// This will never error because of infinite retries.
		for r := retry.Start(retryOptions); r.Next(); {
			g.mu.Lock()
			hasSentinel := g.is.getInfo(KeySentinel) != nil
			triedAll := g.triedAll
			g.mu.Unlock()
			// If we have the sentinel, exit the retry loop.
			if hasSentinel {
				break
			}
			// Otherwise, if all bootstrap hosts are connected, warn.
			if triedAll {
				log.Warningf("connected to gossip but missing sentinel. Has the cluster been initialized? " +
					"Use \"cockroach init\" to initialize.")
			}
		}
	})
}
Beispiel #20
0
// lookupReplica looks up replica by key [range]. Lookups are done
// by consulting each store in turn via Store.LookupRange(key).
// Returns RangeID and replica on success; RangeKeyMismatch error
// if not found.
// This is only for testing usage; performance doesn't matter.
func (ls *Stores) lookupReplica(start, end roachpb.RKey) (rangeID roachpb.RangeID, replica *roachpb.ReplicaDescriptor, err error) {
	ls.mu.RLock()
	defer ls.mu.RUnlock()
	var rng *Replica
	var partialDesc *roachpb.RangeDescriptor
	for _, store := range ls.storeMap {
		rng = store.LookupReplica(start, end)
		if rng == nil {
			if tmpRng := store.LookupReplica(start, nil); tmpRng != nil {
				log.Warningf("range not contained in one range: [%s,%s), but have [%s,%s)",
					start, end, tmpRng.Desc().StartKey, tmpRng.Desc().EndKey)
				partialDesc = tmpRng.Desc()
				break
			}
			continue
		}
		if replica == nil {
			rangeID = rng.RangeID
			replica, err = rng.GetReplica()
			if err != nil {
				if _, ok := err.(*errReplicaNotInRange); !ok {
					return 0, nil, err
				}
			}
			continue
		}
		// Should never happen outside of tests.
		return 0, nil, util.Errorf(
			"range %+v exists on additional store: %+v", rng, store)
	}
	if replica == nil {
		err = roachpb.NewRangeKeyMismatchError(start.AsRawKey(), end.AsRawKey(), partialDesc)
	}
	return rangeID, replica, err
}
Beispiel #21
0
// Send sends call to Cockroach via an RPC request. Errors which are retryable
// are retried with backoff in a loop using the default retry options. Other
// errors sending the request are retried indefinitely using the same client
// command ID to avoid reporting failure when in fact the command may have gone
// through and been executed successfully. We retry here to eventually get
// through with the same client command ID and be given the cached response.
func (s *rpcSender) Send(_ context.Context, call proto.Call) {
	method := fmt.Sprintf("Server.%s", call.Args.Method())

	var err error
	for r := retry.Start(s.retryOpts); r.Next(); {
		select {
		case <-s.client.Healthy():
		default:
			err = fmt.Errorf("failed to send RPC request %s: client is unhealthy", method)
			log.Warning(err)
			continue
		}

		if err = s.client.Call(method, call.Args, call.Reply); err != nil {
			// Assume all errors sending request are retryable. The actual
			// number of things that could go wrong is vast, but we don't
			// want to miss any which should in theory be retried with the
			// same client command ID. We log the error here as a warning so
			// there's visiblity that this is happening. Some of the errors
			// we'll sweep up in this net shouldn't be retried, but we can't
			// really know for sure which.
			log.Warningf("failed to send RPC request %s: %s", method, err)
			continue
		}

		// On successful post, we're done with retry loop.
		break
	}
	if err != nil {
		call.Reply.Header().SetGoError(err)
	}
}
Beispiel #22
0
// EvictCachedRangeDescriptor will evict any cached range descriptors
// for the given key. It is intended that this method be called from a
// consumer of rangeDescriptorCache if the returned range descriptor is
// discovered to be stale.
// seenDesc should always be passed in and is used as the basis of a
// compare-and-evict (as pointers); if it is nil, eviction is unconditional
// but a warning will be logged.
func (rdc *rangeDescriptorCache) EvictCachedRangeDescriptor(descKey proto.Key, seenDesc *proto.RangeDescriptor) {
	if seenDesc == nil {
		log.Warningf("compare-and-evict for key %s with nil descriptor; clearing unconditionally", descKey)
	}

	rdc.rangeCacheMu.Lock()
	defer rdc.rangeCacheMu.Unlock()

	rngKey, cachedDesc := rdc.getCachedRangeDescriptorLocked(descKey)
	// Note that we're doing a "compare-and-erase": If seenDesc is not nil,
	// we want to clean the cache only if it equals the cached range
	// descriptor as a pointer. If not, then likely some other caller
	// already evicted previously, and we can save work by not doing it
	// again (which would prompt another expensive lookup).
	if seenDesc != nil && seenDesc != cachedDesc {
		return
	}

	for !bytes.Equal(descKey, proto.KeyMin) {
		if log.V(2) {
			log.Infof("evict cached descriptor: key=%s desc=%s\n%s", descKey, cachedDesc, rdc.stringLocked())
		} else if log.V(1) {
			log.Infof("evict cached descriptor: key=%s desc=%s", descKey, cachedDesc)
		}
		rdc.rangeCache.Del(rngKey)

		// Retrieve the metadata range key for the next level of metadata, and
		// evict that key as well. This loop ends after the meta1 range, which
		// returns KeyMin as its metadata key.
		descKey = keys.RangeMetaKey(descKey)
		rngKey, cachedDesc = rdc.getCachedRangeDescriptorLocked(descKey)
	}
}
Beispiel #23
0
func (l *LocalCluster) createNetwork() {
	l.panicOnStop()

	net, err := l.client.NetworkInspect(context.Background(), networkName)
	if err == nil {
		// We need to destroy the network and any running containers inside of it.
		for containerID := range net.Containers {
			// This call could fail if the container terminated on its own after we call
			// NetworkInspect, but the likelihood of this seems low. If this line creates
			// a lot of panics we should do more careful error checking.
			maybePanic(l.client.ContainerKill(context.Background(), containerID, "9"))
		}
		maybePanic(l.client.NetworkRemove(context.Background(), networkName))
	} else if !client.IsErrNotFound(err) {
		panic(err)
	}

	resp, err := l.client.NetworkCreate(context.Background(), networkName, types.NetworkCreate{
		Driver: "bridge",
		// Docker gets very confused if two networks have the same name.
		CheckDuplicate: true,
	})
	maybePanic(err)
	if resp.Warning != "" {
		log.Warningf("creating network: %s", resp.Warning)
	}
	l.networkID = resp.ID
}
Beispiel #24
0
// pushTxn attempts to abort the txn via push. The wait group is signaled on
// completion.
func (gcq *gcQueue) pushTxn(repl *Replica, now roachpb.Timestamp, txn *roachpb.Transaction,
	typ roachpb.PushTxnType, wg *sync.WaitGroup) {
	defer wg.Done() // signal wait group always on completion
	if log.V(1) {
		gcq.eventLog.Infof(true, "pushing txn %s ts=%s", txn, txn.OrigTimestamp)
	}

	// Attempt to push the transaction which created the intent.
	pushArgs := &roachpb.PushTxnRequest{
		Span: roachpb.Span{
			Key: txn.Key,
		},
		Now:       now,
		PusherTxn: roachpb.Transaction{Priority: roachpb.MaxUserPriority},
		PusheeTxn: txn.TxnMeta,
		PushType:  typ,
	}
	b := &client.Batch{}
	b.InternalAddRequest(pushArgs)
	br, err := repl.store.DB().RunWithResponse(b)
	if err != nil {
		log.Warningf("push of txn %s failed: %s", txn, err)
		return
	}
	// Update the supplied txn on successful push.
	*txn = br.Responses[0].GetInner().(*roachpb.PushTxnResponse).PusheeTxn
}
Beispiel #25
0
// pushTxn attempts to abort the txn via push. If the transaction
// cannot be aborted, the oldestIntentNanos value is atomically
// updated to the min of oldestIntentNanos and the intent's
// timestamp. The wait group is signaled on completion.
func (gcq *gcQueue) pushTxn(repl *Replica, now proto.Timestamp, txn *proto.Transaction, updateOldestIntent func(int64), wg *sync.WaitGroup) {
	defer wg.Done() // signal wait group always on completion
	if log.V(1) {
		log.Infof("pushing txn %s ts=%s", txn, txn.OrigTimestamp)
	}

	// Attempt to push the transaction which created the intent.
	pushArgs := &proto.PushTxnRequest{
		RequestHeader: proto.RequestHeader{
			Timestamp:    now,
			Key:          txn.Key,
			User:         security.RootUser,
			UserPriority: gogoproto.Int32(proto.MaxPriority),
			Txn:          nil,
		},
		Now:       now,
		PusheeTxn: *txn,
		PushType:  proto.ABORT_TXN,
	}
	pushReply := &proto.PushTxnResponse{}
	b := &client.Batch{}
	b.InternalAddCall(proto.Call{Args: pushArgs, Reply: pushReply})
	if err := repl.rm.DB().Run(b); err != nil {
		log.Warningf("push of txn %s failed: %s", txn, err)
		updateOldestIntent(txn.OrigTimestamp.WallTime)
		return
	}
	// Update the supplied txn on successful push.
	*txn = *pushReply.PusheeTxn
}
Beispiel #26
0
// deleteIndexMutationsWithReversedColumns deletes index mutations with a
// different mutationID than the schema changer and a reference to one of the
// reversed columns.
func (sc *SchemaChanger) deleteIndexMutationsWithReversedColumns(
	desc *sqlbase.TableDescriptor, columns map[string]struct{},
) {
	newMutations := make([]sqlbase.DescriptorMutation, 0, len(desc.Mutations))
	for _, mutation := range desc.Mutations {
		if mutation.MutationID != sc.mutationID {
			if idx := mutation.GetIndex(); idx != nil {
				deleteMutation := false
				for _, name := range idx.ColumnNames {
					if _, ok := columns[name]; ok {
						// Such an index mutation has to be with direction ADD and
						// in the DELETE_ONLY state. Live indexes referencing live
						// columns cannot be deleted and thus never have direction
						// DROP. All mutations with the ADD direction start off in
						// the DELETE_ONLY state.
						if mutation.Direction != sqlbase.DescriptorMutation_ADD ||
							mutation.State != sqlbase.DescriptorMutation_DELETE_ONLY {
							panic(fmt.Sprintf("mutation in bad state: %+v", mutation))
						}
						log.Warningf(context.TODO(), "delete schema change mutation: %+v", mutation)
						deleteMutation = true
						break
					}
				}
				if deleteMutation {
					continue
				}
			}
		}
		newMutations = append(newMutations, mutation)
	}
	// Reset mutations.
	desc.Mutations = newMutations
}
Beispiel #27
0
// lookupReplica looks up replica by key [range]. Lookups are done
// by consulting each store in turn via Store.LookupRange(key).
// Returns RangeID and replica on success; RangeKeyMismatch error
// if not found.
// This is only for testing usage; performance doesn't matter.
func (ls *Stores) lookupReplica(start, end roachpb.RKey) (rangeID roachpb.RangeID, replica *roachpb.ReplicaDescriptor, pErr *roachpb.Error) {
	ls.mu.RLock()
	defer ls.mu.RUnlock()
	var rng *Replica
	for _, store := range ls.storeMap {
		rng = store.LookupReplica(start, end)
		if rng == nil {
			if tmpRng := store.LookupReplica(start, nil); tmpRng != nil {
				log.Warningf(fmt.Sprintf("range not contained in one range: [%s,%s), but have [%s,%s)", start, end, tmpRng.Desc().StartKey, tmpRng.Desc().EndKey))
			}
			continue
		}
		if replica == nil {
			rangeID = rng.RangeID
			replica = rng.GetReplica()
			continue
		}
		// Should never happen outside of tests.
		return 0, nil, roachpb.NewErrorf(
			"range %+v exists on additional store: %+v", rng, store)
	}
	if replica == nil {
		pErr = roachpb.NewError(roachpb.NewRangeKeyMismatchError(start.AsRawKey(), end.AsRawKey(), nil))
	}
	return rangeID, replica, pErr
}
Beispiel #28
0
func (c *v3Conn) parseOptions(data []byte) error {
	buf := readBuffer{msg: data}
	for {
		key, err := buf.getString()
		if err != nil {
			return util.Errorf("error reading option key: %s", err)
		}
		if len(key) == 0 {
			return nil
		}
		value, err := buf.getString()
		if err != nil {
			return util.Errorf("error reading option value: %s", err)
		}
		switch key {
		case "database":
			c.opts.database = value
		case "user":
			c.opts.user = value
		default:
			if log.V(1) {
				log.Warningf("unrecognized configuration parameter %q", key)
			}
		}
	}
}
// updateRangeInfo is called whenever a range is updated by ApplySnapshot
// or is created by range splitting to setup the fields which are
// uninitialized or need updating.
func (r *Replica) updateRangeInfo(desc *roachpb.RangeDescriptor) error {
	// RangeMaxBytes should be updated by looking up Zone Config in two cases:
	// 1. After snapshot applying, if no updating of zone config
	// for this key range, then maxBytes of this range will not
	// be updated.
	// 2. After a new range is created by range splition, just
	// copying maxBytes from the original range does not work
	// since the original range and the new range might belong
	// to different zones.
	// Load the system config.
	cfg, ok := r.store.Gossip().GetSystemConfig()
	if !ok {
		// This could be before the system config was ever gossiped,
		// or it expired. Let the gossip callback set the info.
		log.Warningf("no system config available, cannot determine range MaxBytes")
		return nil
	}

	// Find zone config for this range.
	zone, err := cfg.GetZoneConfigForKey(desc.StartKey)
	if err != nil {
		return util.Errorf("failed to lookup zone config for Range %s: %s", r, err)
	}

	r.SetMaxBytes(zone.RangeMaxBytes)
	return nil
}
// updateRangeInfo is called whenever a range is updated by ApplySnapshot
// or is created by range splitting to setup the fields which are
// uninitialized or need updating.
func (r *Replica) updateRangeInfo(desc *roachpb.RangeDescriptor) error {
	// RangeMaxBytes should be updated by looking up Zone Config in two cases:
	// 1. After applying a snapshot, if the zone config was not updated for
	// this key range, then maxBytes of this range will not be updated either.
	// 2. After a new range is created by a split, only copying maxBytes from
	// the original range wont work as the original and new ranges might belong
	// to different zones.
	// Load the system config.
	cfg, ok := r.store.Gossip().GetSystemConfig()
	if !ok {
		// This could be before the system config was ever gossiped,
		// or it expired. Let the gossip callback set the info.
		log.Warningf(context.TODO(), "%s: no system config available, cannot determine range MaxBytes", r)
		return nil
	}

	// Find zone config for this range.
	zone, err := cfg.GetZoneConfigForKey(desc.StartKey)
	if err != nil {
		return errors.Errorf("%s: failed to lookup zone config: %s", r, err)
	}

	r.SetMaxBytes(zone.RangeMaxBytes)
	return nil
}