// checkAllocatorStable returns whether the replica distribution within the // cluster has been stable for at least `StableInterval`. Only unrecoverable // errors are returned. func (at *allocatorTest) checkAllocatorStable(db *gosql.DB) (bool, error) { q := `SELECT NOW()-timestamp, rangeID, storeID, eventType FROM rangelog WHERE ` + `timestamp=(SELECT MAX(timestamp) FROM rangelog WHERE eventType IN ($1, $2, $3))` eventTypes := []interface{}{ string(storage.RangeEventLogSplit), string(storage.RangeEventLogAdd), string(storage.RangeEventLogRemove), } var elapsedStr string var rangeID int64 var storeID int64 var eventType string row := db.QueryRow(q, eventTypes...) if row == nil { log.Errorf("couldn't find any range events") return false, nil } if err := row.Scan(&elapsedStr, &rangeID, &storeID, &eventType); err != nil { // Log but don't return errors, to increase resilience against transient // errors. log.Errorf("error checking rebalancer: %s", err) return false, nil } elapsedSinceLastRangeEvent, err := time.ParseDuration(elapsedStr) if err != nil { return false, err } log.Infof("last range event: %s for range %d/store %d (%s ago)", eventType, rangeID, storeID, elapsedSinceLastRangeEvent) return elapsedSinceLastRangeEvent >= StableInterval, nil }
// runGetZone retrieves the zone config for a given object id, // and if present, outputs its YAML representation. // TODO(marc): accept db/table names rather than IDs. func runGetZone(cmd *cobra.Command, args []string) { if len(args) != 1 { mustUsage(cmd) return } id, err := strconv.Atoi(args[0]) if err != nil { log.Errorf("could not parse object ID %s", args[0]) return } db, _ := makeSQLClient() defer func() { _ = db.Close() }() // TODO(marc): switch to placeholders once they work with pgwire. _, rows, err := runQueryWithFormat(db, fmtMap{"config": formatZone}, fmt.Sprintf(`SELECT * FROM system.zones WHERE id=%d`, id)) if err != nil { log.Error(err) return } if len(rows) == 0 { log.Errorf("Object %d: no zone config found", id) return } fmt.Println(rows[0][1]) }
// runStart starts the cockroach node using -stores as the list of // storage devices ("stores") on this machine and -gossip as the list // of "well-known" hosts used to join this node to the cockroach // cluster via the gossip network. func runStart(cmd *commander.Command, args []string) { log.Info("Starting cockroach cluster") s, err := newServer() if err != nil { log.Errorf("Failed to start Cockroach server: %v", err) return } // Init engines from -stores. engines, err := initEngines(*stores) if err != nil { log.Errorf("Failed to initialize engines from -stores=%q: %v", *stores, err) return } if len(engines) == 0 { log.Errorf("No valid engines specified after initializing from -stores=%q", *stores) return } err = s.start(engines, false) defer s.stop() if err != nil { log.Errorf("Cockroach server exited with error: %v", err) return } c := make(chan os.Signal, 1) signal.Notify(c, os.Interrupt, os.Kill) // Block until one of the signals above is received. <-c }
func runInit(cmd *commander.Command, args []string) { // Initialize the engine based on the first argument and // then verify it's not in-memory. err := Context.Init() if err != nil { log.Errorf("Failed to initialize context: %v", err) return } e := Context.Engines[0] if _, ok := e.(*engine.InMem); ok { log.Errorf("Cannot initialize a cluster using an in-memory store") return } // Generate a new UUID for cluster ID and bootstrap the cluster. clusterID := uuid.New() localDB, err := server.BootstrapCluster(clusterID, e) if err != nil { log.Errorf("Failed to bootstrap cluster: %v", err) return } // Close localDB and bootstrap engine. localDB.Close() e.Stop() fmt.Printf("Cockroach cluster %s has been initialized\n", clusterID) if Context.BootstrapOnly { fmt.Printf("To start the cluster, run \"cockroach start\"\n") return } runStart(cmd, args) }
func runInit(cmd *commander.Command, args []string) { // Initialize the engine based on the first argument and // then verify it's not in-memory. engines, err := initEngines(*stores) if err != nil { log.Errorf("Failed to initialize engines from -stores=%s: %v", *stores, err) return } if len(engines) == 0 { log.Errorf("No valid engines specified after initializing from -stores=%s", *stores) return } e := engines[0] if _, ok := e.(*engine.InMem); ok { log.Errorf("Cannot initialize a cluster using an in-memory store") return } // Generate a new UUID for cluster ID and bootstrap the cluster. clusterID := uuid.New() localDB, err := BootstrapCluster(clusterID, e) if err != nil { log.Errorf("Failed to bootstrap cluster: %v", err) return } // Close localDB and bootstrap engine. localDB.Close() e.Stop() fmt.Printf("Cockroach cluster %s has been initialized\n", clusterID) if *bootstrapOnly { fmt.Printf("To start the cluster, run \"cockroach start\"\n") return } runStart(cmd, args) }
// computeSplitKeys returns an array of keys at which the supplied // range should be split, as computed by intersecting the range with // accounting and zone config map boundaries. func computeSplitKeys(g *gossip.Gossip, rng *Range) []proto.Key { // Now split the range into pieces by intersecting it with the // boundaries of the config map. splitKeys := proto.KeySlice{} for _, configKey := range []string{gossip.KeyConfigAccounting, gossip.KeyConfigZone} { info, err := g.GetInfo(configKey) if err != nil { log.Errorf("unable to fetch %s config from gossip: %s", configKey, err) continue } configMap := info.(PrefixConfigMap) splits, err := configMap.SplitRangeByPrefixes(rng.Desc().StartKey, rng.Desc().EndKey) if err != nil { log.Errorf("unable to split %s by prefix map %s", rng, configMap) continue } // Gather new splits. for _, split := range splits { if split.end.Less(rng.Desc().EndKey) { splitKeys = append(splitKeys, split.end) } } } // Sort and unique the combined split keys from intersections with // both the accounting and zone config maps. sort.Sort(splitKeys) var unique []proto.Key for i, key := range splitKeys { if i == 0 || !key.Equal(splitKeys[i-1]) { unique = append(unique, key) } } return unique }
// runStart starts the cockroach node using -stores as the list of // storage devices ("stores") on this machine and -gossip as the list // of "well-known" hosts used to join this node to the cockroach // cluster via the gossip network. func runStart(cmd *commander.Command, args []string) { info := util.GetBuildInfo() log.Infof("Build Vers: %s", info.Vers) log.Infof("Build Tag: %s", info.Tag) log.Infof("Build Time: %s", info.Time) log.Infof("Build Deps: %s", info.Deps) log.Info("Starting cockroach cluster") s, err := server.NewServer(Context) if err != nil { log.Errorf("Failed to start Cockroach server: %v", err) return } err = Context.Init() if err != nil { log.Errorf("Failed to initialize context: %v", err) return } err = s.Start(false) defer s.Stop() if err != nil { log.Errorf("Cockroach server exited with error: %v", err) return } c := make(chan os.Signal, 1) signal.Notify(c, os.Interrupt, os.Kill) // Block until one of the signals above is received. <-c }
func (r *Replica) verifyChecksumTrigger( ctx context.Context, args roachpb.VerifyChecksumRequest, ) { id := args.ChecksumID c, ok := r.getChecksum(ctx, id) if !ok { log.Errorf(ctx, "consistency check skipped: checksum for id = %v doesn't exist", id) // Return success because a checksum might be missing only on // this replica. A checksum might be missing because of a // number of reasons: GC-ed, server restart, and ComputeChecksum // version incompatibility. return } if c.checksum != nil && !bytes.Equal(c.checksum, args.Checksum) { // Replication consistency problem! logFunc := log.Errorf // Collect some more debug information. if args.Snapshot == nil { // No debug information; run another consistency check to deliver // more debug information. if err := r.store.stopper.RunAsyncTask(func() { log.Errorf(ctx, "%s: consistency check failed; fetching details", r) desc := r.Desc() startKey := desc.StartKey.AsRawKey() // Can't use a start key less than LocalMax. if bytes.Compare(startKey, keys.LocalMax) < 0 { startKey = keys.LocalMax } if err := r.store.db.CheckConsistency(startKey, desc.EndKey.AsRawKey(), true /* withDiff */); err != nil { log.Errorf(ctx, "couldn't rerun consistency check: %s", err) } }); err != nil { log.Error(ctx, errors.Wrap(err, "could not rerun consistency check")) } } else { // Compute diff. diff := diffRange(args.Snapshot, c.snapshot) if diff != nil { for _, d := range diff { l := "leader" if d.LeaseHolder { l = "replica" } log.Errorf(ctx, "consistency check failed: k:v = (%s (%x), %s, %x) not present on %s", d.Key, d.Key, d.Timestamp, d.Value, l) } } if r.store.ctx.ConsistencyCheckPanicOnFailure { if p := r.store.ctx.TestingKnobs.BadChecksumPanic; p != nil { p(diff) } else { logFunc = log.Fatalf } } } logFunc(ctx, "consistency check failed on replica: %s, checksum mismatch: e = %x, v = %x", args.Checksum, c.checksum) } }
// runStart starts the cockroach node using --stores as the list of // storage devices ("stores") on this machine and --gossip as the list // of "well-known" hosts used to join this node to the cockroach // cluster via the gossip network. func runStart(cmd *cobra.Command, args []string) { info := util.GetBuildInfo() log.Infof("build Vers: %s", info.Vers) log.Infof("build Tag: %s", info.Tag) log.Infof("build Time: %s", info.Time) log.Infof("build Deps: %s", info.Deps) // Default user for servers. Context.User = security.NodeUser // First initialize the Context as it is used in other places. err := Context.Init("start") if err != nil { log.Errorf("failed to initialize context: %s", err) return } log.Info("starting cockroach cluster") stopper := util.NewStopper() stopper.AddWorker() s, err := server.NewServer(Context, stopper) if err != nil { log.Errorf("failed to start Cockroach server: %s", err) return } err = s.Start(false) if err != nil { log.Errorf("cockroach server exited with error: %s", err) return } signalCh := make(chan os.Signal, 1) signal.Notify(signalCh, os.Interrupt, os.Kill) // TODO(spencer): move this behind a build tag. signal.Notify(signalCh, syscall.SIGTERM) // Block until one of the signals above is received or the stopper // is stopped externally (for example, via the quit endpoint). select { case <-stopper.ShouldStop(): stopper.SetStopped() case <-signalCh: log.Infof("initiating graceful shutdown of server") stopper.SetStopped() go func() { s.Stop() }() } select { case <-signalCh: log.Warningf("second signal received, initiating hard shutdown") case <-time.After(time.Minute): log.Warningf("time limit reached, initiating hard shutdown") return case <-stopper.IsStopped(): log.Infof("server drained and shutdown completed") } log.Flush() }
// runGetZone retrieves the zone config for a given object id, // and if present, outputs its YAML representation. // TODO(marc): accept db/table names rather than IDs. func runGetZone(cmd *cobra.Command, args []string) { if len(args) != 1 { mustUsage(cmd) return } id, err := strconv.Atoi(args[0]) if err != nil { log.Errorf("could not parse object ID %s", args[0]) return } db := makeSQLClient() _, rows, err := runQueryWithFormat(db, fmtMap{"config": formatZone}, `SELECT * FROM system.zones WHERE id=$1`, id) if err != nil { log.Error(err) return } if len(rows) == 0 { log.Errorf("Object %d: no zone config found", id) return } fmt.Fprintln(osStdout, rows[0][1]) }
// TestStoreRangeMergeDistantRanges attempts to merge two ranges // that are not not next to each other. func TestStoreRangeMergeDistantRanges(t *testing.T) { store := createTestStore(t) defer store.Stop() // Split into 3 ranges argsSplit, replySplit := adminSplitArgs(engine.KeyMin, []byte("d"), 1, store.StoreID()) if err := store.ExecuteCmd(proto.AdminSplit, argsSplit, replySplit); err != nil { t.Fatalf("Can't split range %s", err) } argsSplit, replySplit = adminSplitArgs(engine.KeyMin, []byte("b"), 1, store.StoreID()) if err := store.ExecuteCmd(proto.AdminSplit, argsSplit, replySplit); err != nil { t.Fatalf("Can't split range %s", err) } rangeA := store.LookupRange([]byte("a"), nil) rangeB := store.LookupRange([]byte("c"), nil) rangeC := store.LookupRange([]byte("e"), nil) if bytes.Equal(rangeA.Desc().StartKey, rangeB.Desc().StartKey) { log.Errorf("split ranges keys are equal %q!=%q", rangeA.Desc().StartKey, rangeB.Desc().StartKey) } if bytes.Equal(rangeB.Desc().StartKey, rangeC.Desc().StartKey) { log.Errorf("split ranges keys are equal %q!=%q", rangeB.Desc().StartKey, rangeC.Desc().StartKey) } if bytes.Equal(rangeA.Desc().StartKey, rangeC.Desc().StartKey) { log.Errorf("split ranges keys are equal %q!=%q", rangeA.Desc().StartKey, rangeC.Desc().StartKey) } argsMerge, replyMerge := adminMergeArgs(rangeC.Desc().StartKey, *rangeC.Desc(), 1, store.StoreID()) rangeA.AdminMerge(argsMerge, replyMerge) if replyMerge.Error == nil { t.Fatal("Should not be able to merge two ranges that are not adjacent.") } }
func runAddNodes(cmd *cobra.Command, args []string) { if len(args) != 1 { cmd.Usage() return } numNodes, err := strconv.Atoi(args[0]) if err != nil || numNodes < 1 { log.Errorf("argument %s must be an integer > 0", args) return } driver, err := NewDriver(Context) if err != nil { log.Errorf("could not create driver: %v", err) return } for i := 1; i <= numNodes; i++ { log.Infof("adding node %d of %d", i, numNodes) err := AddOneNode(driver) if err != nil { log.Errorf("problem adding node: %v", err) return } } }
// computeSplitKeys returns an array of keys at which the supplied // range should be split, as computed by intersecting the range with // zone config map boundaries. func computeSplitKeys(g *gossip.Gossip, repl *Replica) []proto.Key { // Now split the range into pieces by intersecting it with the // boundaries of the config map. configMap, err := repl.rm.Gossip().GetZoneConfig() if err != nil { log.Errorf("unable to fetch zone config from gossip: %s", err) return nil } desc := repl.Desc() splits, err := configMap.SplitRangeByPrefixes(desc.StartKey, desc.EndKey) if err != nil { log.Errorf("unable to split %s by prefix map %s", repl, configMap) return nil } // Gather new splits. var splitKeys proto.KeySlice for _, split := range splits { if split.End.Less(desc.EndKey) { splitKeys = append(splitKeys, split.End) } } // Sort and unique the combined split keys from intersections with // the zone config maps. sort.Sort(splitKeys) var unique []proto.Key for i, key := range splitKeys { if i == 0 || !key.Equal(splitKeys[i-1]) { unique = append(unique, key) } } return unique }
// writeSummaries retrieves status summaries from the supplied // NodeStatusRecorder and persists them to the cockroach data store. func (s *Server) writeSummaries() error { nodeStatus, storeStatuses := s.recorder.GetStatusSummaries() if nodeStatus != nil { key := keys.NodeStatusKey(int32(nodeStatus.Desc.NodeID)) if err := s.db.Put(key, nodeStatus); err != nil { return err } if log.V(1) { statusJSON, err := json.Marshal(nodeStatus) if err != nil { log.Errorf("error marshaling nodeStatus to json: %s", err) } log.Infof("node %d status: %s", nodeStatus.Desc.NodeID, statusJSON) } } for _, ss := range storeStatuses { key := keys.StoreStatusKey(int32(ss.Desc.StoreID)) if err := s.db.Put(key, &ss); err != nil { return err } if log.V(1) { statusJSON, err := json.Marshal(&ss) if err != nil { log.Errorf("error marshaling storeStatus to json: %s", err) } log.Infof("store %d status: %s", ss.Desc.StoreID, statusJSON) } } return nil }
// runExterminate destroys the data held in the specified stores. func runExterminate(cmd *cobra.Command, args []string) { if err := context.InitStores(); err != nil { log.Errorf("failed to initialize context: %s", err) return } // First attempt to shutdown the server. Note that an error of EOF just // means the HTTP server shutdown before the request to quit returned. admin := client.NewAdminClient(&context.Context, context.Addr, client.Quit) body, err := admin.Get() if err != nil { log.Infof("shutdown node %s: %s", context.Addr, err) } else { log.Infof("shutdown node in anticipation of data extermination: %s", body) } // Exterminate all data held in specified stores. for _, e := range context.Engines { if rocksdb, ok := e.(*engine.RocksDB); ok { log.Infof("exterminating data from store %s", e) if err := rocksdb.Destroy(); err != nil { log.Errorf("unable to destroy store %s: %s", e, err) osExit(1) } } } log.Infof("exterminated all data from stores %s", context.Engines) }
// runInit. func runInit(cmd *commander.Command, args []string) { if len(args) != 1 { cmd.Usage() return } // Initialize the engine based on the first argument and // then verify it's not in-memory. engines, err := initEngines(args[0]) if err != nil { log.Errorf("Failed to initialize engine %q: %v", args[0], err) return } e := engines[0] if _, ok := e.(*engine.InMem); ok { log.Errorf("Cannot initialize a cluster using an in-memory store") return } // Generate a new UUID for cluster ID and bootstrap the cluster. clusterID := uuid.New() localDB, err := BootstrapCluster(clusterID, e) if err != nil { log.Errorf("Failed to bootstrap cluster: %v", err) return } defer localDB.Close() fmt.Fprintf(os.Stdout, "Cockroach cluster %s has been initialized\n", clusterID) fmt.Fprintf(os.Stdout, "To start the cluster, run \"cockroach start\"\n") }
// Filter makes decisions about garbage collection based on the // garbage collection policy for batches of values for the same key. // The GC policy is determined via the policyFn specified when the // GarbageCollector was created. Returns a slice of deletions, one // per incoming keys. If an index in the returned array is set to // true, then that value will be garbage collected. func (gc *GarbageCollector) Filter(keys []Key, values [][]byte) []bool { if len(keys) == 1 { return nil } // Look up the policy which applies to this set of MVCC values. _, decKey := encoding.DecodeBinary(keys[0]) policy := gc.policyFn(decKey) if policy == nil || policy.TTLSeconds <= 0 { return nil } toDelete := make([]bool, len(keys)) expiration := gc.now expiration.WallTime -= int64(policy.TTLSeconds) * 1E9 var survivors bool for i, key := range keys { _, ts, isValue := mvccDecodeKey(key) if i == 0 { if isValue { log.Errorf("unexpected MVCC value encountered: %q", key) return make([]bool, len(keys)) } continue } if !isValue { log.Errorf("unexpected MVCC metadata encountered: %q", key) return make([]bool, len(keys)) } mvccVal := proto.MVCCValue{} if err := gogoproto.Unmarshal(values[i], &mvccVal); err != nil { log.Errorf("unable to unmarshal MVCC value %q: %v", key, err) return make([]bool, len(keys)) } if i == 1 { // If the first value isn't a deletion tombstone, set survivors to true. if !mvccVal.Deleted { survivors = true } } else { if ts.Less(expiration) { // If we encounter a version older than our GC timestamp, mark for deletion. toDelete[i] = true } else if !mvccVal.Deleted { // Otherwise, if not marked for GC and not a tombstone, set survivors true. survivors = true } } } // If there are no remaining non-deleted, versioned entries, mark // all keys for deletion, including the MVCC metadata entry. if !survivors { for i := range keys { toDelete[i] = true } } return toDelete }
// Filter makes decisions about garbage collection based on the // garbage collection policy for batches of values for the same key. // Returns the timestamp including, and after which, all values should // be garbage collected. If no values should be GC'd, returns // roachpb.ZeroTimestamp. func (gc *GarbageCollector) Filter(keys []MVCCKey, values [][]byte) roachpb.Timestamp { if gc.policy.TTLSeconds <= 0 { return roachpb.ZeroTimestamp } if len(keys) == 0 { return roachpb.ZeroTimestamp } // Loop over values. All should be MVCC versions. delTS := roachpb.ZeroTimestamp survivors := false for i, key := range keys { _, ts, isValue, err := MVCCDecodeKey(key) if err != nil { log.Errorf("unable to decode MVCC key: %q: %v", key, err) return roachpb.ZeroTimestamp } if !isValue { log.Errorf("unexpected MVCC metadata encountered: %q", key) return roachpb.ZeroTimestamp } mvccVal := MVCCValue{} if err := proto.Unmarshal(values[i], &mvccVal); err != nil { log.Errorf("unable to unmarshal MVCC value %q: %v", key, err) return roachpb.ZeroTimestamp } if i == 0 { // If the first value isn't a deletion tombstone, don't consider // it for GC. It should always survive if non-deleted. if !mvccVal.Deleted { survivors = true continue } } // If we encounter a version older than our GC timestamp, mark for deletion. if ts.Less(gc.expiration) { delTS = ts break } else if !mvccVal.Deleted { survivors = true } } // If there are no non-deleted survivors, return timestamp of first key // to delete all entries. if !survivors { _, ts, _, err := MVCCDecodeKey(keys[0]) if err != nil { // TODO(tschottdorf): Perhaps we should be propagating an error // (e.g. ReplicaCorruptionError) up to the caller. log.Errorf("unable to decode MVCC key: %q: %v", keys[0], err) return roachpb.ZeroTimestamp } return ts } return delTS }
func runInit(cmd *cobra.Command, args []string) { driver, err := NewDriver(Context) if err != nil { log.Errorf("could not create driver: %v", err) return } nodes, err := docker.ListCockroachNodes() if err != nil { log.Errorf("failed to get list of existing cockroach nodes: %v", err) return } if len(nodes) != 0 { log.Errorf("init called but docker-machine has %d existing cockroach nodes: %v", len(nodes), nodes) return } nodeName := docker.MakeNodeName(0) // Create first node. err = docker.CreateMachine(driver, nodeName) if err != nil { log.Errorf("could not create machine %s: %v", nodeName, err) return } // Run driver steps after first-node creation. err = driver.AfterFirstNode() if err != nil { log.Errorf("could not run AfterFirstNode steps for: %v", err) return } // Lookup node info. nodeConfig, err := driver.GetNodeConfig(nodeName) if err != nil { log.Errorf("could not get node config for %s: %v", nodeName, err) return } // Initialize cockroach node. err = docker.RunDockerInit(driver, nodeName, nodeConfig) if err != nil { log.Errorf("could not initialize first cockroach node %s: %v", nodeName, err) return } // Do "start node" logic. err = driver.StartNode(nodeName, nodeConfig) if err != nil { log.Errorf("could not run StartNode steps for %s: %v", nodeName, err) return } // Start the cockroach node. err = docker.RunDockerStart(driver, nodeName, nodeConfig) if err != nil { log.Errorf("could not initialize first cockroach node %s: %v", nodeName, err) } }
func (l *LocalCluster) processEvent(e dockerclient.EventOrError, monitorStopper chan struct{}) bool { l.mu.Lock() defer l.mu.Unlock() if e.Error != nil { log.Errorf("monitoring error: %s", e.Error) l.events <- Event{NodeIndex: -1, Status: eventDie} return false } switch e.Status { case "pull": return false } for i, n := range l.Nodes { if n != nil && n.ID == e.Id { if log.V(1) { log.Errorf("node=%d status=%s", i, e.Status) } l.events <- Event{NodeIndex: i, Status: e.Status} return true } } // TODO(pmattis): When we add the ability to start/stop/restart nodes we'll // need to keep around a map of old node container ids in order to ignore // events on those containers. // An event on any other container is unexpected. Die. select { case <-l.stopper: case <-monitorStopper: default: // There is a very tiny race here: the signal handler might be closing the // stopper simultaneously. log.Errorf("stopping due to unexpected event: %+v", e) if r, err := l.client.ContainerLogs(e.Id, &dockerclient.LogOptions{ Stdout: true, Stderr: true, }); err == nil { if _, err := io.Copy(os.Stderr, r); err != nil { log.Infof("error listing logs: %s", err) } r.Close() } close(l.stopper) } return false }
// runRmConfig invokes the REST API with DELETE action and key prefix as path. // The type of config that is removed is based on the passed in prefix. func runRmConfig(ctx *Context, prefix, keyPrefix string) { friendlyName := getFriendlyNameFromPrefix(prefix) req, err := http.NewRequest("DELETE", fmt.Sprintf("%s://%s%s/%s", ctx.RequestScheme(), ctx.Addr, prefix, keyPrefix), nil) if err != nil { log.Errorf("unable to create request to admin REST endpoint: %s", err) return } _, err = sendAdminRequest(ctx, req) if err != nil { log.Errorf("admin REST request failed: %s", err) return } fmt.Fprintf(os.Stdout, "removed %s config for key prefix %q\n", friendlyName, keyPrefix) }
// runGetConfig invokes the REST API with GET action and key prefix as path. func runGetConfig(ctx *Context, prefix, keyPrefix string) { friendlyName := getFriendlyNameFromPrefix(prefix) req, err := http.NewRequest("GET", fmt.Sprintf("%s://%s%s/%s", ctx.RequestScheme(), ctx.Addr, prefix, keyPrefix), nil) if err != nil { log.Errorf("unable to create request to admin REST endpoint: %s", err) return } req.Header.Add(util.AcceptHeader, util.YAMLContentType) b, err := sendAdminRequest(ctx, req) if err != nil { log.Errorf("admin REST request failed: %s", err) return } fmt.Fprintf(os.Stdout, "%s config for key prefix %q:\n%s\n", friendlyName, keyPrefix, string(b)) }
// Cleanup cleans up the transaction as appropriate based on err. func (txn *Txn) Cleanup(err error) { if err != nil { if replyErr := txn.Rollback(); replyErr != nil { log.Errorf("failure aborting transaction: %s; abort caused by: %s", replyErr, err) } } }
func (bq *baseQueue) processOne(clock *hlc.Clock) { start := time.Now() bq.Lock() repl := bq.pop() bq.Unlock() if repl != nil { now := clock.Now() if log.V(1) { log.Infof("processing replica %s from %s queue...", repl, bq.name) } // If the queue requires a replica to have the range leader lease in // order to be processed, check whether this replica has leader lease // and renew or acquire if necessary. if bq.impl.needsLeaderLease() { // Create a "fake" get request in order to invoke redirectOnOrAcquireLease. args := &proto.GetRequest{RequestHeader: proto.RequestHeader{Timestamp: now}} if err := repl.redirectOnOrAcquireLeaderLease(nil /* Trace */, args.Header().Timestamp); err != nil { if log.V(1) { log.Infof("this replica of %s could not acquire leader lease; skipping...", repl) } return } } if err := bq.impl.process(now, repl); err != nil { log.Errorf("failure processing replica %s from %s queue: %s", repl, bq.name, err) } else if log.V(2) { log.Infof("processed replica %s from %s queue in %s", repl, bq.name, time.Now().Sub(start)) } } }
// ChangeGroupMembership submits a proposed membership change to the cluster. // Payload is an opaque blob that will be returned in EventMembershipChangeCommitted. func (m *MultiRaft) ChangeGroupMembership(groupID proto.RaftID, commandID string, changeType raftpb.ConfChangeType, nodeID proto.RaftNodeID, payload []byte) <-chan error { if log.V(6) { log.Infof("node %v proposing membership change to group %v", m.nodeID, groupID) } ch := make(chan error, 1) m.proposalChan <- &proposal{ groupID: groupID, commandID: commandID, fn: func() { if err := m.multiNode.ProposeConfChange(context.Background(), uint64(groupID), raftpb.ConfChange{ Type: changeType, NodeID: uint64(nodeID), Context: encodeCommand(commandID, payload), }, ); err != nil { log.Errorf("node %v: error proposing membership change to node %v: %s", m.nodeID, groupID, err) } }, ch: ch, } return ch }
// start dials the remote addr and commences gossip once connected. Upon exit, // the client is sent on the disconnected channel. This method starts client // processing in a goroutine and returns immediately. func (c *client) start(g *Gossip, disconnected chan *client, ctx *rpc.Context, stopper *stop.Stopper) { stopper.RunWorker(func() { defer func() { disconnected <- c }() // Note: avoid using `grpc.WithBlock` here. This code is already // asynchronous from the caller's perspective, so the only effect of // `WithBlock` here is blocking shutdown - at the time of this writing, // that ends ups up making `kv` tests take twice as long. conn, err := ctx.GRPCDial(c.addr.String()) if err != nil { log.Errorf("failed to dial: %v", err) return } // Start gossiping. if err := c.gossip(g, NewGossipClient(conn), stopper); err != nil { if !grpcutil.IsClosedConnection(err) { g.mu.Lock() peerID := c.peerID g.mu.Unlock() if peerID != 0 { log.Infof("closing client to node %d (%s): %s", peerID, c.addr, err) } else { log.Infof("closing client to %s: %s", c.addr, err) } } } }) }
// runExterminate destroys the data held in the specified stores. func runExterminate(cmd *cobra.Command, args []string) { err := Context.Init("exterminate") if err != nil { log.Errorf("failed to initialize context: %s", err) return } // First attempt to shutdown the server. Note that an error of EOF just // means the HTTP server shutdown before the request to quit returned. if err := server.SendQuit(Context); err != nil { log.Infof("shutdown node %s: %s", Context.Addr, err) } else { log.Infof("shutdown node in anticipation of data extermination") } // Exterminate all data held in specified stores. for _, e := range Context.Engines { if rocksdb, ok := e.(*engine.RocksDB); ok { log.Infof("exterminating data from store %s", e) if err := rocksdb.Destroy(); err != nil { log.Fatalf("unable to destroy store %s: %s", e, err) } } } log.Infof("exterminated all data from stores %s", Context.Engines) }
// shouldQueue determines whether a replica should be queued for garbage // collection, and if so, at what priority. Returns true for shouldQ // in the event that the cumulative ages of GC'able bytes or extant // intents exceed thresholds. func (gcq *gcQueue) shouldQueue(now roachpb.Timestamp, repl *Replica, sysCfg *config.SystemConfig) (shouldQ bool, priority float64) { desc := repl.Desc() zone, err := sysCfg.GetZoneConfigForKey(desc.StartKey) if err != nil { log.Errorf("could not find GC policy for range %s: %s", repl, err) return } policy := zone.GC // GC score is the total GC'able bytes age normalized by 1 MB * the replica's TTL in seconds. gcScore := float64(repl.stats.GetGCBytesAge(now.WallTime)) / float64(policy.TTLSeconds) / float64(gcByteCountNormalization) // Intent score. This computes the average age of outstanding intents // and normalizes. intentScore := repl.stats.GetAvgIntentAge(now.WallTime) / float64(intentAgeNormalization.Nanoseconds()/1E9) // Compute priority. if gcScore > 1 { priority += gcScore } if intentScore > 1 { priority += intentScore } shouldQ = priority > 0 return }
func (g *Gossip) doCheckTimeout(stopper *stop.Stopper) { g.mu.Lock() defer g.mu.Unlock() // Check whether the graph needs to be tightened to // accommodate distant infos. distant := g.filterExtant(g.is.distant(g.maxToleratedHops())) if distant.len() > 0 { // If we have space, start a client immediately. if g.outgoing.hasSpace() { nodeID := distant.selectRandom() if nodeAddr, err := g.getNodeIDAddressLocked(nodeID); err != nil { log.Errorf("node %d: %s", nodeID, err) } else { g.startClient(nodeAddr, g.RPCContext, stopper) } } else { // Otherwise, find least useful peer and close it. Make sure // here that we only consider outgoing clients which are // connected. nodeID := g.is.leastUseful(g.outgoing) if nodeID != 0 { log.Infof("closing least useful client %d to tighten network graph", nodeID) g.closeClient(nodeID) } } } g.maybeSignalStalledLocked() }
// getNextBootstrapAddress returns the next available bootstrap // address by consulting the first non-exhausted resolver from the // slice supplied to the constructor or set using setBootstrap(). // The lock is assumed held. func (g *Gossip) getNextBootstrapAddress() net.Addr { if len(g.resolvers) == 0 { log.Fatalf("no resolvers specified for gossip network") } // Run through resolvers round robin starting at last resolved index. for i := 0; i < len(g.resolvers); i++ { g.resolverIdx = (g.resolverIdx + 1) % len(g.resolvers) if g.resolverIdx == len(g.resolvers)-1 { g.triedAll = true } resolver := g.resolvers[g.resolverIdx] addr, err := resolver.GetAddress() if err != nil { log.Errorf("invalid bootstrap address: %+v, %v", resolver, err) continue } else if addr.String() == g.is.NodeAddr.String() { // Skip our own node address. continue } _, addrActive := g.bootstrapping[addr.String()] if !resolver.IsExhausted() || !addrActive { g.bootstrapping[addr.String()] = struct{}{} return addr } } return nil }