// returns false is the event func (l *LocalCluster) processEvent(event events.Message) bool { l.mu.Lock() defer l.mu.Unlock() // If there's currently a oneshot container, ignore any die messages from // it because those are expected. if l.oneshot != nil && event.ID == l.oneshot.id && event.Status == eventDie { return true } for i, n := range l.Nodes { if n != nil && n.id == event.ID { if log.V(1) { log.Errorf(context.Background(), "node=%d status=%s", i, event.Status) } select { case l.events <- Event{NodeIndex: i, Status: event.Status}: default: panic("events channel filled up") } return true } } log.Infof(context.Background(), "received docker event for unrecognized container: %+v", event) // An event on any other container is unexpected. Die. select { case <-l.stopper: case <-l.monitorCtx.Done(): default: // There is a very tiny race here: the signal handler might be closing the // stopper simultaneously. log.Errorf(context.Background(), "stopping due to unexpected event: %+v", event) if rc, err := l.client.ContainerLogs(context.Background(), event.Actor.ID, types.ContainerLogsOptions{ ShowStdout: true, ShowStderr: true, }); err == nil { defer rc.Close() if _, err := io.Copy(os.Stderr, rc); err != nil { log.Infof(context.Background(), "error listing logs: %s", err) } } close(l.stopper) } return false }
// OneShot runs a container, expecting it to successfully run to completion // and die, after which it is removed. Not goroutine safe: only one OneShot // can be running at once. // Adds the same binds as the cluster containers (certs, binary, etc). func (l *LocalCluster) OneShot( ctx context.Context, ref string, ipo types.ImagePullOptions, containerConfig container.Config, hostConfig container.HostConfig, name string, ) error { if err := pullImage(ctx, l, ref, ipo); err != nil { return err } hostConfig.VolumesFrom = []string{l.vols.id} container, err := createContainer(ctx, l, containerConfig, hostConfig, name) if err != nil { return err } l.oneshot = container defer func() { if err := l.oneshot.Remove(ctx); err != nil { log.Errorf(ctx, "ContainerRemove: %s", err) } l.oneshot = nil }() if err := l.oneshot.Start(ctx); err != nil { return err } if err := l.oneshot.Wait(ctx); err != nil { return err } return nil }
// shouldQueue determines whether a replica should be queued for garbage // collection, and if so, at what priority. Returns true for shouldQ // in the event that the cumulative ages of GC'able bytes or extant // intents exceed thresholds. func (gcq *gcQueue) shouldQueue( ctx context.Context, now hlc.Timestamp, repl *Replica, sysCfg config.SystemConfig, ) (shouldQ bool, priority float64) { desc := repl.Desc() zone, err := sysCfg.GetZoneConfigForKey(desc.StartKey) if err != nil { log.Errorf(ctx, "could not find zone config for range %s: %s", repl, err) return } ms := repl.GetMVCCStats() // GC score is the total GC'able bytes age normalized by 1 MB * the replica's TTL in seconds. gcScore := float64(ms.GCByteAge(now.WallTime)) / float64(zone.GC.TTLSeconds) / float64(gcByteCountNormalization) // Intent score. This computes the average age of outstanding intents // and normalizes. intentScore := ms.AvgIntentAge(now.WallTime) / float64(intentAgeNormalization.Nanoseconds()/1E9) // Compute priority. if gcScore >= considerThreshold { priority += gcScore } if intentScore >= considerThreshold { priority += intentScore } shouldQ = priority > 0 return }
// WriteStatusSummary generates a summary and immediately writes it to the given // client. func (mr *MetricsRecorder) WriteStatusSummary(ctx context.Context, db *client.DB) error { mr.writeSummaryMu.Lock() defer mr.writeSummaryMu.Unlock() nodeStatus := mr.GetStatusSummary() if nodeStatus != nil { key := keys.NodeStatusKey(nodeStatus.Desc.NodeID) // We use PutInline to store only a single version of the node status. // There's not much point in keeping the historical versions as we keep // all of the constituent data as timeseries. Further, due to the size // of the build info in the node status, writing one of these every 10s // will generate more versions than will easily fit into a range over // the course of a day. if err := db.PutInline(ctx, key, nodeStatus); err != nil { return err } if log.V(2) { statusJSON, err := json.Marshal(nodeStatus) if err != nil { log.Errorf(ctx, "error marshaling nodeStatus to json: %s", err) } log.Infof(ctx, "node %d status: %s", nodeStatus.Desc.NodeID, statusJSON) } } return nil }
// AddReplicas adds replicas for a range on a set of stores. // It's illegal to have multiple replicas of the same range on stores of a single // node. // The method blocks until a snapshot of the range has been copied to all the // new replicas and the new replicas become part of the Raft group. func (tc *TestCluster) AddReplicas( startKey roachpb.Key, targets ...ReplicationTarget, ) (*roachpb.RangeDescriptor, error) { rKey := keys.MustAddr(startKey) rangeDesc, err := tc.changeReplicas( roachpb.ADD_REPLICA, rKey, targets..., ) if err != nil { return nil, err } // Wait for the replication to complete on all destination nodes. if err := util.RetryForDuration(time.Second*5, func() error { for _, target := range targets { // Use LookupReplica(keys) instead of GetRange(rangeID) to ensure that the // snapshot has been transferred and the descriptor initialized. store, err := tc.findMemberStore(target.StoreID) if err != nil { log.Errorf(context.TODO(), "unexpected error: %s", err) return err } if store.LookupReplica(rKey, nil) == nil { return errors.Errorf("range not found on store %d", target) } } return nil }); err != nil { return nil, err } return rangeDesc, nil }
// shouldQueue determines whether a replica should be queued for GC, // and if so at what priority. To be considered for possible GC, a // replica's range lease must not have been active for longer than // ReplicaGCQueueInactivityThreshold. Further, the last replica GC // check must have occurred more than ReplicaGCQueueInactivityThreshold // in the past. func (q *replicaGCQueue) shouldQueue( ctx context.Context, now hlc.Timestamp, rng *Replica, _ config.SystemConfig, ) (bool, float64) { lastCheck, err := rng.getLastReplicaGCTimestamp(ctx) if err != nil { log.Errorf(ctx, "could not read last replica GC timestamp: %s", err) return false, 0 } lastActivity := hlc.ZeroTimestamp.Add(rng.store.startedAt, 0) lease, nextLease := rng.getLease() if lease != nil { lastActivity.Forward(lease.Expiration) } if nextLease != nil { lastActivity.Forward(nextLease.Expiration) } var isCandidate bool if raftStatus := rng.RaftStatus(); raftStatus != nil { isCandidate = (raftStatus.SoftState.RaftState == raft.StateCandidate) } return replicaGCShouldQueueImpl(now, lastCheck, lastActivity, isCandidate) }
// GetSnapshot returns a snapshot of the replica appropriate for sending to a // replica. If this method returns without error, callers must eventually call // OutgoingSnapshot.Close. func (r *Replica) GetSnapshot(ctx context.Context, snapType string) (*OutgoingSnapshot, error) { r.mu.Lock() defer r.mu.Unlock() rangeID := r.RangeID if r.exceedsDoubleSplitSizeLocked() { maxBytes := r.mu.maxBytes size := r.mu.state.Stats.Total() err := errors.Errorf( "%s: not generating %s snapshot because replica is too large: %d > 2 * %d", r, snapType, size, maxBytes) return &OutgoingSnapshot{}, err } startKey := r.mu.state.Desc.StartKey ctx, sp := r.AnnotateCtxWithSpan(ctx, "snapshot") defer sp.Finish() snap := r.store.NewSnapshot() log.Eventf(ctx, "new engine snapshot for replica %s", r) // Delegate to a static function to make sure that we do not depend // on any indirect calls to r.store.Engine() (or other in-memory // state of the Replica). Everything must come from the snapshot. snapData, err := snapshot(ctx, snapType, snap, rangeID, r.store.raftEntryCache, startKey) if err != nil { log.Errorf(ctx, "error generating snapshot: %s", err) return nil, err } return &snapData, nil }
// flush sends the rows accumulated so far in a StreamMessage. func (m *outbox) flush(last bool, err error) error { if !last && m.numRows == 0 { return nil } msg := m.encoder.FormMessage(last, err) if log.V(3) { log.Infof(m.flowCtx.Context, "flushing outbox") } var sendErr error if m.stream != nil { sendErr = m.stream.Send(msg) } else { sendErr = m.syncFlowStream.Send(msg) } if sendErr != nil { if log.V(1) { log.Errorf(m.flowCtx.Context, "outbox flush error: %s", sendErr) } } else if log.V(3) { log.Infof(m.flowCtx.Context, "outbox flushed") } if sendErr != nil { return sendErr } m.numRows = 0 return nil }
// writeSummaries retrieves status summaries from the supplied // NodeStatusRecorder and persists them to the cockroach data store. func (n *Node) writeSummaries(ctx context.Context) error { var err error if runErr := n.stopper.RunTask(func() { nodeStatus := n.recorder.GetStatusSummary() if nodeStatus != nil { key := keys.NodeStatusKey(nodeStatus.Desc.NodeID) // We use PutInline to store only a single version of the node // status. There's not much point in keeping the historical // versions as we keep all of the constituent data as // timeseries. Further, due to the size of the build info in the // node status, writing one of these every 10s will generate // more versions than will easily fit into a range over the // course of a day. if err = n.storeCfg.DB.PutInline(ctx, key, nodeStatus); err != nil { return } if log.V(2) { statusJSON, err := json.Marshal(nodeStatus) if err != nil { log.Errorf(ctx, "error marshaling nodeStatus to json: %s", err) } log.Infof(ctx, "node %d status: %s", nodeStatus.Desc.NodeID, statusJSON) } } }); runErr != nil { err = runErr } return err }
// TestStoreRangeMergeNonCollocated attempts to merge two ranges // that are not on the same stores. func TestStoreRangeMergeNonCollocated(t *testing.T) { defer leaktest.AfterTest(t)() mtc := startMultiTestContext(t, 4) defer mtc.Stop() store := mtc.stores[0] // Split into 3 ranges argsSplit := adminSplitArgs(roachpb.KeyMin, []byte("d")) if _, pErr := client.SendWrapped(context.Background(), rg1(store), &argsSplit); pErr != nil { t.Fatalf("Can't split range %s", pErr) } argsSplit = adminSplitArgs(roachpb.KeyMin, []byte("b")) if _, pErr := client.SendWrapped(context.Background(), rg1(store), &argsSplit); pErr != nil { t.Fatalf("Can't split range %s", pErr) } rangeA := store.LookupReplica([]byte("a"), nil) rangeADesc := rangeA.Desc() rangeB := store.LookupReplica([]byte("c"), nil) rangeBDesc := rangeB.Desc() rangeC := store.LookupReplica([]byte("e"), nil) rangeCDesc := rangeC.Desc() if bytes.Equal(rangeADesc.StartKey, rangeBDesc.StartKey) { log.Errorf(context.TODO(), "split ranges keys are equal %q!=%q", rangeADesc.StartKey, rangeBDesc.StartKey) } if bytes.Equal(rangeBDesc.StartKey, rangeCDesc.StartKey) { log.Errorf(context.TODO(), "split ranges keys are equal %q!=%q", rangeBDesc.StartKey, rangeCDesc.StartKey) } if bytes.Equal(rangeADesc.StartKey, rangeCDesc.StartKey) { log.Errorf(context.TODO(), "split ranges keys are equal %q!=%q", rangeADesc.StartKey, rangeCDesc.StartKey) } // Replicate the ranges to different sets of stores. Ranges A and C // are collocated, but B is different. mtc.replicateRange(rangeA.RangeID, 1, 2) mtc.replicateRange(rangeB.RangeID, 1, 3) mtc.replicateRange(rangeC.RangeID, 1, 2) // Attempt to merge. rangeADesc = rangeA.Desc() argsMerge := adminMergeArgs(roachpb.Key(rangeADesc.StartKey)) if _, pErr := rangeA.AdminMerge(context.Background(), argsMerge, rangeADesc); !testutils.IsPError(pErr, "ranges not collocated") { t.Fatalf("did not got expected error; got %s", pErr) } }
// Start starts a node. func (n *Node) Start() { n.Lock() defer n.Unlock() if n.cmd != nil { return } n.cmd = exec.Command(n.args[0], n.args[1:]...) n.cmd.Env = os.Environ() n.cmd.Env = append(n.cmd.Env, n.env...) stdoutPath := filepath.Join(n.logDir, "stdout") stdout, err := os.OpenFile(stdoutPath, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) if err != nil { log.Fatalf(context.Background(), "unable to open file %s: %s", stdoutPath, err) } n.cmd.Stdout = stdout stderrPath := filepath.Join(n.logDir, "stderr") stderr, err := os.OpenFile(stderrPath, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) if err != nil { log.Fatalf(context.Background(), "unable to open file %s: %s", stderrPath, err) } n.cmd.Stderr = stderr err = n.cmd.Start() if n.cmd.Process != nil { log.Infof(context.Background(), "process %d started: %s", n.cmd.Process.Pid, strings.Join(n.args, " ")) } if err != nil { log.Infof(context.Background(), "%v", err) _ = stdout.Close() _ = stderr.Close() return } go func(cmd *exec.Cmd) { if err := cmd.Wait(); err != nil { log.Errorf(context.Background(), "waiting for command: %v", err) } _ = stdout.Close() _ = stderr.Close() ps := cmd.ProcessState sy := ps.Sys().(syscall.WaitStatus) log.Infof(context.Background(), "Process %d exited with status %d", ps.Pid(), sy.ExitStatus()) log.Infof(context.Background(), ps.String()) n.Lock() n.cmd = nil n.Unlock() }(n.cmd) }
// CleanupOnError cleans up the transaction as a result of an error. func (txn *Txn) CleanupOnError(err error) { if err == nil { panic("no error") } if replyErr := txn.Rollback(); replyErr != nil { log.Errorf(txn.Context, "failure aborting transaction: %s; abort caused by: %s", replyErr, err) } }
// GetStatusSummary returns a status summary messages for the node. The summary // includes the recent values of metrics for both the node and all of its // component stores. func (mr *MetricsRecorder) GetStatusSummary() *NodeStatus { mr.mu.Lock() defer mr.mu.Unlock() if mr.mu.nodeRegistry == nil { // We haven't yet processed initialization information; do nothing. if log.V(1) { log.Warning(context.TODO(), "attempt to generate status summary before NodeID allocation.") } return nil } now := mr.mu.clock.PhysicalNow() // Generate an node status with no store data. nodeStat := &NodeStatus{ Desc: mr.mu.desc, BuildInfo: build.GetInfo(), UpdatedAt: now, StartedAt: mr.mu.startedAt, StoreStatuses: make([]StoreStatus, 0, mr.mu.lastSummaryCount), Metrics: make(map[string]float64, mr.mu.lastNodeMetricCount), } eachRecordableValue(mr.mu.nodeRegistry, func(name string, val float64) { nodeStat.Metrics[name] = val }) // Generate status summaries for stores. for storeID, r := range mr.mu.storeRegistries { storeMetrics := make(map[string]float64, mr.mu.lastStoreMetricCount) eachRecordableValue(r, func(name string, val float64) { storeMetrics[name] = val }) // Gather descriptor from store. descriptor, err := mr.mu.stores[storeID].Descriptor() if err != nil { log.Errorf(context.TODO(), "Could not record status summaries: Store %d could not return descriptor, error: %s", storeID, err) continue } nodeStat.StoreStatuses = append(nodeStat.StoreStatuses, StoreStatus{ Desc: *descriptor, Metrics: storeMetrics, }) } mr.mu.lastSummaryCount = len(nodeStat.StoreStatuses) mr.mu.lastNodeMetricCount = len(nodeStat.Metrics) if len(nodeStat.StoreStatuses) > 0 { mr.mu.lastStoreMetricCount = len(nodeStat.StoreStatuses[0].Metrics) } return nodeStat }
// DrainQueue locks the queue and processes the remaining queued replicas. It // processes the replicas in the order they're queued in, one at a time. // Exposed for testing only. func (bq *baseQueue) DrainQueue(clock *hlc.Clock) { ctx := bq.AnnotateCtx(context.TODO()) repl := bq.pop() for repl != nil { if err := bq.processReplica(ctx, repl, clock); err != nil { bq.failures.Inc(1) log.Errorf(ctx, "failed processing replica %s: %s", repl, err) } repl = bq.pop() } }
// EnvOrDefaultBytes returns the value set by the specified environment // variable, if any, otherwise the specified default value. func EnvOrDefaultBytes(name string, value int64) int64 { if str, present := getEnv(name, 1); present { v, err := humanizeutil.ParseBytes(str) if err != nil { log.Errorf(context.Background(), "error parsing %s: %s", name, err) return value } return v } return value }
// EnvOrDefaultDuration returns the value set by the specified environment // variable, if any, otherwise the specified default value. func EnvOrDefaultDuration(name string, value time.Duration) time.Duration { if str, present := getEnv(name, 1); present { v, err := time.ParseDuration(str) if err != nil { log.Errorf(context.Background(), "error parsing %s: %s", name, err) return value } return v } return value }
// EnvOrDefaultInt returns the value set by the specified environment // variable, if any, otherwise the specified default value. func EnvOrDefaultInt(name string, value int) int { if str, present := getEnv(name, 1); present { v, err := strconv.ParseInt(str, 0, 0) if err != nil { log.Errorf(context.Background(), "error parsing %s: %s", name, err) return value } return int(v) } return value }
// EnvOrDefaultFloat returns the value set by the specified environment // variable, if any, otherwise the specified default value. func EnvOrDefaultFloat(name string, value float64) float64 { if str, present := getEnv(name, 1); present { v, err := strconv.ParseFloat(str, 64) if err != nil { log.Errorf(context.Background(), "error parsing %s: %s", name, err) return value } return v } return value }
// EnvOrDefaultBool returns the value set by the specified environment // variable, if any, otherwise the specified default value. func EnvOrDefaultBool(name string, value bool) bool { if str, present := getEnv(name, 1); present { v, err := strconv.ParseBool(str) if err != nil { log.Errorf(context.Background(), "error parsing %s: %s", name, err) return value } return v } return value }
// shortTestTimeout returns the string form of a time.Duration stripped of // trailing time units that have 0 values. For example, 6*time.Hour normally // stringifies as "6h0m0s". This regex converts it into a more readable "6h". func (cl continuousLoadTest) shortTestTimeout() string { fl := flag.Lookup("test.timeout") if fl == nil { return "" } timeout, err := time.ParseDuration(fl.Value.String()) if err != nil { log.Errorf(context.Background(), "couldn't parse test timeout %s", fl.Value.String()) return "" } return regexp.MustCompile(`([a-z])0[0a-z]+`).ReplaceAllString(timeout.String(), `$1`) }
// AddDescriptor adds a new non-config descriptor to the system schema. func (ms *MetadataSchema) AddDescriptor(parentID ID, desc DescriptorProto) { if id := desc.GetID(); id > keys.MaxReservedDescID { panic(fmt.Sprintf("invalid reserved table ID: %d > %d", id, keys.MaxReservedDescID)) } for _, d := range ms.descs { if d.desc.GetID() == desc.GetID() { log.Errorf(context.TODO(), "adding descriptor with duplicate ID: %v", desc) return } } ms.descs = append(ms.descs, metadataDescriptor{parentID, desc}) }
func (ctx *Context) removeConnLocked(key string, meta *connMeta) { if log.V(1) { log.Infof(ctx.masterCtx, "closing %s", key) } if conn := meta.conn; conn != nil { if err := conn.Close(); err != nil && !grpcutil.IsClosedConnection(err) { if log.V(1) { log.Errorf(ctx.masterCtx, "failed to close client connection: %s", err) } } } delete(ctx.conns.cache, key) }
// StartHeartbeat starts a periodic heartbeat to refresh this node's // last heartbeat in the node liveness table. func (nl *NodeLiveness) StartHeartbeat(ctx context.Context, stopper *stop.Stopper) { log.VEventf(ctx, 1, "starting liveness heartbeat") retryOpts := base.DefaultRetryOptions() retryOpts.Closer = stopper.ShouldQuiesce() stopper.RunWorker(func() { ambient := nl.ambientCtx ambient.AddLogTag("hb", nil) ticker := time.NewTicker(nl.heartbeatInterval) defer ticker.Stop() for { if !nl.pauseHeartbeat.Load().(bool) { ctx, sp := ambient.AnnotateCtxWithSpan(context.Background(), "heartbeat") ctx, cancel := context.WithTimeout(ctx, nl.heartbeatInterval) // Retry heartbeat in the event the conditional put fails. for r := retry.StartWithCtx(ctx, retryOpts); r.Next(); { liveness, err := nl.Self() if err != nil && err != ErrNoLivenessRecord { log.Errorf(ctx, "unexpected error getting liveness: %v", err) } if err := nl.Heartbeat(ctx, liveness); err != nil { if err == errSkippedHeartbeat { continue } log.Errorf(ctx, "failed liveness heartbeat: %v", err) } break } cancel() sp.Finish() } select { case <-ticker.C: case <-stopper.ShouldStop(): return } } }) }
// tightenNetwork "tightens" the network by starting a new gossip // client to the most distant node as measured in required gossip hops // to propagate info from the distant node to this node. func (g *Gossip) tightenNetwork(distantNodeID roachpb.NodeID) { g.mu.Lock() defer g.mu.Unlock() if g.outgoing.hasSpace() { ctx := g.AnnotateCtx(context.TODO()) if nodeAddr, err := g.getNodeIDAddressLocked(distantNodeID); err != nil { log.Errorf(ctx, "unable to get address for node %d: %s", distantNodeID, err) } else { log.Infof(ctx, "starting client to distant node %d to tighten network graph", distantNodeID) log.Eventf(ctx, "tightening network with new client to %s", nodeAddr) g.startClient(nodeAddr, g.NodeID.Get()) } } }
func (r *Replica) computeChecksumPostApply( ctx context.Context, args roachpb.ComputeChecksumRequest, ) { stopper := r.store.Stopper() id := args.ChecksumID now := timeutil.Now() r.mu.Lock() var notify chan struct{} if c, ok := r.mu.checksums[id]; !ok { // There is no record of this ID. Make a new notification. notify = make(chan struct{}) } else if !c.started { // A CollectChecksumRequest is waiting on the existing notification. notify = c.notify } else { // A previous attempt was made to compute the checksum. r.mu.Unlock() return } r.gcOldChecksumEntriesLocked(now) // Create an entry with checksum == nil and gcTimestamp unset. r.mu.checksums[id] = replicaChecksum{started: true, notify: notify} desc := *r.mu.state.Desc r.mu.Unlock() snap := r.store.NewSnapshot() // Compute SHA asynchronously and store it in a map by UUID. if err := stopper.RunAsyncTask(ctx, func(ctx context.Context) { defer snap.Close() var snapshot *roachpb.RaftSnapshotData if args.Snapshot { snapshot = &roachpb.RaftSnapshotData{} } sha, err := r.sha512(desc, snap, snapshot) if err != nil { log.Errorf(ctx, "%v", err) sha = nil } r.computeChecksumDone(ctx, id, sha, snapshot) }); err != nil { defer snap.Close() log.Error(ctx, errors.Wrapf(err, "could not run async checksum computation (ID = %s)", id)) // Set checksum to nil. r.computeChecksumDone(ctx, id, nil, nil) } }
// AddStore adds the specified store to the store map. func (ls *Stores) AddStore(s *Store) { ls.mu.Lock() defer ls.mu.Unlock() if _, ok := ls.storeMap[s.Ident.StoreID]; ok { panic(fmt.Sprintf("cannot add store twice: %+v", s.Ident)) } ls.storeMap[s.Ident.StoreID] = s // If we've already read the gossip bootstrap info, ensure that // all stores have the most recent values. if !ls.biLatestTS.Equal(hlc.ZeroTimestamp) { if err := ls.updateBootstrapInfo(ls.latestBI); err != nil { ctx := ls.AnnotateCtx(context.TODO()) log.Errorf(ctx, "failed to update bootstrap info on newly added store: %s", err) } } }
// Run is part of the processor interface. func (tr *tableReader) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(tr.ctx, "table reader") defer tracing.FinishSpan(span) txn := tr.flowCtx.setupTxn(ctx) log.VEventf(ctx, 1, "starting (filter: %s)", &tr.filter) if log.V(1) { defer log.Infof(ctx, "exiting") } if err := tr.fetcher.StartScan( txn, tr.spans, true /* limit batches */, tr.getLimitHint(), ); err != nil { log.Errorf(ctx, "scan error: %s", err) tr.output.Close(err) return } var rowIdx int64 for { outRow, err := tr.nextRow() if err != nil || outRow == nil { tr.output.Close(err) return } if log.V(3) { log.Infof(ctx, "pushing row %s", outRow) } // Push the row to the output RowReceiver; stop if they don't need more // rows. if !tr.output.PushRow(outRow) { log.VEventf(ctx, 1, "no more rows required") tr.output.Close(nil) return } rowIdx++ if tr.hardLimit != 0 && rowIdx == tr.hardLimit { // We sent tr.hardLimit rows. tr.output.Close(nil) return } } }
// snapshotWithContext is the main implementation for Snapshot() but it takes // a context to allow tracing. If this method returns without error, callers // must eventually call CloseOutSnap to ready this replica for more snapshots. // r.mu must be held. func (r *Replica) snapshotWithContext( ctx context.Context, snapType string, ) (*OutgoingSnapshot, error) { r.mu.AssertHeld() rangeID := r.RangeID if r.exceedsDoubleSplitSizeLocked() { maxBytes := r.mu.maxBytes size := r.mu.state.Stats.Total() log.Infof(ctx, "not generating %s snapshot because replica is too large: %d > 2 * %d", snapType, size, maxBytes) return &OutgoingSnapshot{}, raft.ErrSnapshotTemporarilyUnavailable } // See if there is already a snapshot running for this store. select { case <-r.mu.outSnapDone: default: log.Event(ctx, "snapshot already running") return nil, raft.ErrSnapshotTemporarilyUnavailable } if !r.store.AcquireRaftSnapshot() { log.Event(ctx, "snapshot already running") return nil, raft.ErrSnapshotTemporarilyUnavailable } startKey := r.mu.state.Desc.StartKey ctx, sp := r.AnnotateCtxWithSpan(ctx, "snapshot") defer sp.Finish() snap := r.store.NewSnapshot() log.Eventf(ctx, "new engine snapshot for replica %s", r) // Delegate to a static function to make sure that we do not depend // on any indirect calls to r.store.Engine() (or other in-memory // state of the Replica). Everything must come from the snapshot. snapData, err := snapshot(ctx, snapType, snap, rangeID, r.store.raftEntryCache, startKey) if err != nil { log.Errorf(ctx, "error generating snapshot: %s", err) return nil, err } log.Event(ctx, "snapshot generated") r.store.metrics.RangeSnapshotsGenerated.Inc(1) r.mu.outSnap = snapData r.mu.outSnapDone = make(chan struct{}) return &r.mu.outSnap, nil }
func createSplitRanges( store *storage.Store, ) (*roachpb.RangeDescriptor, *roachpb.RangeDescriptor, *roachpb.Error) { args := adminSplitArgs(roachpb.KeyMin, []byte("b")) if _, err := client.SendWrapped(context.Background(), rg1(store), &args); err != nil { return nil, nil, err } rangeADesc := store.LookupReplica([]byte("a"), nil).Desc() rangeBDesc := store.LookupReplica([]byte("c"), nil).Desc() if bytes.Equal(rangeADesc.StartKey, rangeBDesc.StartKey) { log.Errorf(context.TODO(), "split ranges keys are equal %q!=%q", rangeADesc.StartKey, rangeBDesc.StartKey) } return rangeADesc, rangeBDesc, nil }
// MaybeAdd adds the specified replica if bq.shouldQueue specifies it // should be queued. Replicas are added to the queue using the priority // returned by bq.shouldQueue. If the queue is too full, the replica may // not be added, as the replica with the lowest priority will be // dropped. func (bq *baseQueue) MaybeAdd(repl *Replica, now hlc.Timestamp) { // Load the system config. cfg, cfgOk := bq.gossip.GetSystemConfig() requiresSplit := cfgOk && bq.requiresSplit(cfg, repl) bq.mu.Lock() defer bq.mu.Unlock() if bq.mu.stopped { return } if !repl.IsInitialized() { return } ctx := repl.AnnotateCtx(bq.AnnotateCtx(context.TODO())) if !cfgOk { log.VEvent(ctx, 1, "no system config available. skipping") return } if requiresSplit { // Range needs to be split due to zone configs, but queue does // not accept unsplit ranges. log.VEventf(ctx, 1, "split needed; not adding") return } if bq.needsLease { // Check to see if either we own the lease or do not know who the lease // holder is. if lease, _ := repl.getLease(); repl.IsLeaseValid(lease, now) && !lease.OwnedBy(repl.store.StoreID()) { log.VEventf(ctx, 1, "needs lease; not adding: %+v", lease) return } } should, priority := bq.impl.shouldQueue(ctx, now, repl, cfg) if _, err := bq.addInternal(ctx, repl.Desc(), should, priority); !isExpectedQueueError(err) { log.Errorf(ctx, "unable to add: %s", err) } }