// processReplica processes a single replica. This should not be // called externally to the queue. bq.mu.Lock should not be held // while calling this method. func (bq *baseQueue) processReplica(repl *Replica, clock *hlc.Clock) error { // Load the system config. cfg := bq.gossip.GetSystemConfig() if cfg == nil { bq.eventLog.Infof(log.V(1), "no system config available. skipping") return nil } desc := repl.Desc() if !bq.impl.acceptsUnsplitRanges() && cfg.NeedsSplit(desc.StartKey, desc.EndKey) { // Range needs to be split due to zone configs, but queue does // not accept unsplit ranges. bq.eventLog.Infof(log.V(3), "%s: split needed; skipping", repl) return nil } // If the queue requires a replica to have the range leader lease in // order to be processed, check whether this replica has leader lease // and renew or acquire if necessary. if bq.impl.needsLeaderLease() { // Create a "fake" get request in order to invoke redirectOnOrAcquireLease. if err := repl.redirectOnOrAcquireLeaderLease(tracing.NilSpan()); err != nil { bq.eventLog.Infof(log.V(3), "%s: could not acquire leader lease; skipping", repl) return nil } } bq.eventLog.Infof(log.V(3), "%s: processing", repl) start := time.Now() if err := bq.impl.process(clock.Now(), repl, cfg); err != nil { return err } bq.eventLog.Infof(log.V(2), "%s: done: %s", repl, time.Since(start)) return nil }
// writeSummaries retrieves status summaries from the supplied // NodeStatusRecorder and persists them to the cockroach data store. func (s *Server) writeSummaries() error { nodeStatus, storeStatuses := s.recorder.GetStatusSummaries() if nodeStatus != nil { key := keys.NodeStatusKey(int32(nodeStatus.Desc.NodeID)) if err := s.db.Put(key, nodeStatus); err != nil { return err } if log.V(1) { statusJSON, err := json.Marshal(nodeStatus) if err != nil { log.Errorf("error marshaling nodeStatus to json: %s", err) } log.Infof("node %d status: %s", nodeStatus.Desc.NodeID, statusJSON) } } for _, ss := range storeStatuses { key := keys.StoreStatusKey(int32(ss.Desc.StoreID)) if err := s.db.Put(key, &ss); err != nil { return err } if log.V(1) { statusJSON, err := json.Marshal(&ss) if err != nil { log.Errorf("error marshaling storeStatus to json: %s", err) } log.Infof("store %d status: %s", ss.Desc.StoreID, statusJSON) } } return nil }
// wrap the supplied planNode with the sortNode if sorting is required. func (n *sortNode) wrap(plan planNode) planNode { if n != nil { // Check to see if the requested ordering is compatible with the existing // ordering. existingOrdering := plan.Ordering() if log.V(2) { log.Infof("Sort: existing=%d desired=%d", existingOrdering, n.ordering) } match := computeOrderingMatch(n.ordering, existingOrdering, false) if match < len(n.ordering) { n.plan = plan n.needSort = true return n } if len(n.columns) < len(plan.Columns()) { // No sorting required, but we have to strip off the extra render // expressions we added. n.plan = plan return n } } if log.V(2) { log.Infof("Sort: no sorting required") } return plan }
// waitAndProcess waits for the pace interval and processes the replica // if repl is not nil. The method returns true when the scanner needs // to be stopped. The method also removes a replica from queues when it // is signaled via the removed channel. func (rs *replicaScanner) waitAndProcess(start time.Time, clock *hlc.Clock, stopper *stop.Stopper, repl *Replica) bool { waitInterval := rs.paceInterval(start, timeutil.Now()) rs.waitTimer.Reset(waitInterval) if log.V(6) { log.Infof("Wait time interval set to %s", waitInterval) } for { select { case <-rs.waitTimer.C: rs.waitTimer.Read = true if repl == nil { return false } return !stopper.RunTask(func() { // Try adding replica to all queues. for _, q := range rs.queues { q.MaybeAdd(repl, clock.Now()) } }) case repl := <-rs.removed: // Remove replica from all queues as applicable. for _, q := range rs.queues { q.MaybeRemove(repl) } if log.V(6) { log.Infof("removed replica %s", repl) } case <-stopper.ShouldStop(): return true } } }
// ShouldRebalance returns whether the specified store should attempt to // rebalance a replica to another store. func (a Allocator) ShouldRebalance(storeID roachpb.StoreID) bool { if !a.options.AllowRebalance { return false } // In production, add some random jitter to shouldRebalance. if !a.options.Deterministic && a.randGen.Float32() > rebalanceShouldRebalanceChance { return false } if log.V(2) { log.Infof("ShouldRebalance from store %d", storeID) } storeDesc := a.storePool.getStoreDescriptor(storeID) if storeDesc == nil { if log.V(2) { log.Warningf( "ShouldRebalance couldn't find store with id %d in StorePool", storeID) } return false } sl := a.storePool.getStoreList(*storeDesc.CombinedAttrs(), []roachpb.NodeID{storeDesc.Node.NodeID}, a.options.Deterministic) // ShouldRebalance is true if a suitable replacement can be found. return a.balancer.improve(storeDesc, sl) != nil }
// maybeGossipFirstRange adds the sentinel and first range metadata to gossip // if this is the first range and a leader lease can be obtained. The Store // calls this periodically on first range replicas. func (r *Replica) maybeGossipFirstRange() error { if !r.IsFirstRange() { return nil } ctx := r.context() // Gossip the cluster ID from all replicas of the first range. if log.V(1) { log.Infoc(ctx, "gossiping cluster id %s from store %d, range %d", r.rm.ClusterID(), r.rm.StoreID(), r.Desc().RangeID) } if err := r.rm.Gossip().AddInfo(gossip.KeyClusterID, r.rm.ClusterID(), clusterIDGossipTTL); err != nil { log.Errorc(ctx, "failed to gossip cluster ID: %s", err) } if ok, err := r.getLeaseForGossip(ctx); !ok || err != nil { return err } if log.V(1) { log.Infoc(ctx, "gossiping sentinel from store %d, range %d", r.rm.StoreID(), r.Desc().RangeID) } if err := r.rm.Gossip().AddInfo(gossip.KeySentinel, r.rm.ClusterID(), clusterIDGossipTTL); err != nil { log.Errorc(ctx, "failed to gossip cluster ID: %s", err) } if log.V(1) { log.Infoc(ctx, "gossiping first range from store %d, range %d", r.rm.StoreID(), r.Desc().RangeID) } if err := r.rm.Gossip().AddInfo(gossip.KeyFirstRangeDescriptor, *r.Desc(), configGossipTTL); err != nil { log.Errorc(ctx, "failed to gossip first range metadata: %s", err) } return nil }
func (s *state) handleWriteResponse(response *writeResponse) { log.V(6).Infof("node %v got write response: %#v", s.nodeID, *response) for groupID, persistedGroup := range response.groups { g := s.groups[groupID] if persistedGroup.electionState != nil { g.persistedElectionState = persistedGroup.electionState } if persistedGroup.lastIndex != -1 { log.V(6).Infof("node %v: updating persisted log index to %v", s.nodeID, persistedGroup.lastIndex) s.broadcastEntries(g, persistedGroup.entries) g.persistedLastIndex = persistedGroup.lastIndex g.persistedLastTerm = persistedGroup.lastTerm } // If we are catching up, commit any newly-persisted entries that the leader // already considers committed. s.commitEntries(g, g.leaderCommitIndex) // Resolve any pending RPCs that have been waiting for persistence to catch up. var toDelete []*list.Element for e := g.pendingCalls.Front(); e != nil; e = e.Next() { call := e.Value.(*pendingCall) if !s.resolvePendingCall(g, call) { continue } call.call.Done <- call.call toDelete = append(toDelete, e) } for _, e := range toDelete { g.pendingCalls.Remove(e) } s.updateDirtyStatus(g) } }
func (rq replicateQueue) shouldQueue(now proto.Timestamp, repl *Replica) (shouldQ bool, priority float64) { // If the replica's range spans multiple zones, ignore it until the split // queue has processed it. if len(computeSplitKeys(rq.gossip, repl)) > 0 { return } // Load the zone config to find the desired replica attributes. zone, err := lookupZoneConfig(rq.gossip, repl) if err != nil { log.Error(err) return } delta := rq.replicaDelta(zone, repl, repl.Desc()) if delta == 0 { if log.V(1) { log.Infof("%s has the correct number of nodes", repl) } return false, 0 } if delta > 0 { if log.V(1) { log.Infof("%s needs to add %d nodes", repl, delta) } // For ranges which need additional replicas, increase the priority return true, float64(delta + 10) } if log.V(1) { log.Infof("%s needs to remove %d nodes", repl, 0-delta) } // For ranges which have too many replicas, priority is absolute value of // the delta. return true, float64(0 - delta) }
// waitAndProcess waits for the pace interval and processes the range // if rng is not nil. The method returns true when the scanner needs // to be stopped. The method also removes a range from queues when it // is signaled via the removed channel. func (rs *rangeScanner) waitAndProcess(start time.Time, clock *hlc.Clock, stopper *util.Stopper, rng *Range) bool { waitInterval := rs.paceInterval(start, time.Now()) nextTime := time.After(waitInterval) if log.V(6) { log.Infof("Wait time interval set to %s", waitInterval) } for { select { case <-nextTime: if rng == nil { return false } if !stopper.StartTask() { return true } // Try adding range to all queues. for _, q := range rs.queues { q.MaybeAdd(rng, clock.Now()) } stopper.FinishTask() return false case rng := <-rs.removed: // Remove range from all queues as applicable. for _, q := range rs.queues { q.MaybeRemove(rng) } if log.V(6) { log.Infof("removed range %s", rng) } case <-stopper.ShouldStop(): return true } } }
// writeSummaries retrieves status summaries from the supplied // NodeStatusRecorder and persists them to the cockroach data store. func (s *Server) writeSummaries() (err error) { s.stopper.RunTask(func() { nodeStatus, storeStatuses := s.recorder.GetStatusSummaries() if nodeStatus != nil { key := keys.NodeStatusKey(int32(nodeStatus.Desc.NodeID)) if err = s.db.Put(key, nodeStatus); err != nil { return } if log.V(1) { log.Infof("recorded status for node %d", nodeStatus.Desc.NodeID) } } for _, ss := range storeStatuses { key := keys.StoreStatusKey(int32(ss.Desc.StoreID)) if err = s.db.Put(key, &ss); err != nil { return } } if log.V(1) { log.Infof("recorded status for %d stores", len(storeStatuses)) } }) return nil }
// improve returns a candidate StoreDescriptor to rebalance a replica to. The // strategy is to always converge on the mean range count. If that isn't // possible, we don't return any candidate. func (rcb rangeCountBalancer) improve( sl StoreList, excluded nodeIDSet, ) *roachpb.StoreDescriptor { // Attempt to select a better candidate from the supplied list. sl.stores = selectRandom(rcb.rand, allocatorRandomCount, sl, excluded) candidate := rcb.selectBest(sl) if candidate == nil { if log.V(2) { log.Infof(context.TODO(), "not rebalancing: no valid candidate targets: %s", formatCandidates(nil, sl.stores)) } return nil } // Adding a replica to the candidate must make its range count converge on the // mean range count. if math.Abs(float64(candidate.Capacity.RangeCount+1)-sl.candidateCount.mean) >= math.Abs(float64(candidate.Capacity.RangeCount)-sl.candidateCount.mean) { if log.V(2) { log.Infof(context.TODO(), "not rebalancing: %s wouldn't converge on the mean %.1f", formatCandidates(candidate, sl.stores), sl.candidateCount.mean) } return nil } if log.V(2) { log.Infof(context.TODO(), "rebalancing: mean=%.1f %s", sl.candidateCount.mean, formatCandidates(candidate, sl.stores)) } return candidate }
// runHistoryWithRetry intercepts retry errors. If one is encountered, // alternate histories are generated which all contain the exact // history prefix which encountered the error, but which recombine the // remaining commands with all of the commands from the retrying // history. // // This process continues recursively if there are further retries. func (hv *historyVerifier) runHistoryWithRetry(priorities []int32, isolations []enginepb.IsolationType, cmds []*cmd, db *client.DB, t *testing.T) error { if err := hv.runHistory(priorities, isolations, cmds, db, t); err != nil { if log.V(1) { log.Infof(context.Background(), "got an error running history %s: %s", historyString(cmds), err) } retry, ok := err.(*retryError) if !ok { return err } if _, hasRetried := hv.retriedTxns[retry.txnIdx]; hasRetried { if log.V(1) { log.Infof(context.Background(), "retried txn %d twice; skipping history", retry.txnIdx+1) } return nil } hv.retriedTxns[retry.txnIdx] = struct{}{} // Randomly subsample 5% of histories for reduced execution time. enumHis := sampleHistories(enumerateHistoriesAfterRetry(retry, cmds), 0.05) for i, h := range enumHis { if log.V(1) { log.Infof(context.Background(), "after retry, running alternate history %d of %d", i, len(enumHis)) } if err := hv.runHistoryWithRetry(priorities, isolations, h, db, t); err != nil { return err } } } return nil }
// RefreshLeases starts a goroutine that refreshes the lease manager // leases for tables received in the latest system configuration via gossip. func (m *LeaseManager) RefreshLeases(s *stop.Stopper, db *client.DB, gossip *gossip.Gossip) { s.RunWorker(func() { descKeyPrefix := keys.MakeTablePrefix(uint32(sqlbase.DescriptorTable.ID)) gossipUpdateC := gossip.RegisterSystemConfigChannel() for { select { case <-gossipUpdateC: cfg, _ := gossip.GetSystemConfig() if m.testingKnobs.GossipUpdateEvent != nil { m.testingKnobs.GossipUpdateEvent(cfg) } // Read all tables and their versions if log.V(2) { log.Info("received a new config; will refresh leases") } // Loop through the configuration to find all the tables. for _, kv := range cfg.Values { if !bytes.HasPrefix(kv.Key, descKeyPrefix) { continue } // Attempt to unmarshal config into a table/database descriptor. var descriptor sqlbase.Descriptor if err := kv.Value.GetProto(&descriptor); err != nil { log.Warningf("%s: unable to unmarshal descriptor %v", kv.Key, kv.Value) continue } switch union := descriptor.Union.(type) { case *sqlbase.Descriptor_Table: table := union.Table if err := table.Validate(); err != nil { log.Errorf("%s: received invalid table descriptor: %v", kv.Key, table) continue } if log.V(2) { log.Infof("%s: refreshing lease table: %d (%s), version: %d", kv.Key, table.ID, table.Name, table.Version) } // Try to refresh the table lease to one >= this version. if t := m.findTableState(table.ID, false /* create */, nil); t != nil { if err := t.purgeOldLeases( db, table.Deleted(), table.Version, m.LeaseStore); err != nil { log.Warningf("error purging leases for table %d(%s): %s", table.ID, table.Name, err) } } case *sqlbase.Descriptor_Database: // Ignore. } } if m.testingKnobs.TestingLeasesRefreshedEvent != nil { m.testingKnobs.TestingLeasesRefreshedEvent(cfg) } case <-s.ShouldStop(): return } } }) }
// Repeatedly poll the given operation until its status is DONE, then return its Error. // We determine whether it's a zone or global operation by parsing its resource link. // TODO(marc): give up after a while. func (g *Google) waitForInstanceGroupOperation(op *resourceviews.Operation) error { // Early out for finished ops. if op.Status == "DONE" { if log.V(1) { log.Infof("Operation %s %s: DONE, err=%v", op.OperationType, op.TargetLink, errorFromInstanceGroupOperationError(op.Error)) } return errorFromInstanceGroupOperationError(op.Error) } for { liveOp, err := g.instanceGroupsService.ZoneOperations.Get(g.project, g.zone, op.Name).Do() // This usually indicates a bad operation object. if err != nil { return util.Errorf("could not lookup operation %+v: %s", op, err) } if log.V(1) { log.Infof("Operation %s %s: %s, err=%v", liveOp.OperationType, liveOp.TargetLink, liveOp.Status, errorFromInstanceGroupOperationError(liveOp.Error)) } if liveOp.Status == "DONE" { return errorFromInstanceGroupOperationError(liveOp.Error) } time.Sleep(time.Second) } }
// clearOverlappingCachedRangeDescriptors looks up and clears any // cache entries which overlap the specified key or descriptor. func (rdc *rangeDescriptorCache) clearOverlappingCachedRangeDescriptors(key, metaKey proto.Key, desc *proto.RangeDescriptor) { if desc.StartKey.Equal(desc.EndKey) { // True for some unittests. return } // Clear out any descriptors which subsume the key which we're going // to cache. For example, if an existing KeyMin->KeyMax descriptor // should be cleared out in favor of a KeyMin->"m" descriptor. k, v, ok := rdc.rangeCache.Ceil(rangeCacheKey(metaKey)) if ok { descriptor := v.(*proto.RangeDescriptor) addrKey := keys.KeyAddress(key) if !addrKey.Less(descriptor.StartKey) && !descriptor.EndKey.Less(addrKey) { if log.V(1) { log.Infof("clearing overlapping descriptor: key=%s desc=%s", k, descriptor) } rdc.rangeCache.Del(k.(rangeCacheKey)) } } // Also clear any descriptors which are subsumed by the one we're // going to cache. This could happen on a merge (and also happens // when there's a lot of concurrency). Iterate from StartKey.Next(). rdc.rangeCache.DoRange(func(k, v interface{}) { if log.V(1) { log.Infof("clearing subsumed descriptor: key=%s desc=%s", k, v.(*proto.RangeDescriptor)) } rdc.rangeCache.Del(k.(rangeCacheKey)) }, rangeCacheKey(keys.RangeMetaKey(desc.StartKey.Next())), rangeCacheKey(keys.RangeMetaKey(desc.EndKey))) }
// waitAndProcess waits for the pace interval and processes the replica // if repl is not nil. The method returns true when the scanner needs // to be stopped. The method also removes a replica from queues when it // is signaled via the removed channel. func (rs *replicaScanner) waitAndProcess( start time.Time, clock *hlc.Clock, stopper *stop.Stopper, repl *Replica, ) bool { waitInterval := rs.paceInterval(start, timeutil.Now()) rs.waitTimer.Reset(waitInterval) if log.V(6) { log.Infof(context.TODO(), "wait timer interval set to %s", waitInterval) } for { select { case <-rs.waitTimer.C: if log.V(6) { log.Infof(context.TODO(), "wait timer fired") } rs.waitTimer.Read = true if repl == nil { return false } return nil != stopper.RunTask(func() { // Try adding replica to all queues. for _, q := range rs.queues { q.MaybeAdd(repl, clock.Now()) } }) case repl := <-rs.removed: rs.removeReplica(repl) case <-stopper.ShouldStop(): return true } } }
// MaybeAdd adds the specified replica if bq.shouldQueue specifies it // should be queued. Replicas are added to the queue using the priority // returned by bq.shouldQueue. If the queue is too full, the replica may // not be added, as the replica with the lowest priority will be // dropped. func (bq *baseQueue) MaybeAdd(repl *Replica, now roachpb.Timestamp) { // Load the system config. cfg := bq.gossip.GetSystemConfig() if cfg == nil { log.Infof("no system config available. skipping...") return } desc := repl.Desc() if !bq.impl.acceptsUnsplitRanges() && cfg.NeedsSplit(desc.StartKey, desc.EndKey) { // Range needs to be split due to zone configs, but queue does // not accept unsplit ranges. if log.V(3) { log.Infof("range %s needs to be split; not adding", repl) } return } bq.Lock() defer bq.Unlock() should, priority := bq.impl.shouldQueue(now, repl, cfg) if err := bq.addInternal(repl, should, priority); err != nil && log.V(3) { log.Infof("couldn't add %s to queue %s: %s", repl, bq.name, err) } }
func (s *state) commitEntries(g *group, leaderCommitIndex int) { if leaderCommitIndex == g.commitIndex { return } else if leaderCommitIndex < g.commitIndex { // Commit index cannot actually move backwards, but a newly-elected leader might // report stale positions for a short time so just ignore them. log.V(6).Infof("node %v: ignoring commit index %v because it is behind existing commit %v", s.nodeID, leaderCommitIndex, g.commitIndex) return } g.leaderCommitIndex = leaderCommitIndex index := leaderCommitIndex if index > g.persistedLastIndex { // If we are not caught up with the leader, just commit as far as we can. // We'll continue to commit new entries as we receive AppendEntriesRequests. log.V(6).Infof("node %v: leader is commited to %v, but capping to %v", s.nodeID, index, g.persistedLastIndex) index = g.persistedLastIndex } log.V(6).Infof("node %v advancing commit position for group %v from %v to %v", s.nodeID, g.groupID, g.commitIndex, index) // TODO(bdarnell): move storage access (incl. the channel iteration) to a goroutine entries := make(chan *LogEntryState, 100) go s.Storage.GetLogEntries(g.groupID, g.commitIndex+1, index, entries) for entry := range entries { log.V(6).Infof("node %v: committing %+v", s.nodeID, entry) if entry.Entry.Type == LogEntryCommand { s.sendEvent(&EventCommandCommitted{entry.Entry.Payload}) } } g.commitIndex = index s.broadcastEntries(g, nil) }
// GetJSON retrieves the URL specified by https://Addr(<port>)<path> // and unmarshals the result as JSON. func (c *Container) GetJSON(port, path string, v interface{}) error { client := &http.Client{ Timeout: 200 * time.Millisecond, Transport: &http.Transport{ TLSClientConfig: &tls.Config{ InsecureSkipVerify: true, }, }} resp, err := client.Get(fmt.Sprintf("https://%s%s", c.Addr(port), path)) if err != nil { if log.V(1) { log.Info(err) } return err } defer resp.Body.Close() b, err := ioutil.ReadAll(resp.Body) if err != nil { if log.V(1) { log.Info(err) } return err } if err := json.Unmarshal(b, v); err != nil { if log.V(1) { log.Info(err) } } return nil }
// AddMetricStruct examines all fields of metricStruct and adds // all Iterable or metricGroup objects to the registry. func (r *Registry) AddMetricStruct(metricStruct interface{}) { v := reflect.ValueOf(metricStruct) if v.Kind() == reflect.Ptr { v = v.Elem() } t := v.Type() for i := 0; i < v.NumField(); i++ { vfield, tfield := v.Field(i), t.Field(i) if !vfield.CanInterface() { if log.V(2) { log.Infof(context.TODO(), "Skipping unexported field %s", tfield.Name) } continue } val := vfield.Interface() switch typ := val.(type) { case metricGroup: r.AddMetricGroup(typ) case Iterable: r.AddMetric(typ) default: if log.V(2) { log.Infof(context.TODO(), "Skipping non-metric field %s", tfield.Name) } } } }
// GetClientTLSConfig returns the context client TLS config, initializing it if needed. // If Insecure is true, return a nil config, otherwise load a config based // on the Certs directory. If Certs is empty, use a very permissive config. // TODO(marc): empty Certs dir should fail when client certificates are required. func (ctx *Context) GetClientTLSConfig() (*tls.Config, error) { // Early out. if ctx.Insecure { return nil, nil } ctx.tlsConfigMu.Lock() defer ctx.tlsConfigMu.Unlock() if ctx.clientTLSConfig != nil { return ctx.clientTLSConfig, nil } if ctx.Certs != "" { if log.V(1) { log.Infof("setting up TLS from certificates directory: %s", ctx.Certs) } cfg, err := security.LoadClientTLSConfig(ctx.Certs, ctx.User) if err != nil { return nil, util.Errorf("error setting up client TLS config: %s", err) } ctx.clientTLSConfig = cfg } else { if log.V(1) { log.Infof("no certificates directory specified: using insecure TLS") } ctx.clientTLSConfig = security.LoadInsecureClientTLSConfig() } return ctx.clientTLSConfig, nil }
// Reserve a new replica. Returns true on a successful reservation. // TODO(bram): either here or in the store, prevent taking too many // reservations at once. func (b *bookie) Reserve(res reservation) bool { b.mu.Lock() defer b.mu.Unlock() if oldRes, ok := b.mu.resByRangeID[res.rangeID]; ok { // If the reservation is a repeat of an already existing one, just // update it. Thie can occur when an RPC repeats. if oldRes.nodeID == res.nodeID && oldRes.storeID == res.storeID { // To update the reservation, fill the original one and add the // new one. if log.V(2) { log.Infof("updating existing reservation for rangeID:%d, %v", res.rangeID, oldRes) } b.fillBookingLocked(oldRes) } else { if log.V(2) { log.Infof("there is pre-existing reservation %v, can't update with %v", oldRes, res) } return false } } newBooking := &booking{ reservation: res, expireAt: b.clock.Now().Add(b.reservationTimeout.Nanoseconds(), 0), } b.mu.resByRangeID[res.rangeID] = newBooking b.mu.queue.enqueue(newBooking) b.mu.size += res.size return true }
// clearOverlappingCachedRangeDescriptors looks up and clears any // cache entries which overlap the specified descriptor. func (rdc *rangeDescriptorCache) clearOverlappingCachedRangeDescriptors(desc *roachpb.RangeDescriptor) { key := desc.EndKey metaKey := meta(key) // Clear out any descriptors which subsume the key which we're going // to cache. For example, if an existing KeyMin->KeyMax descriptor // should be cleared out in favor of a KeyMin->"m" descriptor. k, v, ok := rdc.rangeCache.Ceil(rangeCacheKey(metaKey)) if ok { descriptor := v.(*roachpb.RangeDescriptor) if descriptor.StartKey.Less(key) && !descriptor.EndKey.Less(key) { if log.V(1) { log.Infof("clearing overlapping descriptor: key=%s desc=%s", k, descriptor) } rdc.rangeCache.Del(k.(rangeCacheKey)) } } // Also clear any descriptors which are subsumed by the one we're // going to cache. This could happen on a merge (and also happens // when there's a lot of concurrency). Iterate from the range meta key // after RangeMetaKey(desc.StartKey) to the range meta key for desc.EndKey. rdc.rangeCache.DoRange(func(k, v interface{}) { if log.V(1) { log.Infof("clearing subsumed descriptor: key=%s desc=%s", k, v.(*roachpb.RangeDescriptor)) } rdc.rangeCache.Del(k.(rangeCacheKey)) }, rangeCacheKey(meta(desc.StartKey).Next()), rangeCacheKey(meta(desc.EndKey))) }
// wrap the supplied planNode with the sortNode if sorting is required. func (n *sortNode) wrap(plan planNode) planNode { if n != nil { // Check to see if the requested ordering is compatible with the existing // ordering. existingOrdering := plan.Ordering() for i := range n.ordering { if i >= len(existingOrdering) || n.ordering[i] != existingOrdering[i] { if log.V(2) { log.Infof("Sort: %d != %d", existingOrdering, n.ordering) } n.plan = plan n.needSort = true return n } } if len(n.columns) < len(plan.Columns()) { // No sorting required, but we have to strip off the extra render // expressions we added. n.plan = plan return n } } if log.V(2) { log.Infof("Sort: no sorting required") } return plan }
func (txn *Txn) exec(retryable func(txn *Txn) *roachpb.Error) *roachpb.Error { // Run retryable in a retry loop until we encounter a success or // error condition this loop isn't capable of handling. var pErr *roachpb.Error for r := retry.Start(txn.db.txnRetryOptions); r.Next(); { pErr = retryable(txn) if pErr == nil && txn.Proto.Status == roachpb.PENDING { // retryable succeeded, but didn't commit. pErr = txn.commit(nil) } if pErr != nil { switch pErr.TransactionRestart { case roachpb.TransactionRestart_IMMEDIATE: if log.V(2) { log.Warning(pErr) } r.Reset() continue case roachpb.TransactionRestart_BACKOFF: if log.V(2) { log.Warning(pErr) } continue } // By default, fall through and break. } break } txn.Cleanup(pErr) return pErr }
// deleteRow adds to the batch the kv operations necessary to delete a table row // with the given values. func (rd *rowDeleter) deleteRow(b *client.Batch, values []parser.Datum) error { if err := rd.fks.checkAll(values); err != nil { return err } primaryIndexKey, secondaryIndexEntries, err := rd.helper.encodeIndexes(rd.fetchColIDtoRowIndex, values) if err != nil { return err } for _, secondaryIndexEntry := range secondaryIndexEntries { if log.V(2) { log.Infof("Del %s", secondaryIndexEntry.Key) } b.Del(secondaryIndexEntry.Key) } // Delete the row. rd.startKey = roachpb.Key(primaryIndexKey) rd.endKey = roachpb.Key(encoding.EncodeNotNullDescending(primaryIndexKey)) if log.V(2) { log.Infof("DelRange %s - %s", rd.startKey, rd.endKey) } b.DelRange(&rd.startKey, &rd.endKey, false) rd.startKey, rd.endKey = nil, nil return nil }
// handleWriteReady converts a set of raft.Ready structs into a writeRequest // to be persisted, marks the group as writing and sends it to the writeTask. func (s *state) handleWriteReady(readyGroups map[uint64]raft.Ready) { if log.V(6) { log.Infof("node %v write ready, preparing request", s.nodeID) } writeRequest := newWriteRequest() for groupID, ready := range readyGroups { raftGroupID := proto.RaftID(groupID) g, ok := s.groups[raftGroupID] if !ok { if log.V(6) { log.Infof("dropping write request to group %d", groupID) } continue } g.writing = true gwr := &groupWriteRequest{} if !raft.IsEmptyHardState(ready.HardState) { gwr.state = ready.HardState } if !raft.IsEmptySnap(ready.Snapshot) { gwr.snapshot = ready.Snapshot } if len(ready.Entries) > 0 { gwr.entries = ready.Entries } writeRequest.groups[raftGroupID] = gwr } s.writeTask.in <- writeRequest }
// EvictCachedRangeDescriptor will evict any cached range descriptors // for the given key. It is intended that this method be called from a // consumer of rangeDescriptorCache if the returned range descriptor is // discovered to be stale. // seenDesc should always be passed in and is used as the basis of a // compare-and-evict (as pointers); if it is nil, eviction is unconditional // but a warning will be logged. func (rdc *rangeDescriptorCache) EvictCachedRangeDescriptor(descKey proto.Key, seenDesc *proto.RangeDescriptor) { if seenDesc == nil { log.Warningf("compare-and-evict for key %s with nil descriptor; clearing unconditionally", descKey) } rdc.rangeCacheMu.Lock() defer rdc.rangeCacheMu.Unlock() rngKey, cachedDesc := rdc.getCachedRangeDescriptorLocked(descKey) // Note that we're doing a "compare-and-erase": If seenDesc is not nil, // we want to clean the cache only if it equals the cached range // descriptor as a pointer. If not, then likely some other caller // already evicted previously, and we can save work by not doing it // again (which would prompt another expensive lookup). if seenDesc != nil && seenDesc != cachedDesc { return } for !bytes.Equal(descKey, proto.KeyMin) { if log.V(2) { log.Infof("evict cached descriptor: key=%s desc=%s\n%s", descKey, cachedDesc, rdc.stringLocked()) } else if log.V(1) { log.Infof("evict cached descriptor: key=%s desc=%s", descKey, cachedDesc) } rdc.rangeCache.Del(rngKey) // Retrieve the metadata range key for the next level of metadata, and // evict that key as well. This loop ends after the meta1 range, which // returns KeyMin as its metadata key. descKey = keys.RangeMetaKey(descKey) rngKey, cachedDesc = rdc.getCachedRangeDescriptorLocked(descKey) } }
func (bq *baseQueue) processOne(clock *hlc.Clock) { start := time.Now() bq.Lock() repl := bq.pop() bq.Unlock() if repl != nil { now := clock.Now() if log.V(1) { log.Infof("processing replica %s from %s queue...", repl, bq.name) } // If the queue requires a replica to have the range leader lease in // order to be processed, check whether this replica has leader lease // and renew or acquire if necessary. if bq.impl.needsLeaderLease() { // Create a "fake" get request in order to invoke redirectOnOrAcquireLease. args := &proto.GetRequest{RequestHeader: proto.RequestHeader{Timestamp: now}} if err := repl.redirectOnOrAcquireLeaderLease(nil /* Trace */, args.Header().Timestamp); err != nil { if log.V(1) { log.Infof("this replica of %s could not acquire leader lease; skipping...", repl) } return } } if err := bq.impl.process(now, repl); err != nil { log.Errorf("failure processing replica %s from %s queue: %s", repl, bq.name, err) } else if log.V(2) { log.Infof("processed replica %s from %s queue in %s", repl, bq.name, time.Now().Sub(start)) } } }
func (bq *baseQueue) processOne(clock *hlc.Clock, stopper *stop.Stopper) { stopper.RunTask(func() { start := time.Now() bq.Lock() rng := bq.pop() bq.Unlock() if rng != nil { now := clock.Now() if log.V(1) { log.Infof("processing range %s from %s queue...", rng, bq.name) } // If the queue requires the leader lease to process the // range, check whether this replica has leader lease and // renew or acquire if necessary. if bq.impl.needsLeaderLease() { // Create a "fake" get request in order to invoke redirectOnOrAcquireLease. args := &proto.GetRequest{RequestHeader: proto.RequestHeader{Timestamp: now}} if err := rng.redirectOnOrAcquireLeaderLease(nil /* Trace */, args.Header().Timestamp); err != nil { if log.V(1) { log.Infof("this replica of %s could not acquire leader lease; skipping...", rng) } return } } if err := bq.impl.process(now, rng); err != nil { log.Errorf("failure processing range %s from %s queue: %s", rng, bq.name, err) } if log.V(1) { log.Infof("processed range %s from %s queue in %s", rng, bq.name, time.Now().Sub(start)) } } }) }