// append the given entries to the raft log. func (r *Replica) append(batch engine.Engine, entries []raftpb.Entry) error { if len(entries) == 0 { return nil } for _, ent := range entries { err := engine.MVCCPutProto(batch, nil, keys.RaftLogKey(r.RangeID, ent.Index), roachpb.ZeroTimestamp, nil, &ent) if err != nil { return err } } lastIndex := entries[len(entries)-1].Index prevLastIndex := atomic.LoadUint64(&r.lastIndex) // Delete any previously appended log entries which never committed. for i := lastIndex + 1; i <= prevLastIndex; i++ { err := engine.MVCCDelete(batch, nil, keys.RaftLogKey(r.RangeID, i), roachpb.ZeroTimestamp, nil) if err != nil { return err } } // Commit the batch and update the last index. if err := setLastIndex(batch, r.RangeID, lastIndex); err != nil { return err } batch.Defer(func() { atomic.StoreUint64(&r.lastIndex, lastIndex) }) return nil }
func newRangeDataIterator(d *proto.RangeDescriptor, e engine.Engine) *rangeDataIterator { // The first range in the keyspace starts at KeyMin, which includes the node-local // space. We need the original StartKey to find the range metadata, but the // actual data starts at LocalMax. dataStartKey := d.StartKey if d.StartKey.Equal(proto.KeyMin) { dataStartKey = keys.LocalMax } ri := &rangeDataIterator{ ranges: []keyRange{ { start: engine.MVCCEncodeKey(keys.MakeKey(keys.LocalRangeIDPrefix, encoding.EncodeUvarint(nil, uint64(d.RangeID)))), end: engine.MVCCEncodeKey(keys.MakeKey(keys.LocalRangeIDPrefix, encoding.EncodeUvarint(nil, uint64(d.RangeID+1)))), }, { start: engine.MVCCEncodeKey(keys.MakeKey(keys.LocalRangePrefix, encoding.EncodeBytes(nil, d.StartKey))), end: engine.MVCCEncodeKey(keys.MakeKey(keys.LocalRangePrefix, encoding.EncodeBytes(nil, d.EndKey))), }, { start: engine.MVCCEncodeKey(dataStartKey), end: engine.MVCCEncodeKey(d.EndKey), }, }, iter: e.NewIterator(), } ri.iter.Seek(ri.ranges[ri.curIndex].start) ri.advance() return ri }
func newRangeDataIterator(r *Range, e engine.Engine) *rangeDataIterator { r.RLock() startKey := r.Desc().StartKey if startKey.Equal(engine.KeyMin) { startKey = engine.KeyLocalMax } endKey := r.Desc().EndKey r.RUnlock() ri := &rangeDataIterator{ ranges: []keyRange{ { start: engine.MVCCEncodeKey(engine.MakeKey(engine.KeyLocalRangeIDPrefix, encoding.EncodeUvarint(nil, uint64(r.Desc().RaftID)))), end: engine.MVCCEncodeKey(engine.MakeKey(engine.KeyLocalRangeIDPrefix, encoding.EncodeUvarint(nil, uint64(r.Desc().RaftID+1)))), }, { start: engine.MVCCEncodeKey(engine.MakeKey(engine.KeyLocalRangeKeyPrefix, encoding.EncodeBytes(nil, startKey))), end: engine.MVCCEncodeKey(engine.MakeKey(engine.KeyLocalRangeKeyPrefix, encoding.EncodeBytes(nil, endKey))), }, { start: engine.MVCCEncodeKey(startKey), end: engine.MVCCEncodeKey(endKey), }, }, iter: e.NewIterator(), } ri.iter.Seek(ri.ranges[ri.curIndex].start) ri.advance() return ri }
// CopyFrom copies all the cached results from the originRangeID // response cache into this one. Note that the cache will not be // locked while copying is in progress. Failures decoding individual // cache entries return an error. The copy is done directly using the // engine instead of interpreting values through MVCC for efficiency. func (rc *ResponseCache) CopyFrom(e engine.Engine, originRangeID proto.RangeID) error { prefix := keys.ResponseCacheKey(originRangeID, nil) // response cache prefix start := engine.MVCCEncodeKey(prefix) end := engine.MVCCEncodeKey(prefix.PrefixEnd()) return e.Iterate(start, end, func(kv proto.RawKeyValue) (bool, error) { // Decode the key into a cmd, skipping on error. Otherwise, // write it to the corresponding key in the new cache. cmdID, err := rc.decodeResponseCacheKey(kv.Key) if err != nil { return false, util.Errorf("could not decode a response cache key %s: %s", proto.Key(kv.Key), err) } key := keys.ResponseCacheKey(rc.rangeID, &cmdID) encKey := engine.MVCCEncodeKey(key) // Decode the value, update the checksum and re-encode. meta := &engine.MVCCMetadata{} if err := gogoproto.Unmarshal(kv.Value, meta); err != nil { return false, util.Errorf("could not decode response cache value %s [% x]: %s", proto.Key(kv.Key), kv.Value, err) } meta.Value.Checksum = nil meta.Value.InitChecksum(key) _, _, err = engine.PutProto(e, encKey, meta) return false, err }) }
func newReplicaDataIterator(d *roachpb.RangeDescriptor, e engine.Engine) *replicaDataIterator { // The first range in the keyspace starts at KeyMin, which includes the node-local // space. We need the original StartKey to find the range metadata, but the // actual data starts at LocalMax. dataStartKey := d.StartKey.AsRawKey() if d.StartKey.Equal(roachpb.RKeyMin) { dataStartKey = keys.LocalMax } ri := &replicaDataIterator{ ranges: []keyRange{ { start: engine.MVCCEncodeKey(keys.MakeRangeIDPrefix(d.RangeID)), end: engine.MVCCEncodeKey(keys.MakeRangeIDPrefix(d.RangeID + 1)), }, { start: engine.MVCCEncodeKey(keys.MakeRangeKeyPrefix(d.StartKey)), end: engine.MVCCEncodeKey(keys.MakeRangeKeyPrefix(d.EndKey)), }, { start: engine.MVCCEncodeKey(dataStartKey), end: engine.MVCCEncodeKey(d.EndKey.AsRawKey()), }, }, iter: e.NewIterator(), } ri.iter.Seek(ri.ranges[ri.curIndex].start) ri.advance() return ri }
// CopyInto copies all the cached results from this response cache // into the destRangeID response cache. Failures decoding individual // cache entries return an error. func (rc *ResponseCache) CopyInto(e engine.Engine, destRangeID roachpb.RangeID) error { start := engine.MVCCEncodeKey( keys.ResponseCacheKey(rc.rangeID, roachpb.KeyMin)) end := engine.MVCCEncodeKey( keys.ResponseCacheKey(rc.rangeID, roachpb.KeyMax)) return e.Iterate(start, end, func(kv engine.MVCCKeyValue) (bool, error) { // Decode the key into a cmd, skipping on error. Otherwise, // write it to the corresponding key in the new cache. family, err := rc.decodeResponseCacheKey(kv.Key) if err != nil { return false, util.Errorf("could not decode a response cache key %s: %s", roachpb.Key(kv.Key), err) } key := keys.ResponseCacheKey(destRangeID, family) encKey := engine.MVCCEncodeKey(key) // Decode the value, update the checksum and re-encode. meta := &engine.MVCCMetadata{} if err := proto.Unmarshal(kv.Value, meta); err != nil { return false, util.Errorf("could not decode response cache value %s [% x]: %s", roachpb.Key(kv.Key), kv.Value, err) } meta.Value.Checksum = nil meta.Value.InitChecksum(key) _, _, err = engine.PutProto(e, encKey, meta) return false, err }) }
// InternalTruncateLog discards a prefix of the raft log. func (r *Range) InternalTruncateLog(batch engine.Engine, ms *engine.MVCCStats, args *proto.InternalTruncateLogRequest, reply *proto.InternalTruncateLogResponse) { // args.Index is the first index to keep. term, err := r.Term(args.Index - 1) if err != nil { reply.SetGoError(err) return } start := keys.RaftLogKey(r.Desc().RaftID, 0) end := keys.RaftLogKey(r.Desc().RaftID, args.Index) err = batch.Iterate(engine.MVCCEncodeKey(start), engine.MVCCEncodeKey(end), func(kv proto.RawKeyValue) (bool, error) { err := batch.Clear(kv.Key) return false, err }) if err != nil { reply.SetGoError(err) return } ts := proto.RaftTruncatedState{ Index: args.Index - 1, Term: term, } err = engine.MVCCPutProto(batch, ms, keys.RaftTruncatedStateKey(r.Desc().RaftID), proto.ZeroTimestamp, nil, &ts) reply.SetGoError(err) }
func loadRangeDescriptor( db engine.Engine, rangeID roachpb.RangeID, ) (roachpb.RangeDescriptor, error) { var desc roachpb.RangeDescriptor handleKV := func(kv engine.MVCCKeyValue) (bool, error) { if kv.Key.Timestamp == hlc.ZeroTimestamp { // We only want values, not MVCCMetadata. return false, nil } if err := checkRangeDescriptorKey(kv.Key); err != nil { // Range descriptor keys are interleaved with others, so if it // doesn't parse as a range descriptor just skip it. return false, nil } if err := getProtoValue(kv.Value, &desc); err != nil { return false, err } return desc.RangeID == rangeID, nil } // Range descriptors are stored by key, so we have to scan over the // range-local data to find the one for this RangeID. start := engine.MakeMVCCMetadataKey(keys.LocalRangePrefix) end := engine.MakeMVCCMetadataKey(keys.LocalRangeMax) if err := db.Iterate(start, end, handleKV); err != nil { return roachpb.RangeDescriptor{}, err } if desc.RangeID == rangeID { return desc, nil } return roachpb.RangeDescriptor{}, fmt.Errorf("range descriptor %d not found", rangeID) }
func copySeqCache(e engine.Engine, srcID, dstID roachpb.RangeID, keyMin, keyMax engine.MVCCKey) error { var scratch [64]byte return e.Iterate(keyMin, keyMax, func(kv engine.MVCCKeyValue) (bool, error) { // Decode the key into a cmd, skipping on error. Otherwise, // write it to the corresponding key in the new cache. id, epoch, seq, err := decodeSequenceCacheMVCCKey(kv.Key, scratch[:0]) if err != nil { return false, util.Errorf("could not decode a sequence cache key %s: %s", kv.Key, err) } key := keys.SequenceCacheKey(dstID, id, epoch, seq) encKey := engine.MakeMVCCMetadataKey(key) // Decode the value, update the checksum and re-encode. meta := &engine.MVCCMetadata{} if err := proto.Unmarshal(kv.Value, meta); err != nil { return false, util.Errorf("could not decode sequence cache value %s [% x]: %s", kv.Key, kv.Value, err) } value := meta.Value() value.ClearChecksum() value.InitChecksum(key) meta.RawBytes = value.RawBytes _, _, err = engine.PutProto(e, encKey, meta) return false, err }) }
func newReplicaDataIterator(d *roachpb.RangeDescriptor, e engine.Engine) *replicaDataIterator { ri := &replicaDataIterator{ ranges: makeReplicaKeyRanges(d), Iterator: e.NewIterator(false), } ri.Seek(ri.ranges[ri.curIndex].start) return ri }
// AddStore creates a new store on the same Transport but doesn't create any ranges. func (m *multiTestContext) addStore() { idx := len(m.stores) var clock *hlc.Clock if len(m.clocks) > idx { clock = m.clocks[idx] } else { clock = m.clock m.clocks = append(m.clocks, clock) } var eng engine.Engine var needBootstrap bool if len(m.engines) > idx { eng = m.engines[idx] } else { eng = engine.NewInMem(proto.Attributes{}, 1<<20) m.engines = append(m.engines, eng) needBootstrap = true // Add an extra refcount to the engine so the underlying rocksdb instances // aren't closed when stopping and restarting the stores. // These refcounts are removed in Stop(). if err := eng.Open(); err != nil { m.t.Fatal(err) } } stopper := stop.NewStopper() ctx := m.makeContext(idx) store := storage.NewStore(ctx, eng, &proto.NodeDescriptor{NodeID: proto.NodeID(idx + 1)}) if needBootstrap { err := store.Bootstrap(proto.StoreIdent{ NodeID: proto.NodeID(idx + 1), StoreID: proto.StoreID(idx + 1), }, stopper) if err != nil { m.t.Fatal(err) } // Bootstrap the initial range on the first store if idx == 0 { if err := store.BootstrapRange(nil); err != nil { m.t.Fatal(err) } } } if err := store.Start(stopper); err != nil { m.t.Fatal(err) } store.WaitForInit() m.stores = append(m.stores, store) if len(m.senders) == idx { m.senders = append(m.senders, kv.NewLocalSender()) } m.senders[idx].AddStore(store) // Save the store identities for later so we can use them in // replication operations even while the store is stopped. m.idents = append(m.idents, store.Ident) m.stoppers = append(m.stoppers, stopper) }
// ClearData removes all persisted items stored in the cache. func (sc *AbortCache) ClearData(e engine.Engine) error { b := e.NewBatch() defer b.Close() _, err := engine.ClearRange(b, engine.MakeMVCCMetadataKey(sc.min()), engine.MakeMVCCMetadataKey(sc.max())) if err != nil { return err } return b.Commit() }
func newReplicaDataIterator(d *roachpb.RangeDescriptor, e engine.Engine, replicatedOnly bool) *replicaDataIterator { rangeFunc := makeAllKeyRanges if replicatedOnly { rangeFunc = makeReplicatedKeyRanges } ri := &replicaDataIterator{ ranges: rangeFunc(d), Iterator: e.NewIterator(nil), } ri.Seek(ri.ranges[ri.curIndex].start) return ri }
// ComputeStatsForRange computes the stats for a given range by // iterating over all key ranges for the given range that should // be accounted for in its stats. func ComputeStatsForRange(d *roachpb.RangeDescriptor, e engine.Engine, nowNanos int64) (engine.MVCCStats, error) { iter := e.NewIterator(nil) defer iter.Close() ms := engine.MVCCStats{} for _, r := range makeReplicatedKeyRanges(d) { msDelta, err := iter.ComputeStats(r.start, r.end, nowNanos) if err != nil { return engine.MVCCStats{}, err } ms.Add(msDelta) } return ms, nil }
// mergeTrigger is called on a successful commit of an AdminMerge // transaction. It recomputes stats for the receiving range. func (r *Range) mergeTrigger(batch engine.Engine, merge *proto.MergeTrigger) error { if !bytes.Equal(r.Desc().StartKey, merge.UpdatedDesc.StartKey) { return util.Errorf("range and updated range start keys do not match: %s != %s", r.Desc().StartKey, merge.UpdatedDesc.StartKey) } if !r.Desc().EndKey.Less(merge.UpdatedDesc.EndKey) { return util.Errorf("range end key is not less than the post merge end key: %s >= %s", r.Desc().EndKey, merge.UpdatedDesc.EndKey) } if merge.SubsumedRaftID <= 0 { return util.Errorf("subsumed raft ID must be provided: %d", merge.SubsumedRaftID) } // Copy the subsumed range's response cache to the subsuming one. if err := r.respCache.CopyFrom(batch, merge.SubsumedRaftID); err != nil { return util.Errorf("unable to copy response cache to new split range: %s", err) } // Compute stats for updated range. now := r.rm.Clock().Timestamp() iter := newRangeDataIterator(&merge.UpdatedDesc, batch) ms, err := engine.MVCCComputeStats(iter, now.WallTime) iter.Close() if err != nil { return util.Errorf("unable to compute stats for the range after merge: %s", err) } if err = r.stats.SetMVCCStats(batch, ms); err != nil { return util.Errorf("unable to write MVCC stats: %s", err) } // Clear the timestamp cache. In the case that this replica and the // subsumed replica each held their respective leader leases, we // could merge the timestamp caches for efficiency. But it's unlikely // and not worth the extra logic and potential for error. r.Lock() r.tsCache.Clear(r.rm.Clock()) r.Unlock() batch.Defer(func() { if err := r.rm.MergeRange(r, merge.UpdatedDesc.EndKey, merge.SubsumedRaftID); err != nil { // Our in-memory state has diverged from the on-disk state. log.Fatalf("failed to update store after merging range: %s", err) } }) return nil }
// CopyFrom copies all the cached results from another response cache // into this one. Note that the cache will not be locked while copying // is in progress. Failures decoding individual cache entries return an // error. The copy is done directly using the engine instead of interpreting // values through MVCC for efficiency. func (rc *ResponseCache) CopyFrom(e engine.Engine, originRaftID int64) error { prefix := engine.ResponseCacheKey(originRaftID, nil) // response cache prefix start := engine.MVCCEncodeKey(prefix) end := engine.MVCCEncodeKey(prefix.PrefixEnd()) return e.Iterate(start, end, func(kv proto.RawKeyValue) (bool, error) { // Decode the key into a cmd, skipping on error. Otherwise, // write it to the corresponding key in the new cache. cmdID, err := rc.decodeResponseCacheKey(kv.Key) if err != nil { return false, util.Errorf("could not decode a response cache key %q: %s", kv.Key, err) } encKey := engine.MVCCEncodeKey(engine.ResponseCacheKey(rc.raftID, &cmdID)) return false, rc.engine.Put(encKey, kv.Value) }) }
func verifyCleanup(key proto.Key, coord *TxnCoordSender, eng engine.Engine, t *testing.T) { if len(coord.txns) != 0 { t.Errorf("expected empty transactions map; got %d", len(coord.txns)) } if err := util.IsTrueWithin(func() bool { meta := &engine.MVCCMetadata{} ok, _, _, err := eng.GetProto(engine.MVCCEncodeKey(key), meta) if err != nil { t.Errorf("error getting MVCC metadata: %s", err) } return !ok || meta.Txn == nil }, 500*time.Millisecond); err != nil { t.Errorf("expected intents to be cleaned up within 500ms") } }
func copySeqCache( e engine.Engine, ms *engine.MVCCStats, srcID, dstID roachpb.RangeID, keyMin, keyMax engine.MVCCKey, ) (int, error) { var scratch [64]byte var count int var meta engine.MVCCMetadata // TODO(spencer): look into making this an MVCCIteration and writing // the values using MVCC so we can avoid the ugliness of updating // the MVCCStats by hand below. err := e.Iterate(keyMin, keyMax, func(kv engine.MVCCKeyValue) (bool, error) { // Decode the key, skipping on error. Otherwise, write it to the // corresponding key in the new cache. txnID, err := decodeAbortCacheMVCCKey(kv.Key, scratch[:0]) if err != nil { return false, util.Errorf("could not decode an abort cache key %s: %s", kv.Key, err) } key := keys.AbortCacheKey(dstID, txnID) encKey := engine.MakeMVCCMetadataKey(key) // Decode the MVCCMetadata value. if err := proto.Unmarshal(kv.Value, &meta); err != nil { return false, util.Errorf("could not decode mvcc metadata %s [% x]: %s", kv.Key, kv.Value, err) } value := meta.Value() value.ClearChecksum() value.InitChecksum(key) meta.RawBytes = value.RawBytes keyBytes, valBytes, err := engine.PutProto(e, encKey, &meta) if err != nil { return false, err } count++ if ms != nil { ms.SysBytes += keyBytes + valBytes ms.SysCount++ } return false, nil }) return count, err }
func verifyCleanup(key proto.Key, coord *TxnCoordSender, eng engine.Engine, t *testing.T) { util.SucceedsWithin(t, 500*time.Millisecond, func() error { coord.Lock() l := len(coord.txns) coord.Unlock() if l != 0 { return fmt.Errorf("expected empty transactions map; got %d", l) } meta := &engine.MVCCMetadata{} ok, _, _, err := eng.GetProto(engine.MVCCEncodeKey(key), meta) if err != nil { return fmt.Errorf("error getting MVCC metadata: %s", err) } if !ok || meta.Txn == nil { return nil } return errors.New("intents not cleaned up") }) }
func verifyCleanup(key roachpb.Key, coord *TxnCoordSender, eng engine.Engine, t *testing.T) { util.SucceedsWithin(t, 500*time.Millisecond, func() error { coord.Lock() l := len(coord.txns) coord.Unlock() if l != 0 { return fmt.Errorf("expected empty transactions map; got %d", l) } meta := &engine.MVCCMetadata{} ok, _, _, err := eng.GetProto(engine.MakeMVCCMetadataKey(key), meta) if err != nil { return fmt.Errorf("error getting MVCC metadata: %s", err) } if ok && meta.Txn != nil { return fmt.Errorf("found unexpected write intent: %s", meta) } return nil }) }
// applySnapshot updates the replica based on the given snapshot. // Returns the new last index. func (r *Replica) applySnapshot(batch engine.Engine, snap raftpb.Snapshot) (uint64, error) { snapData := roachpb.RaftSnapshotData{} err := proto.Unmarshal(snap.Data, &snapData) if err != nil { return 0, err } rangeID := r.RangeID // First, save the HardState. The HardState must not be changed // because it may record a previous vote cast by this node. This is // usually unnecessary because a snapshot is nearly always // accompanied by a new HardState which incorporates both our former // state and new information from the leader, but in the event that // the HardState has not changed, we want to use our own previous // HardState and not one that was transmitted via the snapshot. hardStateKey := keys.RaftHardStateKey(rangeID) hardState, _, err := engine.MVCCGet(batch, hardStateKey, roachpb.ZeroTimestamp, true /* consistent */, nil) if err != nil { return 0, err } // Extract the updated range descriptor. desc := snapData.RangeDescriptor // Delete everything in the range and recreate it from the snapshot. // We need to delete any old Raft log entries here because any log entries // that predate the snapshot will be orphaned and never truncated or GC'd. iter := newReplicaDataIterator(&desc, batch, false /* !replicatedOnly */) defer iter.Close() for ; iter.Valid(); iter.Next() { if err := batch.Clear(iter.Key()); err != nil { return 0, err } } // Determine the unreplicated key prefix so we can drop any // unreplicated keys from the snapshot. unreplicatedPrefix := keys.MakeRangeIDUnreplicatedPrefix(desc.RangeID) // Write the snapshot into the range. for _, kv := range snapData.KV { if bytes.HasPrefix(kv.Key, unreplicatedPrefix) { continue } mvccKey := engine.MVCCKey{ Key: kv.Key, Timestamp: kv.Timestamp, } if err := batch.Put(mvccKey, kv.Value); err != nil { return 0, err } } // Write the snapshot's Raft log into the range. if _, err := r.append(batch, 0, snapData.LogEntries); err != nil { return 0, err } // Restore the saved HardState. if hardState == nil { err := engine.MVCCDelete(batch, nil, hardStateKey, roachpb.ZeroTimestamp, nil) if err != nil { return 0, err } } else { err := engine.MVCCPut(batch, nil, hardStateKey, roachpb.ZeroTimestamp, *hardState, nil) if err != nil { return 0, err } } // Read the leader lease. lease, err := loadLeaderLease(batch, desc.RangeID) if err != nil { return 0, err } // Load updated range stats. The local newStats variable will be assigned // to r.stats after the batch commits. newStats, err := newRangeStats(desc.RangeID, batch) if err != nil { return 0, err } // The next line sets the persisted last index to the last applied index. // This is not a correctness issue, but means that we may have just // transferred some entries we're about to re-request from the leader and // overwrite. // However, raft.MultiNode currently expects this behaviour, and the // performance implications are not likely to be drastic. If our feelings // about this ever change, we can add a LastIndex field to // raftpb.SnapshotMetadata. if err := setLastIndex(batch, rangeID, snap.Metadata.Index); err != nil { return 0, err } batch.Defer(func() { // Update the range stats. r.stats.Replace(newStats) r.mu.Lock() // As outlined above, last and applied index are the same after applying // the snapshot. r.mu.appliedIndex = snap.Metadata.Index r.mu.leaderLease = lease r.mu.Unlock() // Update other fields which are uninitialized or need updating. // This may not happen if the system config has not yet been loaded. // While config update will correctly set the fields, there is no order // guarantee in ApplySnapshot. // TODO: should go through the standard store lock when adding a replica. if err := r.updateRangeInfo(&desc); err != nil { panic(err) } // Update the range descriptor. This is done last as this is the step that // makes the Replica visible in the Store. if err := r.setDesc(&desc); err != nil { panic(err) } }) return snap.Metadata.Index, nil }
// splitTrigger is called on a successful commit of an AdminSplit // transaction. It copies the response cache for the new range and // recomputes stats for both the existing, updated range and the new // range. func (r *Range) splitTrigger(batch engine.Engine, split *proto.SplitTrigger) error { if !bytes.Equal(r.Desc().StartKey, split.UpdatedDesc.StartKey) || !bytes.Equal(r.Desc().EndKey, split.NewDesc.EndKey) { return util.Errorf("range does not match splits: (%s-%s) + (%s-%s) != %s", split.UpdatedDesc.StartKey, split.UpdatedDesc.EndKey, split.NewDesc.StartKey, split.NewDesc.EndKey, r) } // Copy the GC metadata. gcMeta, err := r.GetGCMetadata() if err != nil { return util.Errorf("unable to fetch GC metadata: %s", err) } if err := engine.MVCCPutProto(batch, nil, keys.RangeGCMetadataKey(split.NewDesc.RaftID), proto.ZeroTimestamp, nil, gcMeta); err != nil { return util.Errorf("unable to copy GC metadata: %s", err) } // Copy the last verification timestamp. verifyTS, err := r.GetLastVerificationTimestamp() if err != nil { return util.Errorf("unable to fetch last verification timestamp: %s", err) } if err := engine.MVCCPutProto(batch, nil, keys.RangeLastVerificationTimestampKey(split.NewDesc.RaftID), proto.ZeroTimestamp, nil, &verifyTS); err != nil { return util.Errorf("unable to copy last verification timestamp: %s", err) } // Compute stats for updated range. now := r.rm.Clock().Timestamp() iter := newRangeDataIterator(&split.UpdatedDesc, batch) ms, err := engine.MVCCComputeStats(iter, now.WallTime) iter.Close() if err != nil { return util.Errorf("unable to compute stats for updated range after split: %s", err) } if err := r.stats.SetMVCCStats(batch, ms); err != nil { return util.Errorf("unable to write MVCC stats: %s", err) } // Initialize the new range's response cache by copying the original's. if err = r.respCache.CopyInto(batch, split.NewDesc.RaftID); err != nil { return util.Errorf("unable to copy response cache to new split range: %s", err) } // Add the new split range to the store. This step atomically // updates the EndKey of the updated range and also adds the // new range to the store's range map. newRng, err := NewRange(&split.NewDesc, r.rm) if err != nil { return err } // Compute stats for new range. iter = newRangeDataIterator(&split.NewDesc, batch) ms, err = engine.MVCCComputeStats(iter, now.WallTime) iter.Close() if err != nil { return util.Errorf("unable to compute stats for new range after split: %s", err) } if err = newRng.stats.SetMVCCStats(batch, ms); err != nil { return util.Errorf("unable to write MVCC stats: %s", err) } // Copy the timestamp cache into the new range. r.Lock() r.tsCache.MergeInto(newRng.tsCache, true /* clear */) r.Unlock() batch.Defer(func() { if err := r.rm.SplitRange(r, newRng); err != nil { // Our in-memory state has diverged from the on-disk state. log.Fatalf("failed to update Store after split: %s", err) } }) return nil }