// CopyInto copies all the cached results from one response cache into // another. The cache will be locked while copying is in progress; // failures decoding individual cache entries return an error. The // copy is done directly using the engine instead of interpreting // values through MVCC for efficiency. func (rc *ResponseCache) CopyInto(e engine.Engine, destRaftID int64) error { rc.Lock() defer rc.Unlock() prefix := engine.ResponseCacheKey(rc.raftID, nil) // response cache prefix start := engine.MVCCEncodeKey(prefix) end := engine.MVCCEncodeKey(prefix.PrefixEnd()) return rc.engine.Iterate(start, end, func(kv proto.RawKeyValue) (bool, error) { // Decode the key into a cmd, skipping on error. Otherwise, // write it to the corresponding key in the new cache. cmdID, err := rc.decodeResponseCacheKey(kv.Key) if err != nil { return false, util.Errorf("could not decode a response cache key %q: %s", kv.Key, err) } encKey := engine.MVCCEncodeKey(engine.ResponseCacheKey(destRaftID, &cmdID)) return false, e.Put(encKey, kv.Value) }) }
// applySnapshot updates the replica based on the given snapshot. // Returns the new last index. func (r *Replica) applySnapshot(batch engine.Engine, snap raftpb.Snapshot) (uint64, error) { snapData := roachpb.RaftSnapshotData{} err := proto.Unmarshal(snap.Data, &snapData) if err != nil { return 0, err } rangeID := r.RangeID // First, save the HardState. The HardState must not be changed // because it may record a previous vote cast by this node. This is // usually unnecessary because a snapshot is nearly always // accompanied by a new HardState which incorporates both our former // state and new information from the leader, but in the event that // the HardState has not changed, we want to use our own previous // HardState and not one that was transmitted via the snapshot. hardStateKey := keys.RaftHardStateKey(rangeID) hardState, _, err := engine.MVCCGet(batch, hardStateKey, roachpb.ZeroTimestamp, true /* consistent */, nil) if err != nil { return 0, err } // Extract the updated range descriptor. desc := snapData.RangeDescriptor // Delete everything in the range and recreate it from the snapshot. // We need to delete any old Raft log entries here because any log entries // that predate the snapshot will be orphaned and never truncated or GC'd. iter := newReplicaDataIterator(&desc, batch, false /* !replicatedOnly */) defer iter.Close() for ; iter.Valid(); iter.Next() { if err := batch.Clear(iter.Key()); err != nil { return 0, err } } // Determine the unreplicated key prefix so we can drop any // unreplicated keys from the snapshot. unreplicatedPrefix := keys.MakeRangeIDUnreplicatedPrefix(desc.RangeID) // Write the snapshot into the range. for _, kv := range snapData.KV { if bytes.HasPrefix(kv.Key, unreplicatedPrefix) { continue } mvccKey := engine.MVCCKey{ Key: kv.Key, Timestamp: kv.Timestamp, } if err := batch.Put(mvccKey, kv.Value); err != nil { return 0, err } } // Write the snapshot's Raft log into the range. if _, err := r.append(batch, 0, snapData.LogEntries); err != nil { return 0, err } // Restore the saved HardState. if hardState == nil { err := engine.MVCCDelete(batch, nil, hardStateKey, roachpb.ZeroTimestamp, nil) if err != nil { return 0, err } } else { err := engine.MVCCPut(batch, nil, hardStateKey, roachpb.ZeroTimestamp, *hardState, nil) if err != nil { return 0, err } } // Read the leader lease. lease, err := loadLeaderLease(batch, desc.RangeID) if err != nil { return 0, err } // Load updated range stats. The local newStats variable will be assigned // to r.stats after the batch commits. newStats, err := newRangeStats(desc.RangeID, batch) if err != nil { return 0, err } // The next line sets the persisted last index to the last applied index. // This is not a correctness issue, but means that we may have just // transferred some entries we're about to re-request from the leader and // overwrite. // However, raft.MultiNode currently expects this behaviour, and the // performance implications are not likely to be drastic. If our feelings // about this ever change, we can add a LastIndex field to // raftpb.SnapshotMetadata. if err := setLastIndex(batch, rangeID, snap.Metadata.Index); err != nil { return 0, err } batch.Defer(func() { // Update the range stats. r.stats.Replace(newStats) r.mu.Lock() // As outlined above, last and applied index are the same after applying // the snapshot. r.mu.appliedIndex = snap.Metadata.Index r.mu.leaderLease = lease r.mu.Unlock() // Update other fields which are uninitialized or need updating. // This may not happen if the system config has not yet been loaded. // While config update will correctly set the fields, there is no order // guarantee in ApplySnapshot. // TODO: should go through the standard store lock when adding a replica. if err := r.updateRangeInfo(&desc); err != nil { panic(err) } // Update the range descriptor. This is done last as this is the step that // makes the Replica visible in the Store. if err := r.setDesc(&desc); err != nil { panic(err) } }) return snap.Metadata.Index, nil }