// InternalTruncateLog discards a prefix of the raft log. func (r *Range) InternalTruncateLog(batch engine.Engine, ms *engine.MVCCStats, args *proto.InternalTruncateLogRequest, reply *proto.InternalTruncateLogResponse) { // args.Index is the first index to keep. term, err := r.Term(args.Index - 1) if err != nil { reply.SetGoError(err) return } start := keys.RaftLogKey(r.Desc().RaftID, 0) end := keys.RaftLogKey(r.Desc().RaftID, args.Index) err = batch.Iterate(engine.MVCCEncodeKey(start), engine.MVCCEncodeKey(end), func(kv proto.RawKeyValue) (bool, error) { err := batch.Clear(kv.Key) return false, err }) if err != nil { reply.SetGoError(err) return } ts := proto.RaftTruncatedState{ Index: args.Index - 1, Term: term, } err = engine.MVCCPutProto(batch, ms, keys.RaftTruncatedStateKey(r.Desc().RaftID), proto.ZeroTimestamp, nil, &ts) reply.SetGoError(err) }
// applySnapshot updates the replica based on the given snapshot. // Returns the new last index. func (r *Replica) applySnapshot(batch engine.Engine, snap raftpb.Snapshot) (uint64, error) { snapData := roachpb.RaftSnapshotData{} err := proto.Unmarshal(snap.Data, &snapData) if err != nil { return 0, err } rangeID := r.RangeID // First, save the HardState. The HardState must not be changed // because it may record a previous vote cast by this node. This is // usually unnecessary because a snapshot is nearly always // accompanied by a new HardState which incorporates both our former // state and new information from the leader, but in the event that // the HardState has not changed, we want to use our own previous // HardState and not one that was transmitted via the snapshot. hardStateKey := keys.RaftHardStateKey(rangeID) hardState, _, err := engine.MVCCGet(batch, hardStateKey, roachpb.ZeroTimestamp, true /* consistent */, nil) if err != nil { return 0, err } // Extract the updated range descriptor. desc := snapData.RangeDescriptor // Delete everything in the range and recreate it from the snapshot. // We need to delete any old Raft log entries here because any log entries // that predate the snapshot will be orphaned and never truncated or GC'd. iter := newReplicaDataIterator(&desc, batch, false /* !replicatedOnly */) defer iter.Close() for ; iter.Valid(); iter.Next() { if err := batch.Clear(iter.Key()); err != nil { return 0, err } } // Determine the unreplicated key prefix so we can drop any // unreplicated keys from the snapshot. unreplicatedPrefix := keys.MakeRangeIDUnreplicatedPrefix(desc.RangeID) // Write the snapshot into the range. for _, kv := range snapData.KV { if bytes.HasPrefix(kv.Key, unreplicatedPrefix) { continue } mvccKey := engine.MVCCKey{ Key: kv.Key, Timestamp: kv.Timestamp, } if err := batch.Put(mvccKey, kv.Value); err != nil { return 0, err } } // Write the snapshot's Raft log into the range. if _, err := r.append(batch, 0, snapData.LogEntries); err != nil { return 0, err } // Restore the saved HardState. if hardState == nil { err := engine.MVCCDelete(batch, nil, hardStateKey, roachpb.ZeroTimestamp, nil) if err != nil { return 0, err } } else { err := engine.MVCCPut(batch, nil, hardStateKey, roachpb.ZeroTimestamp, *hardState, nil) if err != nil { return 0, err } } // Read the leader lease. lease, err := loadLeaderLease(batch, desc.RangeID) if err != nil { return 0, err } // Load updated range stats. The local newStats variable will be assigned // to r.stats after the batch commits. newStats, err := newRangeStats(desc.RangeID, batch) if err != nil { return 0, err } // The next line sets the persisted last index to the last applied index. // This is not a correctness issue, but means that we may have just // transferred some entries we're about to re-request from the leader and // overwrite. // However, raft.MultiNode currently expects this behaviour, and the // performance implications are not likely to be drastic. If our feelings // about this ever change, we can add a LastIndex field to // raftpb.SnapshotMetadata. if err := setLastIndex(batch, rangeID, snap.Metadata.Index); err != nil { return 0, err } batch.Defer(func() { // Update the range stats. r.stats.Replace(newStats) r.mu.Lock() // As outlined above, last and applied index are the same after applying // the snapshot. r.mu.appliedIndex = snap.Metadata.Index r.mu.leaderLease = lease r.mu.Unlock() // Update other fields which are uninitialized or need updating. // This may not happen if the system config has not yet been loaded. // While config update will correctly set the fields, there is no order // guarantee in ApplySnapshot. // TODO: should go through the standard store lock when adding a replica. if err := r.updateRangeInfo(&desc); err != nil { panic(err) } // Update the range descriptor. This is done last as this is the step that // makes the Replica visible in the Store. if err := r.setDesc(&desc); err != nil { panic(err) } }) return snap.Metadata.Index, nil }