// InitialState implements the raft.Storage interface. func (r *Range) InitialState() (raftpb.HardState, raftpb.ConfState, error) { var hs raftpb.HardState found, err := engine.MVCCGetProto(r.rm.Engine(), keys.RaftHardStateKey(r.Desc().RaftID), proto.ZeroTimestamp, true, nil, &hs) if err != nil { return raftpb.HardState{}, raftpb.ConfState{}, err } if !found { // We don't have a saved HardState, so set up the defaults. if r.isInitialized() { // Set the initial log term. hs.Term = raftInitialLogTerm hs.Commit = raftInitialLogIndex atomic.StoreUint64(&r.lastIndex, raftInitialLogIndex) } else { // This is a new range we are receiving from another node. Start // from zero so we will receive a snapshot. atomic.StoreUint64(&r.lastIndex, 0) } } var cs raftpb.ConfState // For uninitalized ranges, membership is unknown at this point. if found || r.isInitialized() { for _, rep := range r.Desc().Replicas { cs.Nodes = append(cs.Nodes, uint64(proto.MakeRaftNodeID(rep.NodeID, rep.StoreID))) } } return hs, cs, nil }
// synthesizeHardState synthesizes a HardState from the given ReplicaState and // any existing on-disk HardState in the context of a snapshot, while verifying // that the application of the snapshot does not violate Raft invariants. It // must be called after the supplied state and ReadWriter have been updated // with the result of the snapshot. // If there is an existing HardState, we must respect it and we must not apply // a snapshot that would move the state backwards. func synthesizeHardState( ctx context.Context, eng engine.ReadWriter, s storagebase.ReplicaState, oldHS raftpb.HardState, ) error { newHS := raftpb.HardState{ Term: s.TruncatedState.Term, // Note that when applying a Raft snapshot, the applied index is // equal to the Commit index represented by the snapshot. Commit: s.RaftAppliedIndex, } if oldHS.Commit > newHS.Commit { return errors.Errorf("can't decrease HardState.Commit from %d to %d", oldHS.Commit, newHS.Commit) } if oldHS.Term > newHS.Term { // The existing HardState is allowed to be ahead of us, which is // relevant in practice for the split trigger. We already checked above // that we're not rewinding the acknowledged index, and we haven't // updated votes yet. newHS.Term = oldHS.Term } // If the existing HardState voted in this term, remember that. if oldHS.Term == newHS.Term { newHS.Vote = oldHS.Vote } return errors.Wrapf(setHardState(ctx, eng, s.Desc.RangeID, newHS), "writing HardState %+v", &newHS) }
func updateHardState(eng engine.ReadWriter, s storagebase.ReplicaState) error { // Load a potentially existing HardState as we may need to preserve // information about cast votes. For example, during a Split for which // another node's new right-hand side has contacted us before our left-hand // side called in here to create the group. rangeID := s.Desc.RangeID oldHS, err := loadHardState(eng, rangeID) if err != nil { return err } newHS := raftpb.HardState{ Term: s.TruncatedState.Term, Commit: s.RaftAppliedIndex, } if !raft.IsEmptyHardState(oldHS) { if oldHS.Commit > newHS.Commit { newHS.Commit = oldHS.Commit } if oldHS.Term > newHS.Term { newHS.Term = oldHS.Term } newHS.Vote = oldHS.Vote } return setHardState(eng, rangeID, newHS) }
// writeInitialState bootstraps a new Raft group (i.e. it is called when we // bootstrap a Range, or when setting up the right hand side of a split). // Its main task is to persist a consistent Raft (and associated Replica) state // which does not start from zero but presupposes a few entries already having // applied. // The supplied MVCCStats are used for the Stats field after adjusting for // persisting the state itself, and the updated stats are returned. func writeInitialState( eng engine.ReadWriter, ms enginepb.MVCCStats, desc roachpb.RangeDescriptor, ) (enginepb.MVCCStats, error) { rangeID := desc.RangeID var s storagebase.ReplicaState s.TruncatedState = &roachpb.RaftTruncatedState{ Term: raftInitialLogTerm, Index: raftInitialLogIndex, } s.RaftAppliedIndex = s.TruncatedState.Index s.Desc = &roachpb.RangeDescriptor{ RangeID: rangeID, } s.Stats = ms newMS, err := saveState(eng, s) if err != nil { return enginepb.MVCCStats{}, err } // Load a potentially existing HardState as we may need to preserve // information about cast votes. For example, during a Split for which // another node's new right-hand side has contacted us before our left-hand // side called in here to create the group. oldHS, err := loadHardState(eng, rangeID) if err != nil { return enginepb.MVCCStats{}, err } newHS := raftpb.HardState{ Term: s.TruncatedState.Term, Commit: s.TruncatedState.Index, } if !raft.IsEmptyHardState(oldHS) { if oldHS.Commit > newHS.Commit { newHS.Commit = oldHS.Commit } if oldHS.Term > newHS.Term { newHS.Term = oldHS.Term } newHS.Vote = oldHS.Vote } if err := setHardState(eng, rangeID, newHS); err != nil { return enginepb.MVCCStats{}, err } if err := setLastIndex(eng, rangeID, s.TruncatedState.Index); err != nil { return enginepb.MVCCStats{}, err } return newMS, nil }
// InitialState implements the raft.Storage interface. func (r *Replica) InitialState() (raftpb.HardState, raftpb.ConfState, error) { var hs raftpb.HardState desc := r.Desc() found, err := engine.MVCCGetProto(r.store.Engine(), keys.RaftHardStateKey(desc.RangeID), roachpb.ZeroTimestamp, true, nil, &hs) if err != nil { return raftpb.HardState{}, raftpb.ConfState{}, err } initialized := r.isInitialized() if !found { // We don't have a saved HardState, so set up the defaults. if initialized { // Set the initial log term. hs.Term = raftInitialLogTerm hs.Commit = raftInitialLogIndex atomic.StoreUint64(&r.lastIndex, raftInitialLogIndex) } else { // This is a new range we are receiving from another node. Start // from zero so we will receive a snapshot. atomic.StoreUint64(&r.lastIndex, 0) } } else if initialized && hs.Commit == 0 { // Normally, when the commit index changes, raft gives us a new // commit index to persist, however, during initialization, which // occurs entirely in cockroach, raft has no knowledge of this. // By setting this to the initial log index, we avoid a panic in // raft caused by this inconsistency. hs.Commit = raftInitialLogIndex } var cs raftpb.ConfState // For uninitalized ranges, membership is unknown at this point. if found || initialized { for _, rep := range desc.Replicas { cs.Nodes = append(cs.Nodes, uint64(rep.ReplicaID)) } } return hs, cs, nil }