// InitialState implements the raft.Storage interface.
func (r *Range) InitialState() (raftpb.HardState, raftpb.ConfState, error) {
	var hs raftpb.HardState
	found, err := engine.MVCCGetProto(r.rm.Engine(), keys.RaftHardStateKey(r.Desc().RaftID),
		proto.ZeroTimestamp, true, nil, &hs)
	if err != nil {
		return raftpb.HardState{}, raftpb.ConfState{}, err
	}
	if !found {
		// We don't have a saved HardState, so set up the defaults.
		if r.isInitialized() {
			// Set the initial log term.
			hs.Term = raftInitialLogTerm
			hs.Commit = raftInitialLogIndex

			atomic.StoreUint64(&r.lastIndex, raftInitialLogIndex)
		} else {
			// This is a new range we are receiving from another node. Start
			// from zero so we will receive a snapshot.
			atomic.StoreUint64(&r.lastIndex, 0)
		}
	}

	var cs raftpb.ConfState
	// For uninitalized ranges, membership is unknown at this point.
	if found || r.isInitialized() {
		for _, rep := range r.Desc().Replicas {
			cs.Nodes = append(cs.Nodes, uint64(proto.MakeRaftNodeID(rep.NodeID, rep.StoreID)))
		}
	}

	return hs, cs, nil
}
Exemple #2
0
// synthesizeHardState synthesizes a HardState from the given ReplicaState and
// any existing on-disk HardState in the context of a snapshot, while verifying
// that the application of the snapshot does not violate Raft invariants. It
// must be called after the supplied state and ReadWriter have been updated
// with the result of the snapshot.
// If there is an existing HardState, we must respect it and we must not apply
// a snapshot that would move the state backwards.
func synthesizeHardState(
	ctx context.Context, eng engine.ReadWriter, s storagebase.ReplicaState, oldHS raftpb.HardState,
) error {
	newHS := raftpb.HardState{
		Term: s.TruncatedState.Term,
		// Note that when applying a Raft snapshot, the applied index is
		// equal to the Commit index represented by the snapshot.
		Commit: s.RaftAppliedIndex,
	}

	if oldHS.Commit > newHS.Commit {
		return errors.Errorf("can't decrease HardState.Commit from %d to %d",
			oldHS.Commit, newHS.Commit)
	}
	if oldHS.Term > newHS.Term {
		// The existing HardState is allowed to be ahead of us, which is
		// relevant in practice for the split trigger. We already checked above
		// that we're not rewinding the acknowledged index, and we haven't
		// updated votes yet.
		newHS.Term = oldHS.Term
	}
	// If the existing HardState voted in this term, remember that.
	if oldHS.Term == newHS.Term {
		newHS.Vote = oldHS.Vote
	}
	return errors.Wrapf(setHardState(ctx, eng, s.Desc.RangeID, newHS), "writing HardState %+v", &newHS)
}
Exemple #3
0
func updateHardState(eng engine.ReadWriter, s storagebase.ReplicaState) error {
	// Load a potentially existing HardState as we may need to preserve
	// information about cast votes. For example, during a Split for which
	// another node's new right-hand side has contacted us before our left-hand
	// side called in here to create the group.
	rangeID := s.Desc.RangeID
	oldHS, err := loadHardState(eng, rangeID)
	if err != nil {
		return err
	}

	newHS := raftpb.HardState{
		Term:   s.TruncatedState.Term,
		Commit: s.RaftAppliedIndex,
	}

	if !raft.IsEmptyHardState(oldHS) {
		if oldHS.Commit > newHS.Commit {
			newHS.Commit = oldHS.Commit
		}
		if oldHS.Term > newHS.Term {
			newHS.Term = oldHS.Term
		}
		newHS.Vote = oldHS.Vote
	}

	return setHardState(eng, rangeID, newHS)
}
Exemple #4
0
// writeInitialState bootstraps a new Raft group (i.e. it is called when we
// bootstrap a Range, or when setting up the right hand side of a split).
// Its main task is to persist a consistent Raft (and associated Replica) state
// which does not start from zero but presupposes a few entries already having
// applied.
// The supplied MVCCStats are used for the Stats field after adjusting for
// persisting the state itself, and the updated stats are returned.
func writeInitialState(
	eng engine.ReadWriter, ms enginepb.MVCCStats, desc roachpb.RangeDescriptor,
) (enginepb.MVCCStats, error) {
	rangeID := desc.RangeID
	var s storagebase.ReplicaState

	s.TruncatedState = &roachpb.RaftTruncatedState{
		Term:  raftInitialLogTerm,
		Index: raftInitialLogIndex,
	}
	s.RaftAppliedIndex = s.TruncatedState.Index
	s.Desc = &roachpb.RangeDescriptor{
		RangeID: rangeID,
	}
	s.Stats = ms

	newMS, err := saveState(eng, s)
	if err != nil {
		return enginepb.MVCCStats{}, err
	}

	// Load a potentially existing HardState as we may need to preserve
	// information about cast votes. For example, during a Split for which
	// another node's new right-hand side has contacted us before our left-hand
	// side called in here to create the group.
	oldHS, err := loadHardState(eng, rangeID)
	if err != nil {
		return enginepb.MVCCStats{}, err
	}

	newHS := raftpb.HardState{
		Term:   s.TruncatedState.Term,
		Commit: s.TruncatedState.Index,
	}

	if !raft.IsEmptyHardState(oldHS) {
		if oldHS.Commit > newHS.Commit {
			newHS.Commit = oldHS.Commit
		}
		if oldHS.Term > newHS.Term {
			newHS.Term = oldHS.Term
		}
		newHS.Vote = oldHS.Vote
	}

	if err := setHardState(eng, rangeID, newHS); err != nil {
		return enginepb.MVCCStats{}, err
	}

	if err := setLastIndex(eng, rangeID, s.TruncatedState.Index); err != nil {
		return enginepb.MVCCStats{}, err
	}

	return newMS, nil
}
// InitialState implements the raft.Storage interface.
func (r *Replica) InitialState() (raftpb.HardState, raftpb.ConfState, error) {
	var hs raftpb.HardState
	desc := r.Desc()
	found, err := engine.MVCCGetProto(r.store.Engine(), keys.RaftHardStateKey(desc.RangeID),
		roachpb.ZeroTimestamp, true, nil, &hs)
	if err != nil {
		return raftpb.HardState{}, raftpb.ConfState{}, err
	}
	initialized := r.isInitialized()
	if !found {
		// We don't have a saved HardState, so set up the defaults.
		if initialized {
			// Set the initial log term.
			hs.Term = raftInitialLogTerm
			hs.Commit = raftInitialLogIndex

			atomic.StoreUint64(&r.lastIndex, raftInitialLogIndex)
		} else {
			// This is a new range we are receiving from another node. Start
			// from zero so we will receive a snapshot.
			atomic.StoreUint64(&r.lastIndex, 0)
		}
	} else if initialized && hs.Commit == 0 {
		// Normally, when the commit index changes, raft gives us a new
		// commit index to persist, however, during initialization, which
		// occurs entirely in cockroach, raft has no knowledge of this.
		// By setting this to the initial log index, we avoid a panic in
		// raft caused by this inconsistency.
		hs.Commit = raftInitialLogIndex
	}

	var cs raftpb.ConfState
	// For uninitalized ranges, membership is unknown at this point.
	if found || initialized {
		for _, rep := range desc.Replicas {
			cs.Nodes = append(cs.Nodes, uint64(rep.ReplicaID))
		}
	}

	return hs, cs, nil
}