// Phase1RPC handles ClassicPaxos.Phase1 rpc. func (this *Paxos) Phase1RPC(header *msgpb.Header, request *thispb.Phase1Request) (status error) { if !this.IsAcceptor() { this.Errorf("this paxos instance is not an acceptor; rejecting %s", header) return errs.ErrInvalid } lock, errLock := this.ctlr.TimedLock(msg.RequestTimeout(header), "acceptor") if errLock != nil { return errLock } defer lock.Unlock() clientID := header.GetMessengerId() respond := func() error { response := thispb.Phase1Response{} response.PromisedBallot = proto.Int64(this.promisedBallot) if this.votedBallot >= 0 { response.VotedBallot = proto.Int64(this.votedBallot) response.VotedValue = this.votedValue } message := thispb.PaxosMessage{} message.Phase1Response = &response errSend := msg.SendResponseProto(this.msn, header, &message) if errSend != nil { this.Errorf("could not send phase1 response to %s: %v", clientID, errSend) return errSend } return nil } ballot := request.GetBallotNumber() if ballot < this.promisedBallot { this.Warningf("phase1 request from %s is ignored due to stale ballot %d", clientID, ballot) return respond() } if ballot == this.promisedBallot { this.Warningf("duplicate phase1 request from client %s with an already "+ "promised ballot number %d", clientID, ballot) return respond() } // Save the promise into the wal. change := thispb.AcceptorChange{} change.PromisedBallot = proto.Int64(ballot) if err := this.doUpdateAcceptor(&change); err != nil { this.Errorf("could not update acceptor state: %v", err) return err } this.Infof("this acceptor has now promised higher ballot %d from %s", ballot, clientID) return respond() }
// LearnRPC handles ClassicPaxos.Learn rpc. func (this *Paxos) LearnRPC(header *msgpb.Header, request *thispb.LearnRequest) (status error) { if !this.IsLearner() { this.Errorf("this paxos instance is not a learner; rejecting %s", header) return errs.ErrInvalid } lock, errLock := this.ctlr.TimedLock(msg.RequestTimeout(header), "learner") if errLock != nil { return errLock } defer lock.Unlock() acceptor := header.GetMessengerId() if this.chosenValue == nil { change := thispb.LearnerChange{} change.VotedBallotList = request.VotedBallotList change.VotedValueList = request.VotedValueList change.VotedAcceptor = proto.String(acceptor) if err := this.doUpdateLearner(&change); err != nil { this.Errorf("could not update learner state: %v", err) return err } } response := thispb.LearnResponse{} if this.chosenValue != nil { response.KnowsChosenValue = proto.Bool(true) } message := thispb.PaxosMessage{} message.LearnResponse = &response errSend := msg.SendResponseProto(this.msn, header, &message) if errSend != nil { this.Errorf("could not respond to learn request from %s: %v", acceptor, errSend) return errSend } return nil }
// Propose proposes given value for a consensus. // // value: The value to propose for consensus. // // timeout: Maximum time duration for the propose operation. // // Returns the chosen value on success. func (this *Paxos) Propose(value []byte, timeout time.Duration) ( []byte, error) { // If local instance is not a proposer, find a random proposer. proposer := this.msn.UID() if !this.IsProposer() { proposer = this.proposerList[rand.Intn(len(this.proposerList))] this.Infof("using %s as the proposer", proposer) } // Send the propose request. request := thispb.ProposeRequest{} request.ProposedValue = value message := thispb.PaxosMessage{} message.ProposeRequest = &request reqHeader := this.msn.NewRequest(this.namespace, this.uid, "ClassicPaxos.Propose", timeout) errSend := msg.SendProto(this.msn, proposer, reqHeader, &message) if errSend != nil { this.Errorf("could not send propose request to %s: %v", proposer, errSend) return nil, errSend } // Wait for the response. _, errRecv := msg.ReceiveProto(this.msn, reqHeader, &message) if errRecv != nil { this.Errorf("could not receive propose response from %s: %v", proposer, errRecv) return nil, errRecv } if message.ProposeResponse == nil { this.Errorf("propose response from %s is empty", proposer) return nil, errs.ErrCorrupt } response := message.GetProposeResponse() return response.GetChosenValue(), nil }
// NotifyAllLearners sends the current vote to all learners. func (this *Paxos) NotifyAllLearners() (status error) { if !this.IsConfigured() || !this.IsAcceptor() { return nil } defer func() { if status != nil && !errs.IsClosed(status) { now := time.Now() next := now.Add(this.opts.LearnRetryInterval) _ = this.alarm.ScheduleAt(this.uid, next, this.NotifyAllLearners) } }() rlock := this.ctlr.ReadLock("acceptor", "config") // Stop notifications when all learners know the consensus value. if len(this.doneLearnerSet) == len(this.learnerList) { rlock.Unlock() return nil } // Make a copy of what we need: learners and the vote map. numLearners := len(this.learnerList) learnerList := append([]string{}, this.learnerList...) votedValueMap := make(map[int64][]byte) for ballot, value := range this.votedValueMap { if ackMap := this.learnerAckMap[ballot]; len(ackMap) < numLearners { votedValueMap[ballot] = value } } rlock.Unlock() request := thispb.LearnRequest{} for ballot, value := range votedValueMap { request.VotedBallotList = append(request.VotedBallotList, ballot) request.VotedValueList = append(request.VotedValueList, value) } message := thispb.PaxosMessage{} message.LearnRequest = &request // Send notification to all learners. reqHeader := this.msn.NewRequest(this.namespace, this.uid, "ClassicPaxos.Learn", this.opts.LearnTimeout) defer this.msn.CloseMessage(reqHeader) count, errSend := msg.SendAllProto(this.msn, learnerList, reqHeader, &message) if errSend != nil { this.Errorf("could not send learn request to all learners: %v", errSend) return errSend } // Wait for responses from all learners. for ii := 0; ii < count; ii++ { message := thispb.PaxosMessage{} resHeader, errRecv := msg.ReceiveProto(this.msn, reqHeader, &message) if errRecv != nil { this.Warningf("could not receive learner responses: %v", errRecv) break } learner := resHeader.GetMessengerId() if message.LearnResponse == nil { continue } response := message.GetLearnResponse() // Save the learner acknowledgment to the wal. change := thispb.AcceptorChange{} change.AckedLearner = proto.String(learner) if response.GetKnowsChosenValue() { change.AckedChosenValue = proto.Bool(true) } else { for ballot := range votedValueMap { change.AckedBallotList = append(change.AckedBallotList, ballot) } } if err := this.UpdateAcceptor(&change); err != nil { this.Errorf("could not update acceptor state: %v", err) return err } } return nil }
// Phase2RPC handles ClassicPaxos.Phase2 rpc. func (this *Paxos) Phase2RPC(header *msgpb.Header, request *thispb.Phase2Request) (status error) { if !this.IsAcceptor() { this.Errorf("this paxos instance is not an acceptor; rejecting %s", header) return errs.ErrInvalid } lock, errLock := this.ctlr.TimedLock(msg.RequestTimeout(header), "acceptor") if errLock != nil { return errLock } defer lock.Unlock() clientID := header.GetMessengerId() respond := func() error { response := thispb.Phase2Response{} response.PromisedBallot = proto.Int64(this.promisedBallot) if this.votedBallot >= 0 { response.VotedBallot = proto.Int64(this.votedBallot) response.VotedValue = this.votedValue } message := thispb.PaxosMessage{} message.Phase2Response = &response errSend := msg.SendResponseProto(this.msn, header, &message) if errSend != nil { this.Errorf("could not send phase2 response to %s: %v", clientID, errSend) return errSend } return nil } ballot := request.GetBallotNumber() if ballot < this.promisedBallot { this.Warningf("phase2 request from %s is ignored due to stale ballot %d", clientID, ballot) return respond() } if ballot > this.promisedBallot { this.Errorf("phase2 request from client %s without acquiring a prior "+ "promise", clientID) return respond() } value := request.GetProposedValue() // Save the phase2 vote into the wal. change := thispb.AcceptorChange{} change.VotedBallot = proto.Int64(ballot) change.VotedValue = value if err := this.doUpdateAcceptor(&change); err != nil { this.Errorf("could not update acceptor state: %v", err) return err } this.Infof("this acceptor has voted for %d in ballot %s", ballot, value) if err := respond(); err != nil { return err } // Schedule a notification to all learners. _ = this.alarm.ScheduleAt(this.uid, time.Now(), this.NotifyAllLearners) return nil }
// ProposeRPC handles ClassicPaxos.Propose rpc. func (this *Paxos) ProposeRPC(header *msgpb.Header, request *thispb.ProposeRequest) (status error) { if !this.IsProposer() { this.Errorf("this paxos instance is not a proposer; rejecting %s", header) return errs.ErrInvalid } // OPTIMIZATION If this object is also a learner and already knows the // consensus result, we don't need to perform expensive proposal. if this.IsLearner() { lock := this.ctlr.ReadLock("learner") defer lock.Unlock() if this.chosenValue != nil { response := thispb.ProposeResponse{} response.ChosenValue = this.chosenValue message := thispb.PaxosMessage{} message.ProposeResponse = &response errSend := msg.SendResponseProto(this.msn, header, &message) if errSend != nil { this.Errorf("could not send known chosen value as the propose "+ "response: %v", errSend) return errSend } return nil } lock.Unlock() } var chosen []byte proposal := request.GetProposedValue() for ii := 0; chosen == nil && msg.RequestTimeout(header) > 0; ii++ { if ii > 0 { time.Sleep(this.opts.ProposeRetryInterval) } // Get the next proposal ballot number. ballot, errNext := this.GetNextProposalBallot(msg.RequestTimeout(header)) if errNext != nil { this.Errorf("could not select higher ballot: %v", errNext) return errNext } this.Infof("using ballot number %d for the proposal", ballot) lock := this.ctlr.ReadLock("config") phase1AcceptorList := this.getPhase1AcceptorList(ballot) lock.Unlock() // Collect phase1 promises from majority number of acceptors. votedValue, acceptorList, errPhase1 := this.doPhase1(header, ballot, phase1AcceptorList) if errPhase1 != nil { this.Warningf("could not complete paxos phase1: %v", errPhase1) continue } // If a value was already voted, it may have been chosen, so propose it // instead. value := proposal if votedValue != nil { value = votedValue } // Collect phase2 votes from majority number of acceptors. errPhase2 := this.doPhase2(header, ballot, value, acceptorList) if errPhase2 != nil { this.Warningf("could not complete paxos phase2: %v", errPhase2) continue } // A value is chosen, break out of the loop. chosen = value break } if chosen == nil { this.Errorf("could not propose value %s", proposal) return errs.ErrRetry } // If local node is a learner, update him with the consensus result directly. defer func() { if this.IsLearner() { lock, errLock := this.ctlr.Lock("learner") if errLock != nil { return } defer lock.Unlock() change := thispb.LearnerChange{} change.ChosenValue = chosen if err := this.doUpdateLearner(&change); err != nil { this.Warningf("could not update local learner with the consensus "+ "result (ignored): %v", err) } } }() // Send propose response with chosen value. response := thispb.ProposeResponse{} response.ChosenValue = chosen message := thispb.PaxosMessage{} message.ProposeResponse = &response errSend := msg.SendResponseProto(this.msn, header, &message) if errSend != nil { this.Errorf("could not send propose response: %v", errSend) return errSend } return nil }
// doPhase2 performs classic paxos phase2 steps. func (this *Paxos) doPhase2(ctxHeader *msgpb.Header, ballot int64, value []byte, acceptorList []string) error { phase2request := thispb.Phase2Request{} phase2request.BallotNumber = proto.Int64(ballot) phase2request.ProposedValue = value message := thispb.PaxosMessage{} message.Phase2Request = &phase2request header := msg.NewNestedRequest(this.msn, ctxHeader, this.namespace, this.uid, "ClassicPaxos.Phase2") defer this.msn.CloseMessage(header) count, errSend := msg.SendAllProto(this.msn, acceptorList, header, &message) if errSend != nil && count < this.MajoritySize() { this.Errorf("could not send phase2 request to majority acceptors: %v", errSend) return errSend } this.Infof("send phase2 request %s to acceptors: %v", header, acceptorList) responseMap := make(map[string]*thispb.Phase2Response) for ii := 0; ii < count && len(responseMap) < this.MajoritySize(); ii++ { message := thispb.PaxosMessage{} resHeader, errRecv := msg.ReceiveProto(this.msn, header, &message) if errRecv != nil { break } acceptor := resHeader.GetMessengerId() if _, ok := responseMap[acceptor]; ok { this.Warningf("duplicate phase2 response from %s (ignored)", acceptor) continue } if message.Phase2Response == nil { this.Warningf("phase2 response data is empty from %s (ignored)", acceptor) continue } response := message.GetPhase2Response() promisedBallot := response.GetPromisedBallot() if promisedBallot < ballot { this.Errorf("as per phase2 response, acceptor %s seems to have rolled "+ "back on his phase1 promise to ballot %d", acceptor, ballot) continue } if promisedBallot > ballot { this.Warningf("acceptor %s has moved on to higher ballot %d", acceptor, promisedBallot) break } responseMap[acceptor] = response } if len(responseMap) < this.MajoritySize() { this.Warningf("could not get majority phase2 votes %v for value [%s] "+ "ballot %d", responseMap, value, ballot) return errs.ErrRetry } this.Infof("value [%s] is chosen by phase2 responses %v", value, responseMap) return nil }
// doPhase1 performs classic paxos phase1 steps. func (this *Paxos) doPhase1(ctxHeader *msgpb.Header, ballot int64, phase1AcceptorList []string) ([]byte, []string, error) { phase1request := thispb.Phase1Request{} phase1request.BallotNumber = proto.Int64(ballot) message := thispb.PaxosMessage{} message.Phase1Request = &phase1request reqHeader := msg.NewNestedRequest(this.msn, ctxHeader, this.namespace, this.uid, "ClassicPaxos.Phase1") defer this.msn.CloseMessage(reqHeader) count, errSend := msg.SendAllProto(this.msn, phase1AcceptorList, reqHeader, &message) if errSend != nil && count < this.MajoritySize() { this.Errorf("could not send phase1 request to majority nodes: %v", errSend) return nil, nil, errSend } this.Infof("sent phase1 request %s to acceptors %v", reqHeader, phase1AcceptorList) var acceptorList []string maxVotedBallot := int64(-1) var maxVotedValue []byte responseMap := make(map[string]*thispb.Phase1Response) for ii := 0; ii < count && len(responseMap) < this.MajoritySize(); ii++ { message := thispb.PaxosMessage{} resHeader, errRecv := msg.ReceiveProto(this.msn, reqHeader, &message) if errRecv != nil { this.Warningf("could not receive more phase1 responses for %s: %v", reqHeader, errRecv) break } acceptor := resHeader.GetMessengerId() if _, ok := responseMap[acceptor]; ok { this.Warningf("duplicate phase1 response from %s (ignored)", acceptor) continue } if message.Phase1Response == nil { this.Warningf("phase1 response data is empty from %s (ignored)", acceptor) continue } response := message.GetPhase1Response() if response.PromisedBallot == nil { this.Warningf("phase1 response from %s has no promise ballot", acceptor) continue } promisedBallot := response.GetPromisedBallot() if promisedBallot > ballot { this.Warningf("acceptor %s has moved on to ballot %d", acceptor, promisedBallot) break } if promisedBallot < ballot { this.Errorf("acceptor %s did not promise this ballot %d", acceptor, ballot) continue } // We received a promise from this acceptor. acceptorList = append(acceptorList, acceptor) responseMap[acceptor] = response // If there was a voted value already, we need to pick the max voted value. if response.VotedBallot != nil { votedBallot := response.GetVotedBallot() if votedBallot > maxVotedBallot { maxVotedBallot = votedBallot maxVotedValue = response.GetVotedValue() } } } if len(responseMap) < this.MajoritySize() { this.Warningf("could not get majority phase1 votes %v for ballot %d", responseMap, ballot) return nil, nil, errs.ErrRetry } if maxVotedValue == nil { this.Infof("no prior value was chosen as per phase1 responses %v", responseMap) } else { this.Infof("value [%s] could have been chosen as per phase1 responses %v", maxVotedValue, responseMap) } return maxVotedValue, acceptorList, nil }