// // Abort the LeaderSyncProxy. // func (l *LeaderSyncProxy) abort() { voter := l.GetFid() common.SafeRun("LeaderSyncProxy.abort()", func() { // terminate any on-going messaging with follower. This will force // the follower to go through election again l.follower.Close() }) common.SafeRun("LeaderSyncProxy.abort()", func() { // clean up the ConsentState l.state.removeAcceptedEpoch(voter) l.state.removeEpochAck(voter) l.state.removeNewLeaderAck(voter) }) // donech should never be closed. But just to be safe ... common.SafeRun("LeaderSyncProxy.abort()", func() { l.donech <- false }) }
// // Cleanup internal state upon exit // func (s *Coordinator) cleanupState() { // tell that coordinator is no longer ready s.markNotReady() s.state.mutex.Lock() defer s.state.mutex.Unlock() common.SafeRun("Coordinator.cleanupState()", func() { if s.listener != nil { s.listener.Close() } }) common.SafeRun("Coordinator.cleanupState()", func() { if s.site != nil { s.site.Close() } }) for len(s.state.incomings) > 0 { request := <-s.state.incomings request.Err = fmt.Errorf("Terminate Request due to server termination") common.SafeRun("Coordinator.cleanupState()", func() { request.CondVar.L.Lock() defer request.CondVar.L.Unlock() request.CondVar.Signal() }) } for _, request := range s.state.pendings { request.Err = fmt.Errorf("Terminate Request due to server termination") common.SafeRun("Coordinator.cleanupState()", func() { request.CondVar.L.Lock() defer request.CondVar.L.Unlock() request.CondVar.Signal() }) } for _, request := range s.state.proposals { request.Err = fmt.Errorf("Terminate Request due to server termination") common.SafeRun("Coordinator.cleanupState()", func() { request.CondVar.L.Lock() defer request.CondVar.L.Unlock() request.CondVar.Signal() }) } }
// // Cleanup internal state upon exit // func (s *EmbeddedServer) cleanupState() { s.state.mutex.Lock() defer s.state.mutex.Unlock() common.SafeRun("EmbeddedServer.cleanupState()", func() { if s.listener != nil { s.listener.Close() } }) common.SafeRun("EmbeddedServer.cleanupState()", func() { if s.repo != nil { s.repo.Close() } }) for len(s.state.incomings) > 0 { request := <-s.state.incomings request.Err = common.NewError(common.SERVER_ERROR, "Terminate Request due to server termination") common.SafeRun("EmbeddedServer.cleanupState()", func() { request.CondVar.L.Lock() defer request.CondVar.L.Unlock() request.CondVar.Signal() }) } for _, request := range s.state.pendings { request.Err = common.NewError(common.SERVER_ERROR, "Terminate Request due to server termination") common.SafeRun("EmbeddedServer.cleanupState()", func() { request.CondVar.L.Lock() defer request.CondVar.L.Unlock() request.CondVar.Signal() }) } for _, request := range s.state.proposals { request.Err = common.NewError(common.SERVER_ERROR, "Terminate Request due to server termination") common.SafeRun("EmbeddedServer.cleanupState()", func() { request.CondVar.L.Lock() defer request.CondVar.L.Unlock() request.CondVar.Signal() }) } }
// // Abort the FollowerSyncProxy. By killing the leader's PeerPipe, // the execution go-rountine will eventually error out and terminate by itself. // func (f *FollowerSyncProxy) abort() { common.SafeRun("FollowerSyncProxy.abort()", func() { // terminate any on-going messaging with follower f.leader.Close() }) common.SafeRun("FollowerSyncProxy.abort()", func() { f.donech <- false }) }
func runOnce(peer string, requestMgr RequestMgr, handler ActionHandler, factory MsgFactory, killch <-chan bool, readych chan<- bool, alivech chan<- bool, pingch <-chan bool, once *sync.Once) (isKilled bool) { // Catch panic at the main entry point for WatcherServer defer func() { if r := recover(); r != nil { log.Current.Errorf("panic in WatcherServer.runOnce() : %s\n", r) log.Current.Errorf("%s", log.Current.StackTrace()) } else { log.Current.Debugf("WatcherServer.runOnce() terminates.") log.Current.Tracef(log.Current.StackTrace()) } if requestMgr != nil { requestMgr.CleanupOnError() } }() // create connection with a peer conn, err := createConnection(peer) if err != nil { log.Current.Errorf("WatcherServer.runOnce() error : %s", err) return false } pipe := common.NewPeerPipe(conn) log.Current.Debugf("WatcherServer.runOnce() : Watcher successfully created TCP connection to peer %s", peer) // close the connection to the peer. If connection is closed, // sync proxy and watcher will also terminate by err-ing out. // If sync proxy and watcher terminates the pipe upon termination, // it is ok to close it again here. defer common.SafeRun("WatcherServer.runOnce()", func() { pipe.Close() }) // start syncrhorniziing with the metadata server success, isKilled := syncWithPeer(pipe, handler, factory, killch) // run watcher after synchronization if success { if !runWatcher(pipe, requestMgr, handler, factory, killch, readych, alivech, pingch, once) { log.Current.Errorf("WatcherServer.runOnce() : Watcher terminated unexpectedly.") return false } } else if !isKilled { log.Current.Errorf("WatcherServer.runOnce() : Watcher fail to synchronized with peer %s", peer) return false } return true }
// // Run the server until it stop. Will not attempt to re-run. // func (s *EmbeddedServer) runOnce() { log.Current.Debugf("EmbeddedServer.runOnce() : Start Running Server") defer func() { if r := recover(); r != nil { log.Current.Errorf("panic in EmbeddedServer.runOnce() : %v\n", r) log.Current.Errorf("Diagnostic Stack ...") log.Current.Errorf("%s", log.Current.StackTrace()) } common.SafeRun("EmbeddedServer.cleanupState()", func() { s.cleanupState() }) }() // Check if the server has been terminated explicitly. If so, don't run. if !s.IsDone() { // runServer() is done if there is an error or being terminated explicitly (killch) s.state.setStatus(protocol.LEADING) if err := protocol.RunLeaderServerWithCustomHandler( s.msgAddr, s.listener, s.state, s.handler, s.factory, s.reqHandler, s.skillch); err != nil { log.Current.Errorf("EmbeddedServer.RunOnce() : Error Encountered From Server : %s", err.Error()) } } else { log.Current.Debugf("EmbeddedServer.RunOnce(): Server has been terminated explicitly. Terminate.") } }
// // Find which peer to connect to // func findPeerToConnect(host string, peerUDP []string, peerTCP []string, factory MsgFactory, handler ActionHandler, killch <-chan bool) (leader string, isKilled bool) { defer func() { if r := recover(); r != nil { log.Current.Errorf("panic in findPeerToConnect() : %s\n", r) log.Current.Errorf("%s", log.Current.StackTrace()) } else { log.Current.Debugf("findPeerToConnect() terminates : Diagnostic Stack ...") log.Current.LazyDebug(log.Current.StackTrace) } }() // Run master election to figure out who is the leader. Only connect to leader for now. site, err := CreateElectionSite(host, peerUDP, factory, handler, true) if err != nil { log.Current.Errorf("WatcherServer.findPeerToConnect() error : %s", err) return "", false } defer func() { common.SafeRun("Server.cleanupState()", func() { site.Close() }) }() resultCh := site.StartElection() if resultCh == nil { log.Current.Errorf("WatcherServer.findPeerToConnect: Election Site is in progress or is closed.") return "", false } select { case leader, ok := <-resultCh: if !ok { log.Current.Errorf("WatcherServer.findPeerToConnect: Election Fails") return "", false } for i, peer := range peerUDP { if peer == leader { return peerTCP[i], false } } log.Current.Errorf("WatcherServer.findPeerToConnect : Cannot find matching port for peer. Peer UPD port = %s", leader) return "", false case <-killch: return "", true } }
// // Gorountine. Start listener to listen to message from follower. // Note that each follower has their own receive queue. This // is to ensure if the queue is filled up for a single follower, // only that the connection of that follower may get affected. // The listener can be killed by calling terminate() or closing // the PeerPipe. // func (l *messageListener) start() { defer func() { if r := recover(); r != nil { log.Current.Errorf("panic in messageListener.start() : %s\n", r) log.Current.Errorf("%s", log.Current.StackTrace()) } else { log.Current.Debugf("leader's messageListener.start() terminates.") log.Current.Tracef(log.Current.StackTrace()) } common.SafeRun("messageListener.start()", func() { l.leader.removeListener(l) }) common.SafeRun("messageListener.start()", func() { l.pipe.Close() }) }() log.Current.Debugf("messageListener.start(): start listening to message from peer %s", l.fid) reqch := l.pipe.ReceiveChannel() for { select { case req, ok := <-reqch: if ok { // TODO: Let's say send is blocked because l.notifications is full, will it becomes unblock // when leader.notifications is unblock. l.leader.QueueRequest(l.fid, req) } else { // The channel is closed. Need to shutdown the listener. log.Current.Infof("messageListener.start(): message channel closed. Remove peer %s as follower.", l.fid) return } case <-l.killch: log.Current.Debugf("messageListener.start(): Listener for %s receive kill signal. Terminate.", l.fid) return } } }
func (m *IndexManager) runTimestampKeeper() { defer logging.Debugf("IndexManager.runTimestampKeeper() : terminate") inboundch := m.timer.getOutputChannel() persistTimestamp := true // save the first timestamp always lastPersistTime := uint64(time.Now().UnixNano()) timestamps, err := m.repo.GetStabilityTimestamps() if err != nil { // TODO : Determine timestamp not exist versus forestdb error logging.Errorf("IndexManager.runTimestampKeeper() : cannot get stability timestamp from repository. Create a new one.") timestamps = createTimestampListSerializable() } for { select { case <-m.timekeeperStopCh: return case timestamp, ok := <-inboundch: if !ok { return } gometaC.SafeRun("IndexManager.runTimestampKeeper()", func() { timestamps.addTimestamp(timestamp) persistTimestamp = persistTimestamp || uint64(time.Now().UnixNano())-lastPersistTime > m.timestampPersistInterval if persistTimestamp { if err := m.repo.SetStabilityTimestamps(timestamps); err != nil { logging.Errorf("IndexManager.runTimestampKeeper() : cannot set stability timestamp into repository.") } else { logging.Debugf("IndexManager.runTimestampKeeper() : saved stability timestamp to repository") persistTimestamp = false lastPersistTime = uint64(time.Now().UnixNano()) } } data, err := marshallTimestampSerializable(timestamp) if err != nil { logging.Debugf( "IndexManager.runTimestampKeeper(): error when marshalling timestamp. Ignore timestamp. Error=%s", err.Error()) } else { m.coordinator.NewRequest(uint32(OPCODE_NOTIFY_TIMESTAMP), "Stability Timestamp", data) } }) } } }
// // Run the server until it stop. Will not attempt to re-run. // func RunOnce() int { log.Current.Debugf("Server.RunOnce() : Start Running Server") pauseTime := 0 gServer = new(Server) defer func() { if r := recover(); r != nil { log.Current.Errorf("panic in Server.runOnce() : %s\n", r) } log.Current.Debugf("RunOnce() terminates : Diagnostic Stack ...") log.Current.LazyDebug(log.Current.StackTrace) common.SafeRun("Server.cleanupState()", func() { gServer.cleanupState() }) }() err := gServer.bootstrap() if err != nil { pauseTime = 200 } // Check if the server has been terminated explicitly. If so, don't run. if !gServer.IsDone() { // runElection() finishes if there is an error, election result is known or // it being terminated. Unless being killed explicitly, a goroutine // will continue to run to responds to other peer election request leader, err := gServer.runElection() if err != nil { log.Current.Errorf("Server.RunOnce() : Error Encountered During Election : %s", err.Error()) pauseTime = 100 } else { // Check if the server has been terminated explicitly. If so, don't run. if !gServer.IsDone() { // runServer() is done if there is an error or being terminated explicitly (killch) err := gServer.runServer(leader) if err != nil { log.Current.Errorf("Server.RunOnce() : Error Encountered From Server : %s", err.Error()) } } } } else { log.Current.Debugf("Server.RunOnce(): Server has been terminated explicitly. Terminate.") } return pauseTime }
// // Create a new FollowerServer. This is a blocking call until // the FollowerServer terminates. Make sure the kilch is a buffered // channel such that if the goroutine running RunFollowerServer goes // away, the sender won't get blocked. // func RunFollowerServer(naddr string, leader string, ss RequestMgr, handler ActionHandler, factory MsgFactory, killch <-chan bool) (err error) { // Catch panic at the main entry point for FollowerServer defer func() { if r := recover(); r != nil { log.Current.Errorf("panic in RunFollowerServer() : %s\n", r) log.Current.Errorf("%s", log.Current.StackTrace()) err = r.(error) } else { log.Current.Debugf("%s", "RunFollowerServer terminates.") log.Current.Tracef(log.Current.StackTrace()) } }() // create connection to leader conn, err := createConnection(leader) if err != nil { return err } pipe := common.NewPeerPipe(conn) log.Current.Debugf("FollowerServer.RunFollowerServer() : Follower %s successfully "+ "created TCP connection to leader %s, local address %s", naddr, leader, conn.LocalAddr()) // close the connection to the leader. If connection is closed, // sync proxy and follower will also terminate by err-ing out. // If sync proxy and follower terminates the pipe upon termination, // it is ok to close it again here. defer common.SafeRun("FollowerServer.runFollowerServer()", func() { pipe.Close() }) // start syncrhorniziing with the leader success := syncWithLeader(naddr, pipe, handler, factory, killch) // run server after synchronization if success { runFollower(pipe, ss, handler, factory, killch) log.Current.Debugf("FollowerServer.RunFollowerServer() : Follower Server %s terminate", naddr) err = nil } else { err = common.NewError(common.SERVER_ERROR, fmt.Sprintf("Follower %s fail to synchronized with leader %s", naddr, leader)) } return err }
func (o *observer) send(msg common.Packet) { defer common.SafeRun("observer.Send()", func() { select { case o.packets <- msg: //no-op case <-o.killch: // if killch is closed, this is non-blocking. return } }) }
// // Run the server until it stop. Will not attempt to re-run. // func (c *Coordinator) runOnce(config string) int { logging.Debugf("Coordinator.runOnce() : Start Running Coordinator") pauseTime := 0 defer func() { if r := recover(); r != nil { logging.Warnf("panic in Coordinator.runOnce() : %s\n", r) } common.SafeRun("Coordinator.cleanupState()", func() { c.cleanupState() }) }() err := c.bootstrap(config) if err != nil { pauseTime = 200 } // Check if the server has been terminated explicitly. If so, don't run. if !c.IsDone() { // runElection() finishes if there is an error, election result is known or // it being terminated. Unless being killed explicitly, a goroutine // will continue to run to responds to other peer election request leader, err := c.runElection() if err != nil { logging.Warnf("Coordinator.runOnce() : Error Encountered During Election : %s", err.Error()) pauseTime = 100 } else { // Check if the server has been terminated explicitly. If so, don't run. if !c.IsDone() { // runCoordinator() is done if there is an error or being terminated explicitly (killch) err := c.runProtocol(leader) if err != nil { logging.Warnf("Coordinator.RunOnce() : Error Encountered From Coordinator : %s", err.Error()) } } } } else { logging.Infof("Coordinator.RunOnce(): Coordinator has been terminated explicitly. Terminate.") } return pauseTime }
// // Terminate the leader. It is an no-op if the leader is already // completed successfully. // func (l *Leader) Terminate() { l.mutex.Lock() defer l.mutex.Unlock() if !l.isClosed { l.isClosed = true for _, listener := range l.followers { listener.terminate() } for _, listener := range l.watchers { listener.terminate() } common.SafeRun("Leader.Terminate()", func() { close(l.notifications) }) } }
// // Main processing message loop for leader. // func (l *Leader) listen() { defer func() { if r := recover(); r != nil { log.Current.Errorf("panic in Leader.listen() : %s\n", r) log.Current.Errorf("%s", log.Current.StackTrace()) } else { log.Current.Debugf("Leader.listen() terminates.") log.Current.Tracef(log.Current.StackTrace()) } common.SafeRun("Leader.listen()", func() { l.Terminate() }) }() log.Current.Debugf("Leader.listen(): start listening to message for leader") for { select { case msg, ok := <-l.notifications: if ok { if !l.IsClosed() { err := l.handleMessage(msg.payload, msg.fid) if err != nil { log.Current.Errorf("Leader.listen(): Encounter error when processing message %s. Error %s. Terminate", msg.fid, err.Error()) return } } else { log.Current.Debugf("Leader.listen(): Leader is closed. Terminate message processing loop.") return } } else { // The channel is closed. log.Current.Debugf("Leader.listen(): message channel closed. Terminate message processing loop for leader.") return } } } }
func safeSend(header string, donech chan bool, result bool) { common.SafeRun(header, func() { donech <- result }) }
// // Goroutine for processing each request one-by-one // func (s *LeaderServer) processRequest(killch <-chan bool, listenerState *ListenerState, reqHandler CustomRequestHandler) (err error) { defer func() { if r := recover(); r != nil { log.Current.Errorf("panic in LeaderServer.processRequest() : %s\n", r) log.Current.Errorf("%s", log.Current.StackTrace()) err = r.(error) } else { log.Current.Debugf("LeaderServer.processRequest() : Terminates.") log.Current.Tracef(log.Current.StackTrace()) } common.SafeRun("LeaderServer.processRequest()", func() { listenerState.killch <- true }) }() // start processing loop after I am being confirmed as a leader (there // is a quorum of followers that have sync'ed with me) if !s.waitTillReady() { return common.NewError(common.ELECTION_ERROR, "LeaderServer.processRequest(): Leader times out waiting for quorum of followers. Terminate") } // At this point, the leader has gotten a majority of followers to follow, so it // can proceed. It is possible that it may loose quorum of followers. But in that // case, the leader will not be able to process any request. log.Current.Debugf("LeaderServer.processRequest(): Leader Server is ready to proces request") // Leader is ready at this time. This implies that there is a quorum of follower has // followed this leader. Get the change channel to keep track of number of followers. // If the leader no longer has quorum, it needs to let go of its leadership. leaderchangech := s.leader.GetEnsembleChangeChannel() ensembleSize := s.handler.GetEnsembleSize() // notify the request processor to start processing new request incomings := s.state.requestMgr.GetRequestChannel() var outgoings <-chan common.Packet = nil if reqHandler != nil { outgoings = reqHandler.GetResponseChannel() } else { outgoings = make(<-chan common.Packet) } for { select { case handle, ok := <-incomings: if ok { // de-queue the request s.state.requestMgr.AddPendingRequest(handle) // forward request to the leader s.leader.QueueRequest(s.leader.GetFollowerId(), handle.Request) } else { // server shutdown. log.Current.Debugf("LeaderServer.processRequest(): channel for receiving client request is closed. Terminate.") return nil } case msg, ok := <-outgoings: if ok { // forward msg to the leader s.leader.QueueResponse(msg) } else { log.Current.Infof("LeaderServer.processRequest(): channel for receiving custom response is closed. Ignore.") } case <-killch: // server shutdown log.Current.Debugf("LeaderServer.processRequest(): receive kill signal. Stop Client request processing.") return nil case <-listenerState.donech: // listener is down. Terminate this request processing loop as well. log.Current.Infof("LeaderServer.processRequest(): follower listener terminates. Stop client request processing.") return nil case <-leaderchangech: // Listen to any change to the leader's active ensemble, and to ensure that the leader maintain majority. // The active ensemble is the set of running followers connected to the leader. numFollowers := s.leader.GetActiveEnsembleSize() if numFollowers <= int(ensembleSize/2) { // leader looses majority of follower. log.Current.Infof("LeaderServer.processRequest(): leader looses majority of follower. Stop client request processing.") return nil } } } return nil }
// // Listen to new connection request from the follower/peer. // Start a new LeaderSyncProxy to synchronize the state // between the leader and the peer. // func (l *LeaderServer) listenFollower(listenerState *ListenerState) { defer func() { if r := recover(); r != nil { log.Current.Errorf("panic in LeaderServer.listenFollower() : %s\n", r) log.Current.Errorf("%s", log.Current.StackTrace()) } else { log.Current.Debugf("LeaderServer.listenFollower() terminates.") log.Current.Tracef(log.Current.StackTrace()) } common.SafeRun("LeaderServer.listenFollower()", func() { l.terminateAllOutstandingProxies() }) common.SafeRun("LeaderServer.listenFollower()", func() { listenerState.donech <- true }) }() connCh := l.listener.ConnChannel() if connCh == nil { // It should not happen unless the listener is closed return } // if there is a single server, then we don't need to wait for follower // for the server to be ready to process request. if l.handler.GetEnsembleSize() == 1 { if err := l.incrementEpoch(); err != nil { log.Current.Errorf("LeaderServer.listenFollower(): Error when boostraping leader with ensembleSize=1. Error = %s", err) return } l.notifyReady() } for { select { case conn, ok := <-connCh: { if !ok { // channel close. Simply return. return } // There is a new peer connection request from the follower. Start a proxy to synchronize with the follower. // The leader does not proactively connect to follower: // 1) The ensemble is stable, but a follower may just reboot and needs to connect to the leader // 2) Even if the leader receives votes from the leader, the leader cannot tell for sure that the follower does // not change its vote. Only if the follower connects, the leader can confirm the follower's alliance. // log.Current.Debugf("LeaderServer.listenFollower(): Receive connection request from follower %s", conn.RemoteAddr()) if l.registerOutstandingProxy(conn.RemoteAddr().String()) { pipe := common.NewPeerPipe(conn) go l.startProxy(pipe) } else { log.Current.Infof("LeaderServer.listenFollower(): Sync Proxy already running for %s. Ignore new request.", conn.RemoteAddr()) conn.Close() } } case <-listenerState.killch: log.Current.Debugf("LeaderServer.listenFollower(): Receive kill signal. Terminate.") return } } }
// // Goroutine. Listen to vote coming from the peer for a // particular ballot. This is the only goroutine that // handle all incoming requests. // // Voter -> the peer that replies the ballot with a vote // Candidate -> the peer that is voted for by the voter. // It is the peer (CndId) that is inside the vote. // func (w *pollWorker) listen() { // If this loop terminates (e.g. due to panic), then make sure // there is no outstanding ballot waiting for a result. Close // any channel for outstanding ballot such that the caller // won't get blocked forever. defer func() { if r := recover(); r != nil { log.Current.Errorf("panic in pollWorker.listen() : %s\n", r) } // make sure we close the ElectionSite first such that // there is no new ballot coming while we are shutting // down the pollWorker. If not, then the some go-routine // may be waiting forever for the new ballot to complete. common.SafeRun("pollWorker.listen()", func() { w.site.Close() }) // unlock anyone waiting for existing ballot to complete. common.SafeRun("pollWorker.listen()", func() { if w.ballot != nil { close(w.ballot.resultch) w.ballot = nil } }) }() // Get the channel for receiving votes from the peer. reqch := w.site.messenger.DefaultReceiveChannel() timeout := common.NewBackoffTimer( common.BALLOT_TIMEOUT*time.Millisecond, common.BALLOT_MAX_TIMEOUT*time.Millisecond, 2, ) inFinalize := false finalizeTimer := common.NewStoppedResettableTimer(common.BALLOT_FINALIZE_WAIT * time.Millisecond) for { select { case w.ballot = <-w.listench: // listench should never close { // Before listening to any vote, see if we reach quorum already. // This should only happen if there is only one server in the // ensemble. If this election is for solicit purpose, then // run election all the time. if !w.site.solicitOnly && w.checkQuorum(w.ballot.result.receivedVotes, w.ballot.result.proposed) { w.site.master.setWinner(w.ballot.result) w.ballot.resultch <- true w.ballot = nil } else { // There is a new ballot. timeout.Reset() inFinalize = false finalizeTimer.Stop() } } // Receiving a vote case msg, ok := <-reqch: { if !ok { return } // Receive a new vote. The voter is identified by its UDP port, // which must remain the same during the election phase. vote := msg.Content.(VoteMsg) voter := msg.Peer // If I am receiving a vote that just for soliciting my response, // then respond with my winning vote only after I am confirmed as // either a leader or follower. This ensure that the watcher will // only find a leader from a stable ensemble. This also ensures // that the watcher will only count the votes from active participant, // therefore, it will not count from other watcher as well as its // own vote (code path for handling votes from electing member will // never called for watcher). if vote.GetSolicit() { status := w.site.handler.GetStatus() if status == LEADING || status == FOLLOWING { w.respondInquiry(voter, vote) } continue } // Check if the voter is in the ensemble if !w.site.inEnsemble(voter) { continue } if w.ballot == nil { // If there is no ballot or the vote is from a watcher, // then just need to respond if I have a winner. w.respondInquiry(voter, vote) continue } timeout.Reset() proposed := w.cloneProposedVote() if w.handleVote(voter, vote) { proposedUpdated := w.compareVote(w.ballot.result.proposed, proposed) != common.EQUAL if !inFinalize || proposedUpdated { inFinalize = true finalizeTimer.Reset() } } else { if inFinalize { // we had a quorum but not anymore inFinalize = false finalizeTimer.Stop() } } } case <-finalizeTimer.C: { // we achieve quorum, set the winner. // setting the winner and usetting the ballot // should be done together. // NOTE: ZK does not notify other peers when this node has // select a leader w.site.master.setWinner(w.ballot.result) w.ballot.resultch <- true w.ballot = nil timeout.Stop() } case <-timeout.GetChannel(): { // If there is a timeout but no response, send vote again. if w.ballot != nil { w.site.messenger.Multicast(w.cloneProposedVote(), w.site.ensemble) timeout.Backoff() } } case <-w.killch: { return } } } }
// // Start a new round of ballot. // func (b *ballotMaster) castBallot(winnerch chan string) { // close the channel to make sure that the caller won't be // block forever. If the balltot is successful, a value would // have sent to the channel before being closed. Otherwise, // a closed channel without value means the ballot is not // successful. defer func() { if r := recover(); r != nil { log.Current.Errorf("panic in ballotMaster.castBallot() : %s\n", r) common.SafeRun("ballotMaster.castBallot()", func() { b.site.Close() }) } common.SafeRun("ballotMaster.castBallot()", func() { close(winnerch) // unblock caller // balloting complete b.setBallotInProg(false) }) }() // create a channel to receive the ballot result // should only be closed by Poll Worker. Make // if buffered so the sender won't block. resultch := make(chan bool, 1) // Create a new ballot ballot := b.createInitialBallot(resultch) // Tell the worker to observe this ballot. This forces // the worker to start collecting new ballot result. b.site.worker.observe(ballot) // let the peer to know about this ballot. It is expected // that the peer will reply with a vote. b.site.messenger.Multicast(ballot.result.proposed, b.site.ensemble) success, ok := <-resultch if !ok { // channel close. Ballot done success = false } // Announce the winner if success { winner, ok := b.GetWinner() if ok { common.SafeRun("ballotMaster.castBallot()", func() { // Remember the last round. gElectionRound = b.round // Announce the result winnerch <- winner }) } } }
// // Bootstrp // func (s *EmbeddedServer) bootstrap() (err error) { defer func() { r := recover() if r != nil { log.Current.Errorf("panic in EmbeddedServer.bootstrap() : %s\n", r) log.Current.Errorf("%s", log.Current.StackTrace()) } if err != nil || r != nil { common.SafeRun("EmbeddedServer.bootstrap()", func() { s.cleanupState() }) } }() // Initialize server state s.state = newServerState() // Create and initialize new txn state. s.txn = common.NewTxnState() // Initialize repository service s.repo, err = r.OpenRepositoryWithName(s.repoName, s.quota) if err != nil { return err } // Initialize server config s.srvConfig = r.NewServerConfig(s.repo) // initialize the current transaction id to the lastLoggedTxid. This // is the txid that this node has seen so far. If this node becomes // the leader, a new epoch will be used and new current txid will // be generated. So no need to initialize the epoch at this point. lastLoggedTxid, err := s.srvConfig.GetLastLoggedTxnId() if err != nil { return err } s.txn.InitCurrentTxnid(common.Txnid(lastLoggedTxid)) // Initialize commit log lastCommittedTxid, err := s.srvConfig.GetLastCommittedTxnId() if err != nil { return err } s.log, err = r.NewTransientCommitLog(s.repo, lastCommittedTxid) if err != nil { return err } // Initialize various callback facility for leader election and // voting protocol. s.factory = message.NewConcreteMsgFactory() s.handler = action.NewServerActionWithNotifier(s.repo, s.log, s.srvConfig, s, s.notifier, s.txn, s.factory, s) s.skillch = make(chan bool, 1) // make it buffered to unblock sender // Need to start the peer listener before election. A follower may // finish its election before a leader finishes its election. Therefore, // a follower node can request a connection to the leader node before that // node knows it is a leader. By starting the listener now, it allows the // follower to establish the connection and let the leader handles this // connection at a later time (when it is ready to be a leader). s.listener, err = common.StartPeerListener(s.msgAddr) if err != nil { err = common.WrapError(common.SERVER_ERROR, "Fail to start PeerListener. err = %v", err) return } return nil }