Beispiel #1
0
//
// Abort the LeaderSyncProxy.
//
func (l *LeaderSyncProxy) abort() {

	voter := l.GetFid()

	common.SafeRun("LeaderSyncProxy.abort()",
		func() {
			// terminate any on-going messaging with follower.  This will force
			// the follower to go through election again
			l.follower.Close()
		})

	common.SafeRun("LeaderSyncProxy.abort()",
		func() {
			// clean up the ConsentState
			l.state.removeAcceptedEpoch(voter)
			l.state.removeEpochAck(voter)
			l.state.removeNewLeaderAck(voter)
		})

	// donech should never be closed.  But just to be safe ...
	common.SafeRun("LeaderSyncProxy.abort()",
		func() {
			l.donech <- false
		})
}
Beispiel #2
0
//
// Cleanup internal state upon exit
//
func (s *Coordinator) cleanupState() {

	// tell that coordinator is no longer ready
	s.markNotReady()

	s.state.mutex.Lock()
	defer s.state.mutex.Unlock()

	common.SafeRun("Coordinator.cleanupState()",
		func() {
			if s.listener != nil {
				s.listener.Close()
			}
		})

	common.SafeRun("Coordinator.cleanupState()",
		func() {
			if s.site != nil {
				s.site.Close()
			}
		})

	for len(s.state.incomings) > 0 {
		request := <-s.state.incomings
		request.Err = fmt.Errorf("Terminate Request due to server termination")

		common.SafeRun("Coordinator.cleanupState()",
			func() {
				request.CondVar.L.Lock()
				defer request.CondVar.L.Unlock()
				request.CondVar.Signal()
			})
	}

	for _, request := range s.state.pendings {
		request.Err = fmt.Errorf("Terminate Request due to server termination")

		common.SafeRun("Coordinator.cleanupState()",
			func() {
				request.CondVar.L.Lock()
				defer request.CondVar.L.Unlock()
				request.CondVar.Signal()
			})
	}

	for _, request := range s.state.proposals {
		request.Err = fmt.Errorf("Terminate Request due to server termination")

		common.SafeRun("Coordinator.cleanupState()",
			func() {
				request.CondVar.L.Lock()
				defer request.CondVar.L.Unlock()
				request.CondVar.Signal()
			})
	}
}
Beispiel #3
0
//
// Cleanup internal state upon exit
//
func (s *EmbeddedServer) cleanupState() {

	s.state.mutex.Lock()
	defer s.state.mutex.Unlock()

	common.SafeRun("EmbeddedServer.cleanupState()",
		func() {
			if s.listener != nil {
				s.listener.Close()
			}
		})

	common.SafeRun("EmbeddedServer.cleanupState()",
		func() {
			if s.repo != nil {
				s.repo.Close()
			}
		})

	for len(s.state.incomings) > 0 {
		request := <-s.state.incomings
		request.Err = common.NewError(common.SERVER_ERROR, "Terminate Request due to server termination")

		common.SafeRun("EmbeddedServer.cleanupState()",
			func() {
				request.CondVar.L.Lock()
				defer request.CondVar.L.Unlock()
				request.CondVar.Signal()
			})
	}

	for _, request := range s.state.pendings {
		request.Err = common.NewError(common.SERVER_ERROR, "Terminate Request due to server termination")

		common.SafeRun("EmbeddedServer.cleanupState()",
			func() {
				request.CondVar.L.Lock()
				defer request.CondVar.L.Unlock()
				request.CondVar.Signal()
			})
	}

	for _, request := range s.state.proposals {
		request.Err = common.NewError(common.SERVER_ERROR, "Terminate Request due to server termination")

		common.SafeRun("EmbeddedServer.cleanupState()",
			func() {
				request.CondVar.L.Lock()
				defer request.CondVar.L.Unlock()
				request.CondVar.Signal()
			})
	}
}
Beispiel #4
0
//
// Abort the FollowerSyncProxy.  By killing the leader's PeerPipe,
// the execution go-rountine will eventually error out and terminate by itself.
//
func (f *FollowerSyncProxy) abort() {

	common.SafeRun("FollowerSyncProxy.abort()",
		func() {
			// terminate any on-going messaging with follower
			f.leader.Close()
		})

	common.SafeRun("FollowerSyncProxy.abort()",
		func() {
			f.donech <- false
		})
}
Beispiel #5
0
func runOnce(peer string,
	requestMgr RequestMgr,
	handler ActionHandler,
	factory MsgFactory,
	killch <-chan bool,
	readych chan<- bool,
	alivech chan<- bool,
	pingch <-chan bool,
	once *sync.Once) (isKilled bool) {

	// Catch panic at the main entry point for WatcherServer
	defer func() {
		if r := recover(); r != nil {
			log.Current.Errorf("panic in WatcherServer.runOnce() : %s\n", r)
			log.Current.Errorf("%s", log.Current.StackTrace())
		} else {
			log.Current.Debugf("WatcherServer.runOnce() terminates.")
			log.Current.Tracef(log.Current.StackTrace())
		}

		if requestMgr != nil {
			requestMgr.CleanupOnError()
		}
	}()

	// create connection with a peer
	conn, err := createConnection(peer)
	if err != nil {
		log.Current.Errorf("WatcherServer.runOnce() error : %s", err)
		return false
	}
	pipe := common.NewPeerPipe(conn)
	log.Current.Debugf("WatcherServer.runOnce() : Watcher successfully created TCP connection to peer %s", peer)

	// close the connection to the peer. If connection is closed,
	// sync proxy and watcher will also terminate by err-ing out.
	// If sync proxy and watcher terminates the pipe upon termination,
	// it is ok to close it again here.
	defer common.SafeRun("WatcherServer.runOnce()",
		func() {
			pipe.Close()
		})

	// start syncrhorniziing with the metadata server
	success, isKilled := syncWithPeer(pipe, handler, factory, killch)

	// run watcher after synchronization
	if success {
		if !runWatcher(pipe, requestMgr, handler, factory, killch, readych, alivech, pingch, once) {
			log.Current.Errorf("WatcherServer.runOnce() : Watcher terminated unexpectedly.")
			return false
		}

	} else if !isKilled {
		log.Current.Errorf("WatcherServer.runOnce() : Watcher fail to synchronized with peer %s", peer)
		return false
	}

	return true
}
Beispiel #6
0
//
// Run the server until it stop.  Will not attempt to re-run.
//
func (s *EmbeddedServer) runOnce() {

	log.Current.Debugf("EmbeddedServer.runOnce() : Start Running Server")

	defer func() {
		if r := recover(); r != nil {
			log.Current.Errorf("panic in EmbeddedServer.runOnce() : %v\n", r)
			log.Current.Errorf("Diagnostic Stack ...")
			log.Current.Errorf("%s", log.Current.StackTrace())
		}

		common.SafeRun("EmbeddedServer.cleanupState()",
			func() {
				s.cleanupState()
			})
	}()

	// Check if the server has been terminated explicitly. If so, don't run.
	if !s.IsDone() {

		// runServer() is done if there is an error	or being terminated explicitly (killch)
		s.state.setStatus(protocol.LEADING)
		if err := protocol.RunLeaderServerWithCustomHandler(
			s.msgAddr, s.listener, s.state, s.handler, s.factory, s.reqHandler, s.skillch); err != nil {
			log.Current.Errorf("EmbeddedServer.RunOnce() : Error Encountered From Server : %s", err.Error())
		}
	} else {
		log.Current.Debugf("EmbeddedServer.RunOnce(): Server has been terminated explicitly. Terminate.")
	}
}
Beispiel #7
0
//
// Find which peer to connect to
//
func findPeerToConnect(host string,
	peerUDP []string,
	peerTCP []string,
	factory MsgFactory,
	handler ActionHandler,
	killch <-chan bool) (leader string, isKilled bool) {

	defer func() {
		if r := recover(); r != nil {
			log.Current.Errorf("panic in findPeerToConnect() : %s\n", r)
			log.Current.Errorf("%s", log.Current.StackTrace())
		} else {
			log.Current.Debugf("findPeerToConnect() terminates : Diagnostic Stack ...")
			log.Current.LazyDebug(log.Current.StackTrace)
		}
	}()

	// Run master election to figure out who is the leader.  Only connect to leader for now.
	site, err := CreateElectionSite(host, peerUDP, factory, handler, true)
	if err != nil {
		log.Current.Errorf("WatcherServer.findPeerToConnect() error : %s", err)
		return "", false
	}

	defer func() {
		common.SafeRun("Server.cleanupState()",
			func() {
				site.Close()
			})
	}()

	resultCh := site.StartElection()
	if resultCh == nil {
		log.Current.Errorf("WatcherServer.findPeerToConnect: Election Site is in progress or is closed.")
		return "", false
	}

	select {
	case leader, ok := <-resultCh:
		if !ok {
			log.Current.Errorf("WatcherServer.findPeerToConnect: Election Fails")
			return "", false
		}

		for i, peer := range peerUDP {
			if peer == leader {
				return peerTCP[i], false
			}
		}

		log.Current.Errorf("WatcherServer.findPeerToConnect : Cannot find matching port for peer. Peer UPD port = %s", leader)
		return "", false

	case <-killch:
		return "", true
	}
}
Beispiel #8
0
//
// Gorountine.  Start listener to listen to message from follower.
// Note that each follower has their own receive queue.  This
// is to ensure if the queue is filled up for a single follower,
// only that the connection of that follower may get affected.
// The listener can be killed by calling terminate() or closing
// the PeerPipe.
//
func (l *messageListener) start() {

	defer func() {
		if r := recover(); r != nil {
			log.Current.Errorf("panic in messageListener.start() : %s\n", r)
			log.Current.Errorf("%s", log.Current.StackTrace())
		} else {
			log.Current.Debugf("leader's messageListener.start() terminates.")
			log.Current.Tracef(log.Current.StackTrace())
		}

		common.SafeRun("messageListener.start()",
			func() {
				l.leader.removeListener(l)
			})

		common.SafeRun("messageListener.start()",
			func() {
				l.pipe.Close()
			})
	}()

	log.Current.Debugf("messageListener.start(): start listening to message from peer %s", l.fid)
	reqch := l.pipe.ReceiveChannel()

	for {
		select {
		case req, ok := <-reqch:
			if ok {
				// TODO:  Let's say send is blocked because l.notifications is full, will it becomes unblock
				// when leader.notifications is unblock.
				l.leader.QueueRequest(l.fid, req)
			} else {
				// The channel is closed.  Need to shutdown the listener.
				log.Current.Infof("messageListener.start(): message channel closed. Remove peer %s as follower.", l.fid)
				return
			}
		case <-l.killch:
			log.Current.Debugf("messageListener.start(): Listener for %s receive kill signal. Terminate.", l.fid)
			return

		}
	}
}
Beispiel #9
0
func (m *IndexManager) runTimestampKeeper() {

	defer logging.Debugf("IndexManager.runTimestampKeeper() : terminate")

	inboundch := m.timer.getOutputChannel()

	persistTimestamp := true // save the first timestamp always
	lastPersistTime := uint64(time.Now().UnixNano())

	timestamps, err := m.repo.GetStabilityTimestamps()
	if err != nil {
		// TODO : Determine timestamp not exist versus forestdb error
		logging.Errorf("IndexManager.runTimestampKeeper() : cannot get stability timestamp from repository. Create a new one.")
		timestamps = createTimestampListSerializable()
	}

	for {
		select {
		case <-m.timekeeperStopCh:
			return

		case timestamp, ok := <-inboundch:

			if !ok {
				return
			}

			gometaC.SafeRun("IndexManager.runTimestampKeeper()",
				func() {
					timestamps.addTimestamp(timestamp)
					persistTimestamp = persistTimestamp ||
						uint64(time.Now().UnixNano())-lastPersistTime > m.timestampPersistInterval
					if persistTimestamp {
						if err := m.repo.SetStabilityTimestamps(timestamps); err != nil {
							logging.Errorf("IndexManager.runTimestampKeeper() : cannot set stability timestamp into repository.")
						} else {
							logging.Debugf("IndexManager.runTimestampKeeper() : saved stability timestamp to repository")
							persistTimestamp = false
							lastPersistTime = uint64(time.Now().UnixNano())
						}
					}

					data, err := marshallTimestampSerializable(timestamp)
					if err != nil {
						logging.Debugf(
							"IndexManager.runTimestampKeeper(): error when marshalling timestamp. Ignore timestamp.  Error=%s",
							err.Error())
					} else {
						m.coordinator.NewRequest(uint32(OPCODE_NOTIFY_TIMESTAMP), "Stability Timestamp", data)
					}
				})
		}
	}
}
Beispiel #10
0
//
// Run the server until it stop.  Will not attempt to re-run.
//
func RunOnce() int {

	log.Current.Debugf("Server.RunOnce() : Start Running Server")

	pauseTime := 0
	gServer = new(Server)

	defer func() {
		if r := recover(); r != nil {
			log.Current.Errorf("panic in Server.runOnce() : %s\n", r)
		}

		log.Current.Debugf("RunOnce() terminates : Diagnostic Stack ...")
		log.Current.LazyDebug(log.Current.StackTrace)

		common.SafeRun("Server.cleanupState()",
			func() {
				gServer.cleanupState()
			})
	}()

	err := gServer.bootstrap()
	if err != nil {
		pauseTime = 200
	}

	// Check if the server has been terminated explicitly. If so, don't run.
	if !gServer.IsDone() {

		// runElection() finishes if there is an error, election result is known or
		// it being terminated. Unless being killed explicitly, a goroutine
		// will continue to run to responds to other peer election request
		leader, err := gServer.runElection()
		if err != nil {
			log.Current.Errorf("Server.RunOnce() : Error Encountered During Election : %s", err.Error())
			pauseTime = 100
		} else {

			// Check if the server has been terminated explicitly. If so, don't run.
			if !gServer.IsDone() {
				// runServer() is done if there is an error	or being terminated explicitly (killch)
				err := gServer.runServer(leader)
				if err != nil {
					log.Current.Errorf("Server.RunOnce() : Error Encountered From Server : %s", err.Error())
				}
			}
		}
	} else {
		log.Current.Debugf("Server.RunOnce(): Server has been terminated explicitly. Terminate.")
	}

	return pauseTime
}
Beispiel #11
0
//
// Create a new FollowerServer. This is a blocking call until
// the FollowerServer terminates. Make sure the kilch is a buffered
// channel such that if the goroutine running RunFollowerServer goes
// away, the sender won't get blocked.
//
func RunFollowerServer(naddr string,
	leader string,
	ss RequestMgr,
	handler ActionHandler,
	factory MsgFactory,
	killch <-chan bool) (err error) {

	// Catch panic at the main entry point for FollowerServer
	defer func() {
		if r := recover(); r != nil {
			log.Current.Errorf("panic in RunFollowerServer() : %s\n", r)
			log.Current.Errorf("%s", log.Current.StackTrace())
			err = r.(error)
		} else {
			log.Current.Debugf("%s", "RunFollowerServer terminates.")
			log.Current.Tracef(log.Current.StackTrace())
		}
	}()

	// create connection to leader
	conn, err := createConnection(leader)
	if err != nil {
		return err
	}

	pipe := common.NewPeerPipe(conn)
	log.Current.Debugf("FollowerServer.RunFollowerServer() : Follower %s successfully "+
		"created TCP connection to leader %s, local address %s", naddr, leader, conn.LocalAddr())

	// close the connection to the leader. If connection is closed,
	// sync proxy and follower will also terminate by err-ing out.
	// If sync proxy and follower terminates the pipe upon termination,
	// it is ok to close it again here.
	defer common.SafeRun("FollowerServer.runFollowerServer()",
		func() {
			pipe.Close()
		})

	// start syncrhorniziing with the leader
	success := syncWithLeader(naddr, pipe, handler, factory, killch)

	// run server after synchronization
	if success {
		runFollower(pipe, ss, handler, factory, killch)
		log.Current.Debugf("FollowerServer.RunFollowerServer() : Follower Server %s terminate", naddr)
		err = nil
	} else {
		err = common.NewError(common.SERVER_ERROR, fmt.Sprintf("Follower %s fail to synchronized with leader %s",
			naddr, leader))
	}

	return err
}
Beispiel #12
0
func (o *observer) send(msg common.Packet) {

	defer common.SafeRun("observer.Send()",
		func() {
			select {
			case o.packets <- msg: //no-op
			case <-o.killch:
				// if killch is closed, this is non-blocking.
				return
			}
		})
}
Beispiel #13
0
//
// Run the server until it stop.  Will not attempt to re-run.
//
func (c *Coordinator) runOnce(config string) int {

	logging.Debugf("Coordinator.runOnce() : Start Running Coordinator")

	pauseTime := 0

	defer func() {
		if r := recover(); r != nil {
			logging.Warnf("panic in Coordinator.runOnce() : %s\n", r)
		}

		common.SafeRun("Coordinator.cleanupState()",
			func() {
				c.cleanupState()
			})
	}()

	err := c.bootstrap(config)
	if err != nil {
		pauseTime = 200
	}

	// Check if the server has been terminated explicitly. If so, don't run.
	if !c.IsDone() {

		// runElection() finishes if there is an error, election result is known or
		// it being terminated. Unless being killed explicitly, a goroutine
		// will continue to run to responds to other peer election request
		leader, err := c.runElection()
		if err != nil {
			logging.Warnf("Coordinator.runOnce() : Error Encountered During Election : %s", err.Error())
			pauseTime = 100
		} else {

			// Check if the server has been terminated explicitly. If so, don't run.
			if !c.IsDone() {
				// runCoordinator() is done if there is an error	or being terminated explicitly (killch)
				err := c.runProtocol(leader)
				if err != nil {
					logging.Warnf("Coordinator.RunOnce() : Error Encountered From Coordinator : %s", err.Error())
				}
			}
		}
	} else {
		logging.Infof("Coordinator.RunOnce(): Coordinator has been terminated explicitly. Terminate.")
	}

	return pauseTime
}
Beispiel #14
0
//
// Terminate the leader. It is an no-op if the leader is already
// completed successfully.
//
func (l *Leader) Terminate() {

	l.mutex.Lock()
	defer l.mutex.Unlock()

	if !l.isClosed {
		l.isClosed = true
		for _, listener := range l.followers {
			listener.terminate()
		}
		for _, listener := range l.watchers {
			listener.terminate()
		}
		common.SafeRun("Leader.Terminate()",
			func() {
				close(l.notifications)
			})
	}
}
Beispiel #15
0
//
// Main processing message loop for leader.
//
func (l *Leader) listen() {
	defer func() {
		if r := recover(); r != nil {
			log.Current.Errorf("panic in Leader.listen() : %s\n", r)
			log.Current.Errorf("%s", log.Current.StackTrace())
		} else {
			log.Current.Debugf("Leader.listen() terminates.")
			log.Current.Tracef(log.Current.StackTrace())
		}

		common.SafeRun("Leader.listen()",
			func() {
				l.Terminate()
			})
	}()

	log.Current.Debugf("Leader.listen(): start listening to message for leader")

	for {
		select {
		case msg, ok := <-l.notifications:
			if ok {
				if !l.IsClosed() {
					err := l.handleMessage(msg.payload, msg.fid)
					if err != nil {
						log.Current.Errorf("Leader.listen(): Encounter error when processing message %s. Error %s. Terminate",
							msg.fid, err.Error())
						return
					}
				} else {
					log.Current.Debugf("Leader.listen(): Leader is closed. Terminate message processing loop.")
					return
				}
			} else {
				// The channel is closed.
				log.Current.Debugf("Leader.listen(): message channel closed. Terminate message processing loop for leader.")
				return
			}
		}
	}
}
Beispiel #16
0
func safeSend(header string, donech chan bool, result bool) {
	common.SafeRun(header,
		func() {
			donech <- result
		})
}
Beispiel #17
0
//
// Goroutine for processing each request one-by-one
//
func (s *LeaderServer) processRequest(killch <-chan bool,
	listenerState *ListenerState,
	reqHandler CustomRequestHandler) (err error) {

	defer func() {
		if r := recover(); r != nil {
			log.Current.Errorf("panic in LeaderServer.processRequest() : %s\n", r)
			log.Current.Errorf("%s", log.Current.StackTrace())
			err = r.(error)
		} else {
			log.Current.Debugf("LeaderServer.processRequest() : Terminates.")
			log.Current.Tracef(log.Current.StackTrace())
		}

		common.SafeRun("LeaderServer.processRequest()",
			func() {
				listenerState.killch <- true
			})
	}()

	// start processing loop after I am being confirmed as a leader (there
	// is a quorum of followers that have sync'ed with me)
	if !s.waitTillReady() {
		return common.NewError(common.ELECTION_ERROR,
			"LeaderServer.processRequest(): Leader times out waiting for quorum of followers. Terminate")
	}

	// At this point, the leader has gotten a majority of followers to follow, so it
	// can proceed.  It is possible that it may loose quorum of followers. But in that
	// case, the leader will not be able to process any request.
	log.Current.Debugf("LeaderServer.processRequest(): Leader Server is ready to proces request")

	// Leader is ready at this time.  This implies that there is a quorum of follower has
	// followed this leader.  Get the change channel to keep track of  number of followers.
	// If the leader no longer has quorum, it needs to let go of its leadership.
	leaderchangech := s.leader.GetEnsembleChangeChannel()
	ensembleSize := s.handler.GetEnsembleSize()

	// notify the request processor to start processing new request
	incomings := s.state.requestMgr.GetRequestChannel()

	var outgoings <-chan common.Packet = nil
	if reqHandler != nil {
		outgoings = reqHandler.GetResponseChannel()
	} else {
		outgoings = make(<-chan common.Packet)
	}

	for {
		select {
		case handle, ok := <-incomings:
			if ok {
				// de-queue the request
				s.state.requestMgr.AddPendingRequest(handle)

				// forward request to the leader
				s.leader.QueueRequest(s.leader.GetFollowerId(), handle.Request)
			} else {
				// server shutdown.
				log.Current.Debugf("LeaderServer.processRequest(): channel for receiving client request is closed. Terminate.")
				return nil
			}
		case msg, ok := <-outgoings:
			if ok {
				// forward msg to the leader
				s.leader.QueueResponse(msg)
			} else {
				log.Current.Infof("LeaderServer.processRequest(): channel for receiving custom response is closed. Ignore.")
			}
		case <-killch:
			// server shutdown
			log.Current.Debugf("LeaderServer.processRequest(): receive kill signal. Stop Client request processing.")
			return nil
		case <-listenerState.donech:
			// listener is down.  Terminate this request processing loop as well.
			log.Current.Infof("LeaderServer.processRequest(): follower listener terminates. Stop client request processing.")
			return nil
		case <-leaderchangech:
			// Listen to any change to the leader's active ensemble, and to ensure that the leader maintain majority.
			// The active ensemble is the set of running followers connected to the leader.
			numFollowers := s.leader.GetActiveEnsembleSize()
			if numFollowers <= int(ensembleSize/2) {
				// leader looses majority of follower.
				log.Current.Infof("LeaderServer.processRequest(): leader looses majority of follower. Stop client request processing.")
				return nil
			}
		}
	}

	return nil
}
Beispiel #18
0
//
// Listen to new connection request from the follower/peer.
// Start a new LeaderSyncProxy to synchronize the state
// between the leader and the peer.
//
func (l *LeaderServer) listenFollower(listenerState *ListenerState) {

	defer func() {
		if r := recover(); r != nil {
			log.Current.Errorf("panic in LeaderServer.listenFollower() : %s\n", r)
			log.Current.Errorf("%s", log.Current.StackTrace())
		} else {
			log.Current.Debugf("LeaderServer.listenFollower() terminates.")
			log.Current.Tracef(log.Current.StackTrace())
		}

		common.SafeRun("LeaderServer.listenFollower()",
			func() {
				l.terminateAllOutstandingProxies()
			})

		common.SafeRun("LeaderServer.listenFollower()",
			func() {
				listenerState.donech <- true
			})
	}()

	connCh := l.listener.ConnChannel()
	if connCh == nil {
		// It should not happen unless the listener is closed
		return
	}

	// if there is a single server, then we don't need to wait for follower
	// for the server to be ready to process request.
	if l.handler.GetEnsembleSize() == 1 {
		if err := l.incrementEpoch(); err != nil {
			log.Current.Errorf("LeaderServer.listenFollower(): Error when boostraping leader with ensembleSize=1. Error = %s", err)
			return
		}

		l.notifyReady()
	}

	for {
		select {
		case conn, ok := <-connCh:
			{
				if !ok {
					// channel close.  Simply return.
					return
				}

				// There is a new peer connection request from the follower.  Start a proxy to synchronize with the follower.
				// The leader does not proactively connect to follower:
				// 1) The ensemble is stable, but a follower may just reboot and needs to connect to the leader
				// 2) Even if the leader receives votes from the leader, the leader cannot tell for sure that the follower does
				//    not change its vote.  Only if the follower connects, the leader can confirm the follower's alliance.
				//
				log.Current.Debugf("LeaderServer.listenFollower(): Receive connection request from follower %s", conn.RemoteAddr())
				if l.registerOutstandingProxy(conn.RemoteAddr().String()) {
					pipe := common.NewPeerPipe(conn)
					go l.startProxy(pipe)
				} else {
					log.Current.Infof("LeaderServer.listenFollower(): Sync Proxy already running for %s. Ignore new request.", conn.RemoteAddr())
					conn.Close()
				}
			}
		case <-listenerState.killch:
			log.Current.Debugf("LeaderServer.listenFollower(): Receive kill signal. Terminate.")
			return
		}
	}
}
Beispiel #19
0
//
// Goroutine.  Listen to vote coming from the peer for a
// particular ballot.  This is the only goroutine that
// handle all incoming requests.
//
// Voter -> the peer that replies the ballot with a vote
// Candidate -> the peer that is voted for by the voter.
// It is the peer (CndId) that is inside the vote.
//
func (w *pollWorker) listen() {

	// If this loop terminates (e.g. due to panic), then make sure
	// there is no outstanding ballot waiting for a result.   Close
	// any channel for outstanding ballot such that the caller
	// won't get blocked forever.
	defer func() {
		if r := recover(); r != nil {
			log.Current.Errorf("panic in pollWorker.listen() : %s\n", r)
		}

		// make sure we close the ElectionSite first such that
		// there is no new ballot coming while we are shutting
		// down the pollWorker. If not, then the some go-routine
		// may be waiting forever for the new ballot to complete.
		common.SafeRun("pollWorker.listen()",
			func() {
				w.site.Close()
			})

		// unlock anyone waiting for existing ballot to complete.
		common.SafeRun("pollWorker.listen()",
			func() {
				if w.ballot != nil {
					close(w.ballot.resultch)
					w.ballot = nil
				}
			})
	}()

	// Get the channel for receiving votes from the peer.
	reqch := w.site.messenger.DefaultReceiveChannel()

	timeout := common.NewBackoffTimer(
		common.BALLOT_TIMEOUT*time.Millisecond,
		common.BALLOT_MAX_TIMEOUT*time.Millisecond,
		2,
	)

	inFinalize := false
	finalizeTimer := common.NewStoppedResettableTimer(common.BALLOT_FINALIZE_WAIT * time.Millisecond)

	for {
		select {
		case w.ballot = <-w.listench: // listench should never close
			{
				// Before listening to any vote, see if we reach quorum already.
				// This should only happen if there is only one server in the
				// ensemble.  If this election is for solicit purpose, then
				// run election all the time.
				if !w.site.solicitOnly &&
					w.checkQuorum(w.ballot.result.receivedVotes, w.ballot.result.proposed) {
					w.site.master.setWinner(w.ballot.result)
					w.ballot.resultch <- true
					w.ballot = nil
				} else {
					// There is a new ballot.
					timeout.Reset()
					inFinalize = false
					finalizeTimer.Stop()
				}
			}
		// Receiving a vote
		case msg, ok := <-reqch:
			{
				if !ok {
					return
				}

				// Receive a new vote.  The voter is identified by its UDP port,
				// which must remain the same during the election phase.
				vote := msg.Content.(VoteMsg)
				voter := msg.Peer

				// If I am receiving a vote that just for soliciting my response,
				// then respond with my winning vote only after I am confirmed as
				// either a leader or follower.  This ensure that the watcher will
				// only find a leader from a stable ensemble.  This also ensures
				// that the watcher will only count the votes from active participant,
				// therefore, it will not count from other watcher as well as its
				// own vote (code path for handling votes from electing member will
				// never called for watcher).
				if vote.GetSolicit() {
					status := w.site.handler.GetStatus()
					if status == LEADING || status == FOLLOWING {
						w.respondInquiry(voter, vote)
					}
					continue
				}

				// Check if the voter is in the ensemble
				if !w.site.inEnsemble(voter) {
					continue
				}

				if w.ballot == nil {
					// If there is no ballot or the vote is from a watcher,
					// then just need to respond if I have a winner.
					w.respondInquiry(voter, vote)
					continue
				}

				timeout.Reset()

				proposed := w.cloneProposedVote()
				if w.handleVote(voter, vote) {
					proposedUpdated :=
						w.compareVote(w.ballot.result.proposed, proposed) != common.EQUAL

					if !inFinalize || proposedUpdated {
						inFinalize = true
						finalizeTimer.Reset()
					}
				} else {
					if inFinalize {
						// we had a quorum but not anymore
						inFinalize = false
						finalizeTimer.Stop()
					}
				}
			}
		case <-finalizeTimer.C:
			{
				// we achieve quorum, set the winner.
				// setting the winner and usetting the ballot
				// should be done together.
				// NOTE: ZK does not notify other peers when this node has
				// select a leader
				w.site.master.setWinner(w.ballot.result)
				w.ballot.resultch <- true
				w.ballot = nil
				timeout.Stop()
			}
		case <-timeout.GetChannel():
			{
				// If there is a timeout but no response, send vote again.
				if w.ballot != nil {
					w.site.messenger.Multicast(w.cloneProposedVote(), w.site.ensemble)
					timeout.Backoff()
				}
			}
		case <-w.killch:
			{
				return
			}
		}
	}
}
Beispiel #20
0
//
// Start a new round of ballot.
//
func (b *ballotMaster) castBallot(winnerch chan string) {

	// close the channel to make sure that the caller won't be
	// block forever.  If the balltot is successful, a value would
	// have sent to the channel before being closed. Otherwise,
	// a closed channel without value means the ballot is not
	// successful.
	defer func() {
		if r := recover(); r != nil {
			log.Current.Errorf("panic in ballotMaster.castBallot() : %s\n", r)
			common.SafeRun("ballotMaster.castBallot()",
				func() {
					b.site.Close()
				})
		}

		common.SafeRun("ballotMaster.castBallot()",
			func() {
				close(winnerch) // unblock caller

				// balloting complete
				b.setBallotInProg(false)
			})
	}()

	// create a channel to receive the ballot result
	// should only be closed by Poll Worker.  Make
	// if buffered so the sender won't block.
	resultch := make(chan bool, 1)

	// Create a new ballot
	ballot := b.createInitialBallot(resultch)

	// Tell the worker to observe this ballot.  This forces
	// the worker to start collecting new ballot result.
	b.site.worker.observe(ballot)

	// let the peer to know about this ballot.  It is expected
	// that the peer will reply with a vote.
	b.site.messenger.Multicast(ballot.result.proposed, b.site.ensemble)

	success, ok := <-resultch
	if !ok {
		// channel close. Ballot done
		success = false
	}

	// Announce the winner
	if success {
		winner, ok := b.GetWinner()
		if ok {
			common.SafeRun("ballotMaster.castBallot()",
				func() {
					// Remember the last round.
					gElectionRound = b.round
					// Announce the result
					winnerch <- winner
				})
		}
	}
}
Beispiel #21
0
//
// Bootstrp
//
func (s *EmbeddedServer) bootstrap() (err error) {

	defer func() {
		r := recover()
		if r != nil {
			log.Current.Errorf("panic in EmbeddedServer.bootstrap() : %s\n", r)
			log.Current.Errorf("%s", log.Current.StackTrace())
		}

		if err != nil || r != nil {
			common.SafeRun("EmbeddedServer.bootstrap()",
				func() {
					s.cleanupState()
				})
		}
	}()

	// Initialize server state
	s.state = newServerState()

	// Create and initialize new txn state.
	s.txn = common.NewTxnState()

	// Initialize repository service
	s.repo, err = r.OpenRepositoryWithName(s.repoName, s.quota)
	if err != nil {
		return err
	}

	// Initialize server config
	s.srvConfig = r.NewServerConfig(s.repo)

	// initialize the current transaction id to the lastLoggedTxid.  This
	// is the txid that this node has seen so far.  If this node becomes
	// the leader, a new epoch will be used and new current txid will
	// be generated. So no need to initialize the epoch at this point.
	lastLoggedTxid, err := s.srvConfig.GetLastLoggedTxnId()
	if err != nil {
		return err
	}
	s.txn.InitCurrentTxnid(common.Txnid(lastLoggedTxid))

	// Initialize commit log
	lastCommittedTxid, err := s.srvConfig.GetLastCommittedTxnId()
	if err != nil {
		return err
	}
	s.log, err = r.NewTransientCommitLog(s.repo, lastCommittedTxid)
	if err != nil {
		return err
	}

	// Initialize various callback facility for leader election and
	// voting protocol.
	s.factory = message.NewConcreteMsgFactory()
	s.handler = action.NewServerActionWithNotifier(s.repo, s.log, s.srvConfig, s, s.notifier, s.txn, s.factory, s)
	s.skillch = make(chan bool, 1) // make it buffered to unblock sender

	// Need to start the peer listener before election. A follower may
	// finish its election before a leader finishes its election. Therefore,
	// a follower node can request a connection to the leader node before that
	// node knows it is a leader.  By starting the listener now, it allows the
	// follower to establish the connection and let the leader handles this
	// connection at a later time (when it is ready to be a leader).
	s.listener, err = common.StartPeerListener(s.msgAddr)
	if err != nil {
		err = common.WrapError(common.SERVER_ERROR, "Fail to start PeerListener. err = %v", err)
		return
	}

	return nil
}