Beispiel #1
0
/*
Shutdown is used by the ServerNode to shutdown this raft node.
*/
func (node *Node) Shutdown(notifier *utils.ShutdownNotifier) {
	go func() {
		// End the leadership loop if we are in one.
		node.setState(SHUTDOWN_NODE)

		// Shutdown the election timer goroutine.
		electionShutdown := utils.NewShutdownNotifier(1)
		node.electionTimer.Shutdown(electionShutdown)

		// Have to wait on the election timer to make sure leadership loop has quit
		// Wait for the election timer to confirm closing.
		if electionShutdown.WaitForDone(RAFT_NODE_SUBSYSTEM_SHUTDOWN_TIMEOUT) != 1 {
			node.node_log("Election timer did not shutdown - proceeding anyway.\n")
		} else {
			node.node_log("Election timer shutdown completed.\n")
		}

		// Start shutdown on the commit log - this releases clients in GET.
		node.log.Shutdown()

		// Get the list of peers - need the lock for this.
		node.lock.RLock()
		peersToClose := len(node.peers)
		peerList := make([]*Peer, 0, peersToClose)
		peerShutdown := utils.NewShutdownNotifier(peersToClose)
		for _, peer := range node.peers {
			peerList = append(peerList, peer)
		}
		node.lock.RUnlock()

		// Ask for shutdown without the lock to avoid possible contention blocking the channel used to send commands to the peer goroutine.
		for _, peer := range peerList {
			peer.shutdown(peerShutdown)
		}

		// Wait for the peers to confirm closing - no timeout.
		peerShutdown.WaitForAllDone()
		node.node_log("Peers all shutdown.\n")

		// Shutdown the write aggregator
		writeAggNotifier := utils.NewShutdownNotifier(1)
		node.writeAggregator.Shutdown(writeAggNotifier)

		// Wait for the write aggregator to shutdown
		writeAggNotifier.WaitForAllDone()
		node.node_log("Write aggregator shutdown complete.")

		// Ask our storage to shutdown
		storageNotifier := utils.NewShutdownNotifier(1)
		store := node.log.GetLogStorage()
		store.Shutdown(storageNotifier)

		storageNotifier.WaitForAllDone()
		node.node_log("Storage shutdown completed.")
		notifier.ShutdownDone()
	}()
}
Beispiel #2
0
func (peer *ServerPeer) manageOutboundConnections(clusterID string) {
	var notifier *utils.ShutdownNotifier
	for {
		select {
		case notifier = <-peer.shutdown_channel:
			// Channel has been closed - shutdown all connections
			close(peer.connections)
			for conn := range peer.connections {
				conn.Close()
			}
			if notifier != nil {
				notifier.ShutdownDone()
			}
			return
		case <-peer.broken_connections:
			srv_log("Handling request to establish connection to peer %v\n", peer.name)
			recentConnectionAttempt := true
			connected := false
			for !connected {
				//rpcclient, err := rpc.Dial("tcp", peer.name)
				netconn, err := net.Dial("tcp", peer.name)
				if err != nil {
					if recentConnectionAttempt {
						srv_log("Unable to connect to peer %v (%v) - will keep trying periodically\n", peer.name, err)
						recentConnectionAttempt = false
					}
				} else {
					rpcclient := rpc.NewClient(netconn)

					recentConnectionAttempt = true
					srv_log("Connection to peer %v established!\n", peer.name)
					// Identify ourselves to our peer
					args := &IdentifyNodeArgs{Name: peer.ourName, ClusterID: clusterID}
					reply := &IdentifyNodeResults{}
					srv_log("Sending identify request to peer %v\n", peer.name)
					err = rpcclient.Call("RPCHandler.IdentifyNode", args, reply)
					if err != nil {
						srv_log("Error in RPC call (%v) for identify to peer %v - disconnecting.\n", err, peer.name)
						rpcclient.Close()
					} else if reply.Result.Code != rapi.RI_SUCCESS {
						srv_log("Identify call to peer failed: %v\n", reply.Result.Description)
						rpcclient.Close()
					} else {
						// Now we are connected, service any send messages
						connected = true
						srv_log("Identity sent - serving outbound requests\n")
						peer.connections <- rpcclient
					}
				}
				// Wait before trying again
				if !connected {
					time.Sleep(NODE_CONNECTION_INTERVAL)
				}
			}
		}
	}
}
Beispiel #3
0
// shutdown is used to request the shutdown of the goroutine that manages sending message to this peer.
func (peer *Peer) shutdown(notifier *utils.ShutdownNotifier) {
	go func() {
		// Send the shutdown - block if required.
		confirmedShutdown := make(chan interface{}, 0)
		cmd := Command{Action: ACTION_SHUTDOWN, ResultChannel: confirmedShutdown}
		peer.sendMessage <- cmd
		<-confirmedShutdown
		if notifier != nil {
			notifier.ShutdownDone()
		}
	}()
}
func (dlog *DiskLogStorage) Shutdown(notifier *utils.ShutdownNotifier) {
	routinesNotifier := utils.NewShutdownNotifier(2)
	dlog.closeSegmentsShutdownChannel <- routinesNotifier
	dlog.segmentCleanupShutdownChannel <- routinesNotifier
	routinesNotifier.WaitForAllDone()
	// Now close all segments.
	dlog.lock.Lock()
	for _, segment := range dlog.segments {
		segment.Close()
	}
	dlog.lock.Unlock()
	notifier.ShutdownDone()
}
// writer takes messages from the internal queue and decides when to send them to the commit log.
func (agg *QueueWriteAggregator) writer() {
	// List of messagse to be aggregated
	messagesToBeAggregated := make([]writeRequest, 0, DEFAULT_MAX_BATCH_AGGREGATION)
	messageProviders := make([]model.MessageProvider, 0, DEFAULT_MAX_BATCH_AGGREGATION)
	totalMessages := 0
	var request writeRequest
	//deadline := time.NewTimer(DEFAULT_AGGREGATION_WINDOW)
	var notifier *utils.ShutdownNotifier
	running := true
	for running {
		// Blocking wait for the first message
		select {
		case notifier = <-agg.shutdownQueue:
			running = false
		case request = <-agg.sendQueue:
			// We have a new request
			messagesToBeAggregated = append(messagesToBeAggregated, request)
			messageProviders = append(messageProviders, &request)
			totalMessages += len(request.messages)
			gathering := true
			// Wait for additional requests to arrive.
			time.Sleep(DEFAULT_AGGREGATION_WINDOW)
			//deadline.Reset(DEFAULT_AGGREGATION_WINDOW)
			//runtime.Gosched()
			// Now pull as many requests as possible.  When there are none left or we have reached our limit, send them.
			for gathering {
				select {
				case request = <-agg.sendQueue:
					// We have additional requests, queue them.
					messagesToBeAggregated = append(messagesToBeAggregated, request)
					messageProviders = append(messageProviders, &request)
					totalMessages += len(request.messages)
					if totalMessages >= DEFAULT_TRIGGER_TOTAL_AGGREGATED_MESSAGES || len(messagesToBeAggregated) >= DEFAULT_MAX_BATCH_AGGREGATION {
						gathering = false
					}
					// case <-deadline.C:
					// 	// We've waited as long as we can - time to send what we have accumulated.
					// 	gathering = false
				default:
					gathering = false
				}
			}
			agg.sendMessages(messagesToBeAggregated, messageProviders, totalMessages)
			messagesToBeAggregated = messagesToBeAggregated[:0]
			messageProviders = messageProviders[:0]
			totalMessages = 0
		}
	}
	notifier.ShutdownDone()
}
func (dlog *DiskLogStorage) cleanupSegmentsLoop() {
	for {
		timer := time.NewTimer(SEGMENT_CLEANUP_SCAN_INTERVAL)
		var notifier *utils.ShutdownNotifier
		select {
		case notifier = <-dlog.segmentCleanupShutdownChannel:
			notifier.ShutdownDone()
			return
		case <-timer.C:
			// Check whether any clean-up has been configured.
			dlog.lock.RLock()
			cleanAge := dlog.segment_cleanup_age
			segmentsToCheck := len(dlog.segments) - 1
			dlog.lock.RUnlock()
			if cleanAge > 0 && segmentsToCheck > 0 {
				// If we have a clean-up interval we need to do a full lock while we build a list of segments to delete
				dlog.node_log("Scanning for segments that can be cleaned-up (older than %v)\n", cleanAge)
				segmentsToDelete := make([]*Segment, 0, 10)
				dlog.lock.Lock()
				// Holding the lock, find the segments that may need cleaning.  We always exclude the last active segment.
				shrinkIndex := 0
				for _, candidate := range dlog.segments[:len(dlog.segments)-1] {
					candidateAge := time.Since(candidate.GetLastModifiedTime())
					if candidateAge > cleanAge {
						dlog.node_log("Segment %v last modified on %v - will delete\n", candidate.filename, candidate.GetLastModifiedTime())
						segmentsToDelete = append(segmentsToDelete, candidate)
						shrinkIndex += 1
					} else {
						// We do clean up in sequential order - if we hit a segment that is not due for clean up we stop here.
						break
					}
				}
				// Now shrink the segment list.  This could be done sligtly more efficiently, but it's not worth the hassle
				dlog.segments = dlog.segments[shrinkIndex:]
				dlog.lock.Unlock()
				// Do the physical deletes now that we have released the lock.
				for _, todelete := range segmentsToDelete {
					err := todelete.Delete()
					if err != nil {
						dlog.node_log("ERROR: Unable to delete old segment %v.  This file should be manually removed as it is no longer part of the segment list.\n", todelete.filename, err)
					}
				}
			}
		}
		timer.Reset(SEGMENT_CLEANUP_SCAN_INTERVAL)
	}
}
/*
RunElectionTimer runs the timer logic.  This method will also call elections and run the leadership loop.
*/
func (t *RaftElectionTimer) RunElectionTimer() {
	loopRunning := true
	timeoutMS := t.randomGenerator.Int31n(MAX_ELECTION_TIMEOUT-MIN_ELECTION_TIMEOUT) + MIN_ELECTION_TIMEOUT
	timeout := time.Duration(timeoutMS) * time.Millisecond
	log.Printf("First election timeout: %v\n", timeout)

	var notifier *utils.ShutdownNotifier

	timer := time.NewTimer(STARTUP_DELAY_TIMEOUT)
	// Delay the running of the timer at startup
	<-timer.C

	timer.Reset(timeout)
	for loopRunning {
		// Block until the timer has passed
		select {
		case notifier = <-t.shutdownChannel:
			loopRunning = false
		case <-timer.C:
			t.lock.RLock()
			lastMessageDuration := time.Since(t.lastMessage)
			//log.Printf("Running %v, lastMessageDuration %v\n", t.running, lastMessageDuration)
			if t.running && lastMessageDuration > timeout {
				t.lock.RUnlock()
				// We may need to start an election
				t.node.holdElection()
			} else {
				t.lock.RUnlock()
			}

			// Set the new timer
			// TODO: Should we subtract lastMessageDuration from the new timeout?
			timeoutMS = t.randomGenerator.Int31n(MAX_ELECTION_TIMEOUT-MIN_ELECTION_TIMEOUT) + MIN_ELECTION_TIMEOUT
			timeout = time.Duration(timeoutMS) * time.Millisecond
			//log.Printf("Setting timer to %v for next election\n", timeout)
			timer.Reset(timeout)
		}
	}
	notifier.ShutdownDone()
}
// closeSegmentsLoop is an internal methods used to close segments that are no longer in use.
func (dlog *DiskLogStorage) closeSegmentsLoop() {
	var notifier *utils.ShutdownNotifier
	for {
		select {
		case notifier = <-dlog.closeSegmentsShutdownChannel:
			notifier.ShutdownDone()
			return
		case <-dlog.closeSegementsChannel:
			// Take a copy of the list of segments so that we don't hold the lock for too long.
			var segmentList []*Segment
			dlog.lock.RLock()
			segmentList = append(segmentList, dlog.segments...)
			dlog.lock.RUnlock()

			openCount := 0
			for i := len(segmentList) - 1; i >= 0; i-- {
				if segmentList[i].GetOpenStatus() {
					openCount++
					if openCount > TARGET_OPEN_SEGMENTS {
						dlog.node_log("Found more than %v segments open\n", TARGET_OPEN_SEGMENTS)
						if time.Since(segmentList[i].GetLastAccessTime()) > SEGMENT_LAST_USED_TIMEOUT {
							dlog.node_log("Found segment that has not been used in the last %v, closing\n", SEGMENT_LAST_USED_TIMEOUT)
							err := segmentList[i].Close()
							if err != nil {
								// Bad things happening here
								log.Fatalf("Unable to close segment, error: %v\n", err)
							}
						} else {
							dlog.node_log("Segment not yet timed out, skipping.\n")
						}
					}
				}
			}
			dlog.node_log("Check for segments to close is complete\n")
		}
	}
}
func (mlog *MemoryLogStorage) Shutdown(notifier *utils.ShutdownNotifier) {
	notifier.ShutdownDone()
}