// Sets this instruction value as accepted by the given node ID. // Safe to call more than once with the same node ID. // If the given current proposal number and leader node ID are // higher than our latest proposal number and leader node ID that // this instruction was accepted in, we update our latest proposal number and // leader node ID that this instruction was accepted in. // // Will automatically cause the instruction value to become chosen, // with all other instruction values in the same slot cleared, // once a majority of nodes have accepted it, // and automatically cause it to be applied to state once all // previous instruction slots have a chosen value. func (i *InstructionValue) Accept(id uint16, proposal uint64, leader uint16) { if CompareProposals(proposal, id, i.latestProposal, i.latestLeader) { i.latestProposal = proposal i.latestLeader = leader } // If this value is already chosen, acceptance changes nothing. if i.chosen { return } // If this node ID already accepted this instruction value, // don't add it twice. for _, node := range i.acceptedBy { if node == id { return } } // Add this node ID to the accepting list. i.acceptedBy = append(i.acceptedBy, id) // If a majority of nodes have accepted this instruction value, // make it chosen. if len(i.acceptedBy) > len(config.CoreNodes())/2 { i.Choose() } }
// May only be called by the processing goroutine, in a transaction. func sendProposal(inst *coproto.Instruction) { // We must be leader. proposal, leader := store.Proposal() if leader != config.Id() || !amLeader { panic("tried to send accept messages while not leader") } // Send accept messages to all other core nodes. var accept coproto.Accept accept.Proposal = new(uint64) *accept.Proposal = proposal accept.Instruction = inst for _, node := range config.CoreNodes() { if node == config.Id() { continue } if len(connections[node]) != 0 { c := connections[node][0] c.SendProto(4, &accept) } } // Behave as if we received our own accept message. addAccept(config.Id(), &accept) }
// Must be called from the processing goroutine, inside a transaction. // Handle an accept message that we've decided to respond to. // Called directly when we send accepted messages to other nodes. func addAccept(node uint16, msg *coproto.Accept) { // Send an accepted message to all other core nodes reachable. var accepted coproto.Accepted accepted.Proposal = msg.Proposal accepted.Leader = new(uint32) *accepted.Leader = uint32(node) accepted.Instruction = msg.Instruction // We must do this before sending accepted messages. // We behave as if we received one from ourselves. if !addAccepted(config.Id(), &accepted) { // Required too large an instruction slots slice. // We have NOT accepted this accept message, // and must NOT send accepted messages. return } for _, node := range config.CoreNodes() { if node == config.Id() { continue } if len(connections[node]) > 0 { connections[node][0].SendProto(5, &accepted) } } }
func process() { for { select { // Try to make any needed outgoing connections. case <-tryOutgoingCh: store.StartTransaction() connectionsLock.Lock() coreNodes := config.CoreNodes() if !store.Degraded() { if config.IsCore() { for _, node := range coreNodes { if node == config.Id() { continue } if connections[node] == nil { go tryOutgoing(node) } } } else { // Settle for less than 2 core nodes, // if there *aren't* 2 core nodes. targetConns := 2 if targetConns > len(coreNodes) { targetConns = len(coreNodes) } newOutgoing := targetConns - len(connections) used := -1 for newOutgoing > 0 { r := processRand.Intn( len(coreNodes)) // Don't do the same node // twice. if r == used { continue } node := coreNodes[r] if connections[node] == nil { used = r go tryOutgoing(node) newOutgoing-- } } } } else { if len(connections) == 0 { r := processRand.Intn(len(coreNodes)) node := coreNodes[r] if connections[node] == nil { go tryOutgoing(node) } } } connectionsLock.Unlock() store.EndTransaction() // After a random time, check again. randDur := time.Duration(processRand.Intn(19)+1) * time.Second tryOutgoingCh = time.NewTimer(randDur).C // New received connection. case conn := <-receivedConnCh: conn.offerTimers = make(map[uint64]*time.Timer) store.StartTransaction() connectionsLock.Lock() // If we are degraded, reject the connection, // unless we have no other connection, // and it is outbound. if store.Degraded() && !(len(connections) == 0 && conn.outgoing) { conn.lock.Lock() conn.Close() conn.lock.Unlock() connectionsLock.Unlock() store.EndTransaction() break } // If we have an existing connection to this node, // close it. if connections[conn.node] != nil { other := connections[conn.node] other.lock.Lock() other.Close() if other.firstUnappliedTimer != nil { other.firstUnappliedTimer.Stop() } other.lock.Unlock() } // Add to our connections. connections[conn.node] = conn // Send initial position and leader messages. posMsg := new(fproto.Position) posMsg.FirstUnapplied = new(uint64) posMsg.Degraded = new(bool) *posMsg.FirstUnapplied = store.InstructionFirstUnapplied() *posMsg.Degraded = store.Degraded() conn.conn.SendProto(2, posMsg) proposal, leader := store.Proposal() leaderMsg := new(fproto.Leader) leaderMsg.Proposal = new(uint64) leaderMsg.Leader = new(uint32) *leaderMsg.Proposal = proposal *leaderMsg.Leader = uint32(leader) conn.conn.SendProto(11, leaderMsg) connectionsLock.Unlock() store.EndTransaction() // Start timer for sending first unapplied // instruction updates. if config.IsCore() && conn.node <= 0x2000 { conn.lock.Lock() conn.firstUnappliedTimer = time.AfterFunc( firstUnappliedTimerDuration, func() { conn.firstUnappliedTimeout() }) conn.lock.Unlock() } // Start handling received messages from the connection. go handleConn(conn) // Terminated connection. case conn := <-terminatedConnCh: connectionsLock.Lock() if connections[conn.node] == conn { delete(connections, conn.node) conn.lock.Lock() conn.closed = true if conn.firstUnappliedTimer != nil { conn.firstUnappliedTimer.Stop() } conn.lock.Unlock() } connectionsLock.Unlock() } } }
// Handle a received change forward. Decides which node is responsible for it. // Must only be called from the processing goroutine. func processForward(forward *chproto.ChangeForward) { // If we are already trying to forward a change forward message with // the same requesting node and request ID, discard this message. if _, exists := getForwardTimeout(uint16(*forward.Request.RequestNode), *forward.Request.RequestId); exists { return } // Everything else in this function runs in a transaction. // We are read-only. store.StartTransaction() defer store.EndTransaction() // If this is a core node and this node stopped being leader less than // a Change Timeout Period ago, always add us to the ignore list. if config.IsCore() && !isIgnored(forward, config.Id()) { diff := time.Now().Sub(store.StoppedLeading()) if diff < config.CHANGE_TIMEOUT_PERIOD { forward.Ignores = append(forward.Ignores, uint32(config.Id())) } } // If all core node IDs are in the forward's ignore list, discard it. if len(forward.Ignores) == len(config.CoreNodes()) { log.Print("shared/chrequest: dropped msg due to full ignores") return } // Otherwise, choose a potential leader node. // This is O(n^2) in the number of core nodes, // but we don't expect to have many. chosenNode := uint16(0) _, leader := store.Proposal() if leader != 0 && !isIgnored(forward, leader) { chosenNode = leader } else { for _, node := range config.CoreNodes() { if !isIgnored(forward, node) { chosenNode = node break } } } if chosenNode == 0 { // Shouldn't happen. log.Print("shared/chrequest: bug, " + "couldn't find candidate leader node") return } // If we are the selected leader, construct an external change request, // and send it on our change request channel. if chosenNode == config.Id() { intRequest := forward.Request chrequest := new(store.ChangeRequest) chrequest.RequestEntity = *intRequest.RequestEntity chrequest.RequestNode = uint16(*intRequest.RequestNode) chrequest.RequestId = *intRequest.RequestId chrequest.Changeset = make([]store.Change, len(intRequest.Changeset)) for i, ch := range intRequest.Changeset { chrequest.Changeset[i].TargetEntity = *ch.TargetEntity chrequest.Changeset[i].Key = *ch.Key chrequest.Changeset[i].Value = *ch.Value } for _, cb := range changeCallbacks { cb(chrequest) } return } // Otherwise, we send it on to the selected leader, // add the selected leader to the ignore list, // and set a timeout to retry. sendForward(chosenNode, forward) forward.Ignores = append(forward.Ignores, uint32(chosenNode)) addForwardTimeout(forward) }
// Sets this instruction value as chosen, clearing all others in the same slot. // Causes it to be automatically applied to state once all previous instruction // slots have a chosen value. func (i *InstructionValue) Choose() { log.Print("shared/store: choosing instruction in slot ", i.slot) relativeSlot := i.slot - start // Drop all other instruction values in the same slot. if len(slots[relativeSlot]) > 1 { slots[relativeSlot] = []*InstructionValue{i} } // Set the value as chosen. i.chosen = true i.chosenTime = time.Now() i.acceptedBy = nil // Call instruction chosen callbacks. for _, cb := range chosenCallbacks { cb(i.slot) } // Apply instructions as far as possible. tryApply() // Discard no longer needed instructions, from the start. newStart := start for { relative := newStart - start // If we've not applied the instruction yet, // we still need it. if firstUnapplied == newStart { break } // If the instruction wasn't chosen long enough ago, // we can't discard it. minDelay := time.Duration(4) * config.CHANGE_TIMEOUT_PERIOD timeChosen := slots[relative][0].TimeChosen() if time.Now().Sub(timeChosen) < minDelay { break } // If this is a core node and the majority of core nodes aren't // past this point, we have to keep it. if config.IsCore() { past := 1 // We are past it. coreNodes := config.CoreNodes() for _, node := range coreNodes { if node == config.Id() { continue } if NodeFirstUnapplied(node) > newStart { past++ } } if past <= len(coreNodes)/2 { break } } newStart++ } // If we can discard, do it by reslicing the slots. // This doesn't result in an immediate free, // but will when we next expand the slots. // This avoids extra performance costs for reallocating them now. if newStart != start { log.Print("Discarding instructions between ", newStart-1, " and ", start) slots = slots[newStart-start:] start = newStart } }
// Must be called from the processing goroutine, inside a transaction. func addPromise(node uint16, msg *coproto.Promise) { receivedPromises[node] = msg // If we have promises from a majority of core nodes, // become leader. if len(receivedPromises) > len(config.CoreNodes())/2 { log.Print("core/consensus: became leader") stopLeaderTimeout() amLeader = true proposal, leader := store.Proposal() // Find a slot number above all those in promise messages, // and above our first unapplied. firstUnapplied := store.InstructionFirstUnapplied() limit := firstUnapplied for _, msg := range receivedPromises { for _, accepted := range msg.Accepted { if *accepted.Slot >= limit { limit = *accepted.Slot + 1 } } } // Start our next slot after the limit. nextProposalSlot = limit // For all slots between this and our first unapplied, // submit a previously accepted instruction unless we // know an instruction was already chosen. // Fills these slots with proposals. // This is O(n^2) in the number of instructions between our // first unapplied and limit. // TODO: Improve worst-case complexity. start := store.InstructionStart() slots := store.InstructionSlots() for i := firstUnapplied; i < limit; i++ { // If we already have a chosen instruction, skip. rel := int(i - start) if len(slots[rel]) == 1 && slots[rel][0].IsChosen() { continue } // Find the previously accepted instruction // accepted with the highest proposal number. var bestInst *coproto.Instruction var bp uint64 // Best proposal var bl uint16 // Best leader for _, msg := range receivedPromises { for _, accepted := range msg.Accepted { if *accepted.Slot != i { continue } if bestInst == nil { bestInst = accepted bp = *accepted.Proposal bl = uint16(*accepted.Leader) continue } // TODO: This indent is just absurd. p := *accepted.Proposal l := uint16(*accepted.Leader) if store.CompareProposals(p, l, bp, bl) { bestInst = accepted bp = *accepted.Proposal bl = uint16(*accepted.Leader) } } } // If we didn't find an instruction, make an empty one. if bestInst == nil { empty := new(coproto.ChangeRequest) empty.RequestEntity = new(uint64) empty.RequestNode = new(uint32) *empty.RequestEntity = uint64(config.Id()) *empty.RequestNode = uint32(config.Id()) bestInst := new(coproto.Instruction) bestInst.Slot = new(uint64) *bestInst.Slot = i bestInst.Request = empty } // Add proposal timeout. req := makeExtChangeRequest(bestInst.Request) addProposalTimeout(i, req) // Send proposal. bestInst.Proposal = new(uint64) bestInst.Leader = new(uint32) *bestInst.Proposal = proposal *bestInst.Leader = uint32(leader) sendProposal(bestInst) } // Discard received promise messages. receivedPromises = nil // Make an instruction proposal for each waiting change. for _, req := range waitingRequests { slot := nextProposalSlot nextProposalSlot++ addProposalTimeout(slot, req) inst := makeInst(slot, proposal, leader, req) sendProposal(inst) } // Clear waiting changes. waitingRequests = nil } }
// Top-level function of the processing goroutine. func process() { connections = make(map[uint16][]*connect.BaseConn) // On startup, make an outgoing connection attempt to all other // core nodes, before continuing. for _, node := range config.CoreNodes() { if node == config.Id() { continue } conn, err := connect.Dial( connect.CONSENSUS_PROTOCOL, node) if err != nil { // Can't reach the other node. continue } log.Print("core/consensus: made outgoing connection to ", node) connections[node] = append(connections[node], conn) go handleConn(node, conn) } // Retry connections once per config.CHANGE_TIMEOUT_PERIOD. // Largely arbitrary. reconnectTicker := time.Tick(config.CHANGE_TIMEOUT_PERIOD) for { select { // Connection retry tick. // We should try to make an outgoing connection to any node // that we do not have at least one connection to. // We need to make these asynchronously, because connections // are slow. case <-reconnectTicker: for _, node := range config.CoreNodes() { if node == config.Id() { continue } if len(connections[node]) > 0 { continue } go outgoingConn(node) } // New change request, for us to propose as leader. case req := <-newChangeCh: store.StartTransaction() if !amLeader && receivedPromises != nil { waitingRequests = append(waitingRequests, req) } else if !amLeader { waitingRequests = append(waitingRequests, req) // Start attempting to be leader. m := make(map[uint16]*coproto.Promise) receivedPromises = m proposal, _ := store.Proposal() proposal++ store.SetProposal(proposal, config.Id()) firstUn := store.InstructionFirstUnapplied() // Send prepare messages to all other nodes. var prepare coproto.Prepare prepare.Proposal = new(uint64) prepare.FirstUnapplied = new(uint64) *prepare.Proposal = proposal *prepare.FirstUnapplied = firstUn for _, node := range config.CoreNodes() { if node == config.Id() { continue } if len(connections[node]) != 0 { c := connections[node][0] c.SendProto(2, &prepare) } } // Behave as if we got a promise message // from ourselves. var promise coproto.Promise promise.Proposal = prepare.Proposal promise.PrevProposal = promise.Proposal promise.Leader = new(uint32) *promise.Leader = uint32(config.Id()) promise.PrevLeader = promise.Leader addPromise(config.Id(), &promise) } else { newSlot := nextProposalSlot nextProposalSlot++ addProposalTimeout(newSlot, req) proposal, leader := store.Proposal() inst := makeInst(newSlot, proposal, leader, req) sendProposal(inst) } store.EndTransaction() // Leadership attempt timed out. case timedOutProposal := <-leaderTimeoutCh: store.StartTransaction() proposal, leader := store.Proposal() // If the proposal has changed since that // leadership attempt, ignore it. if leader != config.Id() { return } if proposal != timedOutProposal { return } // If we successfully became leader, ignore it. if amLeader { return } // Otherwise, stop our attempt to become leader. stopBeingLeader() store.EndTransaction() // Proposal timed out. case timeout := <-proposalTimeoutCh: store.StartTransaction() // If this timeout was not canceled, a proposal failed. // We stop being leader. if proposalTimeouts[timeout.slot] == timeout { stopBeingLeader() } store.EndTransaction() // New received connection. case receivedConn := <-receivedConnCh: node := receivedConn.node conn := receivedConn.conn connections[node] = append(connections[node], conn) // Received message. case recvMsg := <-receivedMsgCh: node := recvMsg.node conn := recvMsg.conn msg := recvMsg.msg switch *msg.MsgType { case 2: processPrepare(node, conn, msg.Content) case 3: processPromise(node, conn, msg.Content) case 4: processAccept(node, conn, msg.Content) case 5: processAccepted(node, conn, msg.Content) case 6: processNack(node, conn, msg.Content) default: // Unknown message. conn.Close() } // Terminate received connection. case terminatedConn := <-terminatedConnCh: node := terminatedConn.node conn := terminatedConn.conn for i, other := range connections[node] { if other != conn { continue } conns := connections[node] conns = append(conns[:i], conns[i+1:]...) connections[node] = conns break } } } }