func handleProposalChange() { proposal, leader := store.Proposal() var msg fproto.Leader msg.Proposal = new(uint64) msg.Leader = new(uint32) *msg.Proposal = proposal *msg.Leader = uint32(leader) msgBuf, err := proto.Marshal(&msg) if err != nil { panic("generated bad leader update message") } var baseMsg baseproto.Message baseMsg.MsgType = new(uint32) *baseMsg.MsgType = 11 baseMsg.Content = msgBuf connectionsLock.Lock() for _, conn := range connections { conn.conn.Send(&baseMsg) } connectionsLock.Unlock() }
// May only be called by the processing goroutine, in a transaction. func sendProposal(inst *coproto.Instruction) { // We must be leader. proposal, leader := store.Proposal() if leader != config.Id() || !amLeader { panic("tried to send accept messages while not leader") } // Send accept messages to all other core nodes. var accept coproto.Accept accept.Proposal = new(uint64) *accept.Proposal = proposal accept.Instruction = inst for _, node := range config.CoreNodes() { if node == config.Id() { continue } if len(connections[node]) != 0 { c := connections[node][0] c.SendProto(4, &accept) } } // Behave as if we received our own accept message. addAccept(config.Id(), &accept) }
// Must be called from the processing goroutine. func processNack(node uint16, conn *connect.BaseConn, content []byte) { var msg coproto.Nack if err := proto.Unmarshal(content, &msg); err != nil { conn.Close() return } store.StartTransaction() defer store.EndTransaction() // If we don't consider ourselves the leader, discard. if !amLeader { return } msgProposal, msgLeader := *msg.Proposal, uint16(*msg.Leader) proposal, leader := store.Proposal() if msgProposal == proposal && msgLeader == leader { return } if store.CompareProposals(msgProposal, msgLeader, proposal, leader) { stopBeingLeader() store.SetProposal(msgProposal, msgLeader) } }
// Must be called from the processing goroutine. func processAccept(node uint16, conn *connect.BaseConn, content []byte) { var msg coproto.Accept if err := proto.Unmarshal(content, &msg); err != nil { conn.Close() return } store.StartTransaction() defer store.EndTransaction() proposal, leader := store.Proposal() msgProposal, msgLeader := *msg.Proposal, node if proposal != msgProposal || leader != msgLeader { // Send a nack message and return, // if this accept relates to an earlier proposal. if store.CompareProposals(proposal, leader, msgProposal, msgLeader) { var nack coproto.Nack nack.Proposal = new(uint64) nack.Leader = new(uint32) *nack.Proposal = proposal *nack.Leader = uint32(leader) conn.SendProto(6, &nack) return } store.SetProposal(msgProposal, msgLeader) } addAccept(node, &msg) }
// Must be called from the processing goroutine. func processPromise(node uint16, conn *connect.BaseConn, content []byte) { var msg coproto.Promise if err := proto.Unmarshal(content, &msg); err != nil { conn.Close() return } store.StartTransaction() defer store.EndTransaction() if receivedPromises == nil { // Not attempting to become leader. log.Print("core/consensus: discarded promise, not becoming "+ "leader, from ", node) return } proposal, leader := store.Proposal() if proposal != *msg.Proposal || leader != uint16(*msg.Leader) { log.Print("core/consensus: rejected promise for wrong "+ "proposal number from ", node) return } if receivedPromises[node] != nil { // Protocol violation; shouldn't get duplicate promises. log.Print("core/consensus: PROTOCOL VIOLATION: received "+ "duplicate promise from node ", node) return } log.Print("core/consensus: received promise from node ", node) addPromise(node, &msg) }
func handleLeader(f *followConn, content []byte) { var msg fproto.Leader if err := proto.Unmarshal(content, &msg); err != nil { f.lock.Lock() f.Close() f.lock.Unlock() return } store.StartTransaction() defer store.EndTransaction() proposal, leader := store.Proposal() msgProposal, msgLeader := *msg.Proposal, uint16(*msg.Leader) if store.CompareProposals(msgProposal, msgLeader, proposal, leader) { store.SetProposal(msgProposal, msgLeader) } }
func process() { for { select { // Try to make any needed outgoing connections. case <-tryOutgoingCh: store.StartTransaction() connectionsLock.Lock() coreNodes := config.CoreNodes() if !store.Degraded() { if config.IsCore() { for _, node := range coreNodes { if node == config.Id() { continue } if connections[node] == nil { go tryOutgoing(node) } } } else { // Settle for less than 2 core nodes, // if there *aren't* 2 core nodes. targetConns := 2 if targetConns > len(coreNodes) { targetConns = len(coreNodes) } newOutgoing := targetConns - len(connections) used := -1 for newOutgoing > 0 { r := processRand.Intn( len(coreNodes)) // Don't do the same node // twice. if r == used { continue } node := coreNodes[r] if connections[node] == nil { used = r go tryOutgoing(node) newOutgoing-- } } } } else { if len(connections) == 0 { r := processRand.Intn(len(coreNodes)) node := coreNodes[r] if connections[node] == nil { go tryOutgoing(node) } } } connectionsLock.Unlock() store.EndTransaction() // After a random time, check again. randDur := time.Duration(processRand.Intn(19)+1) * time.Second tryOutgoingCh = time.NewTimer(randDur).C // New received connection. case conn := <-receivedConnCh: conn.offerTimers = make(map[uint64]*time.Timer) store.StartTransaction() connectionsLock.Lock() // If we are degraded, reject the connection, // unless we have no other connection, // and it is outbound. if store.Degraded() && !(len(connections) == 0 && conn.outgoing) { conn.lock.Lock() conn.Close() conn.lock.Unlock() connectionsLock.Unlock() store.EndTransaction() break } // If we have an existing connection to this node, // close it. if connections[conn.node] != nil { other := connections[conn.node] other.lock.Lock() other.Close() if other.firstUnappliedTimer != nil { other.firstUnappliedTimer.Stop() } other.lock.Unlock() } // Add to our connections. connections[conn.node] = conn // Send initial position and leader messages. posMsg := new(fproto.Position) posMsg.FirstUnapplied = new(uint64) posMsg.Degraded = new(bool) *posMsg.FirstUnapplied = store.InstructionFirstUnapplied() *posMsg.Degraded = store.Degraded() conn.conn.SendProto(2, posMsg) proposal, leader := store.Proposal() leaderMsg := new(fproto.Leader) leaderMsg.Proposal = new(uint64) leaderMsg.Leader = new(uint32) *leaderMsg.Proposal = proposal *leaderMsg.Leader = uint32(leader) conn.conn.SendProto(11, leaderMsg) connectionsLock.Unlock() store.EndTransaction() // Start timer for sending first unapplied // instruction updates. if config.IsCore() && conn.node <= 0x2000 { conn.lock.Lock() conn.firstUnappliedTimer = time.AfterFunc( firstUnappliedTimerDuration, func() { conn.firstUnappliedTimeout() }) conn.lock.Unlock() } // Start handling received messages from the connection. go handleConn(conn) // Terminated connection. case conn := <-terminatedConnCh: connectionsLock.Lock() if connections[conn.node] == conn { delete(connections, conn.node) conn.lock.Lock() conn.closed = true if conn.firstUnappliedTimer != nil { conn.firstUnappliedTimer.Stop() } conn.lock.Unlock() } connectionsLock.Unlock() } } }
// Handle a received change forward. Decides which node is responsible for it. // Must only be called from the processing goroutine. func processForward(forward *chproto.ChangeForward) { // If we are already trying to forward a change forward message with // the same requesting node and request ID, discard this message. if _, exists := getForwardTimeout(uint16(*forward.Request.RequestNode), *forward.Request.RequestId); exists { return } // Everything else in this function runs in a transaction. // We are read-only. store.StartTransaction() defer store.EndTransaction() // If this is a core node and this node stopped being leader less than // a Change Timeout Period ago, always add us to the ignore list. if config.IsCore() && !isIgnored(forward, config.Id()) { diff := time.Now().Sub(store.StoppedLeading()) if diff < config.CHANGE_TIMEOUT_PERIOD { forward.Ignores = append(forward.Ignores, uint32(config.Id())) } } // If all core node IDs are in the forward's ignore list, discard it. if len(forward.Ignores) == len(config.CoreNodes()) { log.Print("shared/chrequest: dropped msg due to full ignores") return } // Otherwise, choose a potential leader node. // This is O(n^2) in the number of core nodes, // but we don't expect to have many. chosenNode := uint16(0) _, leader := store.Proposal() if leader != 0 && !isIgnored(forward, leader) { chosenNode = leader } else { for _, node := range config.CoreNodes() { if !isIgnored(forward, node) { chosenNode = node break } } } if chosenNode == 0 { // Shouldn't happen. log.Print("shared/chrequest: bug, " + "couldn't find candidate leader node") return } // If we are the selected leader, construct an external change request, // and send it on our change request channel. if chosenNode == config.Id() { intRequest := forward.Request chrequest := new(store.ChangeRequest) chrequest.RequestEntity = *intRequest.RequestEntity chrequest.RequestNode = uint16(*intRequest.RequestNode) chrequest.RequestId = *intRequest.RequestId chrequest.Changeset = make([]store.Change, len(intRequest.Changeset)) for i, ch := range intRequest.Changeset { chrequest.Changeset[i].TargetEntity = *ch.TargetEntity chrequest.Changeset[i].Key = *ch.Key chrequest.Changeset[i].Value = *ch.Value } for _, cb := range changeCallbacks { cb(chrequest) } return } // Otherwise, we send it on to the selected leader, // add the selected leader to the ignore list, // and set a timeout to retry. sendForward(chosenNode, forward) forward.Ignores = append(forward.Ignores, uint32(chosenNode)) addForwardTimeout(forward) }
func handleProposalChange() { _, leader := store.Proposal() if amLeader && leader != config.Id() { stopBeingLeader() } }
// Must be called from the processing goroutine, inside a transaction. func addPromise(node uint16, msg *coproto.Promise) { receivedPromises[node] = msg // If we have promises from a majority of core nodes, // become leader. if len(receivedPromises) > len(config.CoreNodes())/2 { log.Print("core/consensus: became leader") stopLeaderTimeout() amLeader = true proposal, leader := store.Proposal() // Find a slot number above all those in promise messages, // and above our first unapplied. firstUnapplied := store.InstructionFirstUnapplied() limit := firstUnapplied for _, msg := range receivedPromises { for _, accepted := range msg.Accepted { if *accepted.Slot >= limit { limit = *accepted.Slot + 1 } } } // Start our next slot after the limit. nextProposalSlot = limit // For all slots between this and our first unapplied, // submit a previously accepted instruction unless we // know an instruction was already chosen. // Fills these slots with proposals. // This is O(n^2) in the number of instructions between our // first unapplied and limit. // TODO: Improve worst-case complexity. start := store.InstructionStart() slots := store.InstructionSlots() for i := firstUnapplied; i < limit; i++ { // If we already have a chosen instruction, skip. rel := int(i - start) if len(slots[rel]) == 1 && slots[rel][0].IsChosen() { continue } // Find the previously accepted instruction // accepted with the highest proposal number. var bestInst *coproto.Instruction var bp uint64 // Best proposal var bl uint16 // Best leader for _, msg := range receivedPromises { for _, accepted := range msg.Accepted { if *accepted.Slot != i { continue } if bestInst == nil { bestInst = accepted bp = *accepted.Proposal bl = uint16(*accepted.Leader) continue } // TODO: This indent is just absurd. p := *accepted.Proposal l := uint16(*accepted.Leader) if store.CompareProposals(p, l, bp, bl) { bestInst = accepted bp = *accepted.Proposal bl = uint16(*accepted.Leader) } } } // If we didn't find an instruction, make an empty one. if bestInst == nil { empty := new(coproto.ChangeRequest) empty.RequestEntity = new(uint64) empty.RequestNode = new(uint32) *empty.RequestEntity = uint64(config.Id()) *empty.RequestNode = uint32(config.Id()) bestInst := new(coproto.Instruction) bestInst.Slot = new(uint64) *bestInst.Slot = i bestInst.Request = empty } // Add proposal timeout. req := makeExtChangeRequest(bestInst.Request) addProposalTimeout(i, req) // Send proposal. bestInst.Proposal = new(uint64) bestInst.Leader = new(uint32) *bestInst.Proposal = proposal *bestInst.Leader = uint32(leader) sendProposal(bestInst) } // Discard received promise messages. receivedPromises = nil // Make an instruction proposal for each waiting change. for _, req := range waitingRequests { slot := nextProposalSlot nextProposalSlot++ addProposalTimeout(slot, req) inst := makeInst(slot, proposal, leader, req) sendProposal(inst) } // Clear waiting changes. waitingRequests = nil } }
// Top-level function of the processing goroutine. func process() { connections = make(map[uint16][]*connect.BaseConn) // On startup, make an outgoing connection attempt to all other // core nodes, before continuing. for _, node := range config.CoreNodes() { if node == config.Id() { continue } conn, err := connect.Dial( connect.CONSENSUS_PROTOCOL, node) if err != nil { // Can't reach the other node. continue } log.Print("core/consensus: made outgoing connection to ", node) connections[node] = append(connections[node], conn) go handleConn(node, conn) } // Retry connections once per config.CHANGE_TIMEOUT_PERIOD. // Largely arbitrary. reconnectTicker := time.Tick(config.CHANGE_TIMEOUT_PERIOD) for { select { // Connection retry tick. // We should try to make an outgoing connection to any node // that we do not have at least one connection to. // We need to make these asynchronously, because connections // are slow. case <-reconnectTicker: for _, node := range config.CoreNodes() { if node == config.Id() { continue } if len(connections[node]) > 0 { continue } go outgoingConn(node) } // New change request, for us to propose as leader. case req := <-newChangeCh: store.StartTransaction() if !amLeader && receivedPromises != nil { waitingRequests = append(waitingRequests, req) } else if !amLeader { waitingRequests = append(waitingRequests, req) // Start attempting to be leader. m := make(map[uint16]*coproto.Promise) receivedPromises = m proposal, _ := store.Proposal() proposal++ store.SetProposal(proposal, config.Id()) firstUn := store.InstructionFirstUnapplied() // Send prepare messages to all other nodes. var prepare coproto.Prepare prepare.Proposal = new(uint64) prepare.FirstUnapplied = new(uint64) *prepare.Proposal = proposal *prepare.FirstUnapplied = firstUn for _, node := range config.CoreNodes() { if node == config.Id() { continue } if len(connections[node]) != 0 { c := connections[node][0] c.SendProto(2, &prepare) } } // Behave as if we got a promise message // from ourselves. var promise coproto.Promise promise.Proposal = prepare.Proposal promise.PrevProposal = promise.Proposal promise.Leader = new(uint32) *promise.Leader = uint32(config.Id()) promise.PrevLeader = promise.Leader addPromise(config.Id(), &promise) } else { newSlot := nextProposalSlot nextProposalSlot++ addProposalTimeout(newSlot, req) proposal, leader := store.Proposal() inst := makeInst(newSlot, proposal, leader, req) sendProposal(inst) } store.EndTransaction() // Leadership attempt timed out. case timedOutProposal := <-leaderTimeoutCh: store.StartTransaction() proposal, leader := store.Proposal() // If the proposal has changed since that // leadership attempt, ignore it. if leader != config.Id() { return } if proposal != timedOutProposal { return } // If we successfully became leader, ignore it. if amLeader { return } // Otherwise, stop our attempt to become leader. stopBeingLeader() store.EndTransaction() // Proposal timed out. case timeout := <-proposalTimeoutCh: store.StartTransaction() // If this timeout was not canceled, a proposal failed. // We stop being leader. if proposalTimeouts[timeout.slot] == timeout { stopBeingLeader() } store.EndTransaction() // New received connection. case receivedConn := <-receivedConnCh: node := receivedConn.node conn := receivedConn.conn connections[node] = append(connections[node], conn) // Received message. case recvMsg := <-receivedMsgCh: node := recvMsg.node conn := recvMsg.conn msg := recvMsg.msg switch *msg.MsgType { case 2: processPrepare(node, conn, msg.Content) case 3: processPromise(node, conn, msg.Content) case 4: processAccept(node, conn, msg.Content) case 5: processAccepted(node, conn, msg.Content) case 6: processNack(node, conn, msg.Content) default: // Unknown message. conn.Close() } // Terminate received connection. case terminatedConn := <-terminatedConnCh: node := terminatedConn.node conn := terminatedConn.conn for i, other := range connections[node] { if other != conn { continue } conns := connections[node] conns = append(conns[:i], conns[i+1:]...) connections[node] = conns break } } } }
// Must be called from the processing goroutine. func processPrepare(node uint16, conn *connect.BaseConn, content []byte) { var msg coproto.Prepare if err := proto.Unmarshal(content, &msg); err != nil { conn.Close() return } store.StartTransaction() defer store.EndTransaction() newProposal, newLeader := *msg.Proposal, node proposal, leader := store.Proposal() if store.CompareProposals(newProposal, newLeader, proposal, leader) { log.Print("core/consensus: sending promise to ", newLeader) // Create a promise message to send back. var promise coproto.Promise promise.Proposal = new(uint64) promise.Leader = new(uint32) promise.PrevProposal = new(uint64) promise.PrevLeader = new(uint32) *promise.Proposal = newProposal *promise.Leader = uint32(newLeader) *promise.PrevProposal = proposal *promise.PrevLeader = uint32(leader) // Add all the instructions we've previously accepted or chosen. slots := store.InstructionSlots() theirFirstUnapplied := *msg.FirstUnapplied ourStart := store.InstructionStart() relativeSlot := int(theirFirstUnapplied - ourStart) if relativeSlot < 0 { relativeSlot = 0 } var accepted []*coproto.Instruction for ; relativeSlot < len(slots); relativeSlot++ { slot := slots[relativeSlot] slotNum := ourStart + uint64(relativeSlot) for i, _ := range slot { if slot[i].IsChosen() { appendInst(&accepted, slotNum, slot[i]) break } weAccepted := false for _, node := range slot[i].Accepted() { if node == config.Id() { weAccepted = true break } } if weAccepted { appendInst(&accepted, slotNum, slot[i]) break } } } // Send promise message. conn.SendProto(3, &promise) // Accept the other node as our new leader. store.SetProposal(newProposal, newLeader) } else { var nack coproto.Nack nack.Proposal = new(uint64) nack.Leader = new(uint32) *nack.Proposal = proposal *nack.Leader = uint32(leader) conn.SendProto(6, &nack) } }