// newNotLeaderError returns a NotLeaderError intialized with the // replica for the holder (if any) of the given lease. func (r *Range) newNotLeaderError(l *proto.Lease, originNode proto.RaftNodeID) error { err := &proto.NotLeaderError{} if l != nil && l.RaftNodeID != 0 { _, originStoreID := proto.DecodeRaftNodeID(originNode) _, err.Replica = r.Desc().FindReplica(originStoreID) _, storeID := proto.DecodeRaftNodeID(proto.RaftNodeID(l.RaftNodeID)) _, err.Leader = r.Desc().FindReplica(storeID) } return err }
func TestRaftNodeID(t *testing.T) { defer leaktest.AfterTest(t) cases := []struct { nodeID proto.NodeID storeID proto.StoreID expected proto.RaftNodeID }{ {0, 1, 1}, {1, 1, 0x100000001}, {2, 3, 0x200000003}, {math.MaxInt32, math.MaxInt32, 0x7fffffff7fffffff}, } for _, c := range cases { x := proto.MakeRaftNodeID(c.nodeID, c.storeID) if x != c.expected { t.Errorf("makeRaftNodeID(%v, %v) returned %v; expected %v", c.nodeID, c.storeID, x, c.expected) } n, s := proto.DecodeRaftNodeID(x) if n != c.nodeID || s != c.storeID { t.Errorf("decodeRaftNodeID(%v) returned %v, %v; expected %v, %v", x, n, s, c.nodeID, c.storeID) } } panicCases := []struct { nodeID proto.NodeID storeID proto.StoreID }{ {1, 0}, {1, -1}, {-1, 1}, } for _, c := range panicCases { func() { defer func() { _ = recover() }() x := proto.MakeRaftNodeID(c.nodeID, c.storeID) t.Errorf("makeRaftNodeID(%v, %v) returned %v; expected panic", c.nodeID, c.storeID, x) }() } }
// processQueue creates a client and sends messages from its designated queue // via that client, exiting when the client fails or when it idles out. All // messages remaining in the queue at that point are lost and a new instance of // processQueue should be started by the next message to be sent. // TODO(tschottdorf) should let MultiRaft know if the node is down; // need a feedback mechanism for that. Potentially easiest is to arrange for // the next call to Send() to fail appropriately. func (t *rpcTransport) processQueue(raftNodeID proto.RaftNodeID) { t.mu.Lock() ch, ok := t.queues[raftNodeID] t.mu.Unlock() if !ok { return } // Clean-up when the loop below shuts down. defer func() { t.mu.Lock() delete(t.queues, raftNodeID) t.mu.Unlock() }() nodeID, _ := proto.DecodeRaftNodeID(raftNodeID) addr, err := t.gossip.GetNodeIDAddress(nodeID) if err != nil { log.Errorf("could not get address for node %d: %s", nodeID, err) return } client := rpc.NewClient(addr, t.rpcContext) select { case <-t.rpcContext.Stopper.ShouldStop(): return case <-client.Closed: log.Warningf("raft client for node %d was closed", nodeID) return case <-time.After(raftIdleTimeout): // Should never happen. log.Errorf("raft client for node %d stuck connecting", nodeID) return case <-client.Healthy(): } done := make(chan *gorpc.Call, cap(ch)) var req *multiraft.RaftMessageRequest protoResp := &proto.RaftMessageResponse{} for { select { case <-t.rpcContext.Stopper.ShouldStop(): return case <-time.After(raftIdleTimeout): if log.V(1) { log.Infof("closing Raft transport to %d due to inactivity", nodeID) } return case <-client.Closed: log.Warningf("raft client for node %d closed", nodeID) return case call := <-done: if call.Error != nil { log.Errorf("raft message to node %d failed: %s", nodeID, call.Error) } continue case req = <-ch: } if req == nil { return } // Convert to proto format. msg, err := req.Message.Marshal() if err != nil { log.Errorf("could not marshal message: %s", err) continue } client.Go(raftMessageName, &proto.RaftMessageRequest{ GroupID: req.GroupID, Msg: msg, }, protoResp, done) } }
// processQueue creates a client and sends messages from its designated queue // via that client, exiting when the client fails or when it idles out. All // messages remaining in the queue at that point are lost and a new instance of // processQueue should be started by the next message to be sent. // TODO(tschottdorf) should let MultiRaft know if the node is down; // need a feedback mechanism for that. Potentially easiest is to arrange for // the next call to Send() to fail appropriately. func (t *rpcTransport) processQueue(raftNodeID proto.RaftNodeID) { t.mu.Lock() ch, ok := t.queues[raftNodeID] t.mu.Unlock() if !ok { return } // Clean-up when the loop below shuts down. defer func() { t.mu.Lock() delete(t.queues, raftNodeID) t.mu.Unlock() }() nodeID, _ := proto.DecodeRaftNodeID(raftNodeID) addr, err := t.gossip.GetNodeIDAddress(nodeID) if err != nil { log.Errorf("could not get address for node %d: %s", nodeID, err) return } client := rpc.NewClient(addr, nil, t.rpcContext) select { case <-t.rpcContext.Stopper.ShouldStop(): return case <-client.Closed: log.Warningf("raft client for node %d failed to connect", nodeID) return case <-time.After(raftIdleTimeout): // Should never happen. log.Errorf("raft client for node %d stuck connecting", nodeID) return case <-client.Ready: } done := make(chan *gorpc.Call, cap(ch)) var req *multiraft.RaftMessageRequest protoReq := &proto.RaftMessageRequest{} protoResp := &proto.RaftMessageResponse{} for { select { case <-t.rpcContext.Stopper.ShouldStop(): return case <-time.After(raftIdleTimeout): if log.V(1) { log.Infof("closing Raft transport to %d due to inactivity", nodeID) } return case <-client.Closed: log.Warningf("raft client for node %d closed", nodeID) return case call := <-done: if call.Error != nil { log.Errorf("raft message to node %d failed: %s", nodeID, call.Error) } continue case req = <-ch: } if req == nil { return } // Convert to proto format. protoReq.Reset() protoReq.GroupID = req.GroupID var err error if protoReq.Msg, err = req.Message.Marshal(); err != nil { log.Errorf("could not marshal message: %s", err) continue } if !client.IsHealthy() { log.Warningf("raft client for node %d unhealthy", nodeID) return } client.Go(raftMessageName, protoReq, protoResp, done) // TODO(tschottdorf): work around #1176 by wasting just a little // bit of time before moving to the next request. select { case <-done: case <-time.After(10 * time.Millisecond): } } }