Ejemplo n.º 1
0
// newNotLeaderError returns a NotLeaderError intialized with the
// replica for the holder (if any) of the given lease.
func (r *Range) newNotLeaderError(l *proto.Lease, originNode proto.RaftNodeID) error {
	err := &proto.NotLeaderError{}
	if l != nil && l.RaftNodeID != 0 {
		_, originStoreID := proto.DecodeRaftNodeID(originNode)
		_, err.Replica = r.Desc().FindReplica(originStoreID)
		_, storeID := proto.DecodeRaftNodeID(proto.RaftNodeID(l.RaftNodeID))
		_, err.Leader = r.Desc().FindReplica(storeID)
	}
	return err
}
Ejemplo n.º 2
0
func TestRaftNodeID(t *testing.T) {
	defer leaktest.AfterTest(t)
	cases := []struct {
		nodeID   proto.NodeID
		storeID  proto.StoreID
		expected proto.RaftNodeID
	}{
		{0, 1, 1},
		{1, 1, 0x100000001},
		{2, 3, 0x200000003},
		{math.MaxInt32, math.MaxInt32, 0x7fffffff7fffffff},
	}
	for _, c := range cases {
		x := proto.MakeRaftNodeID(c.nodeID, c.storeID)
		if x != c.expected {
			t.Errorf("makeRaftNodeID(%v, %v) returned %v; expected %v",
				c.nodeID, c.storeID, x, c.expected)
		}
		n, s := proto.DecodeRaftNodeID(x)
		if n != c.nodeID || s != c.storeID {
			t.Errorf("decodeRaftNodeID(%v) returned %v, %v; expected %v, %v",
				x, n, s, c.nodeID, c.storeID)
		}
	}

	panicCases := []struct {
		nodeID  proto.NodeID
		storeID proto.StoreID
	}{
		{1, 0},
		{1, -1},
		{-1, 1},
	}
	for _, c := range panicCases {
		func() {
			defer func() {
				_ = recover()
			}()
			x := proto.MakeRaftNodeID(c.nodeID, c.storeID)
			t.Errorf("makeRaftNodeID(%v, %v) returned %v; expected panic",
				c.nodeID, c.storeID, x)
		}()
	}
}
Ejemplo n.º 3
0
// processQueue creates a client and sends messages from its designated queue
// via that client, exiting when the client fails or when it idles out. All
// messages remaining in the queue at that point are lost and a new instance of
// processQueue should be started by the next message to be sent.
// TODO(tschottdorf) should let MultiRaft know if the node is down;
// need a feedback mechanism for that. Potentially easiest is to arrange for
// the next call to Send() to fail appropriately.
func (t *rpcTransport) processQueue(raftNodeID proto.RaftNodeID) {
	t.mu.Lock()
	ch, ok := t.queues[raftNodeID]
	t.mu.Unlock()
	if !ok {
		return
	}
	// Clean-up when the loop below shuts down.
	defer func() {
		t.mu.Lock()
		delete(t.queues, raftNodeID)
		t.mu.Unlock()
	}()

	nodeID, _ := proto.DecodeRaftNodeID(raftNodeID)
	addr, err := t.gossip.GetNodeIDAddress(nodeID)
	if err != nil {
		log.Errorf("could not get address for node %d: %s", nodeID, err)
		return
	}
	client := rpc.NewClient(addr, t.rpcContext)
	select {
	case <-t.rpcContext.Stopper.ShouldStop():
		return
	case <-client.Closed:
		log.Warningf("raft client for node %d was closed", nodeID)
		return
	case <-time.After(raftIdleTimeout):
		// Should never happen.
		log.Errorf("raft client for node %d stuck connecting", nodeID)
		return
	case <-client.Healthy():
	}

	done := make(chan *gorpc.Call, cap(ch))
	var req *multiraft.RaftMessageRequest
	protoResp := &proto.RaftMessageResponse{}
	for {
		select {
		case <-t.rpcContext.Stopper.ShouldStop():
			return
		case <-time.After(raftIdleTimeout):
			if log.V(1) {
				log.Infof("closing Raft transport to %d due to inactivity", nodeID)
			}
			return
		case <-client.Closed:
			log.Warningf("raft client for node %d closed", nodeID)
			return
		case call := <-done:
			if call.Error != nil {
				log.Errorf("raft message to node %d failed: %s", nodeID, call.Error)
			}
			continue
		case req = <-ch:
		}
		if req == nil {
			return
		}

		// Convert to proto format.
		msg, err := req.Message.Marshal()
		if err != nil {
			log.Errorf("could not marshal message: %s", err)
			continue
		}

		client.Go(raftMessageName, &proto.RaftMessageRequest{
			GroupID: req.GroupID,
			Msg:     msg,
		}, protoResp, done)
	}
}
Ejemplo n.º 4
0
// processQueue creates a client and sends messages from its designated queue
// via that client, exiting when the client fails or when it idles out. All
// messages remaining in the queue at that point are lost and a new instance of
// processQueue should be started by the next message to be sent.
// TODO(tschottdorf) should let MultiRaft know if the node is down;
// need a feedback mechanism for that. Potentially easiest is to arrange for
// the next call to Send() to fail appropriately.
func (t *rpcTransport) processQueue(raftNodeID proto.RaftNodeID) {
	t.mu.Lock()
	ch, ok := t.queues[raftNodeID]
	t.mu.Unlock()
	if !ok {
		return
	}
	// Clean-up when the loop below shuts down.
	defer func() {
		t.mu.Lock()
		delete(t.queues, raftNodeID)
		t.mu.Unlock()
	}()

	nodeID, _ := proto.DecodeRaftNodeID(raftNodeID)
	addr, err := t.gossip.GetNodeIDAddress(nodeID)
	if err != nil {
		log.Errorf("could not get address for node %d: %s", nodeID, err)
		return
	}
	client := rpc.NewClient(addr, nil, t.rpcContext)
	select {
	case <-t.rpcContext.Stopper.ShouldStop():
		return
	case <-client.Closed:
		log.Warningf("raft client for node %d failed to connect", nodeID)
		return
	case <-time.After(raftIdleTimeout):
		// Should never happen.
		log.Errorf("raft client for node %d stuck connecting", nodeID)
		return
	case <-client.Ready:
	}

	done := make(chan *gorpc.Call, cap(ch))
	var req *multiraft.RaftMessageRequest
	protoReq := &proto.RaftMessageRequest{}
	protoResp := &proto.RaftMessageResponse{}
	for {
		select {
		case <-t.rpcContext.Stopper.ShouldStop():
			return
		case <-time.After(raftIdleTimeout):
			if log.V(1) {
				log.Infof("closing Raft transport to %d due to inactivity", nodeID)
			}
			return
		case <-client.Closed:
			log.Warningf("raft client for node %d closed", nodeID)
			return
		case call := <-done:
			if call.Error != nil {
				log.Errorf("raft message to node %d failed: %s", nodeID, call.Error)
			}
			continue
		case req = <-ch:
		}
		if req == nil {
			return
		}

		// Convert to proto format.
		protoReq.Reset()
		protoReq.GroupID = req.GroupID
		var err error
		if protoReq.Msg, err = req.Message.Marshal(); err != nil {
			log.Errorf("could not marshal message: %s", err)
			continue
		}

		if !client.IsHealthy() {
			log.Warningf("raft client for node %d unhealthy", nodeID)
			return
		}
		client.Go(raftMessageName, protoReq, protoResp, done)

		// TODO(tschottdorf): work around #1176 by wasting just a little
		// bit of time before moving to the next request.
		select {
		case <-done:
		case <-time.After(10 * time.Millisecond):
		}
	}
}