func (enc *messageEncoder) encode(m raftpb.Message) error { if err := binary.Write(enc.w, binary.BigEndian, uint64(m.Size())); err != nil { return err } _, err := enc.w.Write(pbutil.MustMarshal(&m)) return err }
// send persists state to stable storage and then sends to its mailbox. func (r *raft) send(m pb.Message) { m.From = r.id // do not attach term to MsgProp // proposals are a way to forward to the leader and // should be treated as local message. if m.Type != pb.MsgProp { m.Term = r.Term } r.msgs = append(r.msgs, m) }
func (dec *messageDecoder) decode() (raftpb.Message, error) { var m raftpb.Message var l uint64 if err := binary.Read(dec.r, binary.BigEndian, &l); err != nil { return m, err } buf := make([]byte, int(l)) if _, err := io.ReadFull(dec.r, buf); err != nil { return m, err } return m, m.Unmarshal(buf) }
func stepFollower(r *raft, m pb.Message) { switch m.Type { case pb.MsgProp: if r.lead == None { r.logger.Infof("%x no leader at term %d; dropping proposal", r.id, r.Term) return } m.To = r.lead r.send(m) case pb.MsgApp: r.elapsed = 0 r.lead = m.From r.handleAppendEntries(m) case pb.MsgHeartbeat: r.elapsed = 0 r.lead = m.From r.handleHeartbeat(m) case pb.MsgSnap: r.elapsed = 0 r.handleSnapshot(m) case pb.MsgVote: if (r.Vote == None || r.Vote == m.From) && r.raftLog.isUpToDate(m.Index, m.LogTerm) { r.elapsed = 0 r.logger.Infof("%x [logterm: %d, index: %d, vote: %x] voted for %x [logterm: %d, index: %d] at term %d", r.id, r.raftLog.lastTerm(), r.raftLog.lastIndex(), r.Vote, m.From, m.LogTerm, m.Index, r.Term) r.Vote = m.From r.send(pb.Message{To: m.From, Type: pb.MsgVoteResp}) } else { r.logger.Infof("%x [logterm: %d, index: %d, vote: %x] rejected vote from %x [logterm: %d, index: %d] at term %d", r.id, r.raftLog.lastTerm(), r.raftLog.lastIndex(), r.Vote, m.From, m.LogTerm, m.Index, r.Term) r.send(pb.Message{To: m.From, Type: pb.MsgVoteResp, Reject: true}) } } }
// sendAppend sends RRPC, with entries to the given peer. func (r *raft) sendAppend(to uint64) { pr := r.prs[to] if pr.isPaused() { return } m := pb.Message{} m.To = to term, errt := r.raftLog.term(pr.Next - 1) ents, erre := r.raftLog.entries(pr.Next, r.maxMsgSize) if errt != nil || erre != nil { // send snapshot if we failed to get term or entries m.Type = pb.MsgSnap snapshot, err := r.raftLog.snapshot() if err != nil { if err == ErrSnapshotTemporarilyUnavailable { r.logger.Debugf("%x failed to send snapshot to %x because snapshot is temporarily unavailable", r.id, to) return } panic(err) // TODO(bdarnell) } if IsEmptySnap(snapshot) { panic("need non-empty snapshot") } m.Snapshot = snapshot sindex, sterm := snapshot.Metadata.Index, snapshot.Metadata.Term r.logger.Debugf("%x [firstindex: %d, commit: %d] sent snapshot[index: %d, term: %d] to %x [%s]", r.id, r.raftLog.firstIndex(), r.Commit, sindex, sterm, to, pr) pr.becomeSnapshot(sindex) r.logger.Debugf("%x paused sending replication messages to %x [%s]", r.id, to, pr) } else { m.Type = pb.MsgApp m.Index = pr.Next - 1 m.LogTerm = term m.Entries = ents m.Commit = r.raftLog.committed if n := len(m.Entries); n != 0 { switch pr.State { // optimistically increase the next when in ProgressStateReplicate case ProgressStateReplicate: last := m.Entries[n-1].Index pr.optimisticUpdate(last) pr.ins.add(last) case ProgressStateProbe: pr.pause() default: r.logger.Panicf("%x is sending append in unhandled state %s", r.id, pr.State) } } } r.send(m) }
func (h *handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { if r.Method != "POST" { w.Header().Set("Allow", "POST") http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed) return } w.Header().Set("X-Etcd-Cluster-ID", h.cid.String()) if err := checkClusterCompatibilityFromHeader(r.Header, h.cid); err != nil { http.Error(w, err.Error(), http.StatusPreconditionFailed) return } // Limit the data size that could be read from the request body, which ensures that read from // connection will not time out accidentally due to possible block in underlying implementation. limitedr := pioutil.NewLimitedBufferReader(r.Body, ConnReadLimitByte) b, err := ioutil.ReadAll(limitedr) if err != nil { plog.Errorf("failed to read raft message (%v)", err) http.Error(w, "error reading raft message", http.StatusBadRequest) return } var m raftpb.Message if err := m.Unmarshal(b); err != nil { plog.Errorf("failed to unmarshal raft message (%v)", err) http.Error(w, "error unmarshaling raft message", http.StatusBadRequest) return } if err := h.r.Process(context.TODO(), m); err != nil { switch v := err.(type) { case writerToResponse: v.WriteTo(w) default: plog.Warningf("failed to process raft message (%v)", err) http.Error(w, "error processing raft message", http.StatusInternalServerError) } return } // Write StatusNoContet header after the message has been processed by // raft, which facilitates the client to report MsgSnap status. w.WriteHeader(http.StatusNoContent) }
func (enc *msgAppV2Encoder) encode(m raftpb.Message) error { start := time.Now() switch { case isLinkHeartbeatMessage(m): enc.uint8buf[0] = byte(msgTypeLinkHeartbeat) if _, err := enc.w.Write(enc.uint8buf); err != nil { return err } case enc.index == m.Index && enc.term == m.LogTerm && m.LogTerm == m.Term: enc.uint8buf[0] = byte(msgTypeAppEntries) if _, err := enc.w.Write(enc.uint8buf); err != nil { return err } // write length of entries binary.BigEndian.PutUint64(enc.uint64buf, uint64(len(m.Entries))) if _, err := enc.w.Write(enc.uint64buf); err != nil { return err } for i := 0; i < len(m.Entries); i++ { // write length of entry binary.BigEndian.PutUint64(enc.uint64buf, uint64(m.Entries[i].Size())) if _, err := enc.w.Write(enc.uint64buf); err != nil { return err } if n := m.Entries[i].Size(); n < msgAppV2BufSize { if _, err := m.Entries[i].MarshalTo(enc.buf); err != nil { return err } if _, err := enc.w.Write(enc.buf[:n]); err != nil { return err } } else { if _, err := enc.w.Write(pbutil.MustMarshal(&m.Entries[i])); err != nil { return err } } enc.index++ } // write commit index binary.BigEndian.PutUint64(enc.uint64buf, m.Commit) if _, err := enc.w.Write(enc.uint64buf); err != nil { return err } enc.fs.Succ(time.Since(start)) default: if err := binary.Write(enc.w, binary.BigEndian, msgTypeApp); err != nil { return err } // write size of message if err := binary.Write(enc.w, binary.BigEndian, uint64(m.Size())); err != nil { return err } // write message if _, err := enc.w.Write(pbutil.MustMarshal(&m)); err != nil { return err } enc.term = m.Term enc.index = m.Index if l := len(m.Entries); l > 0 { enc.index = m.Entries[l-1].Index } enc.fs.Succ(time.Since(start)) } return nil }
func (dec *msgAppV2Decoder) decode() (raftpb.Message, error) { var ( m raftpb.Message typ uint8 ) if _, err := io.ReadFull(dec.r, dec.uint8buf); err != nil { return m, err } typ = uint8(dec.uint8buf[0]) switch typ { case msgTypeLinkHeartbeat: return linkHeartbeatMessage, nil case msgTypeAppEntries: m = raftpb.Message{ Type: raftpb.MsgApp, From: uint64(dec.remote), To: uint64(dec.local), Term: dec.term, LogTerm: dec.term, Index: dec.index, } // decode entries if _, err := io.ReadFull(dec.r, dec.uint64buf); err != nil { return m, err } l := binary.BigEndian.Uint64(dec.uint64buf) m.Entries = make([]raftpb.Entry, int(l)) for i := 0; i < int(l); i++ { if _, err := io.ReadFull(dec.r, dec.uint64buf); err != nil { return m, err } size := binary.BigEndian.Uint64(dec.uint64buf) var buf []byte if size < msgAppV2BufSize { buf = dec.buf[:size] if _, err := io.ReadFull(dec.r, buf); err != nil { return m, err } } else { buf = make([]byte, int(size)) if _, err := io.ReadFull(dec.r, buf); err != nil { return m, err } } dec.index++ // 1 alloc pbutil.MustUnmarshal(&m.Entries[i], buf) } // decode commit index if _, err := io.ReadFull(dec.r, dec.uint64buf); err != nil { return m, err } m.Commit = binary.BigEndian.Uint64(dec.uint64buf) case msgTypeApp: var size uint64 if err := binary.Read(dec.r, binary.BigEndian, &size); err != nil { return m, err } buf := make([]byte, int(size)) if _, err := io.ReadFull(dec.r, buf); err != nil { return m, err } pbutil.MustUnmarshal(&m, buf) dec.term = m.Term dec.index = m.Index if l := len(m.Entries); l > 0 { dec.index = m.Entries[l-1].Index } default: return m, fmt.Errorf("failed to parse type %d in msgappv2 stream", typ) } return m, nil }