func startNode(cfg *ServerConfig, ids []types.ID) (id types.ID, n raft.Node, s *raft.MemoryStorage, w *wal.WAL) { var err error member := cfg.Cluster.MemberByName(cfg.Name) metadata := pbutil.MustMarshal( &pb.Metadata{ NodeID: uint64(member.ID), ClusterID: uint64(cfg.Cluster.ID()), }, ) if err := os.MkdirAll(cfg.SnapDir(), privateDirMode); err != nil { log.Fatalf("etcdserver create snapshot directory error: %v", err) } if w, err = wal.Create(cfg.WALDir(), metadata); err != nil { log.Fatalf("etcdserver: create wal error: %v", err) } peers := make([]raft.Peer, len(ids)) for i, id := range ids { ctx, err := json.Marshal((*cfg.Cluster).Member(id)) if err != nil { log.Panicf("marshal member should never fail: %v", err) } peers[i] = raft.Peer{ID: uint64(id), Context: ctx} } id = member.ID log.Printf("etcdserver: start member %s in cluster %s", id, cfg.Cluster.ID()) s = raft.NewMemoryStorage() n = raft.StartNode(uint64(id), peers, cfg.ElectionTicks, 1, s) return }
func (p *pipeline) handle() { defer p.wg.Done() for m := range p.msgc { start := time.Now() err := p.post(pbutil.MustMarshal(&m)) if err == errStopped { return } end := time.Now() if err != nil { reportSentFailure(pipelineMsg, m) p.status.deactivate(failureType{source: pipelineMsg, action: "write"}, err.Error()) if m.Type == raftpb.MsgApp && p.fs != nil { p.fs.Fail() } p.r.ReportUnreachable(m.To) if isMsgSnap(m) { p.r.ReportSnapshot(m.To, raft.SnapshotFailure) } } else { p.status.activate() if m.Type == raftpb.MsgApp && p.fs != nil { p.fs.Succ(end.Sub(start)) } if isMsgSnap(m) { p.r.ReportSnapshot(m.To, raft.SnapshotFinish) } reportSentDuration(pipelineMsg, m, time.Since(start)) } } }
func (p *peer) handle() { defer p.wg.Done() for m := range p.q { start := time.Now() err := p.post(pbutil.MustMarshal(m)) end := time.Now() p.Lock() if err != nil { if p.errored == nil || p.errored.Error() != err.Error() { log.Printf("sender: error posting to %s: %v", p.id, err) p.errored = err } if p.active { log.Printf("sender: the connection with %s became inactive", p.id) p.active = false } if m.Type == raftpb.MsgApp && p.fs != nil { p.fs.Fail() } } else { if !p.active { log.Printf("sender: the connection with %s became active", p.id) p.active = true p.errored = nil } if m.Type == raftpb.MsgApp && p.fs != nil { p.fs.Succ(end.Sub(start)) } } p.Unlock() } }
func (enc *messageEncoder) encode(m raftpb.Message) error { if err := binary.Write(enc.w, binary.BigEndian, uint64(m.Size())); err != nil { return err } _, err := enc.w.Write(pbutil.MustMarshal(&m)) return err }
// createConfigChangeEnts creates a series of Raft entries (i.e. // EntryConfChange) to remove the set of given IDs from the cluster. The ID // `self` is _not_ removed, even if present in the set. // If `self` is not inside the given ids, it creates a Raft entry to add a // default member with the given `self`. func createConfigChangeEnts(ids []uint64, self uint64, term, index uint64) []raftpb.Entry { ents := make([]raftpb.Entry, 0) next := index + 1 found := false for _, id := range ids { if id == self { found = true continue } cc := &raftpb.ConfChange{ Type: raftpb.ConfChangeRemoveNode, NodeID: id, } e := raftpb.Entry{ Type: raftpb.EntryConfChange, Data: pbutil.MustMarshal(cc), Term: term, Index: next, } ents = append(ents, e) next++ } if !found { m := Member{ ID: types.ID(self), RaftAttributes: RaftAttributes{PeerURLs: []string{"http://localhost:7001", "http://localhost:2380"}}, } ctx, err := json.Marshal(m) if err != nil { log.Panicf("marshal member should never fail: %v", err) } cc := &raftpb.ConfChange{ Type: raftpb.ConfChangeAddNode, NodeID: self, Context: ctx, } e := raftpb.Entry{ Type: raftpb.EntryConfChange, Data: pbutil.MustMarshal(cc), Term: term, Index: next, } ents = append(ents, e) } return ents }
func (w *WAL) saveState(s *raftpb.HardState) error { if raft.IsEmptyHardState(*s) { return nil } w.state = *s b := pbutil.MustMarshal(s) rec := &walpb.Record{Type: stateType, Data: b} return w.encoder.encode(rec) }
func (w *WAL) saveEntry(e *raftpb.Entry) error { b := pbutil.MustMarshal(e) rec := &walpb.Record{Type: entryType, Data: b} if err := w.encoder.encode(rec); err != nil { return err } w.enti = e.Index return nil }
func (w *WAL) saveEntry(e *raftpb.Entry) error { // TODO: add MustMarshalTo to reduce one allocation. b := pbutil.MustMarshal(e) rec := &walpb.Record{Type: entryType, Data: b} if err := w.encoder.encode(rec); err != nil { return err } w.enti = e.Index lastIndexSaved.Set(float64(w.enti)) return nil }
func (w *WAL) SaveSnapshot(e walpb.Snapshot) error { b := pbutil.MustMarshal(&e) rec := &walpb.Record{Type: snapshotType, Data: b} if err := w.encoder.encode(rec); err != nil { return err } // update enti only when snapshot is ahead of last index if w.enti < e.Index { w.enti = e.Index } return w.sync() }
func TestGetIDs(t *testing.T) { addcc := &raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 2} addEntry := raftpb.Entry{Type: raftpb.EntryConfChange, Data: pbutil.MustMarshal(addcc)} removecc := &raftpb.ConfChange{Type: raftpb.ConfChangeRemoveNode, NodeID: 2} removeEntry := raftpb.Entry{Type: raftpb.EntryConfChange, Data: pbutil.MustMarshal(removecc)} normalEntry := raftpb.Entry{Type: raftpb.EntryNormal} tests := []struct { confState *raftpb.ConfState ents []raftpb.Entry widSet []uint64 }{ {nil, []raftpb.Entry{}, []uint64{}}, {&raftpb.ConfState{Nodes: []uint64{1}}, []raftpb.Entry{}, []uint64{1}}, {&raftpb.ConfState{Nodes: []uint64{1}}, []raftpb.Entry{addEntry}, []uint64{1, 2}}, {&raftpb.ConfState{Nodes: []uint64{1}}, []raftpb.Entry{addEntry, removeEntry}, []uint64{1}}, {&raftpb.ConfState{Nodes: []uint64{1}}, []raftpb.Entry{addEntry, normalEntry}, []uint64{1, 2}}, {&raftpb.ConfState{Nodes: []uint64{1}}, []raftpb.Entry{addEntry, removeEntry, normalEntry}, []uint64{1}}, } for i, tt := range tests { var snap raftpb.Snapshot if tt.confState != nil { snap.Metadata.ConfState = *tt.confState } idSet := getIDs(&snap, tt.ents) if !reflect.DeepEqual(idSet, tt.widSet) { t.Errorf("#%d: idset = %#v, want %#v", i, idSet, tt.widSet) } } }
// sync proposes a SYNC request and is non-blocking. // This makes no guarantee that the request will be proposed or performed. // The request will be cancelled after the given timeout. func (s *EtcdServer) sync(timeout time.Duration) { ctx, cancel := context.WithTimeout(context.Background(), timeout) req := pb.Request{ Method: "SYNC", ID: s.reqIDGen.Next(), Time: time.Now().UnixNano(), } data := pbutil.MustMarshal(&req) // There is no promise that node has leader when do SYNC request, // so it uses goroutine to propose. go func() { s.r.Propose(ctx, data) cancel() }() }
func (w *WAL) SaveSnapshot(e walpb.Snapshot) error { w.mu.Lock() defer w.mu.Unlock() b := pbutil.MustMarshal(&e) rec := &walpb.Record{Type: snapshotType, Data: b} if err := w.encoder.encode(rec); err != nil { return err } // update enti only when snapshot is ahead of last index if w.enti < e.Index { w.enti = e.Index } lastIndexSaved.Set(float64(w.enti)) return w.sync() }
func (s *Snapshotter) save(snapshot *raftpb.Snapshot) error { start := time.Now() fname := fmt.Sprintf("%016x-%016x%s", snapshot.Metadata.Term, snapshot.Metadata.Index, snapSuffix) b := pbutil.MustMarshal(snapshot) crc := crc32.Update(0, crcTable, b) snap := snappb.Snapshot{Crc: crc, Data: b} d, err := snap.Marshal() if err != nil { return err } err = ioutil.WriteFile(path.Join(s.dir, fname), d, 0666) if err == nil { saveDurations.Observe(float64(time.Since(start).Nanoseconds() / int64(time.Microsecond))) } return err }
func TestNew(t *testing.T) { p, err := ioutil.TempDir(os.TempDir(), "waltest") if err != nil { t.Fatal(err) } defer os.RemoveAll(p) w, err := Create(p, []byte("somedata")) if err != nil { t.Fatalf("err = %v, want nil", err) } if g := path.Base(w.f.Name()); g != walName(0, 0) { t.Errorf("name = %+v, want %+v", g, walName(0, 0)) } defer w.Close() gd, err := ioutil.ReadFile(w.f.Name()) if err != nil { t.Fatalf("err = %v, want nil", err) } var wb bytes.Buffer e := newEncoder(&wb, 0) err = e.encode(&walpb.Record{Type: crcType, Crc: 0}) if err != nil { t.Fatalf("err = %v, want nil", err) } err = e.encode(&walpb.Record{Type: metadataType, Data: []byte("somedata")}) if err != nil { t.Fatalf("err = %v, want nil", err) } r := &walpb.Record{ Type: snapshotType, Data: pbutil.MustMarshal(&walpb.Snapshot{}), } if err = e.encode(r); err != nil { t.Fatalf("err = %v, want nil", err) } e.flush() if !reflect.DeepEqual(gd, wb.Bytes()) { t.Errorf("data = %v, want %v", gd, wb.Bytes()) } }
func startNode(cfg *ServerConfig, cl *cluster, ids []types.ID) (id types.ID, n raft.Node, s *raft.MemoryStorage, w *wal.WAL) { var err error member := cl.MemberByName(cfg.Name) metadata := pbutil.MustMarshal( &pb.Metadata{ NodeID: uint64(member.ID), ClusterID: uint64(cl.ID()), }, ) if err := os.MkdirAll(cfg.SnapDir(), privateDirMode); err != nil { plog.Fatalf("create snapshot directory error: %v", err) } if w, err = wal.Create(cfg.WALDir(), metadata); err != nil { plog.Fatalf("create wal error: %v", err) } peers := make([]raft.Peer, len(ids)) for i, id := range ids { ctx, err := json.Marshal((*cl).Member(id)) if err != nil { plog.Panicf("marshal member should never fail: %v", err) } peers[i] = raft.Peer{ID: uint64(id), Context: ctx} } id = member.ID plog.Infof("starting member %s in cluster %s", id, cl.ID()) s = raft.NewMemoryStorage() c := &raft.Config{ ID: uint64(id), ElectionTick: cfg.ElectionTicks, HeartbeatTick: 1, Storage: s, MaxSizePerMsg: maxSizePerMsg, MaxInflightMsgs: maxInflightMsgs, } n = raft.StartNode(c, peers) raftStatus = n.Status advanceTicksForElection(n, c.ElectionTick) return }
func Migrate4To2(dataDir string, name string) error { // prep new directories sd2 := snapDir2(dataDir) if err := os.MkdirAll(sd2, 0700); err != nil { return fmt.Errorf("failed creating snapshot directory %s: %v", sd2, err) } // read v0.4 data snap4, err := DecodeLatestSnapshot4FromDir(snapDir4(dataDir)) if err != nil { return err } cfg4, err := DecodeConfig4FromFile(cfgFile4(dataDir)) if err != nil { return err } ents4, err := DecodeLog4FromFile(logFile4(dataDir)) if err != nil { return err } nodeIDs := ents4.NodeIDs() nodeID := GuessNodeID(nodeIDs, snap4, cfg4, name) if nodeID == 0 { return fmt.Errorf("Couldn't figure out the node ID from the log or flags, cannot convert") } metadata := pbutil.MustMarshal(&pb.Metadata{NodeID: nodeID, ClusterID: 0x04add5}) wd2 := walDir2(dataDir) w, err := wal.Create(wd2, metadata) if err != nil { return fmt.Errorf("failed initializing wal at %s: %v", wd2, err) } defer w.Close() // transform v0.4 data var snap2 *raftpb.Snapshot if snap4 == nil { log.Printf("No snapshot found") } else { log.Printf("Found snapshot: lastIndex=%d", snap4.LastIndex) snap2 = snap4.Snapshot2() } st2 := cfg4.HardState2() // If we've got the most recent snapshot, we can use it's committed index. Still likely less than the current actual index, but worth it for the replay. if snap2 != nil && st2.Commit < snap2.Metadata.Index { st2.Commit = snap2.Metadata.Index } ents2, err := Entries4To2(ents4) if err != nil { return err } ents2Len := len(ents2) log.Printf("Found %d log entries: firstIndex=%d lastIndex=%d", ents2Len, ents2[0].Index, ents2[ents2Len-1].Index) // set the state term to the biggest term we have ever seen, // so term of future entries will not be the same with term of old ones. st2.Term = ents2[ents2Len-1].Term // explicitly prepend an empty entry as the WAL code expects it ents2 = append(make([]raftpb.Entry, 1), ents2...) if err = w.Save(st2, ents2); err != nil { return err } log.Printf("Log migration successful") // migrate snapshot (if necessary) and logs var walsnap walpb.Snapshot if snap2 != nil { walsnap.Index, walsnap.Term = snap2.Metadata.Index, snap2.Metadata.Term ss := snap.New(sd2) if err := ss.SaveSnap(*snap2); err != nil { return err } log.Printf("Snapshot migration successful") } if err = w.SaveSnapshot(walsnap); err != nil { return err } return nil }
func TestServeRaft(t *testing.T) { testCases := []struct { method string body io.Reader p Raft clusterID string wcode int }{ { // bad method "GET", bytes.NewReader( pbutil.MustMarshal(&raftpb.Message{}), ), &nopProcessor{}, "0", http.StatusMethodNotAllowed, }, { // bad method "PUT", bytes.NewReader( pbutil.MustMarshal(&raftpb.Message{}), ), &nopProcessor{}, "0", http.StatusMethodNotAllowed, }, { // bad method "DELETE", bytes.NewReader( pbutil.MustMarshal(&raftpb.Message{}), ), &nopProcessor{}, "0", http.StatusMethodNotAllowed, }, { // bad request body "POST", &errReader{}, &nopProcessor{}, "0", http.StatusBadRequest, }, { // bad request protobuf "POST", strings.NewReader("malformed garbage"), &nopProcessor{}, "0", http.StatusBadRequest, }, { // good request, wrong cluster ID "POST", bytes.NewReader( pbutil.MustMarshal(&raftpb.Message{}), ), &nopProcessor{}, "1", http.StatusPreconditionFailed, }, { // good request, Processor failure "POST", bytes.NewReader( pbutil.MustMarshal(&raftpb.Message{}), ), &errProcessor{ err: &resWriterToError{code: http.StatusForbidden}, }, "0", http.StatusForbidden, }, { // good request, Processor failure "POST", bytes.NewReader( pbutil.MustMarshal(&raftpb.Message{}), ), &errProcessor{ err: &resWriterToError{code: http.StatusInternalServerError}, }, "0", http.StatusInternalServerError, }, { // good request, Processor failure "POST", bytes.NewReader( pbutil.MustMarshal(&raftpb.Message{}), ), &errProcessor{err: errors.New("blah")}, "0", http.StatusInternalServerError, }, { // good request "POST", bytes.NewReader( pbutil.MustMarshal(&raftpb.Message{}), ), &nopProcessor{}, "0", http.StatusNoContent, }, } for i, tt := range testCases { req, err := http.NewRequest(tt.method, "foo", tt.body) if err != nil { t.Fatalf("#%d: could not create request: %#v", i, err) } req.Header.Set("X-Etcd-Cluster-ID", tt.clusterID) rw := httptest.NewRecorder() h := NewHandler(tt.p, types.ID(0)) h.ServeHTTP(rw, req) if rw.Code != tt.wcode { t.Errorf("#%d: got code=%d, want %d", i, rw.Code, tt.wcode) } } }
func TestCreateConfigChangeEnts(t *testing.T) { m := Member{ ID: types.ID(1), RaftAttributes: RaftAttributes{PeerURLs: []string{"http://localhost:7001", "http://localhost:2380"}}, } ctx, err := json.Marshal(m) if err != nil { t.Fatal(err) } addcc1 := &raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1, Context: ctx} removecc2 := &raftpb.ConfChange{Type: raftpb.ConfChangeRemoveNode, NodeID: 2} removecc3 := &raftpb.ConfChange{Type: raftpb.ConfChangeRemoveNode, NodeID: 3} tests := []struct { ids []uint64 self uint64 term, index uint64 wents []raftpb.Entry }{ { []uint64{1}, 1, 1, 1, []raftpb.Entry{}, }, { []uint64{1, 2}, 1, 1, 1, []raftpb.Entry{{Term: 1, Index: 2, Type: raftpb.EntryConfChange, Data: pbutil.MustMarshal(removecc2)}}, }, { []uint64{1, 2}, 1, 2, 2, []raftpb.Entry{{Term: 2, Index: 3, Type: raftpb.EntryConfChange, Data: pbutil.MustMarshal(removecc2)}}, }, { []uint64{1, 2, 3}, 1, 2, 2, []raftpb.Entry{ {Term: 2, Index: 3, Type: raftpb.EntryConfChange, Data: pbutil.MustMarshal(removecc2)}, {Term: 2, Index: 4, Type: raftpb.EntryConfChange, Data: pbutil.MustMarshal(removecc3)}, }, }, { []uint64{2, 3}, 2, 2, 2, []raftpb.Entry{ {Term: 2, Index: 3, Type: raftpb.EntryConfChange, Data: pbutil.MustMarshal(removecc3)}, }, }, { []uint64{2, 3}, 1, 2, 2, []raftpb.Entry{ {Term: 2, Index: 3, Type: raftpb.EntryConfChange, Data: pbutil.MustMarshal(removecc2)}, {Term: 2, Index: 4, Type: raftpb.EntryConfChange, Data: pbutil.MustMarshal(removecc3)}, {Term: 2, Index: 5, Type: raftpb.EntryConfChange, Data: pbutil.MustMarshal(addcc1)}, }, }, } for i, tt := range tests { gents := createConfigChangeEnts(tt.ids, tt.self, tt.term, tt.index) if !reflect.DeepEqual(gents, tt.wents) { t.Errorf("#%d: ents = %v, want %v", i, gents, tt.wents) } } }
func (enc *msgAppV2Encoder) encode(m raftpb.Message) error { start := time.Now() switch { case isLinkHeartbeatMessage(m): enc.uint8buf[0] = byte(msgTypeLinkHeartbeat) if _, err := enc.w.Write(enc.uint8buf); err != nil { return err } case enc.index == m.Index && enc.term == m.LogTerm && m.LogTerm == m.Term: enc.uint8buf[0] = byte(msgTypeAppEntries) if _, err := enc.w.Write(enc.uint8buf); err != nil { return err } // write length of entries binary.BigEndian.PutUint64(enc.uint64buf, uint64(len(m.Entries))) if _, err := enc.w.Write(enc.uint64buf); err != nil { return err } for i := 0; i < len(m.Entries); i++ { // write length of entry binary.BigEndian.PutUint64(enc.uint64buf, uint64(m.Entries[i].Size())) if _, err := enc.w.Write(enc.uint64buf); err != nil { return err } if n := m.Entries[i].Size(); n < msgAppV2BufSize { if _, err := m.Entries[i].MarshalTo(enc.buf); err != nil { return err } if _, err := enc.w.Write(enc.buf[:n]); err != nil { return err } } else { if _, err := enc.w.Write(pbutil.MustMarshal(&m.Entries[i])); err != nil { return err } } enc.index++ } // write commit index binary.BigEndian.PutUint64(enc.uint64buf, m.Commit) if _, err := enc.w.Write(enc.uint64buf); err != nil { return err } enc.fs.Succ(time.Since(start)) default: if err := binary.Write(enc.w, binary.BigEndian, msgTypeApp); err != nil { return err } // write size of message if err := binary.Write(enc.w, binary.BigEndian, uint64(m.Size())); err != nil { return err } // write message if _, err := enc.w.Write(pbutil.MustMarshal(&m)); err != nil { return err } enc.term = m.Term enc.index = m.Index if l := len(m.Entries); l > 0 { enc.index = m.Entries[l-1].Index } enc.fs.Succ(time.Since(start)) } return nil }