// TestMultiNodeProposeConfig ensures that multiNode.ProposeConfChange // sends the given configuration proposal to the underlying raft. func TestMultiNodeProposeConfig(t *testing.T) { mn := newMultiNode(1) go mn.run() s := NewMemoryStorage() mn.CreateGroup(1, newTestConfig(1, nil, 10, 1, s), []Peer{{ID: 1}}) mn.Campaign(context.TODO(), 1) proposed := false var lastIndex uint64 var ccdata []byte for { rds := <-mn.Ready() rd := rds[1] s.Append(rd.Entries) // change the step function to appendStep until this raft becomes leader if !proposed && rd.SoftState.Lead == mn.id { cc := raftpb.ConfChange{Type: raftpb.ConfChangeAddNode, NodeID: 1} var err error ccdata, err = cc.Marshal() if err != nil { t.Fatal(err) } mn.ProposeConfChange(context.TODO(), 1, cc) proposed = true } mn.Advance(rds) var err error lastIndex, err = s.LastIndex() if err != nil { t.Fatal(err) } if lastIndex >= 3 { break } } mn.Stop() entries, err := s.Entries(lastIndex, lastIndex+1, noLimit) if err != nil { t.Fatal(err) } if len(entries) != 1 { t.Fatalf("len(entries) = %d, want %d", len(entries), 1) } if entries[0].Type != raftpb.EntryConfChange { t.Fatalf("type = %v, want %v", entries[0].Type, raftpb.EntryConfChange) } if !bytes.Equal(entries[0].Data, ccdata) { t.Errorf("data = %v, want %v", entries[0].Data, ccdata) } }
func TestBasicProgress(t *testing.T) { peers := []raft.Peer{{1, nil}, {2, nil}, {3, nil}, {4, nil}, {5, nil}} nt := newRaftNetwork(1, 2, 3, 4, 5) nodes := make([]*node, 0) for i := 1; i <= 5; i++ { n := startNode(uint64(i), peers, nt.nodeNetwork(uint64(i))) nodes = append(nodes, n) } time.Sleep(10 * time.Millisecond) for i := 0; i < 10000; i++ { nodes[0].Propose(context.TODO(), []byte("somedata")) } time.Sleep(500 * time.Millisecond) for _, n := range nodes { n.stop() if n.state.Commit != 10006 { t.Errorf("commit = %d, want = 10006", n.state.Commit) } } }
func BenchmarkProposal3Nodes(b *testing.B) { peers := []raft.Peer{{1, nil}, {2, nil}, {3, nil}} nt := newRaftNetwork(1, 2, 3) nodes := make([]*node, 0) for i := 1; i <= 3; i++ { n := startNode(uint64(i), peers, nt.nodeNetwork(uint64(i))) nodes = append(nodes, n) } // get ready and warm up time.Sleep(50 * time.Millisecond) b.ResetTimer() for i := 0; i < b.N; i++ { nodes[0].Propose(context.TODO(), []byte("somedata")) } for _, n := range nodes { if n.state.Commit != uint64(b.N+4) { continue } } b.StopTimer() for _, n := range nodes { n.stop() } }
// TestMultiNodeStep ensures that multiNode.Step sends MsgProp to propc // chan and other kinds of messages to recvc chan. func TestMultiNodeStep(t *testing.T) { for i, msgn := range raftpb.MessageType_name { mn := &multiNode{ propc: make(chan multiMessage, 1), recvc: make(chan multiMessage, 1), } msgt := raftpb.MessageType(i) mn.Step(context.TODO(), 1, raftpb.Message{Type: msgt}) // Proposal goes to proc chan. Others go to recvc chan. if msgt == raftpb.MsgProp { select { case <-mn.propc: default: t.Errorf("%d: cannot receive %s on propc chan", msgt, msgn) } } else { if msgt == raftpb.MsgBeat || msgt == raftpb.MsgHup || msgt == raftpb.MsgUnreachable || msgt == raftpb.MsgSnapStatus { select { case <-mn.recvc: t.Errorf("%d: step should ignore %s", msgt, msgn) default: } } else { select { case <-mn.recvc: default: t.Errorf("%d: cannot receive %s on recvc chan", msgt, msgn) } } } } }
func (n *node) start() { n.stopc = make(chan struct{}) ticker := time.Tick(5 * time.Millisecond) go func() { for { select { case <-ticker: n.Tick() case rd := <-n.Ready(): if !raft.IsEmptyHardState(rd.HardState) { n.state = rd.HardState n.storage.SetHardState(n.state) } n.storage.Append(rd.Entries) time.Sleep(time.Millisecond) // TODO: make send async, more like real world... for _, m := range rd.Messages { n.iface.send(m) } n.Advance() case m := <-n.iface.recv(): n.Step(context.TODO(), m) case <-n.stopc: n.Stop() log.Printf("raft.%d: stop", n.id) n.Node = nil close(n.stopc) return case p := <-n.pausec: recvms := make([]raftpb.Message, 0) for p { select { case m := <-n.iface.recv(): recvms = append(recvms, m) case p = <-n.pausec: } } // step all pending messages for _, m := range recvms { n.Step(context.TODO(), m) } } } }() }
// TestMultiNodePropose ensures that node.Propose sends the given proposal to the underlying raft. func TestMultiNodePropose(t *testing.T) { mn := newMultiNode(1) go mn.run() s := NewMemoryStorage() mn.CreateGroup(1, newTestConfig(1, nil, 10, 1, s), []Peer{{ID: 1}}) mn.Campaign(context.TODO(), 1) proposed := false for { rds := <-mn.Ready() rd := rds[1] s.Append(rd.Entries) // Once we are the leader, propose a command. if !proposed && rd.SoftState.Lead == mn.id { mn.Propose(context.TODO(), 1, []byte("somedata")) proposed = true } mn.Advance(rds) // Exit when we have three entries: one ConfChange, one no-op for the election, // and our proposed command. lastIndex, err := s.LastIndex() if err != nil { t.Fatal(err) } if lastIndex >= 3 { break } } mn.Stop() lastIndex, err := s.LastIndex() if err != nil { t.Fatal(err) } entries, err := s.Entries(lastIndex, lastIndex+1, noLimit) if err != nil { t.Fatal(err) } if len(entries) != 1 { t.Fatalf("len(entries) = %d, want %d", len(entries), 1) } if !bytes.Equal(entries[0].Data, []byte("somedata")) { t.Errorf("entries[0].Data = %v, want %v", entries[0].Data, []byte("somedata")) } }
// ServeHTTP serves HTTP request to receive and process snapshot message. // // If request sender dies without closing underlying TCP connection, // the handler will keep waiting for the request body until TCP keepalive // finds out that the connection is broken after several minutes. // This is acceptable because // 1. snapshot messages sent through other TCP connections could still be // received and processed. // 2. this case should happen rarely, so no further optimization is done. func (h *snapshotHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { if r.Method != "POST" { w.Header().Set("Allow", "POST") http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed) return } w.Header().Set("X-Etcd-Cluster-ID", h.cid.String()) if err := checkClusterCompatibilityFromHeader(r.Header, h.cid); err != nil { http.Error(w, err.Error(), http.StatusPreconditionFailed) return } dec := &messageDecoder{r: r.Body} m, err := dec.decode() if err != nil { msg := fmt.Sprintf("failed to decode raft message (%v)", err) plog.Errorf(msg) http.Error(w, msg, http.StatusBadRequest) return } if m.Type != raftpb.MsgSnap { plog.Errorf("unexpected raft message type %s on snapshot path", m.Type) http.Error(w, "wrong raft message type", http.StatusBadRequest) return } // save snapshot if err := h.snapSaver.SaveFrom(r.Body, m.Snapshot.Metadata.Index); err != nil { msg := fmt.Sprintf("failed to save KV snapshot (%v)", err) plog.Error(msg) http.Error(w, msg, http.StatusInternalServerError) return } plog.Infof("received and saved snapshot [index: %d, from: %s] successfully", m.Snapshot.Metadata.Index, types.ID(m.From)) if err := h.r.Process(context.TODO(), m); err != nil { switch v := err.(type) { // Process may return writerToResponse error when doing some // additional checks before calling raft.Node.Step. case writerToResponse: v.WriteTo(w) default: msg := fmt.Sprintf("failed to process raft message (%v)", err) plog.Warningf(msg) http.Error(w, msg, http.StatusInternalServerError) } return } // Write StatusNoContet header after the message has been processed by // raft, which facilitates the client to report MsgSnap status. w.WriteHeader(http.StatusNoContent) }
// TestProposeUnknownGroup ensures that we gracefully handle proposals // for groups we don't know about (which can happen on a former leader // that has been removed from the group). // // It is analogous to TestBlockProposal from node_test.go but in // MultiNode we cannot block proposals based on individual group // leader status. func TestProposeUnknownGroup(t *testing.T) { mn := newMultiNode(1) go mn.run() defer mn.Stop() // A nil error from Propose() doesn't mean much. In this case the // proposal will be dropped on the floor because we don't know // anything about group 42. This is a very crude test that mainly // guarantees that we don't panic in this case. if err := mn.Propose(context.TODO(), 42, []byte("somedata")); err != nil { t.Errorf("err = %v, want nil", err) } }
func TestPause(t *testing.T) { peers := []raft.Peer{{1, nil}, {2, nil}, {3, nil}, {4, nil}, {5, nil}} nt := newRaftNetwork(1, 2, 3, 4, 5) nodes := make([]*node, 0) for i := 1; i <= 5; i++ { n := startNode(uint64(i), peers, nt.nodeNetwork(uint64(i))) nodes = append(nodes, n) } time.Sleep(50 * time.Millisecond) for i := 0; i < 300; i++ { nodes[0].Propose(context.TODO(), []byte("somedata")) } nodes[1].pause() for i := 0; i < 300; i++ { nodes[0].Propose(context.TODO(), []byte("somedata")) } nodes[2].pause() for i := 0; i < 300; i++ { nodes[0].Propose(context.TODO(), []byte("somedata")) } nodes[2].resume() for i := 0; i < 300; i++ { nodes[0].Propose(context.TODO(), []byte("somedata")) } nodes[1].resume() // give some time for nodes to catch up with the raft leader time.Sleep(300 * time.Millisecond) for _, n := range nodes { n.stop() if n.state.Commit != 1206 { t.Errorf("commit = %d, want = 1206", n.state.Commit) } } }
func runSet(ki client.KeysAPI, setc chan set, wg *sync.WaitGroup) { for s := range setc { log.Println("copying key:", s.key) if s.ttl != 0 && s.ttl < 300 { log.Printf("extending key %s's ttl to 300 seconds", s.key) s.ttl = 5 * 60 } _, err := ki.Set(context.TODO(), s.key, s.value, &client.SetOptions{TTL: time.Duration(s.ttl) * time.Second}) if err != nil { log.Fatalf("failed to copy key: %v\n", err) } } wg.Done() }
// Cancel and Stop should unblock Step() func TestMultiNodeStepUnblock(t *testing.T) { // a node without buffer to block step mn := &multiNode{ propc: make(chan multiMessage), done: make(chan struct{}), } ctx, cancel := context.WithCancel(context.Background()) stopFunc := func() { close(mn.done) } tests := []struct { unblock func() werr error }{ {stopFunc, ErrStopped}, {cancel, context.Canceled}, } for i, tt := range tests { errc := make(chan error, 1) go func() { err := mn.Step(ctx, 1, raftpb.Message{Type: raftpb.MsgProp}) errc <- err }() tt.unblock() select { case err := <-errc: if err != tt.werr { t.Errorf("#%d: err = %v, want %v", i, err, tt.werr) } //clean up side-effect if ctx.Err() != nil { ctx = context.TODO() } select { case <-mn.done: mn.done = make(chan struct{}) default: } case <-time.After(time.Millisecond * 100): t.Errorf("#%d: failed to unblock step", i) } } }
func (h *handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { if r.Method != "POST" { w.Header().Set("Allow", "POST") http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed) return } w.Header().Set("X-Etcd-Cluster-ID", h.cid.String()) if err := checkClusterCompatibilityFromHeader(r.Header, h.cid); err != nil { http.Error(w, err.Error(), http.StatusPreconditionFailed) return } // Limit the data size that could be read from the request body, which ensures that read from // connection will not time out accidentally due to possible block in underlying implementation. limitedr := pioutil.NewLimitedBufferReader(r.Body, ConnReadLimitByte) b, err := ioutil.ReadAll(limitedr) if err != nil { plog.Errorf("failed to read raft message (%v)", err) http.Error(w, "error reading raft message", http.StatusBadRequest) return } var m raftpb.Message if err := m.Unmarshal(b); err != nil { plog.Errorf("failed to unmarshal raft message (%v)", err) http.Error(w, "error unmarshaling raft message", http.StatusBadRequest) return } if err := h.r.Process(context.TODO(), m); err != nil { switch v := err.(type) { case writerToResponse: v.WriteTo(w) default: plog.Warningf("failed to process raft message (%v)", err) http.Error(w, "error processing raft message", http.StatusInternalServerError) } return } // Write StatusNoContet header after the message has been processed by // raft, which facilitates the client to report MsgSnap status. w.WriteHeader(http.StatusNoContent) }
// operateHeader takes action on the decoded headers. It returns the current // stream if there are remaining headers on the wire (in the following // Continuation frame). func (t *http2Server) operateHeaders(hDec *hpackDecoder, s *Stream, frame headerFrame, endStream bool, handle func(*Stream), wg *sync.WaitGroup) (pendingStream *Stream) { defer func() { if pendingStream == nil { hDec.state = decodeState{} } }() endHeaders, err := hDec.decodeServerHTTP2Headers(frame) if s == nil { // s has been closed. return nil } if err != nil { grpclog.Printf("transport: http2Server.operateHeader found %v", err) if se, ok := err.(StreamError); ok { t.controlBuf.put(&resetStream{s.id, statusCodeConvTab[se.Code]}) } return nil } if endStream { // s is just created by the caller. No lock needed. s.state = streamReadDone } if !endHeaders { return s } t.mu.Lock() if t.state != reachable { t.mu.Unlock() return nil } if uint32(len(t.activeStreams)) >= t.maxStreams { t.mu.Unlock() t.controlBuf.put(&resetStream{s.id, http2.ErrCodeRefusedStream}) return nil } s.sendQuotaPool = newQuotaPool(int(t.streamSendQuota)) t.activeStreams[s.id] = s t.mu.Unlock() s.windowHandler = func(n int) { t.updateWindow(s, uint32(n)) } if hDec.state.timeoutSet { s.ctx, s.cancel = context.WithTimeout(context.TODO(), hDec.state.timeout) } else { s.ctx, s.cancel = context.WithCancel(context.TODO()) } // Cache the current stream to the context so that the server application // can find out. Required when the server wants to send some metadata // back to the client (unary call only). s.ctx = newContextWithStream(s.ctx, s) // Attach the received metadata to the context. if len(hDec.state.mdata) > 0 { s.ctx = metadata.NewContext(s.ctx, hDec.state.mdata) } s.dec = &recvBufferReader{ ctx: s.ctx, recv: s.buf, } s.method = hDec.state.method wg.Add(1) go func() { handle(s) wg.Done() }() return nil }
func startPeer(streamRt, pipelineRt http.RoundTripper, urls types.URLs, local, to, cid types.ID, snapst *snapshotStore, r Raft, fs *stats.FollowerStats, errorc chan error, term uint64, v3demo bool) *peer { picker := newURLPicker(urls) status := newPeerStatus(to) p := &peer{ id: to, r: r, v3demo: v3demo, status: status, msgAppWriter: startStreamWriter(to, status, fs, r), writer: startStreamWriter(to, status, fs, r), pipeline: newPipeline(pipelineRt, picker, local, to, cid, status, fs, r, errorc), snapSender: newSnapshotSender(pipelineRt, picker, local, to, cid, status, snapst, r, errorc), sendc: make(chan raftpb.Message), recvc: make(chan raftpb.Message, recvBufSize), propc: make(chan raftpb.Message, maxPendingProposals), newURLsC: make(chan types.URLs), termc: make(chan uint64), pausec: make(chan struct{}), resumec: make(chan struct{}), stopc: make(chan struct{}), done: make(chan struct{}), } // Use go-routine for process of MsgProp because it is // blocking when there is no leader. ctx, cancel := context.WithCancel(context.Background()) go func() { for { select { case mm := <-p.propc: if err := r.Process(ctx, mm); err != nil { plog.Warningf("failed to process raft message (%v)", err) } case <-p.stopc: return } } }() p.msgAppReader = startStreamReader(streamRt, picker, streamTypeMsgAppV2, local, to, cid, status, p.recvc, p.propc, errorc, term) reader := startStreamReader(streamRt, picker, streamTypeMessage, local, to, cid, status, p.recvc, p.propc, errorc, term) go func() { var paused bool for { select { case m := <-p.sendc: if paused { continue } if p.v3demo && isMsgSnap(m) { go p.snapSender.send(m) continue } writec, name := p.pick(m) select { case writec <- m: default: p.r.ReportUnreachable(m.To) if isMsgSnap(m) { p.r.ReportSnapshot(m.To, raft.SnapshotFailure) } if status.isActive() { plog.Warningf("dropped %s to %s since %s's sending buffer is full", m.Type, p.id, name) } else { plog.Debugf("dropped %s to %s since %s's sending buffer is full", m.Type, p.id, name) } } case mm := <-p.recvc: if err := r.Process(context.TODO(), mm); err != nil { plog.Warningf("failed to process raft message (%v)", err) } case urls := <-p.newURLsC: picker.update(urls) case <-p.pausec: paused = true case <-p.resumec: paused = false case <-p.stopc: cancel() p.msgAppWriter.stop() p.writer.stop() p.pipeline.stop() p.snapSender.stop() p.msgAppReader.stop() reader.stop() close(p.done) return } } }() return p }
// execWatchCommandFunc executes the "exec-watch" command. func execWatchCommandFunc(c *cli.Context, ki client.KeysAPI) { args := c.Args() argslen := len(args) if argslen < 2 { handleError(ExitBadArgs, errors.New("key and command to exec required")) } var ( key string cmdArgs []string ) foundSep := false for i := range args { if args[i] == "--" && i != 0 { foundSep = true break } } if foundSep { key = args[0] cmdArgs = args[2:] } else { // If no flag is parsed, the order of key and cmdArgs will be switched and // args will not contain `--`. key = args[argslen-1] cmdArgs = args[:argslen-1] } index := 0 if c.Int("after-index") != 0 { index = c.Int("after-index") + 1 } recursive := c.Bool("recursive") sigch := make(chan os.Signal, 1) signal.Notify(sigch, os.Interrupt) go func() { <-sigch os.Exit(0) }() w := ki.Watcher(key, &client.WatcherOptions{AfterIndex: uint64(index), Recursive: recursive}) for { resp, err := w.Next(context.TODO()) if err != nil { handleError(ExitServerError, err) } if resp.Node.Dir { fmt.Fprintf(os.Stderr, "Ignored dir %s change", resp.Node.Key) continue } cmd := exec.Command(cmdArgs[0], cmdArgs[1:]...) cmd.Env = environResponse(resp, os.Environ()) stdout, err := cmd.StdoutPipe() if err != nil { fmt.Fprintf(os.Stderr, err.Error()) os.Exit(1) } stderr, err := cmd.StderrPipe() if err != nil { fmt.Fprintf(os.Stderr, err.Error()) os.Exit(1) } go func() { err := cmd.Start() if err != nil { fmt.Fprintf(os.Stderr, err.Error()) os.Exit(1) } go io.Copy(os.Stdout, stdout) go io.Copy(os.Stderr, stderr) cmd.Wait() }() } }
"io" "io/ioutil" "mime" "net/http" "net/url" "strconv" "strings" "sync" "time" "github.com/algoadv/etcd/Godeps/_workspace/src/golang.org/x/net/context" ) // NoContext is the default context you should supply if not using // your own context.Context (see https://golang.org/x/net/context). var NoContext = context.TODO() // Config describes a typical 3-legged OAuth2 flow, with both the // client application information and the server's endpoint URLs. type Config struct { // ClientID is the application's ID. ClientID string // ClientSecret is the application's secret. ClientSecret string // Endpoint contains the resource server's token endpoint // URLs. These are constants specific to each server and are // often available via site-specific packages, such as // google.Endpoint or github.Endpoint. Endpoint Endpoint