// TestDiscoverySecondPeerUp ensures that a second peer joining a discovery // cluster works. func TestDiscoverySecondPeerUp(t *testing.T) { etcdtest.RunServer(func(s *server.Server) { v := url.Values{} v.Set("value", "started") resp, err := etcdtest.PutForm(fmt.Sprintf("%s%s", s.URL(), "/v2/keys/_etcd/registry/3/_state"), v) assert.Equal(t, resp.StatusCode, http.StatusCreated) u, ok := s.PeerURL("ETCDTEST") if !ok { t.Fatalf("Couldn't find the URL") } wc := goetcd.NewClient([]string{s.URL()}) testResp, err := wc.Set("test", "0", 0) if err != nil { t.Fatalf("Couldn't set a test key on the leader %v", err) } v = url.Values{} v.Set("value", u) resp, err = etcdtest.PutForm(fmt.Sprintf("%s%s", s.URL(), "/v2/keys/_etcd/registry/3/ETCDTEST"), v) assert.Equal(t, resp.StatusCode, http.StatusCreated) proc, err := startServer([]string{"-discovery", s.URL() + "/v2/keys/_etcd/registry/3"}) if err != nil { t.Fatal(err.Error()) } defer stopServer(proc) watch := fmt.Sprintf("%s%s%d", s.URL(), "/v2/keys/_etcd/registry/3/node1?wait=true&waitIndex=", testResp.EtcdIndex) resp, err = http.Get(watch) if err != nil { t.Fatal(err.Error()) } // TODO(bp): need to have a better way of knowing a machine is up for i := 0; i < 10; i++ { time.Sleep(1 * time.Second) etcdc := goetcd.NewClient(nil) _, err = etcdc.Set("foobar", "baz", 0) if err == nil { break } } if err != nil { t.Fatal(err.Error()) } }) }
// Sending set commands func Set(stop chan bool) { stopSet := false i := 0 c := etcd.NewClient(nil) for { key := fmt.Sprintf("%s_%v", "foo", i) result, err := c.Set(key, "bar", 0) if err != nil || result.Node.Key != "/"+key || result.Node.Value != "bar" { select { case <-stop: stopSet = true default: } } select { case <-stop: stopSet = true default: } if stopSet { break } i++ } stop <- true }
// Create a full cluster, disconnect a peer, wait for autodemotion, wait for autopromotion. func TestStandbyAutoPromote(t *testing.T) { t.Skip("functionality unimplemented") clusterSize := 10 // DefaultActiveSize + 1 _, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false) if err != nil { t.Fatal("cannot create cluster") } defer func() { // Wrap this in a closure so that it picks up the updated version of // the "etcds" variable. DestroyCluster(etcds) }() c := etcd.NewClient(nil) c.SyncCluster() time.Sleep(1 * time.Second) // Verify that we have one standby. result, err := c.Get("_etcd/standbys", false, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), 1) // Reconfigure with a short promote delay (2 second). resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":9, "promoteDelay":2}`)) if !assert.Equal(t, resp.StatusCode, 200) { t.FailNow() } // Remove peer. etcd := etcds[1] etcds = append(etcds[:1], etcds[2:]...) if err := etcd.Kill(); err != nil { panic(err.Error()) } etcd.Release() // Wait for it to get dropped. time.Sleep(server.PeerActivityMonitorTimeout + (2 * time.Second)) // Wait for the standby to be promoted. time.Sleep(server.ActiveMonitorTimeout + (2 * time.Second)) // Verify that we have 9 peers. result, err = c.Get("_etcd/machines", true, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), 9) // Verify that node10 is one of those peers. result, err = c.Get("_etcd/machines/node10", false, false) assert.NoError(t, err) // Verify that there are no more standbys. result, err = c.Get("_etcd/standbys", false, true) assert.NoError(t, err) if assert.Equal(t, len(result.Node.Nodes), 1) { assert.Equal(t, result.Node.Nodes[0].Key, "/_etcd/standbys/node2") } }
// Create a five nodes // Kill all the nodes and restart func TestMultiNodeKillAllAndRecovery(t *testing.T) { procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} clusterSize := 5 argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false) defer DestroyCluster(etcds) if err != nil { t.Fatal("cannot create cluster") } c := etcd.NewClient(nil) c.SyncCluster() time.Sleep(time.Second) // send 10 commands for i := 0; i < 10; i++ { // Test Set _, err := c.Set("foo", "bar", 0) if err != nil { panic(err) } } time.Sleep(time.Second) // kill all DestroyCluster(etcds) time.Sleep(time.Second) stop := make(chan bool) leaderChan := make(chan string, 1) all := make(chan bool, 1) time.Sleep(time.Second) for i := 0; i < clusterSize; i++ { etcds[i], err = os.StartProcess(EtcdBinPath, argGroup[i], procAttr) } go Monitor(clusterSize, 1, leaderChan, all, stop) <-all <-leaderChan result, err := c.Set("foo", "bar", 0) if err != nil { t.Fatalf("Recovery error: %s", err) } if result.Node.ModifiedIndex != 16 { t.Fatalf("recovery failed! [%d/16]", result.Node.ModifiedIndex) } }
// Create a full cluster, disconnect a peer, wait for removal, wait for standby join. func TestStandbyAutoJoin(t *testing.T) { clusterSize := 5 _, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false) if err != nil { t.Fatal("cannot create cluster") } defer func() { // Wrap this in a closure so that it picks up the updated version of // the "etcds" variable. DestroyCluster(etcds) }() c := etcd.NewClient(nil) c.SyncCluster() time.Sleep(1 * time.Second) // Verify that we have five machines. result, err := c.Get("_etcd/machines", false, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), 5) // Reconfigure with a short remove delay (2 second). resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":4, "removeDelay":2, "syncInterval":1}`)) if !assert.Equal(t, resp.StatusCode, 200) { t.FailNow() } // Wait for a monitor cycle before checking for removal. time.Sleep(server.ActiveMonitorTimeout + (1 * time.Second)) // Verify that we now have four peers. result, err = c.Get("_etcd/machines", false, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), 4) // Remove peer. etcd := etcds[1] etcds = append(etcds[:1], etcds[2:]...) if err := etcd.Kill(); err != nil { panic(err.Error()) } etcd.Release() // Wait for it to get dropped. time.Sleep(server.PeerActivityMonitorTimeout + (1 * time.Second)) // Wait for the standby to join. time.Sleep((1 * time.Second) + (1 * time.Second)) // Verify that we have 4 peers. result, err = c.Get("_etcd/machines", true, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), 4) // Verify that node2 is not one of those peers. _, err = c.Get("_etcd/machines/node2", false, false) assert.Error(t, err) }
func TestStandbyJoinMiss(t *testing.T) { clusterSize := 2 _, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false) if err != nil { t.Fatal("cannot create cluster") } defer DestroyCluster(etcds) c := etcd.NewClient(nil) c.SyncCluster() time.Sleep(1 * time.Second) // Verify that we have two machines. result, err := c.Get("_etcd/machines", false, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), clusterSize) resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"removeDelay":4, "syncInterval":4}`)) if !assert.Equal(t, resp.StatusCode, 200) { t.FailNow() } time.Sleep(time.Second) resp, _ = tests.Delete("http://localhost:7001/v2/admin/machines/node2", "application/json", nil) if !assert.Equal(t, resp.StatusCode, 200) { t.FailNow() } // Wait for a monitor cycle before checking for removal. time.Sleep(server.ActiveMonitorTimeout + (1 * time.Second)) // Verify that we now have one peer. result, err = c.Get("_etcd/machines", false, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), 1) // Simulate the join failure _, err = server.NewClient(nil).AddMachine("http://localhost:7001", &server.JoinCommand{ MinVersion: store.MinVersion(), MaxVersion: store.MaxVersion(), Name: "node2", RaftURL: "http://127.0.0.1:7002", EtcdURL: "http://127.0.0.1:4002", }) assert.NoError(t, err) time.Sleep(6 * time.Second) go tests.Delete("http://localhost:7001/v2/admin/machines/node2", "application/json", nil) time.Sleep(time.Second) result, err = c.Get("_etcd/machines", false, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), 1) }
func (d *Discoverer) Do(discoveryURL string, name string, peer string) (peers []string, err error) { d.name = name d.peer = peer d.discoveryURL = discoveryURL u, err := url.Parse(discoveryURL) if err != nil { return } // prefix is prepended to all keys for this discovery d.prefix = strings.TrimPrefix(u.Path, "/v2/keys/") // keep the old path in case we need to set the KeyPrefix below oldPath := u.Path u.Path = "" // Connect to a scheme://host not a full URL with path log.Infof("Discovery via %s using prefix %s.", u.String(), d.prefix) d.client = etcd.NewClient([]string{u.String()}) if !strings.HasPrefix(oldPath, "/v2/keys") { d.client.SetKeyPrefix("") } // Register this machine first and announce that we are a member of // this cluster err = d.heartbeat() if err != nil { return } // Start the very slow heartbeat to the cluster now in anticipation // that everything is going to go alright now go d.startHeartbeat() // Attempt to take the leadership role, if there is no error we are it! resp, err := d.client.Create(path.Join(d.prefix, stateKey), startedState, 0) // Bail out on unexpected errors if err != nil { if clientErr, ok := err.(*etcd.EtcdError); !ok || clientErr.ErrorCode != etcdErr.EcodeNodeExist { return nil, err } } // If we got a response then the CAS was successful, we are leader if resp != nil && resp.Node.Value == startedState { // We are the leader, we have no peers log.Infof("Discovery _state was empty, so this machine is the initial leader.") return nil, nil } // Fall through to finding the other discovery peers return d.findPeers() }
// This test creates a single node and then set a value to it. // Then this test kills the node and restart it and tries to get the value again. func TestSingleNodeRecovery(t *testing.T) { procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} args := []string{"etcd", "-name=node1", "-data-dir=/tmp/node1"} process, err := os.StartProcess(EtcdBinPath, append(args, "-f"), procAttr) if err != nil { t.Fatal("start process failed:" + err.Error()) return } time.Sleep(time.Second) c := etcd.NewClient(nil) c.SyncCluster() // Test Set result, err := c.Set("foo", "bar", 100) node := result.Node if err != nil || node.Key != "/foo" || node.Value != "bar" || node.TTL < 95 { if err != nil { t.Fatal(err) } t.Fatalf("Set 1 failed with %s %s %v", node.Key, node.Value, node.TTL) } time.Sleep(time.Second) process.Kill() process, err = os.StartProcess(EtcdBinPath, args, procAttr) defer process.Kill() if err != nil { t.Fatal("start process failed:" + err.Error()) return } time.Sleep(time.Second) result, err = c.Get("foo", false, false) node = result.Node if err != nil { t.Fatal("get fail: " + err.Error()) return } if err != nil || node.Key != "/foo" || node.Value != "bar" || node.TTL > 99 { if err != nil { t.Fatal(err) } t.Fatalf("Recovery Get failed with %s %s %v", node.Key, node.Value, node.TTL) } }
func write(endpoint string, requests int, end chan int) { client := etcd.NewClient([]string{endpoint}) for i := 0; i < requests; i++ { key := strconv.Itoa(i) _, err := client.Set(key, key, 0) if err != nil { println(err.Error()) } } end <- 1 }
// NewHandler creates an HTTP handler that can be registered on a router. func NewHandler(addr string) http.Handler { h := &handler{ Router: mux.NewRouter(), client: etcd.NewClient([]string{addr}), } h.StrictSlash(false) h.handleFunc("/{key:.*}", h.getIndexHandler).Methods("GET") h.handleFunc("/{key:.*}", h.acquireHandler).Methods("POST") h.handleFunc("/{key:.*}", h.renewLockHandler).Methods("PUT") h.handleFunc("/{key:.*}", h.releaseLockHandler).Methods("DELETE") return h }
// TestSnapshotRestart tests etcd restarts with snapshot file func TestSnapshotRestart(t *testing.T) { procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} args := []string{"etcd", "-name=node1", "-data-dir=/tmp/node1", "-snapshot=true", "-snapshot-count=500"} process, err := os.StartProcess(EtcdBinPath, append(args, "-f"), procAttr) if err != nil { t.Fatal("start process failed:" + err.Error()) } time.Sleep(time.Second) c := etcd.NewClient(nil) c.SyncCluster() // issue first 501 commands for i := 0; i < 501; i++ { result, err := c.Set("foo", "bar", 100) node := result.Node if err != nil || node.Key != "/foo" || node.Value != "bar" || node.TTL < 95 { if err != nil { t.Fatal(err) } t.Fatalf("Set failed with %s %s %v", node.Key, node.Value, node.TTL) } } // wait for a snapshot interval time.Sleep(3 * time.Second) _, err = ioutil.ReadDir("/tmp/node1/snapshot") if err != nil { t.Fatal("list snapshot failed:" + err.Error()) } process.Kill() process, err = os.StartProcess(EtcdBinPath, args, procAttr) if err != nil { t.Fatal("start process failed:" + err.Error()) } defer process.Kill() time.Sleep(1 * time.Second) _, err = c.Set("foo", "bar", 100) if err != nil { t.Fatal(err) } }
// Create a five-node cluster // Replace one of the nodes with different peer address func TestReplaceWithDifferentPeerAddress(t *testing.T) { // TODO(yichengq): find some way to avoid the error that will be // caused if some node joins the cluster with the collided name. // Possible solutions: // 1. Remove itself when executing a join command with the same name // and different peer address. However, it should find some way to // trigger that execution because the leader may update its address // and stop heartbeat. // 2. Remove the node with the same name before join each time. // But this way could be rather overkill. t.Skip("Unimplemented functionality") procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} clusterSize := 5 argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false) if err != nil { t.Fatal("cannot create cluster") } defer DestroyCluster(etcds) time.Sleep(2 * time.Second) rand.Int() for i := 0; i < 10; i++ { num := rand.Int() % clusterSize fmt.Println("replace node", num+1) argGroup[num] = increasePeerAddressPort(argGroup[num], clusterSize) argGroup[num] = increaseAddressPort(argGroup[num], clusterSize) argGroup[num] = increaseDataDir(argGroup[num], clusterSize) // restart newEtcd, err := os.StartProcess(EtcdBinPath, append(argGroup[num], "-f"), procAttr) if err != nil { panic(err) } etcds[num].Wait() etcds[num] = newEtcd } c := etcd.NewClient(nil) c.SyncCluster() result, err := c.Set("foo", "bar", 0) if err != nil || result.Node.Key != "/foo" || result.Node.Value != "bar" { t.Fatal("Failed to set value in etcd cluster") } }
func watch(endpoint string, key string) { client := etcd.NewClient([]string{endpoint}) receiver := make(chan *etcd.Response) go client.Watch(key, 0, true, receiver, nil) log.Printf("watching: %s", key) received := 0 for { <-receiver received++ } }
// Create a three nodes and try to set value func templateTestSimpleMultiNode(t *testing.T, tls bool) { procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} clusterSize := 3 _, etcds, err := CreateCluster(clusterSize, procAttr, tls) if err != nil { t.Fatalf("cannot create cluster: %v", err) } defer DestroyCluster(etcds) time.Sleep(time.Second) c := etcd.NewClient(nil) if c.SyncCluster() == false { t.Fatal("Cannot sync cluster!") } // Test Set result, err := c.Set("foo", "bar", 100) if err != nil { t.Fatal(err) } node := result.Node if node.Key != "/foo" || node.Value != "bar" || node.TTL < 95 { t.Fatalf("Set 1 failed with %s %s %v", node.Key, node.Value, node.TTL) } time.Sleep(time.Second) result, err = c.Set("foo", "bar", 100) if err != nil { t.Fatal(err) } node = result.Node if node.Key != "/foo" || node.Value != "bar" || node.TTL < 95 { t.Fatalf("Set 2 failed with %s %s %v", node.Key, node.Value, node.TTL) } }
// Create a five nodes // Randomly kill one of the node and keep on sending set command to the cluster func TestMultiNodeKillOne(t *testing.T) { procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} clusterSize := 5 argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false) if err != nil { t.Fatal("cannot create cluster") } defer DestroyCluster(etcds) time.Sleep(2 * time.Second) c := etcd.NewClient(nil) c.SyncCluster() stop := make(chan bool) // Test Set go Set(stop) for i := 0; i < 10; i++ { num := rand.Int() % clusterSize fmt.Println("kill node", num+1) // kill etcds[num].Kill() etcds[num].Release() time.Sleep(time.Second) // restart etcds[num], err = os.StartProcess(EtcdBinPath, argGroup[num], procAttr) if err != nil { panic(err) } time.Sleep(time.Second) } fmt.Println("stop") stop <- true <-stop }
// Create a five-node cluster // Random kill one of the nodes and restart it with different peer address func TestRejoinWithDifferentPeerAddress(t *testing.T) { procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} clusterSize := 5 argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false) if err != nil { t.Fatal("cannot create cluster") } defer DestroyCluster(etcds) time.Sleep(2 * time.Second) for i := 0; i < 10; i++ { num := rand.Int() % clusterSize fmt.Println("kill node", num+1) etcds[num].Kill() etcds[num].Release() time.Sleep(time.Second) argGroup[num] = increasePeerAddressPort(argGroup[num], clusterSize) // restart etcds[num], err = os.StartProcess(EtcdBinPath, argGroup[num], procAttr) if err != nil { panic(err) } time.Sleep(time.Second) } c := etcd.NewClient(nil) c.SyncCluster() result, err := c.Set("foo", "bar", 0) if err != nil || result.Node.Key != "/foo" || result.Node.Value != "bar" { t.Fatal("Failed to set value in etcd cluster") } }
// This test creates a single node and then set a value to it to trigger snapshot func TestSnapshot(t *testing.T) { procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} args := []string{"etcd", "-name=node1", "-data-dir=/tmp/node1", "-snapshot=true", "-snapshot-count=500"} process, err := os.StartProcess(EtcdBinPath, append(args, "-f"), procAttr) if err != nil { t.Fatal("start process failed:" + err.Error()) } defer process.Kill() time.Sleep(time.Second) c := etcd.NewClient(nil) c.SyncCluster() // issue first 501 commands for i := 0; i < 501; i++ { result, err := c.Set("foo", "bar", 100) node := result.Node if err != nil || node.Key != "/foo" || node.Value != "bar" || node.TTL < 95 { if err != nil { t.Fatal(err) } t.Fatalf("Set failed with %s %s %v", node.Key, node.Value, node.TTL) } } // wait for a snapshot interval time.Sleep(3 * time.Second) snapshots, err := ioutil.ReadDir("/tmp/node1/snapshot") if err != nil { t.Fatal("list snapshot failed:" + err.Error()) } if len(snapshots) != 1 { t.Fatal("wrong number of snapshot :[1/", len(snapshots), "]") } index, _ := strconv.Atoi(snapshots[0].Name()[2:5]) if index < 507 || index > 515 { t.Fatal("wrong name of snapshot :", snapshots[0].Name()) } // issue second 501 commands for i := 0; i < 501; i++ { result, err := c.Set("foo", "bar", 100) node := result.Node if err != nil || node.Key != "/foo" || node.Value != "bar" || node.TTL < 95 { if err != nil { t.Fatal(err) } t.Fatalf("Set failed with %s %s %v", node.Key, node.Value, node.TTL) } } // wait for a snapshot interval time.Sleep(3 * time.Second) snapshots, err = ioutil.ReadDir("/tmp/node1/snapshot") if err != nil { t.Fatal("list snapshot failed:" + err.Error()) } if len(snapshots) != 1 { t.Fatal("wrong number of snapshot :[1/", len(snapshots), "]") } index, _ = strconv.Atoi(snapshots[0].Name()[2:6]) if index < 1014 || index > 1025 { t.Fatal("wrong name of snapshot :", snapshots[0].Name()) } }
// Create a full cluster and then add extra an extra standby node. func TestStandby(t *testing.T) { t.Skip("functionality unimplemented") clusterSize := 10 // DefaultActiveSize + 1 _, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false) assert.NoError(t, err) defer DestroyCluster(etcds) if err != nil { t.Fatal("cannot create cluster") } c := etcd.NewClient(nil) c.SyncCluster() // Set key. time.Sleep(time.Second) if _, err := c.Set("foo", "bar", 0); err != nil { panic(err) } time.Sleep(time.Second) // Check that all peers and standbys have the value. for i := range etcds { resp, err := tests.Get(fmt.Sprintf("http://localhost:%d/v2/keys/foo", 4000+(i+1))) if assert.NoError(t, err) { body := tests.ReadBodyJSON(resp) if node, _ := body["node"].(map[string]interface{}); assert.NotNil(t, node) { assert.Equal(t, node["value"], "bar") } } } // Verify that we have one standby. result, err := c.Get("_etcd/standbys", false, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), 1) // Reconfigure with larger active size (10 nodes) and wait for promotion. resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":10, "promoteDelay":1800}`)) if !assert.Equal(t, resp.StatusCode, 200) { t.FailNow() } time.Sleep(server.ActiveMonitorTimeout + (1 * time.Second)) // Verify that the standby node is now a peer. result, err = c.Get("_etcd/standbys", false, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), 0) // Reconfigure with a smaller active size (8 nodes). resp, _ = tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":8, "promoteDelay":1800}`)) if !assert.Equal(t, resp.StatusCode, 200) { t.FailNow() } // Wait for two monitor cycles before checking for demotion. time.Sleep((2 * server.ActiveMonitorTimeout) + (1 * time.Second)) // Verify that we now have eight peers. result, err = c.Get("_etcd/machines", false, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), 8) // Verify that we now have two standbys. result, err = c.Get("_etcd/standbys", false, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), 2) }
// TestTLSMultiNodeKillAllAndRecovery create a five nodes // then kill all the nodes and restart func TestTLSMultiNodeKillAllAndRecovery(t *testing.T) { procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} stop := make(chan bool) leaderChan := make(chan string, 1) all := make(chan bool, 1) clusterSize := 5 argGroup, etcds, err := CreateCluster(clusterSize, procAttr, true) defer DestroyCluster(etcds) if err != nil { t.Fatal("cannot create cluster") } time.Sleep(time.Second) c := etcd.NewClient(nil) go Monitor(clusterSize, clusterSize, leaderChan, all, stop) <-all <-leaderChan stop <- true c.SyncCluster() // send 10 commands for i := 0; i < 10; i++ { // Test Set _, err := c.Set("foo", "bar", 0) if err != nil { panic(err) } } time.Sleep(time.Second) // kill all DestroyCluster(etcds) time.Sleep(time.Second) stop = make(chan bool) leaderChan = make(chan string, 1) all = make(chan bool, 1) time.Sleep(time.Second) for i := 0; i < clusterSize; i++ { etcds[i], err = os.StartProcess(EtcdBinPath, argGroup[i], procAttr) // See util.go for the reason to wait for server client := buildClient() err = WaitForServer("127.0.0.1:400"+strconv.Itoa(i+1), client, "http") if err != nil { t.Fatalf("node start error: %s", err) } } go Monitor(clusterSize, 1, leaderChan, all, stop) <-all <-leaderChan result, err := c.Set("foo", "bar", 0) if err != nil { t.Fatalf("Recovery error: %s", err) } if result.Node.ModifiedIndex != 17 { t.Fatalf("recovery failed! [%d/17]", result.Node.ModifiedIndex) } }
// Create a five nodes // Kill all the nodes and restart, then remove the leader func TestMultiNodeKillAllAndRecoveryAndRemoveLeader(t *testing.T) { procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} stop := make(chan bool) leaderChan := make(chan string, 1) all := make(chan bool, 1) clusterSize := 5 argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false) defer DestroyCluster(etcds) if err != nil { t.Fatal("cannot create cluster") } c := etcd.NewClient(nil) go Monitor(clusterSize, clusterSize, leaderChan, all, stop) <-all <-leaderChan stop <- true // It needs some time to sync current commits and write it to disk. // Or some instance may be restarted as a new peer, and we don't support // to connect back the old cluster that doesn't have majority alive // without log now. time.Sleep(time.Second) c.SyncCluster() // kill all DestroyCluster(etcds) time.Sleep(time.Second) stop = make(chan bool) leaderChan = make(chan string, 1) all = make(chan bool, 1) time.Sleep(time.Second) for i := 0; i < clusterSize; i++ { etcds[i], err = os.StartProcess(EtcdBinPath, argGroup[i], procAttr) } go Monitor(clusterSize, 1, leaderChan, all, stop) <-all leader := <-leaderChan _, err = c.Set("foo", "bar", 0) if err != nil { t.Fatalf("Recovery error: %s", err) } port, _ := strconv.Atoi(strings.Split(leader, ":")[2]) num := port - 7000 resp, _ := tests.Delete(leader+"/v2/admin/machines/node"+strconv.Itoa(num), "application/json", nil) if !assert.Equal(t, resp.StatusCode, 200) { t.FailNow() } // check the old leader is in standby mode now time.Sleep(time.Second) resp, _ = tests.Get(leader + "/name") assert.Equal(t, resp.StatusCode, 404) }
// Create a five-node cluster // Kill all the nodes and restart func TestMultiNodeKillAllAndRecoveryWithStandbys(t *testing.T) { procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} stop := make(chan bool) leaderChan := make(chan string, 1) all := make(chan bool, 1) clusterSize := 15 argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false) defer DestroyCluster(etcds) if err != nil { t.Fatal("cannot create cluster") } c := etcd.NewClient(nil) go Monitor(clusterSize, clusterSize, leaderChan, all, stop) <-all <-leaderChan stop <- true c.SyncCluster() // Reconfigure with smaller active size (7 nodes) and wait for remove. resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":7}`)) if !assert.Equal(t, resp.StatusCode, 200) { t.FailNow() } time.Sleep(2*server.ActiveMonitorTimeout + (1 * time.Second)) // Verify that there is three machines in peer mode. result, err := c.Get("_etcd/machines", false, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), 7) // send set commands for i := 0; i < 2*clusterSize; i++ { // Test Set _, err := c.Set("foo", "bar", 0) if err != nil { panic(err) } } time.Sleep(time.Second) // kill all DestroyCluster(etcds) time.Sleep(time.Second) stop = make(chan bool) leaderChan = make(chan string, 1) all = make(chan bool, 1) time.Sleep(time.Second) for i := 0; i < clusterSize; i++ { etcds[i], err = os.StartProcess(EtcdBinPath, append(argGroup[i], "-peers="), procAttr) } time.Sleep(2 * time.Second) // send set commands for i := 0; i < 2*clusterSize; i++ { // Test Set _, err := c.Set("foo", "bar", 0) if err != nil { t.Fatalf("Recovery error: %s", err) } } // Verify that we have seven machines. result, err = c.Get("_etcd/machines", false, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), 7) }
// This test will kill the current leader and wait for the etcd cluster to elect a new leader for 200 times. // It will print out the election time and the average election time. // It runs in a cluster with standby nodes. func TestKillLeaderWithStandbys(t *testing.T) { // https://github.com/goraft/raft/issues/222 t.Skip("stuck on raft issue") procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} clusterSize := 5 argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false) if err != nil { t.Fatal("cannot create cluster") } defer DestroyCluster(etcds) stop := make(chan bool) leaderChan := make(chan string, 1) all := make(chan bool, 1) time.Sleep(time.Second) go Monitor(clusterSize, 1, leaderChan, all, stop) c := etcd.NewClient(nil) c.SyncCluster() // Reconfigure with a small active size. resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":3, "removeDelay":2, "syncInterval":1}`)) if !assert.Equal(t, resp.StatusCode, 200) { t.FailNow() } // Wait for two monitor cycles before checking for demotion. time.Sleep((2 * server.ActiveMonitorTimeout) + (2 * time.Second)) // Verify that we have 3 peers. result, err := c.Get("_etcd/machines", true, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), 3) var totalTime time.Duration leader := "http://127.0.0.1:7001" for i := 0; i < clusterSize; i++ { t.Log("leader is ", leader) port, _ := strconv.Atoi(strings.Split(leader, ":")[2]) num := port - 7001 t.Log("kill server ", num) etcds[num].Kill() etcds[num].Release() start := time.Now() for { newLeader := <-leaderChan if newLeader != leader { leader = newLeader break } } take := time.Now().Sub(start) totalTime += take avgTime := totalTime / (time.Duration)(i+1) fmt.Println("Total time:", totalTime, "; Avg time:", avgTime) time.Sleep(server.ActiveMonitorTimeout + (1 * time.Second)) time.Sleep(2 * time.Second) // Verify that we have 3 peers. result, err = c.Get("_etcd/machines", true, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), 3) // Verify that killed node is not one of those peers. _, err = c.Get(fmt.Sprintf("_etcd/machines/node%d", num+1), false, false) assert.Error(t, err) etcds[num], err = os.StartProcess(EtcdBinPath, argGroup[num], procAttr) } stop <- true }
func TestRemovePausedNode(t *testing.T) { procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} clusterSize := 4 _, etcds, _ := CreateCluster(clusterSize, procAttr, false) defer DestroyCluster(etcds) time.Sleep(time.Second) c := etcd.NewClient(nil) c.SyncCluster() r, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":3, "removeDelay":1, "syncInterval":1}`)) if !assert.Equal(t, r.StatusCode, 200) { t.FailNow() } // Wait for standby instances to update its cluster config time.Sleep(6 * time.Second) resp, err := c.Get("_etcd/machines", false, false) if err != nil { panic(err) } if len(resp.Node.Nodes) != 3 { t.Fatal("cannot remove peer") } for i := 0; i < clusterSize; i++ { // first pause the node, then remove it, then resume it idx := rand.Int() % clusterSize etcds[idx].Signal(syscall.SIGSTOP) fmt.Printf("pause node%d and let standby node take its place\n", idx+1) time.Sleep(4 * time.Second) etcds[idx].Signal(syscall.SIGCONT) // let it change its state to candidate at least time.Sleep(time.Second) stop := make(chan bool) leaderChan := make(chan string, 1) all := make(chan bool, 1) go Monitor(clusterSize, clusterSize, leaderChan, all, stop) <-all <-leaderChan stop <- true resp, err = c.Get("_etcd/machines", false, false) if err != nil { panic(err) } if len(resp.Node.Nodes) != 3 { t.Fatalf("add peer fails (%d != 3)", len(resp.Node.Nodes)) } for i := 0; i < 3; i++ { if resp.Node.Nodes[i].Key == fmt.Sprintf("node%d", idx+1) { t.Fatal("node should be removed") } } } }
// Create a full cluster and then change the active size dramatically. func TestStandbyDramaticChange(t *testing.T) { clusterSize := 9 _, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false) assert.NoError(t, err) defer DestroyCluster(etcds) if err != nil { t.Fatal("cannot create cluster") } time.Sleep(time.Second) c := etcd.NewClient(nil) c.SyncCluster() num := clusterSize for i := 0; i < 3; i++ { for inc := 0; inc < 2; inc++ { // Verify that we just have i machines. result, err := c.Get("_etcd/machines", false, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), num) if inc == 0 { num -= 6 } else { num += 6 } t.Log("Reconfigure with active size", num) resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(fmt.Sprintf(`{"activeSize":%d, "syncInterval":1}`, num))) if !assert.Equal(t, resp.StatusCode, 200) { t.FailNow() } if inc == 0 { // Wait for monitor cycles before checking for demotion. time.Sleep(6*server.ActiveMonitorTimeout + (1 * time.Second)) } else { time.Sleep(time.Second + (1 * time.Second)) } // Verify that we now have peers. result, err = c.Get("_etcd/machines", false, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), num) t.Log("Test the functionality of all servers") // Set key. if _, err := c.Set("foo", "bar", 0); err != nil { panic(err) } time.Sleep(100 * time.Millisecond) // Check that all peers and standbys have the value. for i := range etcds { resp, err := tests.Get(fmt.Sprintf("http://localhost:%d/v2/keys/foo", 4000+(i+1))) if assert.NoError(t, err) { body := tests.ReadBodyJSON(resp) if node, _ := body["node"].(map[string]interface{}); assert.NotNil(t, node) { assert.Equal(t, node["value"], "bar") } } } } } }
// Create a full cluster and then change the active size. func TestStandby(t *testing.T) { clusterSize := 15 _, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false) if !assert.NoError(t, err) { t.Fatal("cannot create cluster") } defer DestroyCluster(etcds) resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"syncInterval":1}`)) if !assert.Equal(t, resp.StatusCode, 200) { t.FailNow() } time.Sleep(time.Second) c := etcd.NewClient(nil) c.SyncCluster() // Verify that we just have default machines. result, err := c.Get("_etcd/machines", false, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), 9) t.Log("Reconfigure with a smaller active size") resp, _ = tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":7, "syncInterval":1}`)) if !assert.Equal(t, resp.StatusCode, 200) { t.FailNow() } // Wait for two monitor cycles before checking for demotion. time.Sleep((2 * server.ActiveMonitorTimeout) + (2 * time.Second)) // Verify that we now have seven peers. result, err = c.Get("_etcd/machines", false, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), 7) t.Log("Test the functionality of all servers") // Set key. time.Sleep(time.Second) if _, err := c.Set("foo", "bar", 0); err != nil { panic(err) } time.Sleep(time.Second) // Check that all peers and standbys have the value. for i := range etcds { resp, err := tests.Get(fmt.Sprintf("http://localhost:%d/v2/keys/foo", 4000+(i+1))) if assert.NoError(t, err) { body := tests.ReadBodyJSON(resp) if node, _ := body["node"].(map[string]interface{}); assert.NotNil(t, node) { assert.Equal(t, node["value"], "bar") } } } t.Log("Reconfigure with larger active size and wait for join") resp, _ = tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":8, "syncInterval":1}`)) if !assert.Equal(t, resp.StatusCode, 200) { t.FailNow() } time.Sleep((1 * time.Second) + (1 * time.Second)) // Verify that exactly eight machines are in the cluster. result, err = c.Get("_etcd/machines", false, true) assert.NoError(t, err) assert.Equal(t, len(result.Node.Nodes), 8) }
// remove the node and node rejoin with previous log func TestRemoveNode(t *testing.T) { procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} clusterSize := 4 argGroup, etcds, _ := CreateCluster(clusterSize, procAttr, false) defer DestroyCluster(etcds) time.Sleep(time.Second) c := etcd.NewClient(nil) c.SyncCluster() resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":4, "syncInterval":5}`)) if !assert.Equal(t, resp.StatusCode, 200) { t.FailNow() } rmReq, _ := http.NewRequest("DELETE", "http://127.0.0.1:7001/remove/node3", nil) client := &http.Client{} for i := 0; i < 2; i++ { for i := 0; i < 2; i++ { client.Do(rmReq) fmt.Println("send remove to node3 and wait for its exiting") time.Sleep(100 * time.Millisecond) resp, err := c.Get("_etcd/machines", false, false) if err != nil { panic(err) } if len(resp.Node.Nodes) != 3 { t.Fatal("cannot remove peer") } etcds[2].Kill() etcds[2].Wait() if i == 1 { // rejoin with log etcds[2], err = os.StartProcess(EtcdBinPath, argGroup[2], procAttr) } else { // rejoin without log etcds[2], err = os.StartProcess(EtcdBinPath, append(argGroup[2], "-f"), procAttr) } if err != nil { panic(err) } time.Sleep(time.Second + 5*time.Second) resp, err = c.Get("_etcd/machines", false, false) if err != nil { panic(err) } if len(resp.Node.Nodes) != 4 { t.Fatalf("add peer fails #1 (%d != 4)", len(resp.Node.Nodes)) } } // first kill the node, then remove it, then add it back for i := 0; i < 2; i++ { etcds[2].Kill() fmt.Println("kill node3 and wait for its exiting") etcds[2].Wait() client.Do(rmReq) time.Sleep(100 * time.Millisecond) resp, err := c.Get("_etcd/machines", false, false) if err != nil { panic(err) } if len(resp.Node.Nodes) != 3 { t.Fatal("cannot remove peer") } if i == 1 { // rejoin with log etcds[2], err = os.StartProcess(EtcdBinPath, append(argGroup[2]), procAttr) } else { // rejoin without log etcds[2], err = os.StartProcess(EtcdBinPath, append(argGroup[2], "-f"), procAttr) } if err != nil { panic(err) } time.Sleep(time.Second + time.Second) resp, err = c.Get("_etcd/machines", false, false) if err != nil { panic(err) } if len(resp.Node.Nodes) != 4 { t.Fatalf("add peer fails #2 (%d != 4)", len(resp.Node.Nodes)) } } } }
// remove the node and node rejoin with previous log func TestRemoveNode(t *testing.T) { procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} clusterSize := 3 argGroup, etcds, _ := CreateCluster(clusterSize, procAttr, false) defer DestroyCluster(etcds) time.Sleep(time.Second) c := etcd.NewClient(nil) c.SyncCluster() rmReq, _ := http.NewRequest("DELETE", "http://127.0.0.1:7001/v2/admin/machines/node3", nil) client := &http.Client{} for i := 0; i < 2; i++ { for i := 0; i < 2; i++ { client.Do(rmReq) fmt.Println("send remove to node3 and wait for its exiting") etcds[2].Wait() resp, err := c.Get("_etcd/machines", false, false) if err != nil { panic(err) } if len(resp.Node.Nodes) != 2 { t.Fatal("cannot remove peer") } if i == 1 { // rejoin with log etcds[2], err = os.StartProcess(EtcdBinPath, argGroup[2], procAttr) } else { // rejoin without log etcds[2], err = os.StartProcess(EtcdBinPath, append(argGroup[2], "-f"), procAttr) } if err != nil { panic(err) } time.Sleep(time.Second) resp, err = c.Get("_etcd/machines", false, false) if err != nil { panic(err) } if len(resp.Node.Nodes) != 3 { t.Fatalf("add peer fails #1 (%d != 3)", len(resp.Node.Nodes)) } } // first kill the node, then remove it, then add it back for i := 0; i < 2; i++ { etcds[2].Kill() fmt.Println("kill node3 and wait for its exiting") etcds[2].Wait() client.Do(rmReq) resp, err := c.Get("_etcd/machines", false, false) if err != nil { panic(err) } if len(resp.Node.Nodes) != 2 { t.Fatal("cannot remove peer") } if i == 1 { // rejoin with log etcds[2], err = os.StartProcess(EtcdBinPath, append(argGroup[2]), procAttr) } else { // rejoin without log etcds[2], err = os.StartProcess(EtcdBinPath, append(argGroup[2], "-f"), procAttr) } if err != nil { panic(err) } time.Sleep(time.Second) resp, err = c.Get("_etcd/machines", false, false) if err != nil { panic(err) } if len(resp.Node.Nodes) != 3 { t.Fatalf("add peer fails #2 (%d != 3)", len(resp.Node.Nodes)) } } } }
// Create a single node and try to set value func TestSingleNode(t *testing.T) { procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} args := []string{"etcd", "-name=node1", "-f", "-data-dir=/tmp/node1"} process, err := os.StartProcess(EtcdBinPath, args, procAttr) if err != nil { t.Fatal("start process failed:" + err.Error()) return } defer process.Kill() time.Sleep(time.Second) c := etcd.NewClient(nil) c.SyncCluster() // Test Set result, err := c.Set("foo", "bar", 100) node := result.Node if err != nil || node.Key != "/foo" || node.Value != "bar" || node.TTL < 95 { if err != nil { t.Fatal("Set 1: ", err) } t.Fatalf("Set 1 failed with %s %s %v", node.Key, node.Value, node.TTL) } time.Sleep(time.Second) result, err = c.Set("foo", "bar", 100) node = result.Node if err != nil || node.Key != "/foo" || node.Value != "bar" || node.TTL != 100 { if err != nil { t.Fatal("Set 2: ", err) } t.Fatalf("Set 2 failed with %s %s %v", node.Key, node.Value, node.TTL) } // Add a test-and-set test // First, we'll test we can change the value if we get it write result, err = c.CompareAndSwap("foo", "foobar", 100, "bar", 0) node = result.Node if err != nil || node.Key != "/foo" || node.Value != "foobar" || node.TTL != 100 { if err != nil { t.Fatal(err) } t.Fatalf("Set 3 failed with %s %s %v", node.Key, node.Value, node.TTL) } // Next, we'll make sure we can't set it without the correct prior value _, err = c.CompareAndSwap("foo", "foofoo", 100, "bar", 0) if err == nil { t.Fatalf("Set 4 expecting error when setting key with incorrect previous value") } }