Пример #1
0
// Ensure that the cluster configuration can be reloaded.
func TestClusterConfigReload(t *testing.T) {
	procAttr := &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}
	argGroup, etcds, err := CreateCluster(3, procAttr, false)
	assert.NoError(t, err)
	defer DestroyCluster(etcds)

	resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":3, "removeDelay":60}`))
	assert.Equal(t, resp.StatusCode, 200)

	time.Sleep(1 * time.Second)

	resp, _ = tests.Get("http://localhost:7002/v2/admin/config")
	body := tests.ReadBodyJSON(resp)
	assert.Equal(t, resp.StatusCode, 200)
	assert.Equal(t, body["activeSize"], 3)
	assert.Equal(t, body["removeDelay"], 60)

	// kill all
	DestroyCluster(etcds)

	for i := 0; i < 3; i++ {
		etcds[i], err = os.StartProcess(EtcdBinPath, argGroup[i], procAttr)
	}

	time.Sleep(1 * time.Second)

	resp, _ = tests.Get("http://localhost:7002/v2/admin/config")
	body = tests.ReadBodyJSON(resp)
	assert.Equal(t, resp.StatusCode, 200)
	assert.Equal(t, body["activeSize"], 3)
	assert.Equal(t, body["removeDelay"], 60)
}
Пример #2
0
// Create a full cluster, disconnect a peer, wait for autodemotion, wait for autopromotion.
func TestStandbyAutoPromote(t *testing.T) {
	t.Skip("functionality unimplemented")

	clusterSize := 10 // DefaultActiveSize + 1
	_, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false)
	if err != nil {
		t.Fatal("cannot create cluster")
	}
	defer func() {
		// Wrap this in a closure so that it picks up the updated version of
		// the "etcds" variable.
		DestroyCluster(etcds)
	}()

	c := etcd.NewClient(nil)
	c.SyncCluster()

	time.Sleep(1 * time.Second)

	// Verify that we have one standby.
	result, err := c.Get("_etcd/standbys", false, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 1)

	// Reconfigure with a short promote delay (2 second).
	resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":9, "promoteDelay":2}`))
	if !assert.Equal(t, resp.StatusCode, 200) {
		t.FailNow()
	}

	// Remove peer.
	etcd := etcds[1]
	etcds = append(etcds[:1], etcds[2:]...)
	if err := etcd.Kill(); err != nil {
		panic(err.Error())
	}
	etcd.Release()

	// Wait for it to get dropped.
	time.Sleep(server.PeerActivityMonitorTimeout + (2 * time.Second))

	// Wait for the standby to be promoted.
	time.Sleep(server.ActiveMonitorTimeout + (2 * time.Second))

	// Verify that we have 9 peers.
	result, err = c.Get("_etcd/machines", true, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 9)

	// Verify that node10 is one of those peers.
	result, err = c.Get("_etcd/machines/node10", false, false)
	assert.NoError(t, err)

	// Verify that there are no more standbys.
	result, err = c.Get("_etcd/standbys", false, true)
	assert.NoError(t, err)
	if assert.Equal(t, len(result.Node.Nodes), 1) {
		assert.Equal(t, result.Node.Nodes[0].Key, "/_etcd/standbys/node2")
	}
}
Пример #3
0
// Create a full cluster, disconnect a peer, wait for removal, wait for standby join.
func TestStandbyAutoJoin(t *testing.T) {
	clusterSize := 5
	_, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false)
	if err != nil {
		t.Fatal("cannot create cluster")
	}
	defer func() {
		// Wrap this in a closure so that it picks up the updated version of
		// the "etcds" variable.
		DestroyCluster(etcds)
	}()

	c := etcd.NewClient(nil)
	c.SyncCluster()

	time.Sleep(1 * time.Second)

	// Verify that we have five machines.
	result, err := c.Get("_etcd/machines", false, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 5)

	// Reconfigure with a short remove delay (2 second).
	resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":4, "removeDelay":2, "syncInterval":1}`))
	if !assert.Equal(t, resp.StatusCode, 200) {
		t.FailNow()
	}

	// Wait for a monitor cycle before checking for removal.
	time.Sleep(server.ActiveMonitorTimeout + (1 * time.Second))

	// Verify that we now have four peers.
	result, err = c.Get("_etcd/machines", false, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 4)

	// Remove peer.
	etcd := etcds[1]
	etcds = append(etcds[:1], etcds[2:]...)
	if err := etcd.Kill(); err != nil {
		panic(err.Error())
	}
	etcd.Release()

	// Wait for it to get dropped.
	time.Sleep(server.PeerActivityMonitorTimeout + (1 * time.Second))

	// Wait for the standby to join.
	time.Sleep((1 * time.Second) + (1 * time.Second))

	// Verify that we have 4 peers.
	result, err = c.Get("_etcd/machines", true, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 4)

	// Verify that node2 is not one of those peers.
	_, err = c.Get("_etcd/machines/node2", false, false)
	assert.Error(t, err)
}
Пример #4
0
func TestStandbyJoinMiss(t *testing.T) {
	clusterSize := 2
	_, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false)
	if err != nil {
		t.Fatal("cannot create cluster")
	}
	defer DestroyCluster(etcds)

	c := etcd.NewClient(nil)
	c.SyncCluster()

	time.Sleep(1 * time.Second)

	// Verify that we have two machines.
	result, err := c.Get("_etcd/machines", false, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), clusterSize)

	resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"removeDelay":4, "syncInterval":4}`))
	if !assert.Equal(t, resp.StatusCode, 200) {
		t.FailNow()
	}
	time.Sleep(time.Second)

	resp, _ = tests.Delete("http://localhost:7001/v2/admin/machines/node2", "application/json", nil)
	if !assert.Equal(t, resp.StatusCode, 200) {
		t.FailNow()
	}

	// Wait for a monitor cycle before checking for removal.
	time.Sleep(server.ActiveMonitorTimeout + (1 * time.Second))

	// Verify that we now have one peer.
	result, err = c.Get("_etcd/machines", false, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 1)

	// Simulate the join failure
	_, err = server.NewClient(nil).AddMachine("http://localhost:7001",
		&server.JoinCommand{
			MinVersion: store.MinVersion(),
			MaxVersion: store.MaxVersion(),
			Name:       "node2",
			RaftURL:    "http://127.0.0.1:7002",
			EtcdURL:    "http://127.0.0.1:4002",
		})
	assert.NoError(t, err)

	time.Sleep(6 * time.Second)

	go tests.Delete("http://localhost:7001/v2/admin/machines/node2", "application/json", nil)

	time.Sleep(time.Second)
	result, err = c.Get("_etcd/machines", false, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 1)
}
Пример #5
0
// Ensure that the cluster configuration can be updated.
func TestClusterConfigSet(t *testing.T) {
	_, etcds, err := CreateCluster(3, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false)
	assert.NoError(t, err)
	defer DestroyCluster(etcds)

	resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":3, "removeDelay":60}`))
	assert.Equal(t, resp.StatusCode, 200)

	time.Sleep(1 * time.Second)

	resp, _ = tests.Get("http://localhost:7002/v2/admin/config")
	body := tests.ReadBodyJSON(resp)
	assert.Equal(t, resp.StatusCode, 200)
	assert.Equal(t, body["activeSize"], 3)
	assert.Equal(t, body["removeDelay"], 60)
}
Пример #6
0
// Create a full cluster and then add extra an extra standby node.
func TestStandby(t *testing.T) {
	t.Skip("functionality unimplemented")

	clusterSize := 10 // DefaultActiveSize + 1
	_, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false)
	assert.NoError(t, err)
	defer DestroyCluster(etcds)

	if err != nil {
		t.Fatal("cannot create cluster")
	}

	c := etcd.NewClient(nil)
	c.SyncCluster()

	// Set key.
	time.Sleep(time.Second)
	if _, err := c.Set("foo", "bar", 0); err != nil {
		panic(err)
	}
	time.Sleep(time.Second)

	// Check that all peers and standbys have the value.
	for i := range etcds {
		resp, err := tests.Get(fmt.Sprintf("http://localhost:%d/v2/keys/foo", 4000+(i+1)))
		if assert.NoError(t, err) {
			body := tests.ReadBodyJSON(resp)
			if node, _ := body["node"].(map[string]interface{}); assert.NotNil(t, node) {
				assert.Equal(t, node["value"], "bar")
			}
		}
	}

	// Verify that we have one standby.
	result, err := c.Get("_etcd/standbys", false, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 1)

	// Reconfigure with larger active size (10 nodes) and wait for promotion.
	resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":10, "promoteDelay":1800}`))
	if !assert.Equal(t, resp.StatusCode, 200) {
		t.FailNow()
	}

	time.Sleep(server.ActiveMonitorTimeout + (1 * time.Second))

	// Verify that the standby node is now a peer.
	result, err = c.Get("_etcd/standbys", false, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 0)

	// Reconfigure with a smaller active size (8 nodes).
	resp, _ = tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":8, "promoteDelay":1800}`))
	if !assert.Equal(t, resp.StatusCode, 200) {
		t.FailNow()
	}

	// Wait for two monitor cycles before checking for demotion.
	time.Sleep((2 * server.ActiveMonitorTimeout) + (1 * time.Second))

	// Verify that we now have eight peers.
	result, err = c.Get("_etcd/machines", false, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 8)

	// Verify that we now have two standbys.
	result, err = c.Get("_etcd/standbys", false, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 2)
}
// Create a five-node cluster
// Kill all the nodes and restart
func TestMultiNodeKillAllAndRecoveryWithStandbys(t *testing.T) {
	procAttr := new(os.ProcAttr)
	procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}

	stop := make(chan bool)
	leaderChan := make(chan string, 1)
	all := make(chan bool, 1)

	clusterSize := 15
	argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false)
	defer DestroyCluster(etcds)

	if err != nil {
		t.Fatal("cannot create cluster")
	}

	c := etcd.NewClient(nil)

	go Monitor(clusterSize, clusterSize, leaderChan, all, stop)
	<-all
	<-leaderChan
	stop <- true

	c.SyncCluster()

	// Reconfigure with smaller active size (7 nodes) and wait for remove.
	resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":7}`))
	if !assert.Equal(t, resp.StatusCode, 200) {
		t.FailNow()
	}

	time.Sleep(2*server.ActiveMonitorTimeout + (1 * time.Second))

	// Verify that there is three machines in peer mode.
	result, err := c.Get("_etcd/machines", false, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 7)

	// send set commands
	for i := 0; i < 2*clusterSize; i++ {
		// Test Set
		_, err := c.Set("foo", "bar", 0)
		if err != nil {
			panic(err)
		}
	}

	time.Sleep(time.Second)

	// kill all
	DestroyCluster(etcds)

	time.Sleep(time.Second)

	stop = make(chan bool)
	leaderChan = make(chan string, 1)
	all = make(chan bool, 1)

	time.Sleep(time.Second)

	for i := 0; i < clusterSize; i++ {
		etcds[i], err = os.StartProcess(EtcdBinPath, append(argGroup[i], "-peers="), procAttr)
	}

	time.Sleep(2 * time.Second)

	// send set commands
	for i := 0; i < 2*clusterSize; i++ {
		// Test Set
		_, err := c.Set("foo", "bar", 0)
		if err != nil {
			t.Fatalf("Recovery error: %s", err)
		}
	}

	// Verify that we have seven machines.
	result, err = c.Get("_etcd/machines", false, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 7)
}
Пример #8
0
// Create a full cluster and then change the active size dramatically.
func TestStandbyDramaticChange(t *testing.T) {
	clusterSize := 9
	_, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false)
	assert.NoError(t, err)
	defer DestroyCluster(etcds)

	if err != nil {
		t.Fatal("cannot create cluster")
	}

	time.Sleep(time.Second)
	c := etcd.NewClient(nil)
	c.SyncCluster()

	num := clusterSize
	for i := 0; i < 3; i++ {
		for inc := 0; inc < 2; inc++ {
			// Verify that we just have i machines.
			result, err := c.Get("_etcd/machines", false, true)
			assert.NoError(t, err)
			assert.Equal(t, len(result.Node.Nodes), num)

			if inc == 0 {
				num -= 6
			} else {
				num += 6
			}

			t.Log("Reconfigure with active size", num)
			resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(fmt.Sprintf(`{"activeSize":%d, "syncInterval":1}`, num)))
			if !assert.Equal(t, resp.StatusCode, 200) {
				t.FailNow()
			}

			if inc == 0 {
				// Wait for monitor cycles before checking for demotion.
				time.Sleep(6*server.ActiveMonitorTimeout + (1 * time.Second))
			} else {
				time.Sleep(time.Second + (1 * time.Second))
			}

			// Verify that we now have peers.
			result, err = c.Get("_etcd/machines", false, true)
			assert.NoError(t, err)
			assert.Equal(t, len(result.Node.Nodes), num)

			t.Log("Test the functionality of all servers")
			// Set key.
			if _, err := c.Set("foo", "bar", 0); err != nil {
				panic(err)
			}
			time.Sleep(100 * time.Millisecond)

			// Check that all peers and standbys have the value.
			for i := range etcds {
				resp, err := tests.Get(fmt.Sprintf("http://localhost:%d/v2/keys/foo", 4000+(i+1)))
				if assert.NoError(t, err) {
					body := tests.ReadBodyJSON(resp)
					if node, _ := body["node"].(map[string]interface{}); assert.NotNil(t, node) {
						assert.Equal(t, node["value"], "bar")
					}
				}
			}
		}
	}
}
Пример #9
0
// Create a full cluster and then change the active size.
func TestStandby(t *testing.T) {
	clusterSize := 15
	_, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false)
	if !assert.NoError(t, err) {
		t.Fatal("cannot create cluster")
	}
	defer DestroyCluster(etcds)

	resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"syncInterval":1}`))
	if !assert.Equal(t, resp.StatusCode, 200) {
		t.FailNow()
	}

	time.Sleep(time.Second)
	c := etcd.NewClient(nil)
	c.SyncCluster()

	// Verify that we just have default machines.
	result, err := c.Get("_etcd/machines", false, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 9)

	t.Log("Reconfigure with a smaller active size")
	resp, _ = tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":7, "syncInterval":1}`))
	if !assert.Equal(t, resp.StatusCode, 200) {
		t.FailNow()
	}

	// Wait for two monitor cycles before checking for demotion.
	time.Sleep((2 * server.ActiveMonitorTimeout) + (2 * time.Second))

	// Verify that we now have seven peers.
	result, err = c.Get("_etcd/machines", false, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 7)

	t.Log("Test the functionality of all servers")
	// Set key.
	time.Sleep(time.Second)
	if _, err := c.Set("foo", "bar", 0); err != nil {
		panic(err)
	}
	time.Sleep(time.Second)

	// Check that all peers and standbys have the value.
	for i := range etcds {
		resp, err := tests.Get(fmt.Sprintf("http://localhost:%d/v2/keys/foo", 4000+(i+1)))
		if assert.NoError(t, err) {
			body := tests.ReadBodyJSON(resp)
			if node, _ := body["node"].(map[string]interface{}); assert.NotNil(t, node) {
				assert.Equal(t, node["value"], "bar")
			}
		}
	}

	t.Log("Reconfigure with larger active size and wait for join")
	resp, _ = tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":8, "syncInterval":1}`))
	if !assert.Equal(t, resp.StatusCode, 200) {
		t.FailNow()
	}

	time.Sleep((1 * time.Second) + (1 * time.Second))

	// Verify that exactly eight machines are in the cluster.
	result, err = c.Get("_etcd/machines", false, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 8)
}
Пример #10
0
// This test will kill the current leader and wait for the etcd cluster to elect a new leader for 200 times.
// It will print out the election time and the average election time.
// It runs in a cluster with standby nodes.
func TestKillLeaderWithStandbys(t *testing.T) {
	// https://github.com/goraft/raft/issues/222
	t.Skip("stuck on raft issue")

	procAttr := new(os.ProcAttr)
	procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}

	clusterSize := 5
	argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false)
	if err != nil {
		t.Fatal("cannot create cluster")
	}
	defer DestroyCluster(etcds)

	stop := make(chan bool)
	leaderChan := make(chan string, 1)
	all := make(chan bool, 1)

	time.Sleep(time.Second)

	go Monitor(clusterSize, 1, leaderChan, all, stop)

	c := etcd.NewClient(nil)
	c.SyncCluster()

	// Reconfigure with a small active size.
	resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":3, "removeDelay":2, "syncInterval":1}`))
	if !assert.Equal(t, resp.StatusCode, 200) {
		t.FailNow()
	}

	// Wait for two monitor cycles before checking for demotion.
	time.Sleep((2 * server.ActiveMonitorTimeout) + (2 * time.Second))

	// Verify that we have 3 peers.
	result, err := c.Get("_etcd/machines", true, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 3)

	var totalTime time.Duration

	leader := "http://127.0.0.1:7001"

	for i := 0; i < clusterSize; i++ {
		t.Log("leader is ", leader)
		port, _ := strconv.Atoi(strings.Split(leader, ":")[2])
		num := port - 7001
		t.Log("kill server ", num)
		etcds[num].Kill()
		etcds[num].Release()

		start := time.Now()
		for {
			newLeader := <-leaderChan
			if newLeader != leader {
				leader = newLeader
				break
			}
		}
		take := time.Now().Sub(start)

		totalTime += take
		avgTime := totalTime / (time.Duration)(i+1)
		fmt.Println("Total time:", totalTime, "; Avg time:", avgTime)

		time.Sleep(server.ActiveMonitorTimeout + (1 * time.Second))
		time.Sleep(2 * time.Second)

		// Verify that we have 3 peers.
		result, err = c.Get("_etcd/machines", true, true)
		assert.NoError(t, err)
		assert.Equal(t, len(result.Node.Nodes), 3)

		// Verify that killed node is not one of those peers.
		_, err = c.Get(fmt.Sprintf("_etcd/machines/node%d", num+1), false, false)
		assert.Error(t, err)

		etcds[num], err = os.StartProcess(EtcdBinPath, argGroup[num], procAttr)
	}
	stop <- true
}
// remove the node and node rejoin with previous log
func TestRemoveNode(t *testing.T) {
	procAttr := new(os.ProcAttr)
	procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}

	clusterSize := 4
	argGroup, etcds, _ := CreateCluster(clusterSize, procAttr, false)
	defer DestroyCluster(etcds)

	time.Sleep(time.Second)

	c := etcd.NewClient(nil)

	c.SyncCluster()

	resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":4, "syncInterval":5}`))
	if !assert.Equal(t, resp.StatusCode, 200) {
		t.FailNow()
	}

	rmReq, _ := http.NewRequest("DELETE", "http://127.0.0.1:7001/remove/node3", nil)

	client := &http.Client{}
	for i := 0; i < 2; i++ {
		for i := 0; i < 2; i++ {
			client.Do(rmReq)

			fmt.Println("send remove to node3 and wait for its exiting")
			time.Sleep(100 * time.Millisecond)

			resp, err := c.Get("_etcd/machines", false, false)

			if err != nil {
				panic(err)
			}

			if len(resp.Node.Nodes) != 3 {
				t.Fatal("cannot remove peer")
			}

			etcds[2].Kill()
			etcds[2].Wait()

			if i == 1 {
				// rejoin with log
				etcds[2], err = os.StartProcess(EtcdBinPath, argGroup[2], procAttr)
			} else {
				// rejoin without log
				etcds[2], err = os.StartProcess(EtcdBinPath, append(argGroup[2], "-f"), procAttr)
			}

			if err != nil {
				panic(err)
			}

			time.Sleep(time.Second + 5*time.Second)

			resp, err = c.Get("_etcd/machines", false, false)

			if err != nil {
				panic(err)
			}

			if len(resp.Node.Nodes) != 4 {
				t.Fatalf("add peer fails #1 (%d != 4)", len(resp.Node.Nodes))
			}
		}

		// first kill the node, then remove it, then add it back
		for i := 0; i < 2; i++ {
			etcds[2].Kill()
			fmt.Println("kill node3 and wait for its exiting")
			etcds[2].Wait()

			client.Do(rmReq)

			time.Sleep(100 * time.Millisecond)

			resp, err := c.Get("_etcd/machines", false, false)

			if err != nil {
				panic(err)
			}

			if len(resp.Node.Nodes) != 3 {
				t.Fatal("cannot remove peer")
			}

			if i == 1 {
				// rejoin with log
				etcds[2], err = os.StartProcess(EtcdBinPath, append(argGroup[2]), procAttr)
			} else {
				// rejoin without log
				etcds[2], err = os.StartProcess(EtcdBinPath, append(argGroup[2], "-f"), procAttr)
			}

			if err != nil {
				panic(err)
			}

			time.Sleep(time.Second + time.Second)

			resp, err = c.Get("_etcd/machines", false, false)

			if err != nil {
				panic(err)
			}

			if len(resp.Node.Nodes) != 4 {
				t.Fatalf("add peer fails #2 (%d != 4)", len(resp.Node.Nodes))
			}
		}
	}
}
func TestRemovePausedNode(t *testing.T) {
	procAttr := new(os.ProcAttr)
	procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}

	clusterSize := 4
	_, etcds, _ := CreateCluster(clusterSize, procAttr, false)
	defer DestroyCluster(etcds)

	time.Sleep(time.Second)

	c := etcd.NewClient(nil)

	c.SyncCluster()

	r, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":3, "removeDelay":1, "syncInterval":1}`))
	if !assert.Equal(t, r.StatusCode, 200) {
		t.FailNow()
	}
	// Wait for standby instances to update its cluster config
	time.Sleep(6 * time.Second)

	resp, err := c.Get("_etcd/machines", false, false)
	if err != nil {
		panic(err)
	}
	if len(resp.Node.Nodes) != 3 {
		t.Fatal("cannot remove peer")
	}

	for i := 0; i < clusterSize; i++ {
		// first pause the node, then remove it, then resume it
		idx := rand.Int() % clusterSize

		etcds[idx].Signal(syscall.SIGSTOP)
		fmt.Printf("pause node%d and let standby node take its place\n", idx+1)

		time.Sleep(4 * time.Second)

		etcds[idx].Signal(syscall.SIGCONT)
		// let it change its state to candidate at least
		time.Sleep(time.Second)

		stop := make(chan bool)
		leaderChan := make(chan string, 1)
		all := make(chan bool, 1)

		go Monitor(clusterSize, clusterSize, leaderChan, all, stop)
		<-all
		<-leaderChan
		stop <- true

		resp, err = c.Get("_etcd/machines", false, false)
		if err != nil {
			panic(err)
		}
		if len(resp.Node.Nodes) != 3 {
			t.Fatalf("add peer fails (%d != 3)", len(resp.Node.Nodes))
		}
		for i := 0; i < 3; i++ {
			if resp.Node.Nodes[i].Key == fmt.Sprintf("node%d", idx+1) {
				t.Fatal("node should be removed")
			}
		}
	}
}