Example #1
0
func TestStandbyJoinMiss(t *testing.T) {
	clusterSize := 2
	_, etcds, err := CreateCluster(clusterSize, &os.ProcAttr{Files: []*os.File{nil, os.Stdout, os.Stderr}}, false)
	if err != nil {
		t.Fatal("cannot create cluster")
	}
	defer DestroyCluster(etcds)

	c := etcd.NewClient(nil)
	c.SyncCluster()

	time.Sleep(1 * time.Second)

	// Verify that we have two machines.
	result, err := c.Get("_etcd/machines", false, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), clusterSize)

	resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"removeDelay":4, "syncInterval":4}`))
	if !assert.Equal(t, resp.StatusCode, 200) {
		t.FailNow()
	}
	time.Sleep(time.Second)

	resp, _ = tests.Delete("http://localhost:7001/v2/admin/machines/node2", "application/json", nil)
	if !assert.Equal(t, resp.StatusCode, 200) {
		t.FailNow()
	}

	// Wait for a monitor cycle before checking for removal.
	time.Sleep(server.ActiveMonitorTimeout + (1 * time.Second))

	// Verify that we now have one peer.
	result, err = c.Get("_etcd/machines", false, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 1)

	// Simulate the join failure
	_, err = server.NewClient(nil).AddMachine("http://localhost:7001",
		&server.JoinCommand{
			MinVersion: store.MinVersion(),
			MaxVersion: store.MaxVersion(),
			Name:       "node2",
			RaftURL:    "http://127.0.0.1:7002",
			EtcdURL:    "http://127.0.0.1:4002",
		})
	assert.NoError(t, err)

	time.Sleep(6 * time.Second)

	go tests.Delete("http://localhost:7001/v2/admin/machines/node2", "application/json", nil)

	time.Sleep(time.Second)
	result, err = c.Get("_etcd/machines", false, true)
	assert.NoError(t, err)
	assert.Equal(t, len(result.Node.Nodes), 1)
}
// Create a five nodes
// Kill all the nodes and restart, then remove the leader
func TestMultiNodeKillAllAndRecoveryAndRemoveLeader(t *testing.T) {
	procAttr := new(os.ProcAttr)
	procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}

	stop := make(chan bool)
	leaderChan := make(chan string, 1)
	all := make(chan bool, 1)

	clusterSize := 5
	argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false)
	defer DestroyCluster(etcds)

	if err != nil {
		t.Fatal("cannot create cluster")
	}

	c := etcd.NewClient(nil)

	go Monitor(clusterSize, clusterSize, leaderChan, all, stop)
	<-all
	<-leaderChan
	stop <- true

	// It needs some time to sync current commits and write it to disk.
	// Or some instance may be restarted as a new peer, and we don't support
	// to connect back the old cluster that doesn't have majority alive
	// without log now.
	time.Sleep(time.Second)

	c.SyncCluster()

	// kill all
	DestroyCluster(etcds)

	time.Sleep(time.Second)

	stop = make(chan bool)
	leaderChan = make(chan string, 1)
	all = make(chan bool, 1)

	time.Sleep(time.Second)

	for i := 0; i < clusterSize; i++ {
		etcds[i], err = os.StartProcess(EtcdBinPath, argGroup[i], procAttr)
	}

	go Monitor(clusterSize, 1, leaderChan, all, stop)

	<-all
	leader := <-leaderChan

	_, err = c.Set("foo", "bar", 0)
	if err != nil {
		t.Fatalf("Recovery error: %s", err)
	}

	port, _ := strconv.Atoi(strings.Split(leader, ":")[2])
	num := port - 7000
	resp, _ := tests.Delete(leader+"/v2/admin/machines/node"+strconv.Itoa(num), "application/json", nil)
	if !assert.Equal(t, resp.StatusCode, 200) {
		t.FailNow()
	}

	// check the old leader is in standby mode now
	time.Sleep(time.Second)
	resp, _ = tests.Get(leader + "/name")
	assert.Equal(t, resp.StatusCode, 404)
}