// This test will kill the current leader and wait for the etcd cluster to elect a new leader for 200 times. // It will print out the election time and the average election time. func TestKillLeader(t *testing.T) { procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} clusterSize := 5 argGroup, etcds, err := test.CreateCluster(clusterSize, procAttr, false) if err != nil { t.Fatal("cannot create cluster") } defer test.DestroyCluster(etcds) stop := make(chan bool) leaderChan := make(chan string, 1) all := make(chan bool, 1) time.Sleep(time.Second) go test.Monitor(clusterSize, 1, leaderChan, all, stop) var totalTime time.Duration leader := "http://127.0.0.1:7001" for i := 0; i < clusterSize; i++ { fmt.Println("leader is ", leader) port, _ := strconv.Atoi(strings.Split(leader, ":")[2]) num := port - 7001 fmt.Println("kill server ", num) etcds[num].Kill() etcds[num].Release() start := time.Now() for { newLeader := <-leaderChan if newLeader != leader { leader = newLeader break } } take := time.Now().Sub(start) totalTime += take avgTime := totalTime / (time.Duration)(i+1) fmt.Println("Leader election time is ", take, "with election timeout", ElectionTimeout) fmt.Println("Leader election time average is", avgTime, "with election timeout", ElectionTimeout) etcds[num], err = os.StartProcess("etcd", argGroup[num], procAttr) } stop <- true }
// TestKillRandom kills random machines in the cluster and // restart them after all other machines agree on the same leader func TestKillRandom(t *testing.T) { procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} clusterSize := 9 argGroup, etcds, err := test.CreateCluster(clusterSize, procAttr, false) if err != nil { t.Fatal("cannot create cluster") } defer test.DestroyCluster(etcds) stop := make(chan bool) leaderChan := make(chan string, 1) all := make(chan bool, 1) time.Sleep(3 * time.Second) go test.Monitor(clusterSize, 4, leaderChan, all, stop) toKill := make(map[int]bool) for i := 0; i < 20; i++ { fmt.Printf("TestKillRandom Round[%d/20]\n", i) j := 0 for { r := rand.Int31n(9) if _, ok := toKill[int(r)]; !ok { j++ toKill[int(r)] = true } if j > 3 { break } } for num, _ := range toKill { err := etcds[num].Kill() if err != nil { panic(err) } etcds[num].Wait() } time.Sleep(ElectionTimeout) <-leaderChan for num, _ := range toKill { etcds[num], err = os.StartProcess("etcd", argGroup[num], procAttr) } toKill = make(map[int]bool) <-all } stop <- true }
// Create a five nodes // Kill all the nodes and restart func TestMultiNodeKillAllAndRecovery(t *testing.T) { procAttr := new(os.ProcAttr) procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} clusterSize := 5 argGroup, etcds, err := test.CreateCluster(clusterSize, procAttr, false) if err != nil { t.Fatal("cannot create cluster") } c := etcd.NewClient() c.SyncCluster() time.Sleep(time.Second) // send 10 commands for i := 0; i < 10; i++ { // Test Set _, err := c.Set("foo", "bar", 0) if err != nil { panic(err) } } time.Sleep(time.Second) // kill all test.DestroyCluster(etcds) time.Sleep(time.Second) stop := make(chan bool) leaderChan := make(chan string, 1) all := make(chan bool, 1) time.Sleep(time.Second) for i := 0; i < clusterSize; i++ { etcds[i], err = os.StartProcess("etcd", argGroup[i], procAttr) } go test.Monitor(clusterSize, 1, leaderChan, all, stop) <-all <-leaderChan result, err := c.Set("foo", "bar", 0) if err != nil { panic(err) } if result.Index != 18 { t.Fatalf("recovery failed! [%d/18]", result.Index) } // kill all test.DestroyCluster(etcds) }