// TestSubmitCommand tests that client.SubmitCommand(...) adds a command // to the proper node path in etcd, and that it can be read back. func TestSubmitCommand(t *testing.T) { eclient, hosts := testutil.NewEtcdClient(t) mclient := NewClient(Namespace, hosts) if err := mclient.SubmitCommand(Node1, metafora.CommandFreeze()); err != nil { t.Fatalf("Unable to submit command. error:%v", err) } if res, err := eclient.Get(NodesDir, false, false); err != nil { t.Fatalf("Get on path %v returned error: %v", NodesDir, err) } else if res.Node == nil || res.Node.Nodes == nil { t.Fatalf("Get on path %v returned nil for child nodes", NodesDir) } }
// TestNodes tests that client.Nodes() returns the metafora nodes // registered in etcd. func TestNodes(t *testing.T) { eclient, hosts := testutil.NewEtcdClient(t) const recursive = true eclient.Delete(Node1Path, recursive) mclient := NewClient(Namespace, hosts) if _, err := eclient.CreateDir(Node1Path, 0); err != nil { t.Fatalf("AddChild %v returned error: %v", NodesDir, err) } if nodes, err := mclient.Nodes(); err != nil { t.Fatalf("Nodes returned error: %v", err) } else { for i, n := range nodes { t.Logf("%v -> %v", i, n) } } }
// TestSubmitTask tests that client.SubmitTask(...) adds a task to // the proper path in etcd, and that the same task id cannot be // submitted more than once. func TestSubmitTask(t *testing.T) { _, hosts := testutil.NewEtcdClient(t) mclient := NewClient(Namespace, hosts) task := DefaultTaskFunc("testid1", "") if err := mclient.DeleteTask(task.ID()); err != nil { t.Logf("DeleteTask returned an error, which maybe ok. Error:%v", err) } if err := mclient.SubmitTask(task); err != nil { t.Fatalf("Submit task failed on initial submission, error: %v", err) } if err := mclient.SubmitTask(task); err == nil { t.Fatalf("Submit task did not fail, but should of, when using existing tast id") } }
func TestCoordinatorFirstNodeJoiner(t *testing.T) { t.Parallel() coordinator1, conf := setupEtcd(t) defer coordinator1.Close() if err := coordinator1.Init(newCtx(t, "coordinator1")); err != nil { t.Fatalf("Unexpected error initialzing coordinator: %v", err) } client, _ := testutil.NewEtcdClient(t) tpath := path.Join(conf.Namespace, TasksPath) _, err := client.Get(tpath, unsorted, notrecursive) if err != nil && strings.Contains(err.Error(), "Key not found") { t.Fatalf("The tasks path wasn't created when the first node joined: %s", tpath) } else if err != nil { t.Fatalf("Unknown error trying to test: err: %s", err.Error()) } //TODO test for node path too... }
// TestTaskResurrectionInt ensures that a Claim won't recreate a task that had // been deleted (marked as done). taskmgr has a non-integration version of this // test. func TestTaskResurrectionInt(t *testing.T) { etcdc, hosts := testutil.NewEtcdClient(t) t.Parallel() etcdc.Delete("test-resurrect", recursive) task := m_etcd.DefaultTaskFunc("xyz", "") conf := m_etcd.NewConfig("testclient", "test-resurrect", hosts) coord, err := m_etcd.NewEtcdCoordinator(conf) if err != nil { t.Fatalf("Error creating coordinator: %v", err) } if err := coord.Init(nil); err != nil { t.Fatalf("Error initializing coordinator: %v", err) } // Try to claim a nonexistent if claimed := coord.Claim(task); claimed { t.Fatal("Claiming a nonexistent task should not work but did!") } // Create a task, mark it as done, and try to claim it again client := m_etcd.NewClient("test-resurrect", hosts) if err := client.SubmitTask(m_etcd.DefaultTaskFunc("xyz", "")); err != nil { t.Fatalf("Error submitting task xyz: %v", err) } if claimed := coord.Claim(task); !claimed { t.Fatal("Failed to claim task xyz") } coord.Done(task) if claimed := coord.Claim(task); claimed { t.Fatal("Reclaimed task that was marked as done.") } }
// Ensure that Watch() picks up new tasks and returns them. func TestCoordinatorTC1(t *testing.T) { t.Parallel() coordinator1, conf := setupEtcd(t) if err := coordinator1.Init(newCtx(t, "coordinator1")); err != nil { t.Fatalf("Unexpected error initialzing coordinator: %v", err) } defer coordinator1.Close() client, _ := testutil.NewEtcdClient(t) tasks := make(chan metafora.Task) task001 := &task{id: "test-task"} taskPath := path.Join(conf.Namespace, TasksPath, task001.ID()) errc := make(chan error) go func() { //Watch blocks, so we need to test it in its own go routine. errc <- coordinator1.Watch(tasks) }() client.CreateDir(taskPath, 5) select { case task := <-tasks: if task.ID() != task001.ID() { t.Fatalf("coordinator1.Watch() test failed: We received the incorrect taskId. Got [%s] Expected[%s]", task, task001) } case <-time.After(time.Second * 5): t.Fatalf("coordinator1.Watch() test failed: The testcase timed out after 5 seconds.") } coordinator1.Close() err := <-errc if err != nil { t.Fatalf("coordinator1.Watch() returned an err: %v", err) } }
// TestExpiration ensures that expired claims get reclaimed properly. func TestExpiration(t *testing.T) { t.Parallel() coord, conf := setupEtcd(t) claims := make(chan int, 10) hf := metafora.HandlerFunc(metafora.SimpleHandler(func(_ metafora.Task, stop <-chan bool) bool { claims <- 1 <-stop return true })) consumer, err := metafora.NewConsumer(coord, hf, metafora.DumbBalancer) if err != nil { t.Fatalf("Error creating consumer: %+v", err) } client, _ := testutil.NewEtcdClient(t) _, err = client.Create(path.Join(conf.Namespace, TasksPath, "abc", OwnerMarker), `{"node":"--"}`, 1) if err != nil { t.Fatalf("Error creating fake claim: %v", err) } defer consumer.Shutdown() go consumer.Run() // Wait for claim to expire and coordinator to pick up task select { case <-claims: // Task claimed! case <-time.After(5 * time.Second): t.Fatal("Task not claimed long after it should have been.") } tasks := consumer.Tasks() if len(tasks) != 1 { t.Fatalf("Expected 1 task to be claimed but found: %v", tasks) } }
// TestSleepTest is an integration test for all of m_etcd's components. // func TestSleepTest(t *testing.T) { etcdc, hosts := testutil.NewEtcdClient(t) t.Parallel() const namespace = "sleeptest-metafora" const sleepingtasks = "sleeping-task1" etcdc.Delete(namespace, recursive) holdtask := make(chan bool) h := func(task metafora.Task, cmds <-chan *statemachine.Message) *statemachine.Message { if task.ID() == sleepingtasks { sleeptil := 5 * time.Second nextstarttime := (time.Now().Add(sleeptil)) t.Logf("sleeping task:%v sleepfor:%v", task, nextstarttime) <-holdtask return statemachine.SleepMessage(nextstarttime) } cmd := <-cmds t.Logf("non sleeping task:%v", task) return cmd } newC := func(name, ns string) *metafora.Consumer { conf := m_etcd.NewConfig(name, ns, hosts) coord, hf, bal, err := m_etcd.New(conf, h) if err != nil { t.Fatalf("Error creating new etcd stack: %v", err) } cons, err := metafora.NewConsumer(coord, hf, bal) if err != nil { t.Fatalf("Error creating consumer %s:%s: %v", ns, name, err) } go func() { cons.Run() t.Logf("Consumer:%s exited.", name) }() return cons } assertRunning := func(tid string, cons ...*metafora.Consumer) { found := false for _, c := range cons { tasks := c.Tasks() if len(tasks) > 0 && found { t.Fatal("Task already found running but another task is running on a different consumer") } if len(tasks) > 1 { t.Fatalf("Expected at most 1 task, but found: %d", len(tasks)) } if len(tasks) == 1 && tasks[0].Task().ID() == tid { found = true } } if !found { t.Fatalf("Could not find task=%q", tid) } } // Start 2 consumers cons1 := newC("node1", namespace) cons2 := newC("node2", namespace) // Create clients and start some tests cliA := m_etcd.NewClient(namespace, hosts) if err := cliA.SubmitTask(m_etcd.DefaultTaskFunc(sleepingtasks, "")); err != nil { t.Fatalf("Error submitting task1 to a: %v", err) } // Give consumers a bit to pick up tasks time.Sleep(500 * time.Millisecond) assertRunning(sleepingtasks, cons1, cons2) holdtask <- true // Give consumers a bit to pick up tasks time.Sleep(500 * time.Millisecond) assertRunning(sleepingtasks, cons1, cons2) // not sure if this should be true or false. wait1 := make(chan bool) go func() { defer close(wait1) // Shutdown cons1.Shutdown() cons2.Shutdown() }() timeout := time.NewTimer(1 * time.Second) select { case <-wait1: case <-timeout.C: t.Fatalf("failed waiting for shutdown") } // make sure all tasks are released for _, c := range []*metafora.Consumer{cons1, cons2} { tasks := c.Tasks() for _, work := range tasks { t.Fatalf("work id %v is still running", work) } } }
// TestAll is an integration test for all of m_etcd's components. // // While huge integration tests like this are rarely desirable as they can be // overly fragile and complex, I found myself manually repeating the tests I've // automated here over and over. This is far more reliable than expecting // developers to do adhoc testing of all of the m_etcd package's features. func TestAll(t *testing.T) { etcdc, hosts := testutil.NewEtcdClient(t) t.Parallel() etcdc.Delete("test-a", recursive) etcdc.Delete("test-b", recursive) h := func(task metafora.Task, cmds <-chan *statemachine.Message) *statemachine.Message { cmd := <-cmds if task.ID() == "error-test" { return statemachine.ErrorMessage(errors.New("error-test")) } return cmd } newC := func(name, ns string) *metafora.Consumer { conf := m_etcd.NewConfig(name, ns, hosts) conf.Name = name coord, hf, bal, err := m_etcd.New(conf, h) if err != nil { t.Fatalf("Error creating new etcd stack: %v", err) } cons, err := metafora.NewConsumer(coord, hf, bal) if err != nil { t.Fatalf("Error creating consumer %s:%s: %v", ns, name, err) } go cons.Run() return cons } // Start 4 consumers, 2 per namespace cons1a := newC("node1", "test-a") cons2a := newC("node2", "test-a") cons1b := newC("node1", "test-b") cons2b := newC("node2", "test-b") // Create clients and start some tests cliA := m_etcd.NewClient("test-a", hosts) cliB := m_etcd.NewClient("test-b", hosts) if err := cliA.SubmitTask(m_etcd.DefaultTaskFunc("task1", "")); err != nil { t.Fatalf("Error submitting task1 to a: %v", err) } if err := cliB.SubmitTask(m_etcd.DefaultTaskFunc("task1", "")); err != nil { t.Fatalf("Error submitting task1 to b: %v", err) } // Give consumers a bit to pick up tasks time.Sleep(250 * time.Millisecond) assertRunning := func(tid string, cons ...*metafora.Consumer) { found := false for _, c := range cons { tasks := c.Tasks() if len(tasks) > 0 && found { t.Fatal("Task already found running but another task is running on a different consumer") } if len(tasks) > 1 { t.Fatalf("Expected at most 1 task, but found: %d", len(tasks)) } if len(tasks) == 1 && tasks[0].Task().ID() == tid { found = true } } if !found { t.Fatalf("Could not find task=%q", tid) } } assertRunning("task1", cons1a, cons2a) assertRunning("task1", cons1b, cons2b) // Kill task1 in A { cmdr := m_etcd.NewCommander("test-a", etcdc) if err := cmdr.Send("task1", statemachine.KillMessage()); err != nil { t.Fatalf("Error sending kill to task1: %v", err) } time.Sleep(250 * time.Millisecond) for _, c := range []*metafora.Consumer{cons1a, cons2a} { tasks := c.Tasks() if len(tasks) != 0 { t.Fatalf("Expected no tasks but found: %d", len(tasks)) } } } // Submit a bunch of tasks to A { tasks := []string{"task2", "task3", "task4", "task5", "task6", "task7"} for _, tid := range tasks { if err := cliA.SubmitTask(m_etcd.DefaultTaskFunc(tid, "")); err != nil { t.Fatalf("Error submitting task=%q to A: %v", tid, err) } } // Give them time to start time.Sleep(800 * time.Millisecond) // Ensure they're balanced if err := cliA.SubmitCommand("node1", metafora.CommandBalance()); err != nil { t.Fatalf("Error submitting balance command to cons1a: %v", err) } time.Sleep(800 * time.Millisecond) if err := cliA.SubmitCommand("node2", metafora.CommandBalance()); err != nil { t.Fatalf("Error submitting balance command to cons1a: %v", err) } a1tasks := cons1a.Tasks() a2tasks := cons2a.Tasks() for _, task := range a1tasks { metafora.Debug("A1: ", task.Task(), " - ", task.Stopped().IsZero()) } for _, task := range a2tasks { metafora.Debug("A2: ", task.Task(), " - ", task.Stopped().IsZero()) } time.Sleep(800 * time.Millisecond) a1tasks = cons1a.Tasks() a2tasks = cons2a.Tasks() if len(a1tasks) < 2 || len(a1tasks) > 4 || len(a2tasks) < 2 || len(a2tasks) > 4 { t.Fatalf("Namespace A isn't fairly balanced: node1: %d; node2: %d", len(a1tasks), len(a2tasks)) } // Shutting down a consumer should migrate all tasks to the other cons1a.Shutdown() time.Sleep(800 * time.Millisecond) a2tasks = cons2a.Tasks() if len(a2tasks) != len(tasks) { t.Fatalf("Consumer 2a should have received all %d tasks but only has %d.", len(tasks), len(a2tasks)) } } // Use Namespace B to check Error state handling { tasks := []string{"task8", "error-test"} for _, tid := range tasks { if err := cliB.SubmitTask(m_etcd.DefaultTaskFunc(tid, "")); err != nil { t.Fatalf("Error submitting task=%q to B: %v", tid, err) } } // Give them time to start time.Sleep(time.Second) n := len(cons1b.Tasks()) + len(cons2b.Tasks()) if n != 3 { t.Fatalf("Expected B to be running 3 tasks but found %d", n) } // Resuming error-test 8*2 times should cause it to be failed cmdr := m_etcd.NewCommander("test-b", etcdc) for i := 0; i < statemachine.DefaultErrMax*2; i++ { if err := cmdr.Send("error-test", statemachine.RunMessage()); err != nil { t.Fatalf("Unexpected error resuming error-test in B: %v", err) } time.Sleep(500 * time.Millisecond) } n = len(cons1b.Tasks()) + len(cons2b.Tasks()) if n != 2 { t.Fatalf("Expected B to be running 2 tasks but found %d", n) } // Resubmitting a failed task shouldn't error but also shouldn't run. if err := cliB.SubmitTask(m_etcd.DefaultTaskFunc("error-test", "")); err != nil { t.Fatalf("Error resubmitting error-test task to B: %v", err) } // Give the statemachine a moment to load the initial state and exit time.Sleep(time.Second) n = len(cons1b.Tasks()) + len(cons2b.Tasks()) if n != 2 { t.Fatalf("Expected B to be running 2 tasks but found %d", n) } } // Shutdown cons2a.Shutdown() cons1b.Shutdown() cons2b.Shutdown() // Make sure everything is cleaned up respA, err := etcdc.Get("/test-a/tasks", true, true) if err != nil { t.Fatalf("Error getting tasks from etcd: %v", err) } respB, err := etcdc.Get("/test-b/tasks", true, true) if err != nil { t.Fatalf("Error getting tasks from etcd: %v", err) } nodes := []*etcd.Node{} nodes = append(nodes, respA.Node.Nodes...) nodes = append(nodes, respB.Node.Nodes...) for _, node := range nodes { if len(node.Nodes) > 1 { t.Fatalf("%s has %d (>1) nodes. First 2: %s, %s", node.Key, len(node.Nodes), node.Nodes[0].Key, node.Nodes[1].Key) } } }
// TestNodeRefresher ensures the node refresher properly updates the TTL on the // node directory in etcd and shuts down the entire consumer on error. func TestNodeRefresher(t *testing.T) { t.Parallel() _, conf := setupEtcd(t) // Use a custom node path ttl conf.NodeTTL = 3 coord, err := NewEtcdCoordinator(conf) if err != nil { t.Fatalf("Error creating coordinator: %v", err) } hf := metafora.HandlerFunc(nil) // we won't be handling any tasks consumer, err := metafora.NewConsumer(coord, hf, metafora.DumbBalancer) if err != nil { t.Fatalf("Error creating consumer: %+v", err) } client, _ := testutil.NewEtcdClient(t) defer consumer.Shutdown() runDone := make(chan struct{}) go func() { consumer.Run() close(runDone) }() nodePath := path.Join(conf.Namespace, NodesPath, conf.Name) ttl := int64(-1) deadline := time.Now().Add(3 * time.Second) for time.Now().Before(deadline) { resp, _ := client.Get(nodePath, false, false) if resp != nil && resp.Node.Dir { ttl = resp.Node.TTL break } time.Sleep(250 * time.Millisecond) } if ttl == -1 { t.Fatalf("Node path %s not found.", nodePath) } if ttl < 1 || ttl > 3 { t.Fatalf("Expected TTL to be between 1 and 3, found: %d", ttl) } // Let it refresh once to make sure that works time.Sleep(time.Duration(ttl) * time.Second) ttl = -1 deadline = time.Now().Add(3 * time.Second) for time.Now().Before(deadline) { resp, _ := client.Get(nodePath, false, false) if resp != nil && resp.Node.Dir { ttl = resp.Node.TTL break } time.Sleep(250 * time.Millisecond) } if ttl == -1 { t.Fatalf("Node path %s not found.", nodePath) } // Now remove the node out from underneath the refresher to cause it to fail if _, err := client.Delete(nodePath, true); err != nil { t.Fatalf("Unexpected error deleting %s: %+v", nodePath, err) } select { case <-runDone: // success! run exited case <-time.After(5 * time.Second): t.Fatal("Consumer didn't exit even though node directory disappeared!") } }
// TestNodeCleanup ensures the coordinator properly cleans up its node entry // upon exit. func TestNodeCleanup(t *testing.T) { t.Parallel() c1, conf1 := setupEtcd(t) if err := c1.Init(newCtx(t, "coordinator1")); err != nil { t.Fatalf("Unexpected error initialzing coordinator: %v", err) } conf2 := conf1.Copy() conf2.Name = "node2" c2, _ := NewEtcdCoordinator(conf2) if err := c2.Init(newCtx(t, "coordinator2")); err != nil { t.Fatalf("Unexpected error initialzing coordinator: %v", err) } defer c1.Close() defer c2.Close() // Make sure node directories were created client, _ := testutil.NewEtcdClient(t) c1nodep := path.Join(conf1.Namespace, NodesPath, conf1.Name) resp, err := client.Get(c1nodep, false, false) if err != nil { t.Fatalf("Error retrieving node key from etcd: %v", err) } if !resp.Node.Dir { t.Error(resp.Node.Key + " isn't a directory!") } c2nodep := path.Join(conf2.Namespace, NodesPath, conf2.Name) resp, err = client.Get(c2nodep, false, false) if err != nil { t.Fatalf("Error retrieving node key from etcd: %v", err) } if !resp.Node.Dir { t.Error(resp.Node.Key + " isn't a directory!") } // Shutdown one and make sure its node directory is gone c1.Close() resp, err = client.Get(c1nodep, false, false) if err != nil { if eerr, ok := err.(*etcd.EtcdError); !ok { t.Errorf("Unexpected error %T retrieving node key from etcd: %v", err, err) } else { if eerr.ErrorCode != EcodeKeyNotFound { t.Errorf("Expected error code %d but found %v", eerr.ErrorCode, err) } // error code was ok! (100) } } else { t.Errorf("Expected Not Found error, but directory still exists!") } // Make sure c2 is untouched resp, err = client.Get(c2nodep, false, false) if err != nil { t.Fatalf("Error retrieving node key from etcd: %v", err) } if !resp.Node.Dir { t.Error(resp.Node.Key + " isn't a directory!") } }