// Fair balancer shouldn't consider a shutting-down node // See https://github.com/lytics/metafora/issues/92 func TestFairBalancerShutdown(t *testing.T) { coord1, conf1 := setupEtcd(t) conf2 := conf1.Copy() conf2.Name = "node2" coord2, _ := NewEtcdCoordinator(conf2) cli := NewClient(conf1.Namespace, conf1.Hosts) // This handler always returns immediately h1 := metafora.SimpleHandler(func(task metafora.Task, stop <-chan bool) bool { metafora.Debugf("H1 Starting %s", task.ID()) <-stop metafora.Debugf("H1 Stopping %s", task.ID()) return false // never done }) // Block forever on a single task stop2 := make(chan struct{}) stopr := make(chan chan struct{}, 1) stopr <- stop2 h2 := metafora.SimpleHandler(func(task metafora.Task, stop <-chan bool) bool { metafora.Debugf("H2 Starting %s", task.ID()) blockchan, ok := <-stopr if ok { <-blockchan } <-stop metafora.Debugf("H2 Stopping %s", task.ID()) return false // never done }) // Create two consumers b1 := NewFairBalancer(conf1) con1, err := metafora.NewConsumer(coord1, h1, b1) if err != nil { t.Fatal(err) } b2 := NewFairBalancer(conf2) con2, err := metafora.NewConsumer(coord2, h2, b2) if err != nil { t.Fatal(err) } // Start the first and let it claim a bunch of tasks go con1.Run() defer con1.Shutdown() cli.SubmitTask(DefaultTaskFunc("t1", "")) cli.SubmitTask(DefaultTaskFunc("t2", "")) cli.SubmitTask(DefaultTaskFunc("t3", "")) cli.SubmitTask(DefaultTaskFunc("t4", "")) cli.SubmitTask(DefaultTaskFunc("t5", "")) cli.SubmitTask(DefaultTaskFunc("t6", "")) time.Sleep(500 * time.Millisecond) if len(con1.Tasks()) != 6 { t.Fatalf("con1 should have claimed 6 tasks: %d", len(con1.Tasks())) } // Start the second consumer and force the 1st to rebalance go con2.Run() close(stopr) // Wait for node to startup and register time.Sleep(500 * time.Millisecond) cli.SubmitCommand(conf1.Name, metafora.CommandBalance()) time.Sleep(2 * time.Second) c1Tasks := con1.Tasks() c2Tasks := con2.Tasks() if len(c1Tasks) != 4 || len(c2Tasks) != 2 { t.Fatalf("expected consumers to have 4|2 tasks: %d|%d", len(c1Tasks), len(c2Tasks)) } // Make sure that balancing the other node does nothing cli.SubmitCommand("node2", metafora.CommandBalance()) time.Sleep(2 * time.Second) c1Tasks2 := con1.Tasks() c2Tasks2 := con2.Tasks() if len(c1Tasks2) != 4 || len(c2Tasks2) != 2 { t.Fatalf("expected consumers to have 4|2 tasks: %d|%d", len(c1Tasks2), len(c2Tasks2)) } for i := 0; i < 4; i++ { if c1Tasks[i] != c1Tasks2[i] { t.Errorf("task mismatch: %s != %s", c1Tasks[i], c1Tasks2[i]) } } for i := 0; i < 2; i++ { if c2Tasks[i] != c2Tasks2[i] { t.Errorf("task mismatch: %s != %s", c2Tasks[i], c2Tasks2[i]) } } // Second consumer should block on a single task forever // Rebalancing the first node should then cause it to pickup all but // one task c2stop := make(chan struct{}) go func() { con2.Shutdown() close(c2stop) }() time.Sleep(500 * time.Millisecond) cli.SubmitCommand(conf1.Name, metafora.CommandBalance()) time.Sleep(2 * time.Second) c1Tasks3 := con1.Tasks() c2Tasks3 := con2.Tasks() if len(c1Tasks3) != 5 || len(c2Tasks3) != 1 { t.Fatalf("Expected consumers to have 5|1 tasks: %d|%d", len(c1Tasks3), len(c2Tasks3)) } // Now stop blocking task, rebalance and make sure the first node picked up the remaining close(stop2) time.Sleep(500 * time.Millisecond) // Consumer 2 should stop now <-c2stop cli.SubmitCommand(conf1.Name, metafora.CommandBalance()) time.Sleep(2 * time.Second) // con2 is out of the picture. con1 has all the tasks. c1Tasks4 := con1.Tasks() c2Tasks4 := con2.Tasks() if len(c1Tasks4) != 6 || len(c2Tasks4) != 0 { t.Fatalf("Expected consumers to have 6|0 tasks: %d|%d", len(c1Tasks4), len(c2Tasks4)) } }
// TestAll is an integration test for all of m_etcd's components. // // While huge integration tests like this are rarely desirable as they can be // overly fragile and complex, I found myself manually repeating the tests I've // automated here over and over. This is far more reliable than expecting // developers to do adhoc testing of all of the m_etcd package's features. func TestAll(t *testing.T) { etcdc, hosts := testutil.NewEtcdClient(t) t.Parallel() etcdc.Delete("test-a", recursive) etcdc.Delete("test-b", recursive) h := func(task metafora.Task, cmds <-chan *statemachine.Message) *statemachine.Message { cmd := <-cmds if task.ID() == "error-test" { return statemachine.ErrorMessage(errors.New("error-test")) } return cmd } newC := func(name, ns string) *metafora.Consumer { conf := m_etcd.NewConfig(name, ns, hosts) conf.Name = name coord, hf, bal, err := m_etcd.New(conf, h) if err != nil { t.Fatalf("Error creating new etcd stack: %v", err) } cons, err := metafora.NewConsumer(coord, hf, bal) if err != nil { t.Fatalf("Error creating consumer %s:%s: %v", ns, name, err) } go cons.Run() return cons } // Start 4 consumers, 2 per namespace cons1a := newC("node1", "test-a") cons2a := newC("node2", "test-a") cons1b := newC("node1", "test-b") cons2b := newC("node2", "test-b") // Create clients and start some tests cliA := m_etcd.NewClient("test-a", hosts) cliB := m_etcd.NewClient("test-b", hosts) if err := cliA.SubmitTask(m_etcd.DefaultTaskFunc("task1", "")); err != nil { t.Fatalf("Error submitting task1 to a: %v", err) } if err := cliB.SubmitTask(m_etcd.DefaultTaskFunc("task1", "")); err != nil { t.Fatalf("Error submitting task1 to b: %v", err) } // Give consumers a bit to pick up tasks time.Sleep(250 * time.Millisecond) assertRunning := func(tid string, cons ...*metafora.Consumer) { found := false for _, c := range cons { tasks := c.Tasks() if len(tasks) > 0 && found { t.Fatal("Task already found running but another task is running on a different consumer") } if len(tasks) > 1 { t.Fatalf("Expected at most 1 task, but found: %d", len(tasks)) } if len(tasks) == 1 && tasks[0].Task().ID() == tid { found = true } } if !found { t.Fatalf("Could not find task=%q", tid) } } assertRunning("task1", cons1a, cons2a) assertRunning("task1", cons1b, cons2b) // Kill task1 in A { cmdr := m_etcd.NewCommander("test-a", etcdc) if err := cmdr.Send("task1", statemachine.KillMessage()); err != nil { t.Fatalf("Error sending kill to task1: %v", err) } time.Sleep(250 * time.Millisecond) for _, c := range []*metafora.Consumer{cons1a, cons2a} { tasks := c.Tasks() if len(tasks) != 0 { t.Fatalf("Expected no tasks but found: %d", len(tasks)) } } } // Submit a bunch of tasks to A { tasks := []string{"task2", "task3", "task4", "task5", "task6", "task7"} for _, tid := range tasks { if err := cliA.SubmitTask(m_etcd.DefaultTaskFunc(tid, "")); err != nil { t.Fatalf("Error submitting task=%q to A: %v", tid, err) } } // Give them time to start time.Sleep(800 * time.Millisecond) // Ensure they're balanced if err := cliA.SubmitCommand("node1", metafora.CommandBalance()); err != nil { t.Fatalf("Error submitting balance command to cons1a: %v", err) } time.Sleep(800 * time.Millisecond) if err := cliA.SubmitCommand("node2", metafora.CommandBalance()); err != nil { t.Fatalf("Error submitting balance command to cons1a: %v", err) } a1tasks := cons1a.Tasks() a2tasks := cons2a.Tasks() for _, task := range a1tasks { metafora.Debug("A1: ", task.Task(), " - ", task.Stopped().IsZero()) } for _, task := range a2tasks { metafora.Debug("A2: ", task.Task(), " - ", task.Stopped().IsZero()) } time.Sleep(800 * time.Millisecond) a1tasks = cons1a.Tasks() a2tasks = cons2a.Tasks() if len(a1tasks) < 2 || len(a1tasks) > 4 || len(a2tasks) < 2 || len(a2tasks) > 4 { t.Fatalf("Namespace A isn't fairly balanced: node1: %d; node2: %d", len(a1tasks), len(a2tasks)) } // Shutting down a consumer should migrate all tasks to the other cons1a.Shutdown() time.Sleep(800 * time.Millisecond) a2tasks = cons2a.Tasks() if len(a2tasks) != len(tasks) { t.Fatalf("Consumer 2a should have received all %d tasks but only has %d.", len(tasks), len(a2tasks)) } } // Use Namespace B to check Error state handling { tasks := []string{"task8", "error-test"} for _, tid := range tasks { if err := cliB.SubmitTask(m_etcd.DefaultTaskFunc(tid, "")); err != nil { t.Fatalf("Error submitting task=%q to B: %v", tid, err) } } // Give them time to start time.Sleep(time.Second) n := len(cons1b.Tasks()) + len(cons2b.Tasks()) if n != 3 { t.Fatalf("Expected B to be running 3 tasks but found %d", n) } // Resuming error-test 8*2 times should cause it to be failed cmdr := m_etcd.NewCommander("test-b", etcdc) for i := 0; i < statemachine.DefaultErrMax*2; i++ { if err := cmdr.Send("error-test", statemachine.RunMessage()); err != nil { t.Fatalf("Unexpected error resuming error-test in B: %v", err) } time.Sleep(500 * time.Millisecond) } n = len(cons1b.Tasks()) + len(cons2b.Tasks()) if n != 2 { t.Fatalf("Expected B to be running 2 tasks but found %d", n) } // Resubmitting a failed task shouldn't error but also shouldn't run. if err := cliB.SubmitTask(m_etcd.DefaultTaskFunc("error-test", "")); err != nil { t.Fatalf("Error resubmitting error-test task to B: %v", err) } // Give the statemachine a moment to load the initial state and exit time.Sleep(time.Second) n = len(cons1b.Tasks()) + len(cons2b.Tasks()) if n != 2 { t.Fatalf("Expected B to be running 2 tasks but found %d", n) } } // Shutdown cons2a.Shutdown() cons1b.Shutdown() cons2b.Shutdown() // Make sure everything is cleaned up respA, err := etcdc.Get("/test-a/tasks", true, true) if err != nil { t.Fatalf("Error getting tasks from etcd: %v", err) } respB, err := etcdc.Get("/test-b/tasks", true, true) if err != nil { t.Fatalf("Error getting tasks from etcd: %v", err) } nodes := []*etcd.Node{} nodes = append(nodes, respA.Node.Nodes...) nodes = append(nodes, respB.Node.Nodes...) for _, node := range nodes { if len(node.Nodes) > 1 { t.Fatalf("%s has %d (>1) nodes. First 2: %s, %s", node.Key, len(node.Nodes), node.Nodes[0].Key, node.Nodes[1].Key) } } }
func TestFairBalancer(t *testing.T) { t.Parallel() coord1, conf1 := setupEtcd(t) conf2 := conf1.Copy() conf2.Name = "coord2" coord2, _ := NewEtcdCoordinator(conf2) cli := NewClient(conf1.Namespace, conf1.Hosts) h := metafora.SimpleHandler(func(task metafora.Task, stop <-chan bool) bool { metafora.Debugf("Starting %s", task.ID()) <-stop metafora.Debugf("Stopping %s", task.ID()) return false // never done }) // Create two consumers b1 := NewFairBalancer(conf1) con1, err := metafora.NewConsumer(coord1, h, b1) if err != nil { t.Fatal(err) } b2 := NewFairBalancer(conf2) con2, err := metafora.NewConsumer(coord2, h, b2) if err != nil { t.Fatal(err) } // Start the first and let it claim a bunch of tasks go con1.Run() defer con1.Shutdown() cli.SubmitTask(DefaultTaskFunc("t1", "")) cli.SubmitTask(DefaultTaskFunc("t2", "")) cli.SubmitTask(DefaultTaskFunc("t3", "")) cli.SubmitTask(DefaultTaskFunc("t4", "")) cli.SubmitTask(DefaultTaskFunc("t5", "")) cli.SubmitTask(DefaultTaskFunc("t6", "")) time.Sleep(1 * time.Second) if len(con1.Tasks()) != 6 { t.Fatalf("con1 should have claimed 6 tasks: %d", len(con1.Tasks())) } // Start the second consumer and force the 1st to rebalance go con2.Run() defer con2.Shutdown() // Wait for node to startup and register time.Sleep(1 * time.Second) cli.SubmitCommand(conf1.Name, metafora.CommandBalance()) time.Sleep(2 * time.Second) c1Tasks := con1.Tasks() c2Tasks := con2.Tasks() if len(c1Tasks) != 4 || len(c2Tasks) != 2 { t.Fatalf("expected consumers to have 4|2 tasks: %d|%d", len(c1Tasks), len(c2Tasks)) } // Finally make sure that balancing the other node does nothing cli.SubmitCommand("node2", metafora.CommandBalance()) time.Sleep(2 * time.Second) c1Tasks2 := con1.Tasks() c2Tasks2 := con2.Tasks() if len(c1Tasks2) != 4 || len(c2Tasks2) != 2 { t.Fatalf("expected consumers to have 4|2 tasks: %d|%d", len(c1Tasks2), len(c2Tasks2)) } for i := 0; i < 4; i++ { if c1Tasks[i] != c1Tasks2[i] { t.Errorf("task mismatch: %s != %s", c1Tasks[i], c1Tasks2[i]) } } for i := 0; i < 2; i++ { if c2Tasks[i] != c2Tasks2[i] { t.Errorf("task mismatch: %s != %s", c2Tasks[i], c2Tasks2[i]) } } }