// TestManagerRestart starts a job within a manager, stops the // manager, restarts a manager, and stops the job. func TestManagerRestart(t *testing.T) { ts := topo.Server{Impl: memorytopo.NewMemoryTopo([]string{"cell1"})} m := NewManager(ts) // Run the manager in the background. wg, cancel := startManager(t, m) // Create a Sleep job. uuid, err := m.Create(context.Background(), sleepFactoryName, []string{"-duration", "60"}) if err != nil { t.Fatalf("cannot create sleep workflow: %v", err) } // Start the job. if err := m.Start(context.Background(), uuid); err != nil { t.Fatalf("cannot start sleep workflow: %v", err) } // Stop the manager. cancel() wg.Wait() // Make sure the workflow is still in the topo server. This // validates that interrupting the Manager leaves the jobs in // the right state in the topo server. wi, err := ts.GetWorkflow(context.Background(), uuid) if err != nil { t.Fatalf("cannot read workflow %v: %v", uuid, err) } if wi.State != workflowpb.WorkflowState_Running { t.Fatalf("unexpected workflow state %v was expecting %v", wi.State, workflowpb.WorkflowState_Running) } // Restart the manager. wg, cancel = startManager(t, m) // Make sure the job is in there shortly. timeout := 0 for { tree, err := m.NodeManager().GetFullTree() if err != nil { t.Fatalf("cannot get full node tree: %v", err) } if strings.Contains(string(tree), uuid) { break } timeout++ if timeout == 1000 { t.Fatalf("failed to wait for full node tree to appear: %v", string(tree)) } time.Sleep(time.Millisecond) } // Stop the job. Note Stop() waits until the background go // routine that saves the job is done, so when we return from // this call, the job is saved with the right updated State // inside the topo server. if err := m.Stop(context.Background(), uuid); err != nil { t.Fatalf("cannot stop sleep workflow: %v", err) } // And stop the manager. cancel() wg.Wait() // Make sure the workflow is stopped in the topo server. wi, err = ts.GetWorkflow(context.Background(), uuid) if err != nil { t.Fatalf("cannot read workflow %v: %v", uuid, err) } if wi.State != workflowpb.WorkflowState_Done { t.Fatalf("unexpected workflow state %v was expecting %v", wi.State, workflowpb.WorkflowState_Running) } if !strings.Contains(wi.Error, "canceled") { t.Errorf("invalid workflow error: %v", wi.Error) } }