示例#1
0
// TestManagerRestart starts a job within a manager, stops the
// manager, restarts a manager, and stops the job.
func TestManagerRestart(t *testing.T) {
	ts := topo.Server{Impl: memorytopo.NewMemoryTopo([]string{"cell1"})}
	m := NewManager(ts)

	// Run the manager in the background.
	wg, cancel := startManager(t, m)

	// Create a Sleep job.
	uuid, err := m.Create(context.Background(), sleepFactoryName, []string{"-duration", "60"})
	if err != nil {
		t.Fatalf("cannot create sleep workflow: %v", err)
	}

	// Start the job.
	if err := m.Start(context.Background(), uuid); err != nil {
		t.Fatalf("cannot start sleep workflow: %v", err)
	}

	// Stop the manager.
	cancel()
	wg.Wait()

	// Make sure the workflow is still in the topo server.  This
	// validates that interrupting the Manager leaves the jobs in
	// the right state in the topo server.
	wi, err := ts.GetWorkflow(context.Background(), uuid)
	if err != nil {
		t.Fatalf("cannot read workflow %v: %v", uuid, err)
	}
	if wi.State != workflowpb.WorkflowState_Running {
		t.Fatalf("unexpected workflow state %v was expecting %v", wi.State, workflowpb.WorkflowState_Running)
	}

	// Restart the manager.
	wg, cancel = startManager(t, m)

	// Make sure the job is in there shortly.
	timeout := 0
	for {
		tree, err := m.NodeManager().GetFullTree()
		if err != nil {
			t.Fatalf("cannot get full node tree: %v", err)
		}
		if strings.Contains(string(tree), uuid) {
			break
		}
		timeout++
		if timeout == 1000 {
			t.Fatalf("failed to wait for full node tree to appear: %v", string(tree))
		}
		time.Sleep(time.Millisecond)
	}

	// Stop the job. Note Stop() waits until the background go
	// routine that saves the job is done, so when we return from
	// this call, the job is saved with the right updated State
	// inside the topo server.
	if err := m.Stop(context.Background(), uuid); err != nil {
		t.Fatalf("cannot stop sleep workflow: %v", err)
	}

	// And stop the manager.
	cancel()
	wg.Wait()

	// Make sure the workflow is stopped in the topo server.
	wi, err = ts.GetWorkflow(context.Background(), uuid)
	if err != nil {
		t.Fatalf("cannot read workflow %v: %v", uuid, err)
	}
	if wi.State != workflowpb.WorkflowState_Done {
		t.Fatalf("unexpected workflow state %v was expecting %v", wi.State, workflowpb.WorkflowState_Running)
	}
	if !strings.Contains(wi.Error, "canceled") {
		t.Errorf("invalid workflow error: %v", wi.Error)
	}
}