func TestHealthcheck(t *testing.T) { c := containerConfig{ task: &api.Task{ Spec: api.TaskSpec{Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Healthcheck: &api.HealthConfig{ Test: []string{"a", "b", "c"}, Interval: gogotypes.DurationProto(time.Second), Timeout: gogotypes.DurationProto(time.Minute), Retries: 10, }, }, }}, }, } config := c.config() expected := &enginecontainer.HealthConfig{ Test: []string{"a", "b", "c"}, Interval: time.Second, Timeout: time.Minute, Retries: 10, } if !reflect.DeepEqual(config.Healthcheck, expected) { t.Fatalf("expected %#v, got %#v", expected, config.Healthcheck) } }
func TestValidateClusterSpec(t *testing.T) { type BadClusterSpec struct { spec *api.ClusterSpec c codes.Code } for _, bad := range []BadClusterSpec{ { spec: nil, c: codes.InvalidArgument, }, { spec: &api.ClusterSpec{ Annotations: api.Annotations{ Name: "name", }, CAConfig: api.CAConfig{ NodeCertExpiry: gogotypes.DurationProto(29 * time.Minute), }, }, c: codes.InvalidArgument, }, { spec: &api.ClusterSpec{ Annotations: api.Annotations{ Name: "name", }, Dispatcher: api.DispatcherConfig{ HeartbeatPeriod: gogotypes.DurationProto(-29 * time.Minute), }, }, c: codes.InvalidArgument, }, } { err := validateClusterSpec(bad.spec) assert.Error(t, err) assert.Equal(t, bad.c, grpc.Code(err)) } for _, good := range []*api.ClusterSpec{ createClusterSpec("name"), } { err := validateClusterSpec(good) assert.NoError(t, err) } }
func createClusterSpec(name string) *api.ClusterSpec { return &api.ClusterSpec{ Annotations: api.Annotations{ Name: name, }, CAConfig: api.CAConfig{ NodeCertExpiry: gogotypes.DurationProto(ca.DefaultNodeCertExpiration), }, } }
func TestValidateRestartPolicy(t *testing.T) { bad := []*api.RestartPolicy{ { Delay: gogotypes.DurationProto(time.Duration(-1 * time.Second)), Window: gogotypes.DurationProto(time.Duration(-1 * time.Second)), }, { Delay: gogotypes.DurationProto(time.Duration(20 * time.Second)), Window: gogotypes.DurationProto(time.Duration(-4 * time.Second)), }, } good := []*api.RestartPolicy{ { Delay: gogotypes.DurationProto(time.Duration(10 * time.Second)), Window: gogotypes.DurationProto(time.Duration(1 * time.Second)), }, } for _, b := range bad { err := validateRestartPolicy(b) assert.Error(t, err) assert.Equal(t, codes.InvalidArgument, grpc.Code(err)) } for _, g := range good { assert.NoError(t, validateRestartPolicy(g)) } }
func genTask(t *testing.T) *api.Task { const ( nodeID = "dockerexec-test-node-id" serviceID = "dockerexec-test-service" reference = "stevvooe/foo:latest" ) return &api.Task{ ID: identity.NewID(), ServiceID: serviceID, NodeID: nodeID, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: reference, StopGracePeriod: gogotypes.DurationProto(10 * time.Second), }, }, }, } }
// defaultClusterObject creates a default cluster. func defaultClusterObject( clusterID string, initialCAConfig api.CAConfig, raftCfg api.RaftConfig, encryptionConfig api.EncryptionConfig, initialUnlockKeys []*api.EncryptionKey, rootCA *ca.RootCA) *api.Cluster { return &api.Cluster{ ID: clusterID, Spec: api.ClusterSpec{ Annotations: api.Annotations{ Name: store.DefaultClusterName, }, Orchestration: api.OrchestrationConfig{ TaskHistoryRetentionLimit: defaultTaskHistoryRetentionLimit, }, Dispatcher: api.DispatcherConfig{ HeartbeatPeriod: gogotypes.DurationProto(dispatcher.DefaultHeartBeatPeriod), }, Raft: raftCfg, CAConfig: initialCAConfig, EncryptionConfig: encryptionConfig, }, RootCA: api.RootCA{ CAKey: rootCA.Key, CACert: rootCA.Cert, CACertHash: rootCA.Digest.String(), JoinTokens: api.JoinTokens{ Worker: ca.GenerateJoinToken(rootCA), Manager: ca.GenerateJoinToken(rootCA), }, }, UnlockKeys: initialUnlockKeys, } }
// DefaultCAConfig returns the default CA Config, with a default expiration. func DefaultCAConfig() api.CAConfig { return api.CAConfig{ NodeCertExpiry: gogotypes.DurationProto(DefaultNodeCertExpiration), } }
func TestOrchestratorRestartWindow(t *testing.T) { t.Parallel() ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() watch, cancel := state.Watch(s.WatchQueue() /*state.EventCreateTask{}, state.EventUpdateTask{}*/) defer cancel() // Create a service with two instances specified before the orchestrator is // started. This should result in two tasks when the orchestrator // starts up. err := s.Update(func(tx store.Tx) error { j1 := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 2, }, }, Task: api.TaskSpec{ Restart: &api.RestartPolicy{ Condition: api.RestartOnAny, Delay: gogotypes.DurationProto(100 * time.Millisecond), MaxAttempts: 1, Window: gogotypes.DurationProto(500 * time.Millisecond), }, }, }, } assert.NoError(t, store.CreateService(tx, j1)) return nil }) assert.NoError(t, err) // Start the orchestrator. go func() { assert.NoError(t, orchestrator.Run(ctx)) }() observedTask1 := testutils.WatchTaskCreate(t, watch) assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") observedTask2 := testutils.WatchTaskCreate(t, watch) assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") // Fail the first task. Confirm that it gets restarted. updatedTask1 := observedTask1.Copy() updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed} before := time.Now() err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask1)) return nil }) assert.NoError(t, err) testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, state.EventUpdateTask{}) testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, state.EventUpdateTask{}) observedTask3 := testutils.WatchTaskCreate(t, watch) testutils.Expect(t, watch, state.EventCommit{}) assert.Equal(t, observedTask3.Status.State, api.TaskStateNew) assert.Equal(t, observedTask3.DesiredState, api.TaskStateReady) assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1") observedTask4 := testutils.WatchTaskUpdate(t, watch) after := time.Now() // At least 100 ms should have elapsed. Only check the lower bound, // because the system may be slow and it could have taken longer. if after.Sub(before) < 100*time.Millisecond { t.Fatal("restart delay should have elapsed") } assert.Equal(t, observedTask4.Status.State, api.TaskStateNew) assert.Equal(t, observedTask4.DesiredState, api.TaskStateRunning) assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1") // Fail the second task. Confirm that it gets restarted. updatedTask2 := observedTask2.Copy() updatedTask2.Status = api.TaskStatus{State: api.TaskStateFailed} err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask2)) return nil }) assert.NoError(t, err) testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, state.EventUpdateTask{}) testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, state.EventUpdateTask{}) observedTask5 := testutils.WatchTaskCreate(t, watch) testutils.Expect(t, watch, state.EventCommit{}) assert.Equal(t, observedTask5.Status.State, api.TaskStateNew) assert.Equal(t, observedTask5.DesiredState, api.TaskStateReady) assert.Equal(t, observedTask5.ServiceAnnotations.Name, "name1") observedTask6 := testutils.WatchTaskUpdate(t, watch) // task gets started after a delay testutils.Expect(t, watch, state.EventCommit{}) assert.Equal(t, observedTask6.Status.State, api.TaskStateNew) assert.Equal(t, observedTask6.DesiredState, api.TaskStateRunning) assert.Equal(t, observedTask6.ServiceAnnotations.Name, "name1") // Fail the first instance again. It should not be restarted. updatedTask1 = observedTask3.Copy() updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed} err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask1)) return nil }) assert.NoError(t, err) testutils.Expect(t, watch, state.EventUpdateTask{}) testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, state.EventUpdateTask{}) testutils.Expect(t, watch, state.EventCommit{}) select { case <-watch: t.Fatal("got unexpected event") case <-time.After(200 * time.Millisecond): } time.Sleep(time.Second) // Fail the second instance again. It should get restarted because // enough time has elapsed since the last restarts. updatedTask2 = observedTask5.Copy() updatedTask2.Status = api.TaskStatus{State: api.TaskStateFailed} before = time.Now() err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask2)) return nil }) assert.NoError(t, err) testutils.Expect(t, watch, state.EventUpdateTask{}) testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, state.EventUpdateTask{}) observedTask7 := testutils.WatchTaskCreate(t, watch) testutils.Expect(t, watch, state.EventCommit{}) assert.Equal(t, observedTask7.Status.State, api.TaskStateNew) assert.Equal(t, observedTask7.DesiredState, api.TaskStateReady) observedTask8 := testutils.WatchTaskUpdate(t, watch) after = time.Now() // At least 100 ms should have elapsed. Only check the lower bound, // because the system may be slow and it could have taken longer. if after.Sub(before) < 100*time.Millisecond { t.Fatal("restart delay should have elapsed") } assert.Equal(t, observedTask8.Status.State, api.TaskStateNew) assert.Equal(t, observedTask8.DesiredState, api.TaskStateRunning) assert.Equal(t, observedTask8.ServiceAnnotations.Name, "name1") }
func TestOrchestratorRestartDelay(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() watch, cancel := state.Watch(s.WatchQueue() /*state.EventCreateTask{}, state.EventUpdateTask{}*/) defer cancel() // Create a service with two instances specified before the orchestrator is // started. This should result in two tasks when the orchestrator // starts up. err := s.Update(func(tx store.Tx) error { j1 := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, Restart: &api.RestartPolicy{ Condition: api.RestartOnAny, Delay: gogotypes.DurationProto(100 * time.Millisecond), }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 2, }, }, }, } assert.NoError(t, store.CreateService(tx, j1)) return nil }) assert.NoError(t, err) // Start the orchestrator. go func() { assert.NoError(t, orchestrator.Run(ctx)) }() observedTask1 := testutils.WatchTaskCreate(t, watch) assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") observedTask2 := testutils.WatchTaskCreate(t, watch) assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") // Fail the first task. Confirm that it gets restarted. updatedTask1 := observedTask1.Copy() updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed} before := time.Now() err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask1)) return nil }) assert.NoError(t, err) testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, state.EventUpdateTask{}) testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, state.EventUpdateTask{}) observedTask3 := testutils.WatchTaskCreate(t, watch) testutils.Expect(t, watch, state.EventCommit{}) assert.Equal(t, observedTask3.Status.State, api.TaskStateNew) assert.Equal(t, observedTask3.DesiredState, api.TaskStateReady) assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1") observedTask4 := testutils.WatchTaskUpdate(t, watch) after := time.Now() // At least 100 ms should have elapsed. Only check the lower bound, // because the system may be slow and it could have taken longer. if after.Sub(before) < 100*time.Millisecond { t.Fatalf("restart delay should have elapsed. Got: %v", after.Sub(before)) } assert.Equal(t, observedTask4.Status.State, api.TaskStateNew) assert.Equal(t, observedTask4.DesiredState, api.TaskStateRunning) assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1") }
func TestOrchestratorRestartOnAny(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() watch, cancel := state.Watch(s.WatchQueue() /*state.EventCreateTask{}, state.EventUpdateTask{}*/) defer cancel() // Create a service with two instances specified before the orchestrator is // started. This should result in two tasks when the orchestrator // starts up. err := s.Update(func(tx store.Tx) error { j1 := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, Restart: &api.RestartPolicy{ Condition: api.RestartOnAny, Delay: gogotypes.DurationProto(0), }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 2, }, }, }, } assert.NoError(t, store.CreateService(tx, j1)) return nil }) assert.NoError(t, err) // Start the orchestrator. go func() { assert.NoError(t, orchestrator.Run(ctx)) }() observedTask1 := testutils.WatchTaskCreate(t, watch) assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") observedTask2 := testutils.WatchTaskCreate(t, watch) assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") // Fail the first task. Confirm that it gets restarted. updatedTask1 := observedTask1.Copy() updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed} err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask1)) return nil }) assert.NoError(t, err) testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, state.EventUpdateTask{}) testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, state.EventUpdateTask{}) observedTask3 := testutils.WatchTaskCreate(t, watch) assert.Equal(t, observedTask3.Status.State, api.TaskStateNew) assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1") testutils.Expect(t, watch, state.EventCommit{}) observedTask4 := testutils.WatchTaskUpdate(t, watch) assert.Equal(t, observedTask4.DesiredState, api.TaskStateRunning) assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1") // Mark the second task as completed. Confirm that it gets restarted. updatedTask2 := observedTask2.Copy() updatedTask2.Status = api.TaskStatus{State: api.TaskStateCompleted} err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask2)) return nil }) assert.NoError(t, err) testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, state.EventUpdateTask{}) testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, state.EventUpdateTask{}) observedTask5 := testutils.WatchTaskCreate(t, watch) assert.Equal(t, observedTask5.Status.State, api.TaskStateNew) assert.Equal(t, observedTask5.ServiceAnnotations.Name, "name1") testutils.Expect(t, watch, state.EventCommit{}) observedTask6 := testutils.WatchTaskUpdate(t, watch) assert.Equal(t, observedTask6.DesiredState, api.TaskStateRunning) assert.Equal(t, observedTask6.ServiceAnnotations.Name, "name1") }
func TestUpdaterRollback(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() var ( failImage1 uint32 failImage2 uint32 ) watchCreate, cancelCreate := state.Watch(s.WatchQueue(), state.EventCreateTask{}) defer cancelCreate() watchServiceUpdate, cancelServiceUpdate := state.Watch(s.WatchQueue(), state.EventUpdateService{}) defer cancelServiceUpdate() // Fail new tasks the updater tries to run watchUpdate, cancelUpdate := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancelUpdate() go func() { failedLast := false for { select { case e := <-watchUpdate: task := e.(state.EventUpdateTask).Task if task.DesiredState == task.Status.State { continue } if task.DesiredState == api.TaskStateRunning && task.Status.State != api.TaskStateFailed && task.Status.State != api.TaskStateRunning { err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) // Never fail two image2 tasks in a row, so there's a mix of // failed and successful tasks for the rollback. if task.Spec.GetContainer().Image == "image1" && atomic.LoadUint32(&failImage1) == 1 { task.Status.State = api.TaskStateFailed failedLast = true } else if task.Spec.GetContainer().Image == "image2" && atomic.LoadUint32(&failImage2) == 1 && !failedLast { task.Status.State = api.TaskStateFailed failedLast = true } else { task.Status.State = task.DesiredState failedLast = false } return store.UpdateTask(tx, task) }) assert.NoError(t, err) } else if task.DesiredState > api.TaskStateRunning { err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) task.Status.State = task.DesiredState return store.UpdateTask(tx, task) }) assert.NoError(t, err) } } } }() // Create a service with four replicas specified before the orchestrator // is started. This should result in two tasks when the orchestrator // starts up. err := s.Update(func(tx store.Tx) error { s1 := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "image1", }, }, Restart: &api.RestartPolicy{ Condition: api.RestartOnNone, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 4, }, }, Update: &api.UpdateConfig{ FailureAction: api.UpdateConfig_ROLLBACK, Parallelism: 1, Delay: 10 * time.Millisecond, Monitor: gogotypes.DurationProto(500 * time.Millisecond), MaxFailureRatio: 0.4, }, }, } assert.NoError(t, store.CreateService(tx, s1)) return nil }) assert.NoError(t, err) // Start the orchestrator. go func() { assert.NoError(t, orchestrator.Run(ctx)) }() observedTask := testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") atomic.StoreUint32(&failImage2, 1) // Start a rolling update err = s.Update(func(tx store.Tx) error { s1 := store.GetService(tx, "id1") require.NotNil(t, s1) s1.PreviousSpec = s1.Spec.Copy() s1.UpdateStatus = nil s1.Spec.Task.GetContainer().Image = "image2" assert.NoError(t, store.UpdateService(tx, s1)) return nil }) assert.NoError(t, err) // Should see three tasks started, then a rollback observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") // Should get to the ROLLBACK_STARTED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus == nil { continue } if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED { break } } observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") // Should end up in ROLLBACK_COMPLETED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_COMPLETED { break } } atomic.StoreUint32(&failImage1, 1) // Repeat the rolling update but this time fail the tasks that the // rollback creates. It should end up in ROLLBACK_PAUSED. err = s.Update(func(tx store.Tx) error { s1 := store.GetService(tx, "id1") require.NotNil(t, s1) s1.PreviousSpec = s1.Spec.Copy() s1.UpdateStatus = nil s1.Spec.Task.GetContainer().Image = "image2" assert.NoError(t, store.UpdateService(tx, s1)) return nil }) assert.NoError(t, err) // Should see three tasks started, then a rollback observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") // Should get to the ROLLBACK_STARTED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus == nil { continue } if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED { break } } observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") // Should end up in ROLLBACK_PAUSED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_PAUSED { break } } }
func TestTaskHistory(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() assert.NoError(t, s.Update(func(tx store.Tx) error { store.CreateCluster(tx, &api.Cluster{ ID: identity.NewID(), Spec: api.ClusterSpec{ Annotations: api.Annotations{ Name: store.DefaultClusterName, }, Orchestration: api.OrchestrationConfig{ TaskHistoryRetentionLimit: 2, }, }, }) return nil })) taskReaper := taskreaper.New(s) defer taskReaper.Stop() orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() watch, cancel := state.Watch(s.WatchQueue() /*state.EventCreateTask{}, state.EventUpdateTask{}*/) defer cancel() // Create a service with two instances specified before the orchestrator is // started. This should result in two tasks when the orchestrator // starts up. err := s.Update(func(tx store.Tx) error { j1 := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 2, }, }, Task: api.TaskSpec{ Restart: &api.RestartPolicy{ Condition: api.RestartOnAny, Delay: gogotypes.DurationProto(0), }, }, }, } assert.NoError(t, store.CreateService(tx, j1)) return nil }) assert.NoError(t, err) // Start the orchestrator. go func() { assert.NoError(t, orchestrator.Run(ctx)) }() go taskReaper.Run() observedTask1 := testutils.WatchTaskCreate(t, watch) assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") observedTask2 := testutils.WatchTaskCreate(t, watch) assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") // Fail both tasks. They should both get restarted. updatedTask1 := observedTask1.Copy() updatedTask1.Status.State = api.TaskStateFailed updatedTask1.ServiceAnnotations = api.Annotations{Name: "original"} updatedTask2 := observedTask2.Copy() updatedTask2.Status.State = api.TaskStateFailed updatedTask2.ServiceAnnotations = api.Annotations{Name: "original"} err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask1)) assert.NoError(t, store.UpdateTask(tx, updatedTask2)) return nil }) testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, state.EventUpdateTask{}) testutils.Expect(t, watch, state.EventUpdateTask{}) testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, state.EventUpdateTask{}) observedTask3 := testutils.WatchTaskCreate(t, watch) assert.Equal(t, observedTask3.Status.State, api.TaskStateNew) assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1") testutils.Expect(t, watch, state.EventUpdateTask{}) observedTask4 := testutils.WatchTaskCreate(t, watch) assert.Equal(t, observedTask4.Status.State, api.TaskStateNew) assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1") // Fail these replacement tasks. Since TaskHistory is set to 2, this // should cause the oldest tasks for each instance to get deleted. updatedTask3 := observedTask3.Copy() updatedTask3.Status.State = api.TaskStateFailed updatedTask4 := observedTask4.Copy() updatedTask4.Status.State = api.TaskStateFailed err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask3)) assert.NoError(t, store.UpdateTask(tx, updatedTask4)) return nil }) deletedTask1 := testutils.WatchTaskDelete(t, watch) deletedTask2 := testutils.WatchTaskDelete(t, watch) assert.Equal(t, api.TaskStateFailed, deletedTask1.Status.State) assert.Equal(t, "original", deletedTask1.ServiceAnnotations.Name) assert.Equal(t, api.TaskStateFailed, deletedTask2.Status.State) assert.Equal(t, "original", deletedTask2.ServiceAnnotations.Name) var foundTasks []*api.Task s.View(func(tx store.ReadTx) { foundTasks, err = store.FindTasks(tx, store.All) }) assert.NoError(t, err) assert.Len(t, foundTasks, 4) }
func TestUpdaterTaskTimeout(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() // Move tasks to their desired state. watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { for { select { case e := <-watch: task := e.(state.EventUpdateTask).Task err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) // Explicitly do not set task state to // DEAD to trigger TaskTimeout if task.DesiredState == api.TaskStateRunning && task.Status.State != api.TaskStateRunning { task.Status.State = api.TaskStateRunning return store.UpdateTask(tx, task) } return nil }) assert.NoError(t, err) } } }() var instances uint64 = 3 service := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "v:1", }, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: instances, }, }, Update: &api.UpdateConfig{ // avoid having Run block for a long time to watch for failures Monitor: gogotypes.DurationProto(50 * time.Millisecond), }, }, } err := s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateService(tx, service)) for i := uint64(0); i < instances; i++ { task := orchestrator.NewTask(nil, service, uint64(i), "") task.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) originalTasks := getRunnableSlotSlice(t, s, service) for _, slot := range originalTasks { for _, task := range slot { assert.Equal(t, "v:1", task.Spec.GetContainer().Image) } } before := time.Now() service.Spec.Task.GetContainer().Image = "v:2" updater := NewUpdater(s, restart.NewSupervisor(s), nil, service) // Override the default (1 minute) to speed up the test. updater.restarts.TaskTimeout = 100 * time.Millisecond updater.Run(ctx, getRunnableSlotSlice(t, s, service)) updatedTasks := getRunnableSlotSlice(t, s, service) for _, slot := range updatedTasks { for _, task := range slot { assert.Equal(t, "v:2", task.Spec.GetContainer().Image) } } after := time.Now() // At least 100 ms should have elapsed. Only check the lower bound, // because the system may be slow and it could have taken longer. if after.Sub(before) < 100*time.Millisecond { t.Fatal("stop timeout should have elapsed") } }
func TestUpdater(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() // Move tasks to their desired state. watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { for { select { case e := <-watch: task := e.(state.EventUpdateTask).Task if task.Status.State == task.DesiredState { continue } err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) task.Status.State = task.DesiredState return store.UpdateTask(tx, task) }) assert.NoError(t, err) } } }() instances := 3 cluster := &api.Cluster{ // test cluster configuration propagation to task creation. Spec: api.ClusterSpec{ Annotations: api.Annotations{ Name: "default", }, }, } service := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: uint64(instances), }, }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "v:1", }, }, }, Update: &api.UpdateConfig{ // avoid having Run block for a long time to watch for failures Monitor: gogotypes.DurationProto(50 * time.Millisecond), }, }, } err := s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateCluster(tx, cluster)) assert.NoError(t, store.CreateService(tx, service)) for i := 0; i < instances; i++ { assert.NoError(t, store.CreateTask(tx, orchestrator.NewTask(cluster, service, uint64(i), ""))) } return nil }) assert.NoError(t, err) originalTasks := getRunnableSlotSlice(t, s, service) for _, slot := range originalTasks { for _, task := range slot { assert.Equal(t, "v:1", task.Spec.GetContainer().Image) assert.Nil(t, task.LogDriver) // should be left alone } } service.Spec.Task.GetContainer().Image = "v:2" service.Spec.Task.LogDriver = &api.Driver{Name: "tasklogdriver"} updater := NewUpdater(s, restart.NewSupervisor(s), cluster, service) updater.Run(ctx, getRunnableSlotSlice(t, s, service)) updatedTasks := getRunnableSlotSlice(t, s, service) for _, slot := range updatedTasks { for _, task := range slot { assert.Equal(t, "v:2", task.Spec.GetContainer().Image) assert.Equal(t, service.Spec.Task.LogDriver, task.LogDriver) // pick up from task } } service.Spec.Task.GetContainer().Image = "v:3" cluster.Spec.TaskDefaults.LogDriver = &api.Driver{Name: "clusterlogdriver"} // make cluster default logdriver. service.Spec.Update = &api.UpdateConfig{ Parallelism: 1, Monitor: gogotypes.DurationProto(50 * time.Millisecond), } updater = NewUpdater(s, restart.NewSupervisor(s), cluster, service) updater.Run(ctx, getRunnableSlotSlice(t, s, service)) updatedTasks = getRunnableSlotSlice(t, s, service) for _, slot := range updatedTasks { for _, task := range slot { assert.Equal(t, "v:3", task.Spec.GetContainer().Image) assert.Equal(t, service.Spec.Task.LogDriver, task.LogDriver) // still pick up from task } } service.Spec.Task.GetContainer().Image = "v:4" service.Spec.Task.LogDriver = nil // use cluster default now. service.Spec.Update = &api.UpdateConfig{ Parallelism: 1, Delay: 10 * time.Millisecond, Monitor: gogotypes.DurationProto(50 * time.Millisecond), } updater = NewUpdater(s, restart.NewSupervisor(s), cluster, service) updater.Run(ctx, getRunnableSlotSlice(t, s, service)) updatedTasks = getRunnableSlotSlice(t, s, service) for _, slot := range updatedTasks { for _, task := range slot { assert.Equal(t, "v:4", task.Spec.GetContainer().Image) assert.Equal(t, cluster.Spec.TaskDefaults.LogDriver, task.LogDriver) // pick up from cluster } } }
func TestUpdaterFailureAction(t *testing.T) { t.Parallel() ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() // Fail new tasks the updater tries to run watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { for { select { case e := <-watch: task := e.(state.EventUpdateTask).Task if task.DesiredState == api.TaskStateRunning && task.Status.State != api.TaskStateFailed { err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) task.Status.State = api.TaskStateFailed return store.UpdateTask(tx, task) }) assert.NoError(t, err) } else if task.DesiredState > api.TaskStateRunning { err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) task.Status.State = task.DesiredState return store.UpdateTask(tx, task) }) assert.NoError(t, err) } } } }() instances := 3 cluster := &api.Cluster{ Spec: api.ClusterSpec{ Annotations: api.Annotations{ Name: "default", }, }, } service := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: uint64(instances), }, }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "v:1", }, }, }, Update: &api.UpdateConfig{ FailureAction: api.UpdateConfig_PAUSE, Parallelism: 1, Delay: 500 * time.Millisecond, Monitor: gogotypes.DurationProto(500 * time.Millisecond), }, }, } err := s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateCluster(tx, cluster)) assert.NoError(t, store.CreateService(tx, service)) for i := 0; i < instances; i++ { assert.NoError(t, store.CreateTask(tx, orchestrator.NewTask(cluster, service, uint64(i), ""))) } return nil }) assert.NoError(t, err) originalTasks := getRunnableSlotSlice(t, s, service) for _, slot := range originalTasks { for _, task := range slot { assert.Equal(t, "v:1", task.Spec.GetContainer().Image) } } service.Spec.Task.GetContainer().Image = "v:2" updater := NewUpdater(s, restart.NewSupervisor(s), cluster, service) updater.Run(ctx, getRunnableSlotSlice(t, s, service)) updatedTasks := getRunnableSlotSlice(t, s, service) v1Counter := 0 v2Counter := 0 for _, slot := range updatedTasks { for _, task := range slot { if task.Spec.GetContainer().Image == "v:1" { v1Counter++ } else if task.Spec.GetContainer().Image == "v:2" { v2Counter++ } } } assert.Equal(t, instances-1, v1Counter) assert.Equal(t, 1, v2Counter) s.View(func(tx store.ReadTx) { service = store.GetService(tx, service.ID) }) assert.Equal(t, api.UpdateStatus_PAUSED, service.UpdateStatus.State) // Updating again should do nothing while the update is PAUSED updater = NewUpdater(s, restart.NewSupervisor(s), cluster, service) updater.Run(ctx, getRunnableSlotSlice(t, s, service)) updatedTasks = getRunnableSlotSlice(t, s, service) v1Counter = 0 v2Counter = 0 for _, slot := range updatedTasks { for _, task := range slot { if task.Spec.GetContainer().Image == "v:1" { v1Counter++ } else if task.Spec.GetContainer().Image == "v:2" { v2Counter++ } } } assert.Equal(t, instances-1, v1Counter) assert.Equal(t, 1, v2Counter) // Switch to a service with FailureAction: CONTINUE err = s.Update(func(tx store.Tx) error { service = store.GetService(tx, service.ID) service.Spec.Update.FailureAction = api.UpdateConfig_CONTINUE service.UpdateStatus = nil assert.NoError(t, store.UpdateService(tx, service)) return nil }) assert.NoError(t, err) service.Spec.Task.GetContainer().Image = "v:3" updater = NewUpdater(s, restart.NewSupervisor(s), cluster, service) updater.Run(ctx, getRunnableSlotSlice(t, s, service)) updatedTasks = getRunnableSlotSlice(t, s, service) v2Counter = 0 v3Counter := 0 for _, slot := range updatedTasks { for _, task := range slot { if task.Spec.GetContainer().Image == "v:2" { v2Counter++ } else if task.Spec.GetContainer().Image == "v:3" { v3Counter++ } } } assert.Equal(t, 0, v2Counter) assert.Equal(t, instances, v3Counter) }
cluster, err := getCluster(common.Context(cmd), c, args[0]) if err != nil { return err } flags := cmd.Flags() spec := &cluster.Spec var rotation api.KeyRotation if flags.Changed("certexpiry") { cePeriod, err := flags.GetDuration("certexpiry") if err != nil { return err } ceProtoPeriod := gogotypes.DurationProto(cePeriod) spec.CAConfig.NodeCertExpiry = ceProtoPeriod } if flags.Changed("external-ca") { spec.CAConfig.ExternalCAs = externalCAOpt.Value() } if flags.Changed("taskhistory") { taskHistory, err := flags.GetInt64("taskhistory") if err != nil { return err } spec.Orchestration.TaskHistoryRetentionLimit = taskHistory } if flags.Changed("heartbeatperiod") { hbPeriod, err := flags.GetDuration("heartbeatperiod") if err != nil {
// marshalValue writes the value to the Writer. func (m *Marshaler) marshalValue(out *errWriter, prop *proto.Properties, v reflect.Value, indent string) error { v = reflect.Indirect(v) // Handle repeated elements. if v.Kind() == reflect.Slice && v.Type().Elem().Kind() != reflect.Uint8 { out.write("[") comma := "" for i := 0; i < v.Len(); i++ { sliceVal := v.Index(i) out.write(comma) if m.Indent != "" { out.write("\n") out.write(indent) out.write(m.Indent) out.write(m.Indent) } if err := m.marshalValue(out, prop, sliceVal, indent+m.Indent); err != nil { return err } comma = "," } if m.Indent != "" { out.write("\n") out.write(indent) out.write(m.Indent) } out.write("]") return out.err } // Handle well-known types. // Most are handled up in marshalObject (because 99% are messages). if wkt, ok := v.Interface().(isWkt); ok { switch wkt.XXX_WellKnownType() { case "NullValue": out.write("null") return out.err } } if t, ok := v.Interface().(time.Time); ok { ts, err := types.TimestampProto(t) if err != nil { return err } return m.marshalValue(out, prop, reflect.ValueOf(ts), indent) } if d, ok := v.Interface().(time.Duration); ok { dur := types.DurationProto(d) return m.marshalValue(out, prop, reflect.ValueOf(dur), indent) } // Handle enumerations. if !m.EnumsAsInts && prop.Enum != "" { // Unknown enum values will are stringified by the proto library as their // value. Such values should _not_ be quoted or they will be interpreted // as an enum string instead of their value. enumStr := v.Interface().(fmt.Stringer).String() var valStr string if v.Kind() == reflect.Ptr { valStr = strconv.Itoa(int(v.Elem().Int())) } else { valStr = strconv.Itoa(int(v.Int())) } if m, ok := v.Interface().(interface { MarshalJSON() ([]byte, error) }); ok { data, err := m.MarshalJSON() if err != nil { return err } enumStr = string(data) enumStr, err = strconv.Unquote(enumStr) if err != nil { return err } } isKnownEnum := enumStr != valStr if isKnownEnum { out.write(`"`) } out.write(enumStr) if isKnownEnum { out.write(`"`) } return out.err } // Handle nested messages. if v.Kind() == reflect.Struct { i := v if v.CanAddr() { i = v.Addr() } else { i = reflect.New(v.Type()) i.Elem().Set(v) } iface := i.Interface() if iface == nil { out.write(`null`) return out.err } pm, ok := iface.(proto.Message) if !ok { if prop.CustomType == "" { return fmt.Errorf("%v does not implement proto.Message", v.Type()) } t := proto.MessageType(prop.CustomType) if t == nil || !i.Type().ConvertibleTo(t) { return fmt.Errorf("%v declared custom type %s but it is not convertible to %v", v.Type(), prop.CustomType, t) } pm = i.Convert(t).Interface().(proto.Message) } return m.marshalObject(out, pm, indent+m.Indent, "") } // Handle maps. // Since Go randomizes map iteration, we sort keys for stable output. if v.Kind() == reflect.Map { out.write(`{`) keys := v.MapKeys() sort.Sort(mapKeys(keys)) for i, k := range keys { if i > 0 { out.write(`,`) } if m.Indent != "" { out.write("\n") out.write(indent) out.write(m.Indent) out.write(m.Indent) } b, err := json.Marshal(k.Interface()) if err != nil { return err } s := string(b) // If the JSON is not a string value, encode it again to make it one. if !strings.HasPrefix(s, `"`) { b, err := json.Marshal(s) if err != nil { return err } s = string(b) } out.write(s) out.write(`:`) if m.Indent != "" { out.write(` `) } if err := m.marshalValue(out, prop, v.MapIndex(k), indent+m.Indent); err != nil { return err } } if m.Indent != "" { out.write("\n") out.write(indent) out.write(m.Indent) } out.write(`}`) return out.err } // Default handling defers to the encoding/json library. b, err := json.Marshal(v.Interface()) if err != nil { return err } needToQuote := string(b[0]) != `"` && (v.Kind() == reflect.Int64 || v.Kind() == reflect.Uint64) if needToQuote { out.write(`"`) } out.write(string(b)) if needToQuote { out.write(`"`) } return out.err }
func parseRestart(flags *pflag.FlagSet, spec *api.ServiceSpec) error { if spec.Task.Restart == nil { // set new service's restart policy as RestartOnAny spec.Task.Restart = &api.RestartPolicy{ Condition: api.RestartOnAny, } } if flags.Changed("restart-condition") { condition, err := flags.GetString("restart-condition") if err != nil { return err } switch condition { case "none": spec.Task.Restart.Condition = api.RestartOnNone case "failure": spec.Task.Restart.Condition = api.RestartOnFailure case "any": spec.Task.Restart.Condition = api.RestartOnAny default: return fmt.Errorf("invalid restart condition: %s", condition) } } if flags.Changed("restart-delay") { delay, err := flags.GetString("restart-delay") if err != nil { return err } delayDuration, err := time.ParseDuration(delay) if err != nil { return err } spec.Task.Restart.Delay = gogotypes.DurationProto(delayDuration) } if flags.Changed("restart-max-attempts") { attempts, err := flags.GetUint64("restart-max-attempts") if err != nil { return err } spec.Task.Restart.MaxAttempts = attempts } if flags.Changed("restart-window") { window, err := flags.GetString("restart-window") if err != nil { return err } windowDelay, err := time.ParseDuration(window) if err != nil { return err } spec.Task.Restart.Window = gogotypes.DurationProto(windowDelay) } return nil }