func (u *Updater) completeUpdate(ctx context.Context, serviceID string) { log.G(ctx).Debugf("update of service %s complete", serviceID) err := u.store.Update(func(tx store.Tx) error { service := store.GetService(tx, serviceID) if service == nil { return nil } if service.UpdateStatus == nil { // The service was changed since we started this update return nil } if service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED { service.UpdateStatus.State = api.UpdateStatus_ROLLBACK_COMPLETED service.UpdateStatus.Message = "rollback completed" } else { service.UpdateStatus.State = api.UpdateStatus_COMPLETED service.UpdateStatus.Message = "update completed" } service.UpdateStatus.CompletedAt = ptypes.MustTimestampProto(time.Now()) return store.UpdateService(tx, service) }) if err != nil { log.G(ctx).WithError(err).Errorf("failed to mark update of service %s complete", serviceID) } }
func (u *Updater) pauseUpdate(ctx context.Context, serviceID, message string) { log.G(ctx).Debugf("pausing update of service %s", serviceID) err := u.store.Update(func(tx store.Tx) error { service := store.GetService(tx, serviceID) if service == nil { return nil } if service.UpdateStatus == nil { // The service was updated since we started this update return nil } if service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED { service.UpdateStatus.State = api.UpdateStatus_ROLLBACK_PAUSED } else { service.UpdateStatus.State = api.UpdateStatus_PAUSED } service.UpdateStatus.Message = message return store.UpdateService(tx, service) }) if err != nil { log.G(ctx).WithError(err).Errorf("failed to pause update of service %s", serviceID) } }
func (u *Updater) rollbackUpdate(ctx context.Context, serviceID, message string) { log.G(ctx).Debugf("starting rollback of service %s", serviceID) var service *api.Service err := u.store.Update(func(tx store.Tx) error { service = store.GetService(tx, serviceID) if service == nil { return nil } if service.UpdateStatus == nil { // The service was updated since we started this update return nil } service.UpdateStatus.State = api.UpdateStatus_ROLLBACK_STARTED service.UpdateStatus.Message = message if service.PreviousSpec == nil { return errors.New("cannot roll back service because no previous spec is available") } service.Spec = *service.PreviousSpec service.PreviousSpec = nil return store.UpdateService(tx, service) }) if err != nil { log.G(ctx).WithError(err).Errorf("failed to start rollback of service %s", serviceID) return } }
// UpdateService updates a Service referenced by ServiceID with the given ServiceSpec. // - Returns `NotFound` if the Service is not found. // - Returns `InvalidArgument` if the ServiceSpec is malformed. // - Returns `Unimplemented` if the ServiceSpec references unimplemented features. // - Returns an error if the update fails. func (s *Server) UpdateService(ctx context.Context, request *api.UpdateServiceRequest) (*api.UpdateServiceResponse, error) { if request.ServiceID == "" || request.ServiceVersion == nil { return nil, grpc.Errorf(codes.InvalidArgument, errInvalidArgument.Error()) } if err := validateServiceSpec(request.Spec); err != nil { return nil, err } var service *api.Service err := s.store.Update(func(tx store.Tx) error { service = store.GetService(tx, request.ServiceID) if service == nil { return nil } // temporary disable network update if request.Spec != nil && !reflect.DeepEqual(request.Spec.Networks, service.Spec.Networks) { return errNetworkUpdateNotSupported } service.Meta.Version = *request.ServiceVersion service.Spec = *request.Spec.Copy() return store.UpdateService(tx, service) }) if err != nil { return nil, err } if service == nil { return nil, grpc.Errorf(codes.NotFound, "service %s not found", request.ServiceID) } return &api.UpdateServiceResponse{ Service: service, }, nil }
func (a *Allocator) allocateService(ctx context.Context, nc *networkContext, s *api.Service) error { if s.Spec.Endpoint != nil { if s.Endpoint == nil { s.Endpoint = &api.Endpoint{ Spec: s.Spec.Endpoint.Copy(), } } // The service is trying to expose ports to the external // world. Automatically attach the service to the ingress // network only if it is not already done. if len(s.Spec.Endpoint.Ports) != 0 { var found bool for _, vip := range s.Endpoint.VirtualIPs { if vip.NetworkID == ingressNetwork.ID { found = true break } } if !found { s.Endpoint.VirtualIPs = append(s.Endpoint.VirtualIPs, &api.Endpoint_VirtualIP{NetworkID: ingressNetwork.ID}) } } } if err := nc.nwkAllocator.ServiceAllocate(s); err != nil { nc.unallocatedServices[s.ID] = s return err } if err := a.store.Update(func(tx store.Tx) error { for { err := store.UpdateService(tx, s) if err != nil && err != store.ErrSequenceConflict { return fmt.Errorf("failed updating state in store transaction for service %s: %v", s.ID, err) } if err == store.ErrSequenceConflict { storeService := store.GetService(tx, s.ID) storeService.Endpoint = s.Endpoint s = storeService continue } break } return nil }); err != nil { if err := nc.nwkAllocator.ServiceDeallocate(s); err != nil { log.G(ctx).WithError(err).Errorf("failed rolling back allocation of service %s: %v", s.ID, err) } return err } return nil }
// UpdateService updates a Service referenced by ServiceID with the given ServiceSpec. // - Returns `NotFound` if the Service is not found. // - Returns `InvalidArgument` if the ServiceSpec is malformed. // - Returns `Unimplemented` if the ServiceSpec references unimplemented features. // - Returns an error if the update fails. func (s *Server) UpdateService(ctx context.Context, request *api.UpdateServiceRequest) (*api.UpdateServiceResponse, error) { if request.ServiceID == "" || request.ServiceVersion == nil { return nil, grpc.Errorf(codes.InvalidArgument, errInvalidArgument.Error()) } if err := validateServiceSpec(request.Spec); err != nil { return nil, err } var service *api.Service err := s.store.Update(func(tx store.Tx) error { service = store.GetService(tx, request.ServiceID) if service == nil { return nil } service.Meta.Version = *request.ServiceVersion service.Spec = *request.Spec.Copy() return store.UpdateService(tx, service) }) if err != nil { return nil, err } if service == nil { return nil, grpc.Errorf(codes.NotFound, "service %s not found", request.ServiceID) } return &api.UpdateServiceResponse{ Service: service, }, nil }
// UpdateService updates a Service referenced by ServiceID with the given ServiceSpec. // - Returns `NotFound` if the Service is not found. // - Returns `InvalidArgument` if the ServiceSpec is malformed. // - Returns `Unimplemented` if the ServiceSpec references unimplemented features. // - Returns an error if the update fails. func (s *Server) UpdateService(ctx context.Context, request *api.UpdateServiceRequest) (*api.UpdateServiceResponse, error) { if request.ServiceID == "" || request.ServiceVersion == nil { return nil, grpc.Errorf(codes.InvalidArgument, errInvalidArgument.Error()) } if err := validateServiceSpec(request.Spec); err != nil { return nil, err } var service *api.Service s.store.View(func(tx store.ReadTx) { service = store.GetService(tx, request.ServiceID) }) if service == nil { return nil, grpc.Errorf(codes.NotFound, "service %s not found", request.ServiceID) } if request.Spec.Endpoint != nil && !reflect.DeepEqual(request.Spec.Endpoint, service.Spec.Endpoint) { if err := s.checkPortConflicts(request.Spec, request.ServiceID); err != nil { return nil, err } } err := s.store.Update(func(tx store.Tx) error { service = store.GetService(tx, request.ServiceID) if service == nil { return nil } // temporary disable network update if request.Spec != nil && !reflect.DeepEqual(request.Spec.Networks, service.Spec.Networks) { return errNetworkUpdateNotSupported } // orchestrator is designed to be stateless, so it should not deal // with service mode change (comparing current config with previous config). // proper way to change service mode is to delete and re-add. if request.Spec != nil && reflect.TypeOf(service.Spec.Mode) != reflect.TypeOf(request.Spec.Mode) { return errModeChangeNotAllowed } service.Meta.Version = *request.ServiceVersion service.Spec = *request.Spec.Copy() // Reset update status service.UpdateStatus = nil return store.UpdateService(tx, service) }) if err != nil { return nil, err } if service == nil { return nil, grpc.Errorf(codes.NotFound, "service %s not found", request.ServiceID) } return &api.UpdateServiceResponse{ Service: service, }, nil }
func (a *Allocator) commitAllocatedService(ctx context.Context, batch *store.Batch, s *api.Service) error { if err := batch.Update(func(tx store.Tx) error { err := store.UpdateService(tx, s) if err == store.ErrSequenceConflict { storeService := store.GetService(tx, s.ID) storeService.Endpoint = s.Endpoint err = store.UpdateService(tx, storeService) } return errors.Wrapf(err, "failed updating state in store transaction for service %s", s.ID) }); err != nil { if err := a.netCtx.nwkAllocator.ServiceDeallocate(s); err != nil { log.G(ctx).WithError(err).Errorf("failed rolling back allocation of service %s", s.ID) } return err } return nil }
func (u *Updater) startUpdate(ctx context.Context, serviceID string) { err := u.store.Update(func(tx store.Tx) error { service := store.GetService(tx, serviceID) if service == nil { return nil } if service.UpdateStatus != nil { return nil } service.UpdateStatus = &api.UpdateStatus{ State: api.UpdateStatus_UPDATING, Message: "update in progress", StartedAt: ptypes.MustTimestampProto(time.Now()), } return store.UpdateService(tx, service) }) if err != nil { log.G(ctx).WithError(err).Errorf("failed to mark update of service %s in progress", serviceID) } }
func TestUpdaterRollback(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() var ( failImage1 uint32 failImage2 uint32 ) watchCreate, cancelCreate := state.Watch(s.WatchQueue(), state.EventCreateTask{}) defer cancelCreate() watchServiceUpdate, cancelServiceUpdate := state.Watch(s.WatchQueue(), state.EventUpdateService{}) defer cancelServiceUpdate() // Fail new tasks the updater tries to run watchUpdate, cancelUpdate := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancelUpdate() go func() { failedLast := false for { select { case e := <-watchUpdate: task := e.(state.EventUpdateTask).Task if task.DesiredState == task.Status.State { continue } if task.DesiredState == api.TaskStateRunning && task.Status.State != api.TaskStateFailed && task.Status.State != api.TaskStateRunning { err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) // Never fail two image2 tasks in a row, so there's a mix of // failed and successful tasks for the rollback. if task.Spec.GetContainer().Image == "image1" && atomic.LoadUint32(&failImage1) == 1 { task.Status.State = api.TaskStateFailed failedLast = true } else if task.Spec.GetContainer().Image == "image2" && atomic.LoadUint32(&failImage2) == 1 && !failedLast { task.Status.State = api.TaskStateFailed failedLast = true } else { task.Status.State = task.DesiredState failedLast = false } return store.UpdateTask(tx, task) }) assert.NoError(t, err) } else if task.DesiredState > api.TaskStateRunning { err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) task.Status.State = task.DesiredState return store.UpdateTask(tx, task) }) assert.NoError(t, err) } } } }() // Create a service with four replicas specified before the orchestrator // is started. This should result in two tasks when the orchestrator // starts up. err := s.Update(func(tx store.Tx) error { s1 := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "image1", }, }, Restart: &api.RestartPolicy{ Condition: api.RestartOnNone, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 4, }, }, Update: &api.UpdateConfig{ FailureAction: api.UpdateConfig_ROLLBACK, Parallelism: 1, Delay: *ptypes.DurationProto(10 * time.Millisecond), Monitor: ptypes.DurationProto(500 * time.Millisecond), MaxFailureRatio: 0.4, }, }, } assert.NoError(t, store.CreateService(tx, s1)) return nil }) assert.NoError(t, err) // Start the orchestrator. go func() { assert.NoError(t, orchestrator.Run(ctx)) }() observedTask := testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") atomic.StoreUint32(&failImage2, 1) // Start a rolling update err = s.Update(func(tx store.Tx) error { s1 := store.GetService(tx, "id1") require.NotNil(t, s1) s1.PreviousSpec = s1.Spec.Copy() s1.UpdateStatus = nil s1.Spec.Task.GetContainer().Image = "image2" assert.NoError(t, store.UpdateService(tx, s1)) return nil }) assert.NoError(t, err) // Should see three tasks started, then a rollback observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") // Should get to the ROLLBACK_STARTED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus == nil { continue } if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED { break } } observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") // Should end up in ROLLBACK_COMPLETED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_COMPLETED { break } } atomic.StoreUint32(&failImage1, 1) // Repeat the rolling update but this time fail the tasks that the // rollback creates. It should end up in ROLLBACK_PAUSED. err = s.Update(func(tx store.Tx) error { s1 := store.GetService(tx, "id1") require.NotNil(t, s1) s1.PreviousSpec = s1.Spec.Copy() s1.UpdateStatus = nil s1.Spec.Task.GetContainer().Image = "image2" assert.NoError(t, store.UpdateService(tx, s1)) return nil }) assert.NoError(t, err) // Should see three tasks started, then a rollback observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") // Should get to the ROLLBACK_STARTED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus == nil { continue } if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED { break } } observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") // Should end up in ROLLBACK_PAUSED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_PAUSED { break } } }
func (a *Allocator) allocateService(ctx context.Context, s *api.Service) error { nc := a.netCtx if s.Spec.Endpoint != nil { // service has user-defined endpoint if s.Endpoint == nil { // service currently has no allocated endpoint, need allocated. s.Endpoint = &api.Endpoint{ Spec: s.Spec.Endpoint.Copy(), } } // The service is trying to expose ports to the external // world. Automatically attach the service to the ingress // network only if it is not already done. if len(s.Spec.Endpoint.Ports) != 0 { var found bool for _, vip := range s.Endpoint.VirtualIPs { if vip.NetworkID == nc.ingressNetwork.ID { found = true break } } if !found { s.Endpoint.VirtualIPs = append(s.Endpoint.VirtualIPs, &api.Endpoint_VirtualIP{NetworkID: nc.ingressNetwork.ID}) } } } else if s.Endpoint != nil { // service has no user-defined endpoints while has already allocated network resources, // need deallocated. if err := nc.nwkAllocator.ServiceDeallocate(s); err != nil { return err } } if err := nc.nwkAllocator.ServiceAllocate(s); err != nil { nc.unallocatedServices[s.ID] = s return err } // If the service doesn't expose ports any more and if we have // any lingering virtual IP references for ingress network // clean them up here. if s.Spec.Endpoint == nil || len(s.Spec.Endpoint.Ports) == 0 { if s.Endpoint != nil { for i, vip := range s.Endpoint.VirtualIPs { if vip.NetworkID == nc.ingressNetwork.ID { n := len(s.Endpoint.VirtualIPs) s.Endpoint.VirtualIPs[i], s.Endpoint.VirtualIPs[n-1] = s.Endpoint.VirtualIPs[n-1], nil s.Endpoint.VirtualIPs = s.Endpoint.VirtualIPs[:n-1] break } } } } if err := a.store.Update(func(tx store.Tx) error { for { err := store.UpdateService(tx, s) if err != nil && err != store.ErrSequenceConflict { return fmt.Errorf("failed updating state in store transaction for service %s: %v", s.ID, err) } if err == store.ErrSequenceConflict { storeService := store.GetService(tx, s.ID) storeService.Endpoint = s.Endpoint s = storeService continue } break } return nil }); err != nil { if err := nc.nwkAllocator.ServiceDeallocate(s); err != nil { log.G(ctx).WithError(err).Errorf("failed rolling back allocation of service %s: %v", s.ID, err) } return err } return nil }
func TestUpdaterFailureAction(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() // Fail new tasks the updater tries to run watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { for { select { case e := <-watch: task := e.(state.EventUpdateTask).Task if task.DesiredState == api.TaskStateRunning && task.Status.State != api.TaskStateFailed { err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) task.Status.State = api.TaskStateFailed return store.UpdateTask(tx, task) }) assert.NoError(t, err) } else if task.DesiredState > api.TaskStateRunning { err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) task.Status.State = task.DesiredState return store.UpdateTask(tx, task) }) assert.NoError(t, err) } } } }() instances := 3 cluster := &api.Cluster{ Spec: api.ClusterSpec{ Annotations: api.Annotations{ Name: "default", }, }, } service := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: uint64(instances), }, }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "v:1", // This won't apply in this test because we set the old tasks to DEAD. StopGracePeriod: ptypes.DurationProto(time.Hour), }, }, }, Update: &api.UpdateConfig{ FailureAction: api.UpdateConfig_PAUSE, Parallelism: 1, Delay: *ptypes.DurationProto(500 * time.Millisecond), }, }, } err := s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateCluster(tx, cluster)) assert.NoError(t, store.CreateService(tx, service)) for i := 0; i < instances; i++ { assert.NoError(t, store.CreateTask(tx, newTask(cluster, service, uint64(i)))) } return nil }) assert.NoError(t, err) originalTasks := getRunnableSlotSlice(t, s, service) for _, slot := range originalTasks { for _, task := range slot { assert.Equal(t, "v:1", task.Spec.GetContainer().Image) } } service.Spec.Task.GetContainer().Image = "v:2" updater := NewUpdater(s, NewRestartSupervisor(s), cluster, service) updater.Run(ctx, getRunnableSlotSlice(t, s, service)) updatedTasks := getRunnableSlotSlice(t, s, service) v1Counter := 0 v2Counter := 0 for _, slot := range updatedTasks { for _, task := range slot { if task.Spec.GetContainer().Image == "v:1" { v1Counter++ } else if task.Spec.GetContainer().Image == "v:2" { v2Counter++ } } } assert.Equal(t, instances-1, v1Counter) assert.Equal(t, 1, v2Counter) s.View(func(tx store.ReadTx) { service = store.GetService(tx, service.ID) }) assert.Equal(t, api.UpdateStatus_PAUSED, service.UpdateStatus.State) // Updating again should do nothing while the update is PAUSED updater = NewUpdater(s, NewRestartSupervisor(s), cluster, service) updater.Run(ctx, getRunnableSlotSlice(t, s, service)) updatedTasks = getRunnableSlotSlice(t, s, service) v1Counter = 0 v2Counter = 0 for _, slot := range updatedTasks { for _, task := range slot { if task.Spec.GetContainer().Image == "v:1" { v1Counter++ } else if task.Spec.GetContainer().Image == "v:2" { v2Counter++ } } } assert.Equal(t, instances-1, v1Counter) assert.Equal(t, 1, v2Counter) // Switch to a service with FailureAction: CONTINUE err = s.Update(func(tx store.Tx) error { service = store.GetService(tx, service.ID) service.Spec.Update.FailureAction = api.UpdateConfig_CONTINUE service.UpdateStatus = nil assert.NoError(t, store.UpdateService(tx, service)) return nil }) assert.NoError(t, err) service.Spec.Task.GetContainer().Image = "v:3" updater = NewUpdater(s, NewRestartSupervisor(s), cluster, service) updater.Run(ctx, getRunnableSlotSlice(t, s, service)) updatedTasks = getRunnableSlotSlice(t, s, service) v2Counter = 0 v3Counter := 0 for _, slot := range updatedTasks { for _, task := range slot { if task.Spec.GetContainer().Image == "v:2" { v2Counter++ } else if task.Spec.GetContainer().Image == "v:3" { v3Counter++ } } } assert.Equal(t, 0, v2Counter) assert.Equal(t, instances, v3Counter) }
func TestReplicatedScaleDown(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() s1 := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 6, }, }, }, } err := s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateService(tx, s1)) nodes := []*api.Node{ { ID: "node1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name1", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "node2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "node3", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name3", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, } for _, node := range nodes { assert.NoError(t, store.CreateNode(tx, node)) } // task1 is assigned to node1 // task2 - task3 are assigned to node2 // task4 - task6 are assigned to node3 // task7 is unassigned tasks := []*api.Task{ { ID: "task1", Slot: 1, DesiredState: api.TaskStateRunning, Status: api.TaskStatus{ State: api.TaskStateStarting, }, ServiceAnnotations: api.Annotations{ Name: "task1", }, ServiceID: "id1", NodeID: "node1", }, { ID: "task2", Slot: 2, DesiredState: api.TaskStateRunning, Status: api.TaskStatus{ State: api.TaskStateRunning, }, ServiceAnnotations: api.Annotations{ Name: "task2", }, ServiceID: "id1", NodeID: "node2", }, { ID: "task3", Slot: 3, DesiredState: api.TaskStateRunning, Status: api.TaskStatus{ State: api.TaskStateRunning, }, ServiceAnnotations: api.Annotations{ Name: "task3", }, ServiceID: "id1", NodeID: "node2", }, { ID: "task4", Slot: 4, DesiredState: api.TaskStateRunning, Status: api.TaskStatus{ State: api.TaskStateRunning, }, ServiceAnnotations: api.Annotations{ Name: "task4", }, ServiceID: "id1", NodeID: "node3", }, { ID: "task5", Slot: 5, DesiredState: api.TaskStateRunning, Status: api.TaskStatus{ State: api.TaskStateRunning, }, ServiceAnnotations: api.Annotations{ Name: "task5", }, ServiceID: "id1", NodeID: "node3", }, { ID: "task6", Slot: 6, DesiredState: api.TaskStateRunning, Status: api.TaskStatus{ State: api.TaskStateRunning, }, ServiceAnnotations: api.Annotations{ Name: "task6", }, ServiceID: "id1", NodeID: "node3", }, { ID: "task7", Slot: 7, DesiredState: api.TaskStateRunning, Status: api.TaskStatus{ State: api.TaskStateNew, }, ServiceAnnotations: api.Annotations{ Name: "task7", }, ServiceID: "id1", }, } for _, task := range tasks { assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) // Start the orchestrator. go func() { assert.NoError(t, orchestrator.Run(ctx)) }() // Replicas was set to 6, but we started with 7 tasks. task7 should // be the one the orchestrator chose to shut down because it was not // assigned yet. observedShutdown := watchShutdownTask(t, watch) assert.Equal(t, "task7", observedShutdown.ID) // Now scale down to 2 instances. err = s.Update(func(tx store.Tx) error { s1.Spec.Mode = &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 2, }, } assert.NoError(t, store.UpdateService(tx, s1)) return nil }) assert.NoError(t, err) // Tasks should be shut down in a way that balances the remaining tasks. // node2 and node3 should be preferred over node1 because node1's task // is not running yet. shutdowns := make(map[string]int) for i := 0; i != 4; i++ { observedShutdown := watchShutdownTask(t, watch) shutdowns[observedShutdown.NodeID]++ } assert.Equal(t, 1, shutdowns["node1"]) assert.Equal(t, 1, shutdowns["node2"]) assert.Equal(t, 2, shutdowns["node3"]) // There should be remaining tasks on node2 and node3. s.View(func(readTx store.ReadTx) { tasks, err := store.FindTasks(readTx, store.ByDesiredState(api.TaskStateRunning)) require.NoError(t, err) require.Len(t, tasks, 2) if tasks[0].NodeID == "node2" { assert.Equal(t, "node3", tasks[1].NodeID) } else { assert.Equal(t, "node3", tasks[0].NodeID) assert.Equal(t, "node2", tasks[1].NodeID) } }) }
func TestReplicatedOrchestrator(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() watch, cancel := state.Watch(s.WatchQueue() /*state.EventCreateTask{}, state.EventUpdateTask{}*/) defer cancel() // Create a service with two instances specified before the orchestrator is // started. This should result in two tasks when the orchestrator // starts up. err := s.Update(func(tx store.Tx) error { s1 := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 2, }, }, }, } assert.NoError(t, store.CreateService(tx, s1)) return nil }) assert.NoError(t, err) // Start the orchestrator. go func() { assert.NoError(t, orchestrator.Run(ctx)) }() observedTask1 := watchTaskCreate(t, watch) assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") observedTask2 := watchTaskCreate(t, watch) assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") // Create a second service. err = s.Update(func(tx store.Tx) error { s2 := &api.Service{ ID: "id2", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name2", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 1, }, }, }, } assert.NoError(t, store.CreateService(tx, s2)) return nil }) assert.NoError(t, err) observedTask3 := watchTaskCreate(t, watch) assert.Equal(t, observedTask3.Status.State, api.TaskStateNew) assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name2") // Update a service to scale it out to 3 instances err = s.Update(func(tx store.Tx) error { s2 := &api.Service{ ID: "id2", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name2", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 3, }, }, }, } assert.NoError(t, store.UpdateService(tx, s2)) return nil }) assert.NoError(t, err) observedTask4 := watchTaskCreate(t, watch) assert.Equal(t, observedTask4.Status.State, api.TaskStateNew) assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name2") observedTask5 := watchTaskCreate(t, watch) assert.Equal(t, observedTask5.Status.State, api.TaskStateNew) assert.Equal(t, observedTask5.ServiceAnnotations.Name, "name2") // Now scale it back down to 1 instance err = s.Update(func(tx store.Tx) error { s2 := &api.Service{ ID: "id2", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name2", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 1, }, }, }, } assert.NoError(t, store.UpdateService(tx, s2)) return nil }) assert.NoError(t, err) observedDeletion1 := watchShutdownTask(t, watch) assert.Equal(t, observedDeletion1.Status.State, api.TaskStateNew) assert.Equal(t, observedDeletion1.ServiceAnnotations.Name, "name2") observedDeletion2 := watchShutdownTask(t, watch) assert.Equal(t, observedDeletion2.Status.State, api.TaskStateNew) assert.Equal(t, observedDeletion2.ServiceAnnotations.Name, "name2") // There should be one remaining task attached to service id2/name2. var liveTasks []*api.Task s.View(func(readTx store.ReadTx) { var tasks []*api.Task tasks, err = store.FindTasks(readTx, store.ByServiceID("id2")) for _, t := range tasks { if t.DesiredState == api.TaskStateRunning { liveTasks = append(liveTasks, t) } } }) assert.NoError(t, err) assert.Len(t, liveTasks, 1) // Delete the remaining task directly. It should be recreated by the // orchestrator. err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteTask(tx, liveTasks[0].ID)) return nil }) assert.NoError(t, err) observedTask6 := watchTaskCreate(t, watch) assert.Equal(t, observedTask6.Status.State, api.TaskStateNew) assert.Equal(t, observedTask6.ServiceAnnotations.Name, "name2") // Delete the service. Its remaining task should go away. err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteService(tx, "id2")) return nil }) assert.NoError(t, err) deletedTask := watchTaskDelete(t, watch) assert.Equal(t, deletedTask.Status.State, api.TaskStateNew) assert.Equal(t, deletedTask.ServiceAnnotations.Name, "name2") }
// UpdateService updates a Service referenced by ServiceID with the given ServiceSpec. // - Returns `NotFound` if the Service is not found. // - Returns `InvalidArgument` if the ServiceSpec is malformed. // - Returns `Unimplemented` if the ServiceSpec references unimplemented features. // - Returns an error if the update fails. func (s *Server) UpdateService(ctx context.Context, request *api.UpdateServiceRequest) (*api.UpdateServiceResponse, error) { if request.ServiceID == "" || request.ServiceVersion == nil { return nil, grpc.Errorf(codes.InvalidArgument, errInvalidArgument.Error()) } if err := validateServiceSpec(request.Spec); err != nil { return nil, err } var service *api.Service s.store.View(func(tx store.ReadTx) { service = store.GetService(tx, request.ServiceID) }) if service == nil { return nil, grpc.Errorf(codes.NotFound, "service %s not found", request.ServiceID) } if request.Spec.Endpoint != nil && !reflect.DeepEqual(request.Spec.Endpoint, service.Spec.Endpoint) { if err := s.checkPortConflicts(request.Spec, request.ServiceID); err != nil { return nil, err } } err := s.store.Update(func(tx store.Tx) error { service = store.GetService(tx, request.ServiceID) if service == nil { return grpc.Errorf(codes.NotFound, "service %s not found", request.ServiceID) } // temporary disable network update requestSpecNetworks := request.Spec.Task.Networks if len(requestSpecNetworks) == 0 { requestSpecNetworks = request.Spec.Networks } specNetworks := service.Spec.Task.Networks if len(specNetworks) == 0 { specNetworks = service.Spec.Networks } if !reflect.DeepEqual(requestSpecNetworks, specNetworks) { return grpc.Errorf(codes.Unimplemented, errNetworkUpdateNotSupported.Error()) } // Check to see if all the secrets being added exist as objects // in our datastore err := s.checkSecretExistence(tx, request.Spec) if err != nil { return err } // orchestrator is designed to be stateless, so it should not deal // with service mode change (comparing current config with previous config). // proper way to change service mode is to delete and re-add. if reflect.TypeOf(service.Spec.Mode) != reflect.TypeOf(request.Spec.Mode) { return grpc.Errorf(codes.Unimplemented, errModeChangeNotAllowed.Error()) } if service.Spec.Annotations.Name != request.Spec.Annotations.Name { return grpc.Errorf(codes.Unimplemented, errRenameNotSupported.Error()) } service.Meta.Version = *request.ServiceVersion service.PreviousSpec = service.Spec.Copy() service.Spec = *request.Spec.Copy() // Reset update status service.UpdateStatus = nil return store.UpdateService(tx, service) }) if err != nil { return nil, err } return &api.UpdateServiceResponse{ Service: service, }, nil }
func TestAllocator(t *testing.T) { s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() a, err := New(s, nil) assert.NoError(t, err) assert.NotNil(t, a) // Try adding some objects to store before allocator is started assert.NoError(t, s.Update(func(tx store.Tx) error { n1 := &api.Network{ ID: "testID1", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test1", }, }, } assert.NoError(t, store.CreateNetwork(tx, n1)) s1 := &api.Service{ ID: "testServiceID1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "service1", }, Task: api.TaskSpec{ Networks: []*api.NetworkAttachmentConfig{ { Target: "testID1", }, }, }, Endpoint: &api.EndpointSpec{ Mode: api.ResolutionModeVirtualIP, Ports: []*api.PortConfig{ { Name: "portName", Protocol: api.ProtocolTCP, TargetPort: 8000, PublishedPort: 8001, }, }, }, }, } assert.NoError(t, store.CreateService(tx, s1)) t1 := &api.Task{ ID: "testTaskID1", Status: api.TaskStatus{ State: api.TaskStateNew, }, Networks: []*api.NetworkAttachment{ { Network: n1, }, }, } assert.NoError(t, store.CreateTask(tx, t1)) t2 := &api.Task{ ID: "testTaskIDPreInit", Status: api.TaskStatus{ State: api.TaskStateNew, }, ServiceID: "testServiceID1", DesiredState: api.TaskStateRunning, } assert.NoError(t, store.CreateTask(tx, t2)) return nil })) netWatch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateNetwork{}, state.EventDeleteNetwork{}) defer cancel() taskWatch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}, state.EventDeleteTask{}) defer cancel() serviceWatch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateService{}, state.EventDeleteService{}) defer cancel() // Start allocator go func() { assert.NoError(t, a.Run(context.Background())) }() // Now verify if we get network and tasks updated properly watchNetwork(t, netWatch, false, isValidNetwork) watchTask(t, s, taskWatch, false, isValidTask) // t1 watchTask(t, s, taskWatch, false, isValidTask) // t2 watchService(t, serviceWatch, false, nil) // Add new networks/tasks/services after allocator is started. assert.NoError(t, s.Update(func(tx store.Tx) error { n2 := &api.Network{ ID: "testID2", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test2", }, }, } assert.NoError(t, store.CreateNetwork(tx, n2)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) assert.NoError(t, s.Update(func(tx store.Tx) error { s2 := &api.Service{ ID: "testServiceID2", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "service2", }, Networks: []*api.NetworkAttachmentConfig{ { Target: "testID2", }, }, Endpoint: &api.EndpointSpec{}, }, } assert.NoError(t, store.CreateService(tx, s2)) return nil })) watchService(t, serviceWatch, false, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { t2 := &api.Task{ ID: "testTaskID2", Status: api.TaskStatus{ State: api.TaskStateNew, }, ServiceID: "testServiceID2", DesiredState: api.TaskStateRunning, } assert.NoError(t, store.CreateTask(tx, t2)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) // Now try adding a task which depends on a network before adding the network. n3 := &api.Network{ ID: "testID3", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test3", }, }, } assert.NoError(t, s.Update(func(tx store.Tx) error { t3 := &api.Task{ ID: "testTaskID3", Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, Networks: []*api.NetworkAttachment{ { Network: n3, }, }, } assert.NoError(t, store.CreateTask(tx, t3)) return nil })) // Wait for a little bit of time before adding network just to // test network is not available while task allocation is // going through time.Sleep(10 * time.Millisecond) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNetwork(tx, n3)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteTask(tx, "testTaskID3")) return nil })) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { t5 := &api.Task{ ID: "testTaskID5", Spec: api.TaskSpec{ Networks: []*api.NetworkAttachmentConfig{ { Target: "testID2", }, }, }, Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, ServiceID: "testServiceID2", } assert.NoError(t, store.CreateTask(tx, t5)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteNetwork(tx, "testID3")) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteService(tx, "testServiceID2")) return nil })) watchService(t, serviceWatch, false, nil) // Try to create a task with no network attachments and test // that it moves to ALLOCATED state. assert.NoError(t, s.Update(func(tx store.Tx) error { t4 := &api.Task{ ID: "testTaskID4", Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, } assert.NoError(t, store.CreateTask(tx, t4)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { n2 := store.GetNetwork(tx, "testID2") require.NotEqual(t, nil, n2) assert.NoError(t, store.UpdateNetwork(tx, n2)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) watchNetwork(t, netWatch, true, nil) // Try updating service which is already allocated with no endpointSpec assert.NoError(t, s.Update(func(tx store.Tx) error { s := store.GetService(tx, "testServiceID1") s.Spec.Endpoint = nil assert.NoError(t, store.UpdateService(tx, s)) return nil })) watchService(t, serviceWatch, false, nil) // Try updating task which is already allocated assert.NoError(t, s.Update(func(tx store.Tx) error { t2 := store.GetTask(tx, "testTaskID2") require.NotEqual(t, nil, t2) assert.NoError(t, store.UpdateTask(tx, t2)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) watchTask(t, s, taskWatch, true, nil) // Try adding networks with conflicting network resources and // add task which attaches to a network which gets allocated // later and verify if task reconciles and moves to ALLOCATED. n4 := &api.Network{ ID: "testID4", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test4", }, DriverConfig: &api.Driver{ Name: "overlay", Options: map[string]string{ "com.docker.network.driver.overlay.vxlanid_list": "328", }, }, }, } n5 := n4.Copy() n5.ID = "testID5" n5.Spec.Annotations.Name = "test5" assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNetwork(tx, n4)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNetwork(tx, n5)) return nil })) watchNetwork(t, netWatch, true, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { t6 := &api.Task{ ID: "testTaskID6", Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, Networks: []*api.NetworkAttachment{ { Network: n5, }, }, } assert.NoError(t, store.CreateTask(tx, t6)) return nil })) watchTask(t, s, taskWatch, true, nil) // Now remove the conflicting network. assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteNetwork(tx, n4.ID)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) watchTask(t, s, taskWatch, false, isValidTask) // Try adding services with conflicting port configs and add // task which is part of the service whose allocation hasn't // happened and when that happens later and verify if task // reconciles and moves to ALLOCATED. s3 := &api.Service{ ID: "testServiceID3", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "service3", }, Endpoint: &api.EndpointSpec{ Ports: []*api.PortConfig{ { Name: "http", TargetPort: 80, PublishedPort: 8080, }, { PublishMode: api.PublishModeHost, Name: "http", TargetPort: 80, }, }, }, }, } s4 := s3.Copy() s4.ID = "testServiceID4" s4.Spec.Annotations.Name = "service4" assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateService(tx, s3)) return nil })) watchService(t, serviceWatch, false, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateService(tx, s4)) return nil })) watchService(t, serviceWatch, true, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { t7 := &api.Task{ ID: "testTaskID7", Status: api.TaskStatus{ State: api.TaskStateNew, }, ServiceID: "testServiceID4", DesiredState: api.TaskStateRunning, } assert.NoError(t, store.CreateTask(tx, t7)) return nil })) watchTask(t, s, taskWatch, true, nil) // Now remove the conflicting service. assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteService(tx, s3.ID)) return nil })) watchService(t, serviceWatch, false, nil) watchTask(t, s, taskWatch, false, isValidTask) a.Stop() }