func (u *Updater) useExistingTask(ctx context.Context, slot slot, existing *api.Task) { var removeTasks []*api.Task for _, t := range slot { if t != existing { removeTasks = append(removeTasks, t) } } if len(removeTasks) != 0 || existing.DesiredState != api.TaskStateRunning { _, err := u.store.Batch(func(batch *store.Batch) error { u.removeOldTasks(ctx, batch, removeTasks) if existing.DesiredState != api.TaskStateRunning { err := batch.Update(func(tx store.Tx) error { t := store.GetTask(tx, existing.ID) if t == nil { return fmt.Errorf("task %s not found while trying to start it", existing.ID) } if t.DesiredState >= api.TaskStateRunning { return fmt.Errorf("task %s was already started when reached by updater", existing.ID) } t.DesiredState = api.TaskStateRunning return store.UpdateTask(tx, t) }) if err != nil { log.G(ctx).WithError(err).Errorf("starting task %s failed", existing.ID) } } return nil }) if err != nil { log.G(ctx).WithError(err).Error("updater batch transaction failed") } } }
// removeOldTasks shuts down the given tasks and returns one of the tasks that // was shut down, or an error. func (u *Updater) removeOldTasks(ctx context.Context, batch *store.Batch, removeTasks []*api.Task) (*api.Task, error) { var ( lastErr error removedTask *api.Task ) for _, original := range removeTasks { err := batch.Update(func(tx store.Tx) error { t := store.GetTask(tx, original.ID) if t == nil { return fmt.Errorf("task %s not found while trying to shut it down", original.ID) } if t.DesiredState > api.TaskStateRunning { return fmt.Errorf("task %s was already shut down when reached by updater", original.ID) } t.DesiredState = api.TaskStateShutdown return store.UpdateTask(tx, t) }) if err != nil { lastErr = err } else { removedTask = original } } if removedTask == nil { return nil, lastErr } return removedTask, nil }
// StartNow moves the task into the RUNNING state so it will proceed to start // up. func (r *Supervisor) StartNow(tx store.Tx, taskID string) error { t := store.GetTask(tx, taskID) if t == nil || t.DesiredState >= api.TaskStateRunning { return nil } t.DesiredState = api.TaskStateRunning return store.UpdateTask(tx, t) }
func (a *Allocator) commitAllocatedTask(ctx context.Context, batch *store.Batch, t *api.Task) error { return batch.Update(func(tx store.Tx) error { err := store.UpdateTask(tx, t) if err == store.ErrSequenceConflict { storeTask := store.GetTask(tx, t.ID) taskUpdateNetworks(storeTask, t.Networks) taskUpdateEndpoint(storeTask, t.Endpoint) if storeTask.Status.State < api.TaskStatePending { storeTask.Status = t.Status } err = store.UpdateTask(tx, storeTask) } return errors.Wrapf(err, "failed updating state in store transaction for task %s", t.ID) }) }
func (u *Updater) updateTask(ctx context.Context, original, updated *api.Task) error { log.G(ctx).Debugf("replacing %s with %s", original.ID, updated.ID) // Kick off the watch before even creating the updated task. This is in order to avoid missing any event. taskUpdates, cancel := state.Watch(u.watchQueue, state.EventUpdateTask{ Task: &api.Task{ID: updated.ID}, Checks: []state.TaskCheckFunc{state.TaskCheckID}, }) defer cancel() var delayStartCh <-chan struct{} // Atomically create the updated task and bring down the old one. err := u.store.Update(func(tx store.Tx) error { t := store.GetTask(tx, original.ID) if t == nil { return fmt.Errorf("task %s not found while trying to update it", original.ID) } if t.DesiredState > api.TaskStateRunning { return fmt.Errorf("task %s was already shut down when reached by updater", original.ID) } t.DesiredState = api.TaskStateShutdown if err := store.UpdateTask(tx, t); err != nil { return err } if err := store.CreateTask(tx, updated); err != nil { return err } // Wait for the old task to stop or time out, and then set the new one // to RUNNING. delayStartCh = u.restarts.DelayStart(ctx, tx, original, updated.ID, 0, true) return nil }) if err != nil { return err } <-delayStartCh // Wait for the new task to come up. // TODO(aluzzardi): Consider adding a timeout here. for { select { case e := <-taskUpdates: updated = e.(state.EventUpdateTask).Task if updated.Status.State >= api.TaskStateRunning { return nil } case <-u.stopChan: return nil } } }
func (d *Dispatcher) processTaskUpdates() { d.taskUpdatesLock.Lock() if len(d.taskUpdates) == 0 { d.taskUpdatesLock.Unlock() return } taskUpdates := d.taskUpdates d.taskUpdates = make(map[string]*api.TaskStatus) d.taskUpdatesLock.Unlock() log := log.G(d.ctx).WithFields(logrus.Fields{ "method": "(*Dispatcher).processTaskUpdates", }) _, err := d.store.Batch(func(batch *store.Batch) error { for taskID, status := range taskUpdates { err := batch.Update(func(tx store.Tx) error { logger := log.WithField("task.id", taskID) task := store.GetTask(tx, taskID) if task == nil { logger.Errorf("task unavailable") return nil } logger = logger.WithField("state.transition", fmt.Sprintf("%v->%v", task.Status.State, status.State)) if task.Status == *status { logger.Debug("task status identical, ignoring") return nil } if task.Status.State > status.State { logger.Debug("task status invalid transition") return nil } task.Status = *status if err := store.UpdateTask(tx, task); err != nil { logger.WithError(err).Error("failed to update task status") return nil } logger.Debug("task status updated") return nil }) if err != nil { log.WithError(err).Error("dispatcher transaction failed") } } return nil }) if err != nil { log.WithError(err).Error("dispatcher batch failed") } }
func (g *GlobalOrchestrator) removeTask(ctx context.Context, batch *store.Batch, t *api.Task) { // set existing task DesiredState to TaskStateShutdown // TODO(aaronl): optimistic update? err := batch.Update(func(tx store.Tx) error { t = store.GetTask(tx, t.ID) if t != nil { t.DesiredState = api.TaskStateShutdown return store.UpdateTask(tx, t) } return nil }) if err != nil { log.G(ctx).WithError(err).Errorf("global orchestrator: removeTask failed to remove %s", t.ID) } }
func (r *ReplicatedOrchestrator) removeTasks(ctx context.Context, batch *store.Batch, service *api.Service, tasks []*api.Task) { for _, t := range tasks { err := batch.Update(func(tx store.Tx) error { // TODO(aaronl): optimistic update? t = store.GetTask(tx, t.ID) if t != nil { t.DesiredState = api.TaskStateShutdown return store.UpdateTask(tx, t) } return nil }) if err != nil { log.G(ctx).WithError(err).Errorf("removing task %s failed", t.ID) } } }
func (s *Scheduler) applySchedulingDecisions(ctx context.Context, schedulingDecisions map[string]schedulingDecision) (successful, failed []schedulingDecision) { if len(schedulingDecisions) == 0 { return } successful = make([]schedulingDecision, 0, len(schedulingDecisions)) // Apply changes to master store applied, err := s.store.Batch(func(batch *store.Batch) error { for len(schedulingDecisions) > 0 { err := batch.Update(func(tx store.Tx) error { // Update exactly one task inside this Update // callback. for taskID, decision := range schedulingDecisions { delete(schedulingDecisions, taskID) t := store.GetTask(tx, taskID) if t == nil { // Task no longer exists. Do nothing. failed = append(failed, decision) continue } if err := store.UpdateTask(tx, decision.new); err != nil { log.G(ctx).Debugf("scheduler failed to update task %s; will retry", taskID) failed = append(failed, decision) continue } successful = append(successful, decision) return nil } return nil }) if err != nil { return err } } return nil }) if err != nil { log.G(ctx).WithError(err).Error("scheduler tick transaction failed") failed = append(failed, successful[applied:]...) successful = successful[:applied] } return }
func (u *Updater) removeOldTasks(ctx context.Context, batch *store.Batch, removeTasks []*api.Task) { for _, original := range removeTasks { err := batch.Update(func(tx store.Tx) error { t := store.GetTask(tx, original.ID) if t == nil { return fmt.Errorf("task %s not found while trying to shut it down", original.ID) } if t.DesiredState > api.TaskStateRunning { return fmt.Errorf("task %s was already shut down when reached by updater", original.ID) } t.DesiredState = api.TaskStateShutdown return store.UpdateTask(tx, t) }) if err != nil { log.G(ctx).WithError(err).Errorf("shutting down stale task %s failed", original.ID) } } }
func (d *Dispatcher) moveTasksToOrphaned(nodeID string) error { _, err := d.store.Batch(func(batch *store.Batch) error { var ( tasks []*api.Task err error ) d.store.View(func(tx store.ReadTx) { tasks, err = store.FindTasks(tx, store.ByNodeID(nodeID)) }) if err != nil { return err } for _, task := range tasks { if task.Status.State < api.TaskStateOrphaned { task.Status.State = api.TaskStateOrphaned } if err := batch.Update(func(tx store.Tx) error { err := store.UpdateTask(tx, task) if err != nil { return err } return nil }); err != nil { return err } } return nil }) return err }
func TestUpdaterRollback(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() var ( failImage1 uint32 failImage2 uint32 ) watchCreate, cancelCreate := state.Watch(s.WatchQueue(), state.EventCreateTask{}) defer cancelCreate() watchServiceUpdate, cancelServiceUpdate := state.Watch(s.WatchQueue(), state.EventUpdateService{}) defer cancelServiceUpdate() // Fail new tasks the updater tries to run watchUpdate, cancelUpdate := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancelUpdate() go func() { failedLast := false for { select { case e := <-watchUpdate: task := e.(state.EventUpdateTask).Task if task.DesiredState == task.Status.State { continue } if task.DesiredState == api.TaskStateRunning && task.Status.State != api.TaskStateFailed && task.Status.State != api.TaskStateRunning { err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) // Never fail two image2 tasks in a row, so there's a mix of // failed and successful tasks for the rollback. if task.Spec.GetContainer().Image == "image1" && atomic.LoadUint32(&failImage1) == 1 { task.Status.State = api.TaskStateFailed failedLast = true } else if task.Spec.GetContainer().Image == "image2" && atomic.LoadUint32(&failImage2) == 1 && !failedLast { task.Status.State = api.TaskStateFailed failedLast = true } else { task.Status.State = task.DesiredState failedLast = false } return store.UpdateTask(tx, task) }) assert.NoError(t, err) } else if task.DesiredState > api.TaskStateRunning { err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) task.Status.State = task.DesiredState return store.UpdateTask(tx, task) }) assert.NoError(t, err) } } } }() // Create a service with four replicas specified before the orchestrator // is started. This should result in two tasks when the orchestrator // starts up. err := s.Update(func(tx store.Tx) error { s1 := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "image1", }, }, Restart: &api.RestartPolicy{ Condition: api.RestartOnNone, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 4, }, }, Update: &api.UpdateConfig{ FailureAction: api.UpdateConfig_ROLLBACK, Parallelism: 1, Delay: *ptypes.DurationProto(10 * time.Millisecond), Monitor: ptypes.DurationProto(500 * time.Millisecond), MaxFailureRatio: 0.4, }, }, } assert.NoError(t, store.CreateService(tx, s1)) return nil }) assert.NoError(t, err) // Start the orchestrator. go func() { assert.NoError(t, orchestrator.Run(ctx)) }() observedTask := testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") atomic.StoreUint32(&failImage2, 1) // Start a rolling update err = s.Update(func(tx store.Tx) error { s1 := store.GetService(tx, "id1") require.NotNil(t, s1) s1.PreviousSpec = s1.Spec.Copy() s1.UpdateStatus = nil s1.Spec.Task.GetContainer().Image = "image2" assert.NoError(t, store.UpdateService(tx, s1)) return nil }) assert.NoError(t, err) // Should see three tasks started, then a rollback observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") // Should get to the ROLLBACK_STARTED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus == nil { continue } if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED { break } } observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") // Should end up in ROLLBACK_COMPLETED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_COMPLETED { break } } atomic.StoreUint32(&failImage1, 1) // Repeat the rolling update but this time fail the tasks that the // rollback creates. It should end up in ROLLBACK_PAUSED. err = s.Update(func(tx store.Tx) error { s1 := store.GetService(tx, "id1") require.NotNil(t, s1) s1.PreviousSpec = s1.Spec.Copy() s1.UpdateStatus = nil s1.Spec.Task.GetContainer().Image = "image2" assert.NoError(t, store.UpdateService(tx, s1)) return nil }) assert.NoError(t, err) // Should see three tasks started, then a rollback observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") // Should get to the ROLLBACK_STARTED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus == nil { continue } if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED { break } } observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") // Should end up in ROLLBACK_PAUSED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_PAUSED { break } } }
func TestOrchestratorRestartDelay(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() watch, cancel := state.Watch(s.WatchQueue() /*state.EventCreateTask{}, state.EventUpdateTask{}*/) defer cancel() // Create a service with two instances specified before the orchestrator is // started. This should result in two tasks when the orchestrator // starts up. err := s.Update(func(tx store.Tx) error { j1 := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, Restart: &api.RestartPolicy{ Condition: api.RestartOnAny, Delay: ptypes.DurationProto(100 * time.Millisecond), }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 2, }, }, }, } assert.NoError(t, store.CreateService(tx, j1)) return nil }) assert.NoError(t, err) // Start the orchestrator. go func() { assert.NoError(t, orchestrator.Run(ctx)) }() observedTask1 := watchTaskCreate(t, watch) assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") observedTask2 := watchTaskCreate(t, watch) assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") // Fail the first task. Confirm that it gets restarted. updatedTask1 := observedTask1.Copy() updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed} before := time.Now() err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask1)) return nil }) assert.NoError(t, err) expectCommit(t, watch) expectTaskUpdate(t, watch) expectCommit(t, watch) expectTaskUpdate(t, watch) observedTask3 := watchTaskCreate(t, watch) expectCommit(t, watch) assert.Equal(t, observedTask3.Status.State, api.TaskStateNew) assert.Equal(t, observedTask3.DesiredState, api.TaskStateReady) assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1") observedTask4 := watchTaskUpdate(t, watch) after := time.Now() // At least 100 ms should have elapsed. Only check the lower bound, // because the system may be slow and it could have taken longer. if after.Sub(before) < 100*time.Millisecond { t.Fatalf("restart delay should have elapsed. Got: %v", after.Sub(before)) } assert.Equal(t, observedTask4.Status.State, api.TaskStateNew) assert.Equal(t, observedTask4.DesiredState, api.TaskStateRunning) assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1") }
func (a *Allocator) procUnallocatedTasksNetwork(ctx context.Context, nc *networkContext) { tasks := make([]*api.Task, 0, len(nc.unallocatedTasks)) committed, err := a.store.Batch(func(batch *store.Batch) error { for _, t := range nc.unallocatedTasks { var allocatedT *api.Task err := batch.Update(func(tx store.Tx) error { var err error allocatedT, err = a.allocateTask(ctx, nc, tx, t) return err }) if err != nil { log.G(ctx).WithError(err).Error("task allocation failure") continue } tasks = append(tasks, allocatedT) } return nil }) if err != nil { log.G(ctx).WithError(err).Error("failed a store batch operation while processing unallocated tasks") } var retryCnt int for len(tasks) != 0 { var err error for _, t := range tasks[:committed] { delete(nc.unallocatedTasks, t.ID) } tasks = tasks[committed:] if len(tasks) == 0 { break } updatedTasks := make([]*api.Task, 0, len(tasks)) committed, err = a.store.Batch(func(batch *store.Batch) error { for _, t := range tasks { err := batch.Update(func(tx store.Tx) error { return store.UpdateTask(tx, t) }) if err != nil { log.G(ctx).WithError(err).Error("allocated task store update failure") continue } updatedTasks = append(updatedTasks, t) } return nil }) if err != nil { log.G(ctx).WithError(err).Error("failed a store batch operation while processing unallocated tasks") } tasks = updatedTasks select { case <-ctx.Done(): return default: } retryCnt++ if retryCnt >= 3 { log.G(ctx).Errorf("failed to complete batch update of allocated tasks after 3 retries") break } } }
func (a *Allocator) doNetworkInit(ctx context.Context) error { na, err := networkallocator.New() if err != nil { return err } nc := &networkContext{ nwkAllocator: na, unallocatedTasks: make(map[string]*api.Task), unallocatedServices: make(map[string]*api.Service), unallocatedNetworks: make(map[string]*api.Network), ingressNetwork: newIngressNetwork(), } // Check if we have the ingress network. If not found create // it before reading all network objects for allocation. var networks []*api.Network a.store.View(func(tx store.ReadTx) { networks, err = store.FindNetworks(tx, store.ByName(ingressNetworkName)) if len(networks) > 0 { nc.ingressNetwork = networks[0] } }) if err != nil { return fmt.Errorf("failed to find ingress network during init: %v", err) } // If ingress network is not found, create one right away // using the predefined template. if len(networks) == 0 { if err := a.store.Update(func(tx store.Tx) error { nc.ingressNetwork.ID = identity.NewID() if err := store.CreateNetwork(tx, nc.ingressNetwork); err != nil { return err } return nil }); err != nil { return fmt.Errorf("failed to create ingress network: %v", err) } a.store.View(func(tx store.ReadTx) { networks, err = store.FindNetworks(tx, store.ByName(ingressNetworkName)) if len(networks) > 0 { nc.ingressNetwork = networks[0] } }) if err != nil { return fmt.Errorf("failed to find ingress network after creating it: %v", err) } } // Try to complete ingress network allocation before anything else so // that the we can get the preferred subnet for ingress // network. if !na.IsAllocated(nc.ingressNetwork) { if err := a.allocateNetwork(ctx, nc, nc.ingressNetwork); err != nil { log.G(ctx).Errorf("failed allocating ingress network during init: %v", err) } // Update store after allocation if err := a.store.Update(func(tx store.Tx) error { if err := store.UpdateNetwork(tx, nc.ingressNetwork); err != nil { return err } return nil }); err != nil { return fmt.Errorf("failed to create ingress network: %v", err) } } // Allocate networks in the store so far before we started // watching. a.store.View(func(tx store.ReadTx) { networks, err = store.FindNetworks(tx, store.All) }) if err != nil { return fmt.Errorf("error listing all networks in store while trying to allocate during init: %v", err) } for _, n := range networks { if na.IsAllocated(n) { continue } if err := a.allocateNetwork(ctx, nc, n); err != nil { log.G(ctx).Errorf("failed allocating network %s during init: %v", n.ID, err) } } // Allocate nodes in the store so far before we process watched events. var nodes []*api.Node a.store.View(func(tx store.ReadTx) { nodes, err = store.FindNodes(tx, store.All) }) if err != nil { return fmt.Errorf("error listing all services in store while trying to allocate during init: %v", err) } for _, node := range nodes { if na.IsNodeAllocated(node) { continue } if node.Attachment == nil { node.Attachment = &api.NetworkAttachment{} } node.Attachment.Network = nc.ingressNetwork.Copy() if err := a.allocateNode(ctx, nc, node); err != nil { log.G(ctx).Errorf("Failed to allocate network resources for node %s during init: %v", node.ID, err) } } // Allocate services in the store so far before we process watched events. var services []*api.Service a.store.View(func(tx store.ReadTx) { services, err = store.FindServices(tx, store.All) }) if err != nil { return fmt.Errorf("error listing all services in store while trying to allocate during init: %v", err) } for _, s := range services { if nc.nwkAllocator.IsServiceAllocated(s) { continue } if err := a.allocateService(ctx, nc, s); err != nil { log.G(ctx).Errorf("failed allocating service %s during init: %v", s.ID, err) } } // Allocate tasks in the store so far before we started watching. var tasks []*api.Task a.store.View(func(tx store.ReadTx) { tasks, err = store.FindTasks(tx, store.All) }) if err != nil { return fmt.Errorf("error listing all tasks in store while trying to allocate during init: %v", err) } if _, err := a.store.Batch(func(batch *store.Batch) error { for _, t := range tasks { if taskDead(t) { continue } var s *api.Service if t.ServiceID != "" { a.store.View(func(tx store.ReadTx) { s = store.GetService(tx, t.ServiceID) }) } // Populate network attachments in the task // based on service spec. a.taskCreateNetworkAttachments(t, s) if taskReadyForNetworkVote(t, s, nc) { if t.Status.State >= api.TaskStateAllocated { continue } if a.taskAllocateVote(networkVoter, t.ID) { // If the task is not attached to any network, network // allocators job is done. Immediately cast a vote so // that the task can be moved to ALLOCATED state as // soon as possible. if err := batch.Update(func(tx store.Tx) error { storeT := store.GetTask(tx, t.ID) if storeT == nil { return fmt.Errorf("task %s not found while trying to update state", t.ID) } updateTaskStatus(storeT, api.TaskStateAllocated, "allocated") if err := store.UpdateTask(tx, storeT); err != nil { return fmt.Errorf("failed updating state in store transaction for task %s: %v", storeT.ID, err) } return nil }); err != nil { log.G(ctx).WithError(err).Error("error updating task network") } } continue } err := batch.Update(func(tx store.Tx) error { _, err := a.allocateTask(ctx, nc, tx, t) return err }) if err != nil { log.G(ctx).Errorf("failed allocating task %s during init: %v", t.ID, err) nc.unallocatedTasks[t.ID] = t } } return nil }); err != nil { return err } a.netCtx = nc return nil }
func (a *Allocator) allocateTask(ctx context.Context, nc *networkContext, tx store.Tx, t *api.Task) (*api.Task, error) { taskUpdated := false // Get the latest task state from the store before updating. storeT := store.GetTask(tx, t.ID) if storeT == nil { return nil, fmt.Errorf("could not find task %s while trying to update network allocation", t.ID) } // We might be here even if a task allocation has already // happened but wasn't successfully committed to store. In such // cases skip allocation and go straight ahead to updating the // store. if !nc.nwkAllocator.IsTaskAllocated(t) { if t.ServiceID != "" { s := store.GetService(tx, t.ServiceID) if s == nil { return nil, fmt.Errorf("could not find service %s", t.ServiceID) } if !nc.nwkAllocator.IsServiceAllocated(s) { return nil, fmt.Errorf("service %s to which this task %s belongs has pending allocations", s.ID, t.ID) } taskUpdateEndpoint(t, s.Endpoint) } for _, na := range t.Networks { n := store.GetNetwork(tx, na.Network.ID) if n == nil { return nil, fmt.Errorf("failed to retrieve network %s while allocating task %s", na.Network.ID, t.ID) } if !nc.nwkAllocator.IsAllocated(n) { return nil, fmt.Errorf("network %s attached to task %s not allocated yet", n.ID, t.ID) } na.Network = n } if err := nc.nwkAllocator.AllocateTask(t); err != nil { return nil, fmt.Errorf("failed during networktask allocation for task %s: %v", t.ID, err) } if nc.nwkAllocator.IsTaskAllocated(t) { taskUpdateNetworks(storeT, t.Networks) taskUpdateEndpoint(storeT, t.Endpoint) taskUpdated = true } } // Update the network allocations and moving to // ALLOCATED state on top of the latest store state. if a.taskAllocateVote(networkVoter, t.ID) { if storeT.Status.State < api.TaskStateAllocated { updateTaskStatus(storeT, api.TaskStateAllocated, "allocated") taskUpdated = true } } if taskUpdated { if err := store.UpdateTask(tx, storeT); err != nil { return nil, fmt.Errorf("failed updating state in store transaction for task %s: %v", storeT.ID, err) } } return storeT, nil }
func TestOrchestratorRestartWindow(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() watch, cancel := state.Watch(s.WatchQueue() /*state.EventCreateTask{}, state.EventUpdateTask{}*/) defer cancel() // Create a service with two instances specified before the orchestrator is // started. This should result in two tasks when the orchestrator // starts up. err := s.Update(func(tx store.Tx) error { j1 := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 2, }, }, Task: api.TaskSpec{ Restart: &api.RestartPolicy{ Condition: api.RestartOnAny, Delay: ptypes.DurationProto(100 * time.Millisecond), MaxAttempts: 1, Window: ptypes.DurationProto(500 * time.Millisecond), }, }, }, } assert.NoError(t, store.CreateService(tx, j1)) return nil }) assert.NoError(t, err) // Start the orchestrator. go func() { assert.NoError(t, orchestrator.Run(ctx)) }() observedTask1 := watchTaskCreate(t, watch) assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") observedTask2 := watchTaskCreate(t, watch) assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") // Fail the first task. Confirm that it gets restarted. updatedTask1 := observedTask1.Copy() updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed} before := time.Now() err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask1)) return nil }) assert.NoError(t, err) expectCommit(t, watch) expectTaskUpdate(t, watch) expectCommit(t, watch) expectTaskUpdate(t, watch) observedTask3 := watchTaskCreate(t, watch) expectCommit(t, watch) assert.Equal(t, observedTask3.Status.State, api.TaskStateNew) assert.Equal(t, observedTask3.DesiredState, api.TaskStateReady) assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1") observedTask4 := watchTaskUpdate(t, watch) after := time.Now() // At least 100 ms should have elapsed. Only check the lower bound, // because the system may be slow and it could have taken longer. if after.Sub(before) < 100*time.Millisecond { t.Fatal("restart delay should have elapsed") } assert.Equal(t, observedTask4.Status.State, api.TaskStateNew) assert.Equal(t, observedTask4.DesiredState, api.TaskStateRunning) assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1") // Fail the second task. Confirm that it gets restarted. updatedTask2 := observedTask2.Copy() updatedTask2.Status = api.TaskStatus{State: api.TaskStateFailed} before = time.Now() err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask2)) return nil }) assert.NoError(t, err) expectCommit(t, watch) expectTaskUpdate(t, watch) expectCommit(t, watch) expectTaskUpdate(t, watch) observedTask5 := watchTaskCreate(t, watch) expectCommit(t, watch) assert.Equal(t, observedTask5.Status.State, api.TaskStateNew) assert.Equal(t, observedTask5.DesiredState, api.TaskStateReady) assert.Equal(t, observedTask5.ServiceAnnotations.Name, "name1") observedTask6 := watchTaskUpdate(t, watch) // task gets started after a delay expectCommit(t, watch) assert.Equal(t, observedTask6.Status.State, api.TaskStateNew) assert.Equal(t, observedTask6.DesiredState, api.TaskStateRunning) assert.Equal(t, observedTask6.ServiceAnnotations.Name, "name1") // Fail the first instance again. It should not be restarted. updatedTask1 = observedTask3.Copy() updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed} before = time.Now() err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask1)) return nil }) assert.NoError(t, err) expectTaskUpdate(t, watch) expectCommit(t, watch) expectTaskUpdate(t, watch) expectCommit(t, watch) select { case <-watch: t.Fatal("got unexpected event") case <-time.After(200 * time.Millisecond): } time.Sleep(time.Second) // Fail the second instance again. It should get restarted because // enough time has elapsed since the last restarts. updatedTask2 = observedTask5.Copy() updatedTask2.Status = api.TaskStatus{State: api.TaskStateFailed} before = time.Now() err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask2)) return nil }) assert.NoError(t, err) expectTaskUpdate(t, watch) expectCommit(t, watch) expectTaskUpdate(t, watch) observedTask7 := watchTaskCreate(t, watch) expectCommit(t, watch) assert.Equal(t, observedTask7.Status.State, api.TaskStateNew) assert.Equal(t, observedTask7.DesiredState, api.TaskStateReady) observedTask8 := watchTaskUpdate(t, watch) after = time.Now() // At least 100 ms should have elapsed. Only check the lower bound, // because the system may be slow and it could have taken longer. if after.Sub(before) < 100*time.Millisecond { t.Fatal("restart delay should have elapsed") } assert.Equal(t, observedTask8.Status.State, api.TaskStateNew) assert.Equal(t, observedTask8.DesiredState, api.TaskStateRunning) assert.Equal(t, observedTask8.ServiceAnnotations.Name, "name1") }
func TestTaskHistory(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) assert.NoError(t, s.Update(func(tx store.Tx) error { store.CreateCluster(tx, &api.Cluster{ ID: identity.NewID(), Spec: api.ClusterSpec{ Annotations: api.Annotations{ Name: store.DefaultClusterName, }, Orchestration: api.OrchestrationConfig{ TaskHistoryRetentionLimit: 2, }, }, }) return nil })) taskReaper := NewTaskReaper(s) defer taskReaper.Stop() orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() watch, cancel := state.Watch(s.WatchQueue() /*state.EventCreateTask{}, state.EventUpdateTask{}*/) defer cancel() // Create a service with two instances specified before the orchestrator is // started. This should result in two tasks when the orchestrator // starts up. err := s.Update(func(tx store.Tx) error { j1 := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 2, }, }, Task: api.TaskSpec{ Restart: &api.RestartPolicy{ Condition: api.RestartOnAny, Delay: ptypes.DurationProto(0), }, }, }, } assert.NoError(t, store.CreateService(tx, j1)) return nil }) assert.NoError(t, err) // Start the orchestrator. go func() { assert.NoError(t, orchestrator.Run(ctx)) }() go taskReaper.Run() observedTask1 := watchTaskCreate(t, watch) assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") observedTask2 := watchTaskCreate(t, watch) assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") // Fail both tasks. They should both get restarted. updatedTask1 := observedTask1.Copy() updatedTask1.Status.State = api.TaskStateFailed updatedTask1.ServiceAnnotations = api.Annotations{Name: "original"} updatedTask2 := observedTask2.Copy() updatedTask2.Status.State = api.TaskStateFailed updatedTask2.ServiceAnnotations = api.Annotations{Name: "original"} err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask1)) assert.NoError(t, store.UpdateTask(tx, updatedTask2)) return nil }) expectCommit(t, watch) expectTaskUpdate(t, watch) expectTaskUpdate(t, watch) expectCommit(t, watch) expectTaskUpdate(t, watch) observedTask3 := watchTaskCreate(t, watch) assert.Equal(t, observedTask3.Status.State, api.TaskStateNew) assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1") expectTaskUpdate(t, watch) observedTask4 := watchTaskCreate(t, watch) assert.Equal(t, observedTask4.Status.State, api.TaskStateNew) assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1") // Fail these replacement tasks. Since TaskHistory is set to 2, this // should cause the oldest tasks for each instance to get deleted. updatedTask3 := observedTask3.Copy() updatedTask3.Status.State = api.TaskStateFailed updatedTask4 := observedTask4.Copy() updatedTask4.Status.State = api.TaskStateFailed err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask3)) assert.NoError(t, store.UpdateTask(tx, updatedTask4)) return nil }) deletedTask1 := watchTaskDelete(t, watch) deletedTask2 := watchTaskDelete(t, watch) assert.Equal(t, api.TaskStateFailed, deletedTask1.Status.State) assert.Equal(t, "original", deletedTask1.ServiceAnnotations.Name) assert.Equal(t, api.TaskStateFailed, deletedTask2.Status.State) assert.Equal(t, "original", deletedTask2.ServiceAnnotations.Name) var foundTasks []*api.Task s.View(func(tx store.ReadTx) { foundTasks, err = store.FindTasks(tx, store.All) }) assert.NoError(t, err) assert.Len(t, foundTasks, 4) }
func TestAllocator(t *testing.T) { s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() a, err := New(s) assert.NoError(t, err) assert.NotNil(t, a) // Try adding some objects to store before allocator is started assert.NoError(t, s.Update(func(tx store.Tx) error { n1 := &api.Network{ ID: "testID1", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test1", }, }, } assert.NoError(t, store.CreateNetwork(tx, n1)) s1 := &api.Service{ ID: "testServiceID1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "service1", }, Task: api.TaskSpec{ Networks: []*api.NetworkAttachmentConfig{ { Target: "testID1", }, }, }, Endpoint: &api.EndpointSpec{}, }, } assert.NoError(t, store.CreateService(tx, s1)) t1 := &api.Task{ ID: "testTaskID1", Status: api.TaskStatus{ State: api.TaskStateNew, }, Networks: []*api.NetworkAttachment{ { Network: n1, }, }, } assert.NoError(t, store.CreateTask(tx, t1)) return nil })) netWatch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateNetwork{}, state.EventDeleteNetwork{}) defer cancel() taskWatch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}, state.EventDeleteTask{}) defer cancel() serviceWatch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateService{}, state.EventDeleteService{}) defer cancel() // Start allocator go func() { assert.NoError(t, a.Run(context.Background())) }() // Now verify if we get network and tasks updated properly watchNetwork(t, netWatch, false, isValidNetwork) watchTask(t, s, taskWatch, false, isValidTask) watchService(t, serviceWatch, false, nil) // Add new networks/tasks/services after allocator is started. assert.NoError(t, s.Update(func(tx store.Tx) error { n2 := &api.Network{ ID: "testID2", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test2", }, }, } assert.NoError(t, store.CreateNetwork(tx, n2)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) assert.NoError(t, s.Update(func(tx store.Tx) error { s2 := &api.Service{ ID: "testServiceID2", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "service2", }, Networks: []*api.NetworkAttachmentConfig{ { Target: "testID2", }, }, Endpoint: &api.EndpointSpec{}, }, } assert.NoError(t, store.CreateService(tx, s2)) return nil })) watchService(t, serviceWatch, false, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { t2 := &api.Task{ ID: "testTaskID2", Status: api.TaskStatus{ State: api.TaskStateNew, }, ServiceID: "testServiceID2", DesiredState: api.TaskStateRunning, } assert.NoError(t, store.CreateTask(tx, t2)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) // Now try adding a task which depends on a network before adding the network. n3 := &api.Network{ ID: "testID3", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test3", }, }, } assert.NoError(t, s.Update(func(tx store.Tx) error { t3 := &api.Task{ ID: "testTaskID3", Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, Networks: []*api.NetworkAttachment{ { Network: n3, }, }, } assert.NoError(t, store.CreateTask(tx, t3)) return nil })) // Wait for a little bit of time before adding network just to // test network is not available while task allocation is // going through time.Sleep(10 * time.Millisecond) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNetwork(tx, n3)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteTask(tx, "testTaskID3")) return nil })) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { t5 := &api.Task{ ID: "testTaskID5", Spec: api.TaskSpec{ Networks: []*api.NetworkAttachmentConfig{ { Target: "testID2", }, }, }, Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, ServiceID: "testServiceID2", } assert.NoError(t, store.CreateTask(tx, t5)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteNetwork(tx, "testID3")) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteService(tx, "testServiceID2")) return nil })) watchService(t, serviceWatch, false, nil) // Try to create a task with no network attachments and test // that it moves to ALLOCATED state. assert.NoError(t, s.Update(func(tx store.Tx) error { t4 := &api.Task{ ID: "testTaskID4", Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, } assert.NoError(t, store.CreateTask(tx, t4)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { n2 := store.GetNetwork(tx, "testID2") require.NotEqual(t, nil, n2) assert.NoError(t, store.UpdateNetwork(tx, n2)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) watchNetwork(t, netWatch, true, nil) // Try updating task which is already allocated assert.NoError(t, s.Update(func(tx store.Tx) error { t2 := store.GetTask(tx, "testTaskID2") require.NotEqual(t, nil, t2) assert.NoError(t, store.UpdateTask(tx, t2)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) watchTask(t, s, taskWatch, true, nil) // Try adding networks with conflicting network resources and // add task which attaches to a network which gets allocated // later and verify if task reconciles and moves to ALLOCATED. n4 := &api.Network{ ID: "testID4", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test4", }, DriverConfig: &api.Driver{ Name: "overlay", Options: map[string]string{ "com.docker.network.driver.overlay.vxlanid_list": "328", }, }, }, } n5 := n4.Copy() n5.ID = "testID5" n5.Spec.Annotations.Name = "test5" assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNetwork(tx, n4)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNetwork(tx, n5)) return nil })) watchNetwork(t, netWatch, true, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { t6 := &api.Task{ ID: "testTaskID6", Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, Networks: []*api.NetworkAttachment{ { Network: n5, }, }, } assert.NoError(t, store.CreateTask(tx, t6)) return nil })) watchTask(t, s, taskWatch, true, nil) // Now remove the conflicting network. assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteNetwork(tx, n4.ID)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) watchTask(t, s, taskWatch, false, isValidTask) // Try adding services with conflicting port configs and add // task which is part of the service whose allocation hasn't // happened and when that happens later and verify if task // reconciles and moves to ALLOCATED. s3 := &api.Service{ ID: "testServiceID3", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "service3", }, Endpoint: &api.EndpointSpec{ Ports: []*api.PortConfig{ { Name: "http", TargetPort: 80, PublishedPort: 8080, }, }, }, }, } s4 := s3.Copy() s4.ID = "testServiceID4" s4.Spec.Annotations.Name = "service4" assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateService(tx, s3)) return nil })) watchService(t, serviceWatch, false, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateService(tx, s4)) return nil })) watchService(t, serviceWatch, true, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { t7 := &api.Task{ ID: "testTaskID7", Status: api.TaskStatus{ State: api.TaskStateNew, }, ServiceID: "testServiceID4", DesiredState: api.TaskStateRunning, } assert.NoError(t, store.CreateTask(tx, t7)) return nil })) watchTask(t, s, taskWatch, true, nil) // Now remove the conflicting service. assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteService(tx, s3.ID)) return nil })) watchService(t, serviceWatch, false, nil) watchTask(t, s, taskWatch, false, isValidTask) a.Stop() }
func (ce *ConstraintEnforcer) shutdownNoncompliantTasks(node *api.Node) { // If the availability is "drain", the orchestrator will // shut down all tasks. // If the availability is "pause", we shouldn't touch // the tasks on this node. if node.Spec.Availability != api.NodeAvailabilityActive { return } var ( tasks []*api.Task err error ) ce.store.View(func(tx store.ReadTx) { tasks, err = store.FindTasks(tx, store.ByNodeID(node.ID)) }) if err != nil { log.L.WithError(err).Errorf("failed to list tasks for node ID %s", node.ID) } var availableMemoryBytes, availableNanoCPUs int64 if node.Description != nil && node.Description.Resources != nil { availableMemoryBytes = node.Description.Resources.MemoryBytes availableNanoCPUs = node.Description.Resources.NanoCPUs } removeTasks := make(map[string]*api.Task) // TODO(aaronl): The set of tasks removed will be // nondeterministic because it depends on the order of // the slice returned from FindTasks. We could do // a separate pass over the tasks for each type of // resource, and sort by the size of the reservation // to remove the most resource-intensive tasks. for _, t := range tasks { if t.DesiredState < api.TaskStateAssigned || t.DesiredState > api.TaskStateRunning { continue } // Ensure that the task still meets scheduling // constraints. if t.Spec.Placement != nil && len(t.Spec.Placement.Constraints) != 0 { constraints, _ := constraint.Parse(t.Spec.Placement.Constraints) if !constraint.NodeMatches(constraints, node) { removeTasks[t.ID] = t continue } } // Ensure that the task assigned to the node // still satisfies the resource limits. if t.Spec.Resources != nil && t.Spec.Resources.Reservations != nil { if t.Spec.Resources.Reservations.MemoryBytes > availableMemoryBytes { removeTasks[t.ID] = t continue } if t.Spec.Resources.Reservations.NanoCPUs > availableNanoCPUs { removeTasks[t.ID] = t continue } availableMemoryBytes -= t.Spec.Resources.Reservations.MemoryBytes availableNanoCPUs -= t.Spec.Resources.Reservations.NanoCPUs } } if len(removeTasks) != 0 { _, err := ce.store.Batch(func(batch *store.Batch) error { for _, t := range removeTasks { err := batch.Update(func(tx store.Tx) error { t = store.GetTask(tx, t.ID) if t == nil || t.DesiredState > api.TaskStateRunning { return nil } t.DesiredState = api.TaskStateShutdown return store.UpdateTask(tx, t) }) if err != nil { log.L.WithError(err).Errorf("failed to shut down task %s", t.ID) } } return nil }) if err != nil { log.L.WithError(err).Errorf("failed to shut down tasks") } } }
func TestOrchestratorRestartOnNone(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() watch, cancel := state.Watch(s.WatchQueue() /*state.EventCreateTask{}, state.EventUpdateTask{}*/) defer cancel() // Create a service with two instances specified before the orchestrator is // started. This should result in two tasks when the orchestrator // starts up. err := s.Update(func(tx store.Tx) error { j1 := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, Restart: &api.RestartPolicy{ Condition: api.RestartOnNone, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 2, }, }, }, } assert.NoError(t, store.CreateService(tx, j1)) return nil }) assert.NoError(t, err) // Start the orchestrator. go func() { assert.NoError(t, orchestrator.Run(ctx)) }() observedTask1 := watchTaskCreate(t, watch) assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") observedTask2 := watchTaskCreate(t, watch) assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") // Fail the first task. Confirm that it does not get restarted. updatedTask1 := observedTask1.Copy() updatedTask1.Status.State = api.TaskStateFailed err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask1)) return nil }) assert.NoError(t, err) expectCommit(t, watch) expectTaskUpdate(t, watch) expectCommit(t, watch) expectTaskUpdate(t, watch) expectCommit(t, watch) select { case <-watch: t.Fatal("got unexpected event") case <-time.After(100 * time.Millisecond): } // Mark the second task as completed. Confirm that it does not get restarted. updatedTask2 := observedTask2.Copy() updatedTask2.Status = api.TaskStatus{State: api.TaskStateCompleted} err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask2)) return nil }) assert.NoError(t, err) expectTaskUpdate(t, watch) expectCommit(t, watch) expectTaskUpdate(t, watch) expectCommit(t, watch) select { case <-watch: t.Fatal("got unexpected event") case <-time.After(100 * time.Millisecond): } }
func TestSchedulerFaultyNode(t *testing.T) { ctx := context.Background() taskTemplate := &api.Task{ ServiceID: "service1", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } node1 := &api.Node{ ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "id1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } node2 := &api.Node{ ID: "id2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "id2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial nodes, and one task assigned to node id1 assert.NoError(t, store.CreateNode(tx, node1)) assert.NoError(t, store.CreateNode(tx, node2)) task1 := taskTemplate.Copy() task1.ID = "id1" task1.NodeID = "id1" task1.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task1)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() for i := 0; i != 8; i++ { // Simulate a task failure cycle newTask := taskTemplate.Copy() newTask.ID = identity.NewID() err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, newTask)) return nil }) assert.NoError(t, err) assignment := watchAssignment(t, watch) assert.Equal(t, newTask.ID, assignment.ID) if i < 5 { // The first 5 attempts should be assigned to node id2 because // it has no replicas of the service. assert.Equal(t, "id2", assignment.NodeID) } else { // The next ones should be assigned to id1, since we'll // flag id2 as potentially faulty. assert.Equal(t, "id1", assignment.NodeID) } err = s.Update(func(tx store.Tx) error { newTask := store.GetTask(tx, newTask.ID) require.NotNil(t, newTask) newTask.Status.State = api.TaskStateFailed assert.NoError(t, store.UpdateTask(tx, newTask)) return nil }) assert.NoError(t, err) } }
func TestScheduler(t *testing.T) { ctx := context.Background() initialNodeSet := []*api.Node{ { ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id3", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, } initialTaskSet := []*api.Task{ { ID: "id1", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStateAssigned, }, NodeID: initialNodeSet[0].ID, }, { ID: "id2", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, }, { ID: "id3", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Prepoulate nodes for _, n := range initialNodeSet { assert.NoError(t, store.CreateNode(tx, n)) } // Prepopulate tasks for _, task := range initialTaskSet { assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() assignment1 := watchAssignment(t, watch) // must assign to id2 or id3 since id1 already has a task assert.Regexp(t, assignment1.NodeID, "(id2|id3)") assignment2 := watchAssignment(t, watch) // must assign to id2 or id3 since id1 already has a task if assignment1.NodeID == "id2" { assert.Equal(t, "id3", assignment2.NodeID) } else { assert.Equal(t, "id2", assignment2.NodeID) } err = s.Update(func(tx store.Tx) error { // Update each node to make sure this doesn't mess up the // scheduler's state. for _, n := range initialNodeSet { assert.NoError(t, store.UpdateNode(tx, n)) } return nil }) assert.NoError(t, err) err = s.Update(func(tx store.Tx) error { // Delete the task associated with node 1 so it's now the most lightly // loaded node. assert.NoError(t, store.DeleteTask(tx, "id1")) // Create a new task. It should get assigned to id1. t4 := &api.Task{ ID: "id4", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name4", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t4)) return nil }) assert.NoError(t, err) assignment3 := watchAssignment(t, watch) assert.Equal(t, "id1", assignment3.NodeID) // Update a task to make it unassigned. It should get assigned by the // scheduler. err = s.Update(func(tx store.Tx) error { // Remove assignment from task id4. It should get assigned // to node id1. t4 := &api.Task{ ID: "id4", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name4", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.UpdateTask(tx, t4)) return nil }) assert.NoError(t, err) assignment4 := watchAssignment(t, watch) assert.Equal(t, "id1", assignment4.NodeID) err = s.Update(func(tx store.Tx) error { // Create a ready node, then remove it. No tasks should ever // be assigned to it. node := &api.Node{ ID: "removednode", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "removednode", }, }, Status: api.NodeStatus{ State: api.NodeStatus_DOWN, }, } assert.NoError(t, store.CreateNode(tx, node)) assert.NoError(t, store.DeleteNode(tx, node.ID)) // Create an unassigned task. task := &api.Task{ ID: "removednode", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "removednode", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, task)) return nil }) assert.NoError(t, err) assignmentRemovedNode := watchAssignment(t, watch) assert.NotEqual(t, "removednode", assignmentRemovedNode.NodeID) err = s.Update(func(tx store.Tx) error { // Create a ready node. It should be used for the next // assignment. n4 := &api.Node{ ID: "id4", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name4", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.CreateNode(tx, n4)) // Create an unassigned task. t5 := &api.Task{ ID: "id5", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name5", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t5)) return nil }) assert.NoError(t, err) assignment5 := watchAssignment(t, watch) assert.Equal(t, "id4", assignment5.NodeID) err = s.Update(func(tx store.Tx) error { // Create a non-ready node. It should NOT be used for the next // assignment. n5 := &api.Node{ ID: "id5", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name5", }, }, Status: api.NodeStatus{ State: api.NodeStatus_DOWN, }, } assert.NoError(t, store.CreateNode(tx, n5)) // Create an unassigned task. t6 := &api.Task{ ID: "id6", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name6", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t6)) return nil }) assert.NoError(t, err) assignment6 := watchAssignment(t, watch) assert.NotEqual(t, "id5", assignment6.NodeID) err = s.Update(func(tx store.Tx) error { // Update node id5 to put it in the READY state. n5 := &api.Node{ ID: "id5", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name5", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.UpdateNode(tx, n5)) // Create an unassigned task. Should be assigned to the // now-ready node. t7 := &api.Task{ ID: "id7", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name7", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t7)) return nil }) assert.NoError(t, err) assignment7 := watchAssignment(t, watch) assert.Equal(t, "id5", assignment7.NodeID) err = s.Update(func(tx store.Tx) error { // Create a ready node, then immediately take it down. The next // unassigned task should NOT be assigned to it. n6 := &api.Node{ ID: "id6", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name6", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.CreateNode(tx, n6)) n6.Status.State = api.NodeStatus_DOWN assert.NoError(t, store.UpdateNode(tx, n6)) // Create an unassigned task. t8 := &api.Task{ ID: "id8", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name8", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t8)) return nil }) assert.NoError(t, err) assignment8 := watchAssignment(t, watch) assert.NotEqual(t, "id6", assignment8.NodeID) }
func TestSchedulerResourceConstraintDeadTask(t *testing.T) { ctx := context.Background() // Create a ready node without enough memory to run the task. node := &api.Node{ ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "node", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 1e9, MemoryBytes: 1e9, }, }, } bigTask1 := &api.Task{ DesiredState: api.TaskStateRunning, ID: "id1", Spec: api.TaskSpec{ Resources: &api.ResourceRequirements{ Reservations: &api.Resources{ MemoryBytes: 8e8, }, }, }, ServiceAnnotations: api.Annotations{ Name: "big", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } bigTask2 := bigTask1.Copy() bigTask2.ID = "id2" s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial node and task assert.NoError(t, store.CreateNode(tx, node)) assert.NoError(t, store.CreateTask(tx, bigTask1)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() // The task fits, so it should get assigned assignment := watchAssignment(t, watch) assert.Equal(t, "id1", assignment.ID) assert.Equal(t, "id1", assignment.NodeID) err = s.Update(func(tx store.Tx) error { // Add a second task. It shouldn't get assigned because of // resource constraints. return store.CreateTask(tx, bigTask2) }) assert.NoError(t, err) time.Sleep(100 * time.Millisecond) s.View(func(tx store.ReadTx) { tasks, err := store.FindTasks(tx, store.ByNodeID(node.ID)) assert.NoError(t, err) assert.Len(t, tasks, 1) }) err = s.Update(func(tx store.Tx) error { // The task becomes dead updatedTask := store.GetTask(tx, bigTask1.ID) updatedTask.Status.State = api.TaskStateShutdown return store.UpdateTask(tx, updatedTask) }) assert.NoError(t, err) // With the first task no longer consuming resources, the second // one can be scheduled. assignment = watchAssignment(t, watch) assert.Equal(t, "id2", assignment.ID) assert.Equal(t, "id1", assignment.NodeID) }
func TestLogBrokerSelector(t *testing.T) { ctx, ca, _, serverAddr, brokerAddr, done := testLogBrokerEnv(t) defer done() client, clientDone := testLogClient(t, serverAddr) defer clientDone() agent1, agent1Security, agent1Done := testBrokerClient(t, ca, brokerAddr) defer agent1Done() agent1subscriptions := listenSubscriptions(ctx, t, agent1) agent2, agent2Security, agent2Done := testBrokerClient(t, ca, brokerAddr) defer agent2Done() agent2subscriptions := listenSubscriptions(ctx, t, agent2) // Subscribe to a task. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateTask(tx, &api.Task{ ID: "task", }) })) _, err := client.SubscribeLogs(ctx, &api.SubscribeLogsRequest{ Options: &api.LogSubscriptionOptions{ Follow: true, }, Selector: &api.LogSelector{ TaskIDs: []string{"task"}, }, }) require.NoError(t, err) // Since it's not assigned to any agent, nobody should receive it. ensureNoSubscription(t, agent1subscriptions) ensureNoSubscription(t, agent2subscriptions) // Assign the task to agent-1. Make sure it's received by agent-1 but *not* // agent-2. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { task := store.GetTask(tx, "task") require.NotNil(t, task) task.NodeID = agent1Security.ServerTLSCreds.NodeID() return store.UpdateTask(tx, task) })) ensureSubscription(t, agent1subscriptions) ensureNoSubscription(t, agent2subscriptions) // Subscribe to a service. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateService(tx, &api.Service{ ID: "service", }) })) _, err = client.SubscribeLogs(ctx, &api.SubscribeLogsRequest{ Options: &api.LogSubscriptionOptions{ Follow: true, }, Selector: &api.LogSelector{ ServiceIDs: []string{"service"}, }, }) require.NoError(t, err) // Since there are no corresponding tasks, nobody should receive it. ensureNoSubscription(t, agent1subscriptions) ensureNoSubscription(t, agent2subscriptions) // Create a task that does *NOT* belong to our service and assign it to node-1. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateTask(tx, &api.Task{ ID: "wrong-task", ServiceID: "wrong-service", NodeID: agent1Security.ServerTLSCreds.NodeID(), }) })) // Ensure agent-1 doesn't receive it. ensureNoSubscription(t, agent1subscriptions) // Now create another task that does belong to our service and assign it to node-1. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateTask(tx, &api.Task{ ID: "service-task-1", ServiceID: "service", NodeID: agent1Security.ServerTLSCreds.NodeID(), }) })) // Make sure agent-1 receives it... ensureSubscription(t, agent1subscriptions) // ...and agent-2 does not. ensureNoSubscription(t, agent2subscriptions) // Create another task, same as above. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateTask(tx, &api.Task{ ID: "service-task-2", ServiceID: "service", NodeID: agent1Security.ServerTLSCreds.NodeID(), }) })) // agent-1 should *not* receive it anymore since the subscription was already delivered. // agent-2 should still not get it. ensureNoSubscription(t, agent1subscriptions) ensureNoSubscription(t, agent2subscriptions) // Now, create another one and assign it to agent-2. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateTask(tx, &api.Task{ ID: "service-task-3", ServiceID: "service", NodeID: agent2Security.ServerTLSCreds.NodeID(), }) })) // Make sure it's delivered to agent-2. ensureSubscription(t, agent2subscriptions) // it shouldn't do anything for agent-1. ensureNoSubscription(t, agent1subscriptions) }
func (d *Dispatcher) processUpdates(ctx context.Context) { var ( taskUpdates map[string]*api.TaskStatus nodeUpdates map[string]nodeUpdate ) d.taskUpdatesLock.Lock() if len(d.taskUpdates) != 0 { taskUpdates = d.taskUpdates d.taskUpdates = make(map[string]*api.TaskStatus) } d.taskUpdatesLock.Unlock() d.nodeUpdatesLock.Lock() if len(d.nodeUpdates) != 0 { nodeUpdates = d.nodeUpdates d.nodeUpdates = make(map[string]nodeUpdate) } d.nodeUpdatesLock.Unlock() if len(taskUpdates) == 0 && len(nodeUpdates) == 0 { return } log := log.G(ctx).WithFields(logrus.Fields{ "method": "(*Dispatcher).processUpdates", }) _, err := d.store.Batch(func(batch *store.Batch) error { for taskID, status := range taskUpdates { err := batch.Update(func(tx store.Tx) error { logger := log.WithField("task.id", taskID) task := store.GetTask(tx, taskID) if task == nil { logger.Errorf("task unavailable") return nil } logger = logger.WithField("state.transition", fmt.Sprintf("%v->%v", task.Status.State, status.State)) if task.Status == *status { logger.Debug("task status identical, ignoring") return nil } if task.Status.State > status.State { logger.Debug("task status invalid transition") return nil } task.Status = *status if err := store.UpdateTask(tx, task); err != nil { logger.WithError(err).Error("failed to update task status") return nil } logger.Debug("task status updated") return nil }) if err != nil { log.WithError(err).Error("dispatcher task update transaction failed") } } for nodeID, nodeUpdate := range nodeUpdates { err := batch.Update(func(tx store.Tx) error { logger := log.WithField("node.id", nodeID) node := store.GetNode(tx, nodeID) if node == nil { logger.Errorf("node unavailable") return nil } if nodeUpdate.status != nil { node.Status.State = nodeUpdate.status.State node.Status.Message = nodeUpdate.status.Message if nodeUpdate.status.Addr != "" { node.Status.Addr = nodeUpdate.status.Addr } } if nodeUpdate.description != nil { node.Description = nodeUpdate.description } if err := store.UpdateNode(tx, node); err != nil { logger.WithError(err).Error("failed to update node status") return nil } logger.Debug("node status updated") return nil }) if err != nil { log.WithError(err).Error("dispatcher node update transaction failed") } } return nil }) if err != nil { log.WithError(err).Error("dispatcher batch failed") } d.processUpdatesCond.Broadcast() }
func TestUpdater(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) // Move tasks to their desired state. watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { for { select { case e := <-watch: task := e.(state.EventUpdateTask).Task if task.Status.State == task.DesiredState { continue } err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) task.Status.State = task.DesiredState return store.UpdateTask(tx, task) }) assert.NoError(t, err) } } }() instances := 3 cluster := &api.Cluster{ // test cluster configuration propagation to task creation. Spec: api.ClusterSpec{ Annotations: api.Annotations{ Name: "default", }, }, } service := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: uint64(instances), }, }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "v:1", // This won't apply in this test because we set the old tasks to DEAD. StopGracePeriod: ptypes.DurationProto(time.Hour), }, }, }, }, } err := s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateCluster(tx, cluster)) assert.NoError(t, store.CreateService(tx, service)) for i := 0; i < instances; i++ { assert.NoError(t, store.CreateTask(tx, newTask(cluster, service, uint64(i)))) } return nil }) assert.NoError(t, err) originalTasks := getRunnableServiceTasks(t, s, service) for _, task := range originalTasks { assert.Equal(t, "v:1", task.Spec.GetContainer().Image) assert.Nil(t, task.LogDriver) // should be left alone } service.Spec.Task.GetContainer().Image = "v:2" service.Spec.Task.LogDriver = &api.Driver{Name: "tasklogdriver"} updater := NewUpdater(s, NewRestartSupervisor(s)) updater.Run(ctx, cluster, service, getRunnableServiceTasks(t, s, service)) updatedTasks := getRunnableServiceTasks(t, s, service) for _, task := range updatedTasks { assert.Equal(t, "v:2", task.Spec.GetContainer().Image) assert.Equal(t, service.Spec.Task.LogDriver, task.LogDriver) // pick up from task } service.Spec.Task.GetContainer().Image = "v:3" cluster.Spec.DefaultLogDriver = &api.Driver{Name: "clusterlogdriver"} // make cluster default logdriver. service.Spec.Update = &api.UpdateConfig{ Parallelism: 1, } updater = NewUpdater(s, NewRestartSupervisor(s)) updater.Run(ctx, cluster, service, getRunnableServiceTasks(t, s, service)) updatedTasks = getRunnableServiceTasks(t, s, service) for _, task := range updatedTasks { assert.Equal(t, "v:3", task.Spec.GetContainer().Image) assert.Equal(t, service.Spec.Task.LogDriver, task.LogDriver) // still pick up from task } service.Spec.Task.GetContainer().Image = "v:4" service.Spec.Task.LogDriver = nil // use cluster default now. service.Spec.Update = &api.UpdateConfig{ Parallelism: 1, Delay: *ptypes.DurationProto(10 * time.Millisecond), } updater = NewUpdater(s, NewRestartSupervisor(s)) updater.Run(ctx, cluster, service, getRunnableServiceTasks(t, s, service)) updatedTasks = getRunnableServiceTasks(t, s, service) for _, task := range updatedTasks { assert.Equal(t, "v:4", task.Spec.GetContainer().Image) assert.Equal(t, cluster.Spec.DefaultLogDriver, task.LogDriver) // pick up from cluster } }
func (s *Scheduler) applySchedulingDecisions(ctx context.Context, schedulingDecisions map[string]schedulingDecision) (successful, failed []schedulingDecision) { if len(schedulingDecisions) == 0 { return } successful = make([]schedulingDecision, 0, len(schedulingDecisions)) // Apply changes to master store applied, err := s.store.Batch(func(batch *store.Batch) error { for len(schedulingDecisions) > 0 { err := batch.Update(func(tx store.Tx) error { // Update exactly one task inside this Update // callback. for taskID, decision := range schedulingDecisions { delete(schedulingDecisions, taskID) t := store.GetTask(tx, taskID) if t == nil { // Task no longer exists nodeInfo, err := s.nodeSet.nodeInfo(decision.new.NodeID) if err == nil && nodeInfo.removeTask(decision.new) { s.nodeSet.updateNode(nodeInfo) } delete(s.allTasks, decision.old.ID) continue } if t.Status.State == decision.new.Status.State && t.Status.Message == decision.new.Status.Message { // No changes, ignore continue } if t.Status.State >= api.TaskStateAssigned { nodeInfo, err := s.nodeSet.nodeInfo(decision.new.NodeID) if err != nil { failed = append(failed, decision) continue } node := store.GetNode(tx, decision.new.NodeID) if node == nil || node.Meta.Version != nodeInfo.Meta.Version { // node is out of date failed = append(failed, decision) continue } } if err := store.UpdateTask(tx, decision.new); err != nil { log.G(ctx).Debugf("scheduler failed to update task %s; will retry", taskID) failed = append(failed, decision) continue } successful = append(successful, decision) return nil } return nil }) if err != nil { return err } } return nil }) if err != nil { log.G(ctx).WithError(err).Error("scheduler tick transaction failed") failed = append(failed, successful[applied:]...) successful = successful[:applied] } return }
func TestUpdaterStopGracePeriod(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) // Move tasks to their desired state. watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { for { select { case e := <-watch: task := e.(state.EventUpdateTask).Task err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) // Explicitly do not set task state to // DEAD to trigger StopGracePeriod if task.DesiredState == api.TaskStateRunning && task.Status.State != api.TaskStateRunning { task.Status.State = api.TaskStateRunning return store.UpdateTask(tx, task) } return nil }) assert.NoError(t, err) } } }() var instances uint64 = 3 service := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "v:1", StopGracePeriod: ptypes.DurationProto(100 * time.Millisecond), }, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: instances, }, }, }, } err := s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateService(tx, service)) for i := uint64(0); i < instances; i++ { task := newTask(nil, service, uint64(i)) task.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) originalTasks := getRunnableServiceTasks(t, s, service) for _, task := range originalTasks { assert.Equal(t, "v:1", task.Spec.GetContainer().Image) } before := time.Now() service.Spec.Task.GetContainer().Image = "v:2" updater := NewUpdater(s, NewRestartSupervisor(s)) // Override the default (1 minute) to speed up the test. updater.restarts.taskTimeout = 100 * time.Millisecond updater.Run(ctx, nil, service, getRunnableServiceTasks(t, s, service)) updatedTasks := getRunnableServiceTasks(t, s, service) for _, task := range updatedTasks { assert.Equal(t, "v:2", task.Spec.GetContainer().Image) } after := time.Now() // At least 100 ms should have elapsed. Only check the lower bound, // because the system may be slow and it could have taken longer. if after.Sub(before) < 100*time.Millisecond { t.Fatal("stop timeout should have elapsed") } }
func TestOrchestratorRestartOnAny(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() watch, cancel := state.Watch(s.WatchQueue() /*state.EventCreateTask{}, state.EventUpdateTask{}*/) defer cancel() // Create a service with two instances specified before the orchestrator is // started. This should result in two tasks when the orchestrator // starts up. err := s.Update(func(tx store.Tx) error { j1 := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, Restart: &api.RestartPolicy{ Condition: api.RestartOnAny, Delay: ptypes.DurationProto(0), }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 2, }, }, }, } assert.NoError(t, store.CreateService(tx, j1)) return nil }) assert.NoError(t, err) // Start the orchestrator. go func() { assert.NoError(t, orchestrator.Run(ctx)) }() observedTask1 := watchTaskCreate(t, watch) assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") observedTask2 := watchTaskCreate(t, watch) assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") // Fail the first task. Confirm that it gets restarted. updatedTask1 := observedTask1.Copy() updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed} err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask1)) return nil }) assert.NoError(t, err) expectCommit(t, watch) expectTaskUpdate(t, watch) expectCommit(t, watch) expectTaskUpdate(t, watch) observedTask3 := watchTaskCreate(t, watch) assert.Equal(t, observedTask3.Status.State, api.TaskStateNew) assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1") expectCommit(t, watch) observedTask4 := watchTaskUpdate(t, watch) assert.Equal(t, observedTask4.DesiredState, api.TaskStateRunning) assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1") // Mark the second task as completed. Confirm that it gets restarted. updatedTask2 := observedTask2.Copy() updatedTask2.Status = api.TaskStatus{State: api.TaskStateCompleted} err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.UpdateTask(tx, updatedTask2)) return nil }) assert.NoError(t, err) expectCommit(t, watch) expectTaskUpdate(t, watch) expectCommit(t, watch) expectTaskUpdate(t, watch) observedTask5 := watchTaskCreate(t, watch) assert.Equal(t, observedTask5.Status.State, api.TaskStateNew) assert.Equal(t, observedTask5.ServiceAnnotations.Name, "name1") expectCommit(t, watch) observedTask6 := watchTaskUpdate(t, watch) assert.Equal(t, observedTask6.DesiredState, api.TaskStateRunning) assert.Equal(t, observedTask6.ServiceAnnotations.Name, "name1") }