func (u *Updater) useExistingTask(ctx context.Context, slot slot, existing *api.Task) { var removeTasks []*api.Task for _, t := range slot { if t != existing { removeTasks = append(removeTasks, t) } } if len(removeTasks) != 0 || existing.DesiredState != api.TaskStateRunning { _, err := u.store.Batch(func(batch *store.Batch) error { u.removeOldTasks(ctx, batch, removeTasks) if existing.DesiredState != api.TaskStateRunning { err := batch.Update(func(tx store.Tx) error { t := store.GetTask(tx, existing.ID) if t == nil { return fmt.Errorf("task %s not found while trying to start it", existing.ID) } if t.DesiredState >= api.TaskStateRunning { return fmt.Errorf("task %s was already started when reached by updater", existing.ID) } t.DesiredState = api.TaskStateRunning return store.UpdateTask(tx, t) }) if err != nil { log.G(ctx).WithError(err).Errorf("starting task %s failed", existing.ID) } } return nil }) if err != nil { log.G(ctx).WithError(err).Error("updater batch transaction failed") } } }
func (r *Supervisor) waitRestart(ctx context.Context, oldDelay *delayedStart, cluster *api.Cluster, taskID string) { // Wait for the last restart delay to elapse. select { case <-oldDelay.doneCh: case <-ctx.Done(): return } // Start the next restart err := r.store.Update(func(tx store.Tx) error { t := store.GetTask(tx, taskID) if t == nil { return nil } if t.DesiredState > api.TaskStateRunning { return nil } service := store.GetService(tx, t.ServiceID) if service == nil { return nil } return r.Restart(ctx, tx, cluster, service, *t) }) if err != nil { log.G(ctx).WithError(err).Errorf("failed to restart task after waiting for previous restart") } }
// DetachNetwork allows the node to request the release of // the resources associated to the network attachment. // - Returns `InvalidArgument` if attachment ID is not provided. // - Returns `NotFound` if the attachment is not found. // - Returns an error if the deletion fails. func (ra *ResourceAllocator) DetachNetwork(ctx context.Context, request *api.DetachNetworkRequest) (*api.DetachNetworkResponse, error) { if request.AttachmentID == "" { return nil, grpc.Errorf(codes.InvalidArgument, errInvalidArgument.Error()) } nodeInfo, err := ca.RemoteNode(ctx) if err != nil { return nil, err } if err := ra.store.Update(func(tx store.Tx) error { t := store.GetTask(tx, request.AttachmentID) if t == nil { return grpc.Errorf(codes.NotFound, "attachment %s not found", request.AttachmentID) } if t.NodeID != nodeInfo.NodeID { return grpc.Errorf(codes.PermissionDenied, "attachment %s doesn't belong to this node", request.AttachmentID) } return store.DeleteTask(tx, request.AttachmentID) }); err != nil { return nil, err } return &api.DetachNetworkResponse{}, nil }
// removeOldTasks shuts down the given tasks and returns one of the tasks that // was shut down, or an error. func (u *Updater) removeOldTasks(ctx context.Context, batch *store.Batch, removeTasks []*api.Task) (*api.Task, error) { var ( lastErr error removedTask *api.Task ) for _, original := range removeTasks { err := batch.Update(func(tx store.Tx) error { t := store.GetTask(tx, original.ID) if t == nil { return fmt.Errorf("task %s not found while trying to shut it down", original.ID) } if t.DesiredState > api.TaskStateRunning { return fmt.Errorf("task %s was already shut down when reached by updater", original.ID) } t.DesiredState = api.TaskStateShutdown return store.UpdateTask(tx, t) }) if err != nil { lastErr = err } else { removedTask = original } } if removedTask == nil { return nil, lastErr } return removedTask, nil }
// StartNow moves the task into the RUNNING state so it will proceed to start // up. func (r *Supervisor) StartNow(tx store.Tx, taskID string) error { t := store.GetTask(tx, taskID) if t == nil || t.DesiredState >= api.TaskStateRunning { return nil } t.DesiredState = api.TaskStateRunning return store.UpdateTask(tx, t) }
func (u *Updater) updateTask(ctx context.Context, original, updated *api.Task) error { log.G(ctx).Debugf("replacing %s with %s", original.ID, updated.ID) // Kick off the watch before even creating the updated task. This is in order to avoid missing any event. taskUpdates, cancel := state.Watch(u.watchQueue, state.EventUpdateTask{ Task: &api.Task{ID: updated.ID}, Checks: []state.TaskCheckFunc{state.TaskCheckID}, }) defer cancel() var delayStartCh <-chan struct{} // Atomically create the updated task and bring down the old one. err := u.store.Update(func(tx store.Tx) error { t := store.GetTask(tx, original.ID) if t == nil { return fmt.Errorf("task %s not found while trying to update it", original.ID) } if t.DesiredState > api.TaskStateRunning { return fmt.Errorf("task %s was already shut down when reached by updater", original.ID) } t.DesiredState = api.TaskStateShutdown if err := store.UpdateTask(tx, t); err != nil { return err } if err := store.CreateTask(tx, updated); err != nil { return err } // Wait for the old task to stop or time out, and then set the new one // to RUNNING. delayStartCh = u.restarts.DelayStart(ctx, tx, original, updated.ID, 0, true) return nil }) if err != nil { return err } <-delayStartCh // Wait for the new task to come up. // TODO(aluzzardi): Consider adding a timeout here. for { select { case e := <-taskUpdates: updated = e.(state.EventUpdateTask).Task if updated.Status.State >= api.TaskStateRunning { return nil } case <-u.stopChan: return nil } } }
func (d *Dispatcher) processTaskUpdates() { d.taskUpdatesLock.Lock() if len(d.taskUpdates) == 0 { d.taskUpdatesLock.Unlock() return } taskUpdates := d.taskUpdates d.taskUpdates = make(map[string]*api.TaskStatus) d.taskUpdatesLock.Unlock() log := log.G(d.ctx).WithFields(logrus.Fields{ "method": "(*Dispatcher).processTaskUpdates", }) _, err := d.store.Batch(func(batch *store.Batch) error { for taskID, status := range taskUpdates { err := batch.Update(func(tx store.Tx) error { logger := log.WithField("task.id", taskID) task := store.GetTask(tx, taskID) if task == nil { logger.Errorf("task unavailable") return nil } logger = logger.WithField("state.transition", fmt.Sprintf("%v->%v", task.Status.State, status.State)) if task.Status == *status { logger.Debug("task status identical, ignoring") return nil } if task.Status.State > status.State { logger.Debug("task status invalid transition") return nil } task.Status = *status if err := store.UpdateTask(tx, task); err != nil { logger.WithError(err).Error("failed to update task status") return nil } logger.Debug("task status updated") return nil }) if err != nil { log.WithError(err).Error("dispatcher transaction failed") } } return nil }) if err != nil { log.WithError(err).Error("dispatcher batch failed") } }
func (g *GlobalOrchestrator) removeTask(ctx context.Context, batch *store.Batch, t *api.Task) { // set existing task DesiredState to TaskStateShutdown // TODO(aaronl): optimistic update? err := batch.Update(func(tx store.Tx) error { t = store.GetTask(tx, t.ID) if t != nil { t.DesiredState = api.TaskStateShutdown return store.UpdateTask(tx, t) } return nil }) if err != nil { log.G(ctx).WithError(err).Errorf("global orchestrator: removeTask failed to remove %s", t.ID) } }
// restartTask calls the restart supervisor's Restart function, which // sets a task's desired state to dead and restarts it if the restart // policy calls for it to be restarted. func (g *GlobalOrchestrator) restartTask(ctx context.Context, taskID string, serviceID string) { err := g.store.Update(func(tx store.Tx) error { t := store.GetTask(tx, taskID) if t == nil || t.DesiredState > api.TaskStateRunning { return nil } service := store.GetService(tx, serviceID) if service == nil { return nil } return g.restarts.Restart(ctx, tx, service, *t) }) if err != nil { log.G(ctx).WithError(err).Errorf("global orchestrator: restartTask transaction failed") } }
// GetTask returns a Task given a TaskID. // - Returns `InvalidArgument` if TaskID is not provided. // - Returns `NotFound` if the Task is not found. func (s *Server) GetTask(ctx context.Context, request *api.GetTaskRequest) (*api.GetTaskResponse, error) { if request.TaskID == "" { return nil, grpc.Errorf(codes.InvalidArgument, errInvalidArgument.Error()) } var task *api.Task s.store.View(func(tx store.ReadTx) { task = store.GetTask(tx, request.TaskID) }) if task == nil { return nil, grpc.Errorf(codes.NotFound, "task %s not found", request.TaskID) } return &api.GetTaskResponse{ Task: task, }, nil }
func (r *ReplicatedOrchestrator) removeTasks(ctx context.Context, batch *store.Batch, service *api.Service, tasks []*api.Task) { for _, t := range tasks { err := batch.Update(func(tx store.Tx) error { // TODO(aaronl): optimistic update? t = store.GetTask(tx, t.ID) if t != nil { t.DesiredState = api.TaskStateShutdown return store.UpdateTask(tx, t) } return nil }) if err != nil { log.G(ctx).WithError(err).Errorf("removing task %s failed", t.ID) } } }
func (s *Scheduler) applySchedulingDecisions(ctx context.Context, schedulingDecisions map[string]schedulingDecision) (successful, failed []schedulingDecision) { if len(schedulingDecisions) == 0 { return } successful = make([]schedulingDecision, 0, len(schedulingDecisions)) // Apply changes to master store applied, err := s.store.Batch(func(batch *store.Batch) error { for len(schedulingDecisions) > 0 { err := batch.Update(func(tx store.Tx) error { // Update exactly one task inside this Update // callback. for taskID, decision := range schedulingDecisions { delete(schedulingDecisions, taskID) t := store.GetTask(tx, taskID) if t == nil { // Task no longer exists. Do nothing. failed = append(failed, decision) continue } if err := store.UpdateTask(tx, decision.new); err != nil { log.G(ctx).Debugf("scheduler failed to update task %s; will retry", taskID) failed = append(failed, decision) continue } successful = append(successful, decision) return nil } return nil }) if err != nil { return err } } return nil }) if err != nil { log.G(ctx).WithError(err).Error("scheduler tick transaction failed") failed = append(failed, successful[applied:]...) successful = successful[:applied] } return }
func (a *Allocator) commitAllocatedTask(ctx context.Context, batch *store.Batch, t *api.Task) error { return batch.Update(func(tx store.Tx) error { err := store.UpdateTask(tx, t) if err == store.ErrSequenceConflict { storeTask := store.GetTask(tx, t.ID) taskUpdateNetworks(storeTask, t.Networks) taskUpdateEndpoint(storeTask, t.Endpoint) if storeTask.Status.State < api.TaskStatePending { storeTask.Status = t.Status } err = store.UpdateTask(tx, storeTask) } return errors.Wrapf(err, "failed updating state in store transaction for task %s", t.ID) }) }
func (u *Updater) removeOldTasks(ctx context.Context, batch *store.Batch, removeTasks []*api.Task) { for _, original := range removeTasks { err := batch.Update(func(tx store.Tx) error { t := store.GetTask(tx, original.ID) if t == nil { return fmt.Errorf("task %s not found while trying to shut it down", original.ID) } if t.DesiredState > api.TaskStateRunning { return fmt.Errorf("task %s was already shut down when reached by updater", original.ID) } t.DesiredState = api.TaskStateShutdown return store.UpdateTask(tx, t) }) if err != nil { log.G(ctx).WithError(err).Errorf("shutting down stale task %s failed", original.ID) } } }
func (r *Orchestrator) tickTasks(ctx context.Context) { if len(r.restartTasks) > 0 { _, err := r.store.Batch(func(batch *store.Batch) error { for taskID := range r.restartTasks { err := batch.Update(func(tx store.Tx) error { // TODO(aaronl): optimistic update? t := store.GetTask(tx, taskID) if t != nil { if t.DesiredState > api.TaskStateRunning { return nil } service := store.GetService(tx, t.ServiceID) if !orchestrator.IsReplicatedService(service) { return nil } // Restart task if applicable if err := r.restarts.Restart(ctx, tx, r.cluster, service, *t); err != nil { return err } } return nil }) if err != nil { log.G(ctx).WithError(err).Errorf("Orchestrator task reaping transaction failed") } } return nil }) if err != nil { log.G(ctx).WithError(err).Errorf("orchestrator task removal batch failed") } r.restartTasks = make(map[string]struct{}) } }
func (s *subscription) match() { s.mu.Lock() defer s.mu.Unlock() add := func(t *api.Task) { if t.NodeID == "" { s.pendingTasks[t.ID] = struct{}{} return } if _, ok := s.nodes[t.NodeID]; !ok { s.nodes[t.NodeID] = struct{}{} s.wg.Add(1) } } s.store.View(func(tx store.ReadTx) { for _, nid := range s.message.Selector.NodeIDs { s.nodes[nid] = struct{}{} } for _, tid := range s.message.Selector.TaskIDs { if task := store.GetTask(tx, tid); task != nil { add(task) } } for _, sid := range s.message.Selector.ServiceIDs { tasks, err := store.FindTasks(tx, store.ByServiceID(sid)) if err != nil { log.L.Warning(err) continue } for _, task := range tasks { add(task) } } }) }
func (a *Allocator) allocateTask(ctx context.Context, nc *networkContext, tx store.Tx, t *api.Task) (*api.Task, error) { taskUpdated := false // Get the latest task state from the store before updating. storeT := store.GetTask(tx, t.ID) if storeT == nil { return nil, fmt.Errorf("could not find task %s while trying to update network allocation", t.ID) } // We might be here even if a task allocation has already // happened but wasn't successfully committed to store. In such // cases skip allocation and go straight ahead to updating the // store. if !nc.nwkAllocator.IsTaskAllocated(t) { if t.ServiceID != "" { s := store.GetService(tx, t.ServiceID) if s == nil { return nil, fmt.Errorf("could not find service %s", t.ServiceID) } if !nc.nwkAllocator.IsServiceAllocated(s) { return nil, fmt.Errorf("service %s to which this task %s belongs has pending allocations", s.ID, t.ID) } taskUpdateEndpoint(t, s.Endpoint) } for _, na := range t.Networks { n := store.GetNetwork(tx, na.Network.ID) if n == nil { return nil, fmt.Errorf("failed to retrieve network %s while allocating task %s", na.Network.ID, t.ID) } if !nc.nwkAllocator.IsAllocated(n) { return nil, fmt.Errorf("network %s attached to task %s not allocated yet", n.ID, t.ID) } na.Network = n } if err := nc.nwkAllocator.AllocateTask(t); err != nil { return nil, fmt.Errorf("failed during networktask allocation for task %s: %v", t.ID, err) } if nc.nwkAllocator.IsTaskAllocated(t) { taskUpdateNetworks(storeT, t.Networks) taskUpdateEndpoint(storeT, t.Endpoint) taskUpdated = true } } // Update the network allocations and moving to // ALLOCATED state on top of the latest store state. if a.taskAllocateVote(networkVoter, t.ID) { if storeT.Status.State < api.TaskStateAllocated { updateTaskStatus(storeT, api.TaskStateAllocated, "allocated") taskUpdated = true } } if taskUpdated { if err := store.UpdateTask(tx, storeT); err != nil { return nil, fmt.Errorf("failed updating state in store transaction for task %s: %v", storeT.ID, err) } } return storeT, nil }
func (r *Orchestrator) initTasks(ctx context.Context, readTx store.ReadTx) error { tasks, err := store.FindTasks(readTx, store.All) if err != nil { return err } for _, t := range tasks { if t.NodeID != "" { n := store.GetNode(readTx, t.NodeID) if invalidNode(n) && t.Status.State <= api.TaskStateRunning && t.DesiredState <= api.TaskStateRunning { r.restartTasks[t.ID] = struct{}{} } } } _, err = r.store.Batch(func(batch *store.Batch) error { for _, t := range tasks { if t.ServiceID == "" { continue } // TODO(aluzzardi): We should NOT retrieve the service here. service := store.GetService(readTx, t.ServiceID) if service == nil { // Service was deleted err := batch.Update(func(tx store.Tx) error { return store.DeleteTask(tx, t.ID) }) if err != nil { log.G(ctx).WithError(err).Error("failed to set task desired state to dead") } continue } // TODO(aluzzardi): This is shady. We should have a more generic condition. if t.DesiredState != api.TaskStateReady || !orchestrator.IsReplicatedService(service) { continue } restartDelay := orchestrator.DefaultRestartDelay if t.Spec.Restart != nil && t.Spec.Restart.Delay != nil { var err error restartDelay, err = gogotypes.DurationFromProto(t.Spec.Restart.Delay) if err != nil { log.G(ctx).WithError(err).Error("invalid restart delay") restartDelay = orchestrator.DefaultRestartDelay } } if restartDelay != 0 { timestamp, err := gogotypes.TimestampFromProto(t.Status.Timestamp) if err == nil { restartTime := timestamp.Add(restartDelay) calculatedRestartDelay := restartTime.Sub(time.Now()) if calculatedRestartDelay < restartDelay { restartDelay = calculatedRestartDelay } if restartDelay > 0 { _ = batch.Update(func(tx store.Tx) error { t := store.GetTask(tx, t.ID) // TODO(aluzzardi): This is shady as well. We should have a more generic condition. if t == nil || t.DesiredState != api.TaskStateReady { return nil } r.restarts.DelayStart(ctx, tx, nil, t.ID, restartDelay, true) return nil }) continue } } else { log.G(ctx).WithError(err).Error("invalid status timestamp") } } // Start now err := batch.Update(func(tx store.Tx) error { return r.restarts.StartNow(tx, t.ID) }) if err != nil { log.G(ctx).WithError(err).WithField("task.id", t.ID).Error("moving task out of delayed state failed") } } return nil }) return err }
func TestLogBrokerSelector(t *testing.T) { ctx, ca, _, serverAddr, brokerAddr, done := testLogBrokerEnv(t) defer done() client, clientDone := testLogClient(t, serverAddr) defer clientDone() agent1, agent1Security, agent1Done := testBrokerClient(t, ca, brokerAddr) defer agent1Done() agent1subscriptions := listenSubscriptions(ctx, t, agent1) agent2, agent2Security, agent2Done := testBrokerClient(t, ca, brokerAddr) defer agent2Done() agent2subscriptions := listenSubscriptions(ctx, t, agent2) // Subscribe to a task. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateTask(tx, &api.Task{ ID: "task", }) })) _, err := client.SubscribeLogs(ctx, &api.SubscribeLogsRequest{ Options: &api.LogSubscriptionOptions{ Follow: true, }, Selector: &api.LogSelector{ TaskIDs: []string{"task"}, }, }) require.NoError(t, err) // Since it's not assigned to any agent, nobody should receive it. ensureNoSubscription(t, agent1subscriptions) ensureNoSubscription(t, agent2subscriptions) // Assign the task to agent-1. Make sure it's received by agent-1 but *not* // agent-2. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { task := store.GetTask(tx, "task") require.NotNil(t, task) task.NodeID = agent1Security.ServerTLSCreds.NodeID() return store.UpdateTask(tx, task) })) ensureSubscription(t, agent1subscriptions) ensureNoSubscription(t, agent2subscriptions) // Subscribe to a service. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateService(tx, &api.Service{ ID: "service", }) })) _, err = client.SubscribeLogs(ctx, &api.SubscribeLogsRequest{ Options: &api.LogSubscriptionOptions{ Follow: true, }, Selector: &api.LogSelector{ ServiceIDs: []string{"service"}, }, }) require.NoError(t, err) // Since there are no corresponding tasks, nobody should receive it. ensureNoSubscription(t, agent1subscriptions) ensureNoSubscription(t, agent2subscriptions) // Create a task that does *NOT* belong to our service and assign it to node-1. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateTask(tx, &api.Task{ ID: "wrong-task", ServiceID: "wrong-service", NodeID: agent1Security.ServerTLSCreds.NodeID(), }) })) // Ensure agent-1 doesn't receive it. ensureNoSubscription(t, agent1subscriptions) // Now create another task that does belong to our service and assign it to node-1. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateTask(tx, &api.Task{ ID: "service-task-1", ServiceID: "service", NodeID: agent1Security.ServerTLSCreds.NodeID(), }) })) // Make sure agent-1 receives it... ensureSubscription(t, agent1subscriptions) // ...and agent-2 does not. ensureNoSubscription(t, agent2subscriptions) // Create another task, same as above. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateTask(tx, &api.Task{ ID: "service-task-2", ServiceID: "service", NodeID: agent1Security.ServerTLSCreds.NodeID(), }) })) // agent-1 should *not* receive it anymore since the subscription was already delivered. // agent-2 should still not get it. ensureNoSubscription(t, agent1subscriptions) ensureNoSubscription(t, agent2subscriptions) // Now, create another one and assign it to agent-2. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateTask(tx, &api.Task{ ID: "service-task-3", ServiceID: "service", NodeID: agent2Security.ServerTLSCreds.NodeID(), }) })) // Make sure it's delivered to agent-2. ensureSubscription(t, agent2subscriptions) // it shouldn't do anything for agent-1. ensureNoSubscription(t, agent1subscriptions) }
func TestUpdaterRollback(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() var ( failImage1 uint32 failImage2 uint32 ) watchCreate, cancelCreate := state.Watch(s.WatchQueue(), state.EventCreateTask{}) defer cancelCreate() watchServiceUpdate, cancelServiceUpdate := state.Watch(s.WatchQueue(), state.EventUpdateService{}) defer cancelServiceUpdate() // Fail new tasks the updater tries to run watchUpdate, cancelUpdate := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancelUpdate() go func() { failedLast := false for { select { case e := <-watchUpdate: task := e.(state.EventUpdateTask).Task if task.DesiredState == task.Status.State { continue } if task.DesiredState == api.TaskStateRunning && task.Status.State != api.TaskStateFailed && task.Status.State != api.TaskStateRunning { err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) // Never fail two image2 tasks in a row, so there's a mix of // failed and successful tasks for the rollback. if task.Spec.GetContainer().Image == "image1" && atomic.LoadUint32(&failImage1) == 1 { task.Status.State = api.TaskStateFailed failedLast = true } else if task.Spec.GetContainer().Image == "image2" && atomic.LoadUint32(&failImage2) == 1 && !failedLast { task.Status.State = api.TaskStateFailed failedLast = true } else { task.Status.State = task.DesiredState failedLast = false } return store.UpdateTask(tx, task) }) assert.NoError(t, err) } else if task.DesiredState > api.TaskStateRunning { err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) task.Status.State = task.DesiredState return store.UpdateTask(tx, task) }) assert.NoError(t, err) } } } }() // Create a service with four replicas specified before the orchestrator // is started. This should result in two tasks when the orchestrator // starts up. err := s.Update(func(tx store.Tx) error { s1 := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "image1", }, }, Restart: &api.RestartPolicy{ Condition: api.RestartOnNone, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 4, }, }, Update: &api.UpdateConfig{ FailureAction: api.UpdateConfig_ROLLBACK, Parallelism: 1, Delay: *ptypes.DurationProto(10 * time.Millisecond), Monitor: ptypes.DurationProto(500 * time.Millisecond), MaxFailureRatio: 0.4, }, }, } assert.NoError(t, store.CreateService(tx, s1)) return nil }) assert.NoError(t, err) // Start the orchestrator. go func() { assert.NoError(t, orchestrator.Run(ctx)) }() observedTask := testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") atomic.StoreUint32(&failImage2, 1) // Start a rolling update err = s.Update(func(tx store.Tx) error { s1 := store.GetService(tx, "id1") require.NotNil(t, s1) s1.PreviousSpec = s1.Spec.Copy() s1.UpdateStatus = nil s1.Spec.Task.GetContainer().Image = "image2" assert.NoError(t, store.UpdateService(tx, s1)) return nil }) assert.NoError(t, err) // Should see three tasks started, then a rollback observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") // Should get to the ROLLBACK_STARTED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus == nil { continue } if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED { break } } observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") // Should end up in ROLLBACK_COMPLETED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_COMPLETED { break } } atomic.StoreUint32(&failImage1, 1) // Repeat the rolling update but this time fail the tasks that the // rollback creates. It should end up in ROLLBACK_PAUSED. err = s.Update(func(tx store.Tx) error { s1 := store.GetService(tx, "id1") require.NotNil(t, s1) s1.PreviousSpec = s1.Spec.Copy() s1.UpdateStatus = nil s1.Spec.Task.GetContainer().Image = "image2" assert.NoError(t, store.UpdateService(tx, s1)) return nil }) assert.NoError(t, err) // Should see three tasks started, then a rollback observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") // Should get to the ROLLBACK_STARTED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus == nil { continue } if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED { break } } observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") // Should end up in ROLLBACK_PAUSED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_PAUSED { break } } }
func TestUpdater(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) // Move tasks to their desired state. watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { for { select { case e := <-watch: task := e.(state.EventUpdateTask).Task if task.Status.State == task.DesiredState { continue } err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) task.Status.State = task.DesiredState return store.UpdateTask(tx, task) }) assert.NoError(t, err) } } }() instances := 3 cluster := &api.Cluster{ // test cluster configuration propagation to task creation. Spec: api.ClusterSpec{ Annotations: api.Annotations{ Name: "default", }, }, } service := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: uint64(instances), }, }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "v:1", // This won't apply in this test because we set the old tasks to DEAD. StopGracePeriod: ptypes.DurationProto(time.Hour), }, }, }, }, } err := s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateCluster(tx, cluster)) assert.NoError(t, store.CreateService(tx, service)) for i := 0; i < instances; i++ { assert.NoError(t, store.CreateTask(tx, newTask(cluster, service, uint64(i)))) } return nil }) assert.NoError(t, err) originalTasks := getRunnableServiceTasks(t, s, service) for _, task := range originalTasks { assert.Equal(t, "v:1", task.Spec.GetContainer().Image) assert.Nil(t, task.LogDriver) // should be left alone } service.Spec.Task.GetContainer().Image = "v:2" service.Spec.Task.LogDriver = &api.Driver{Name: "tasklogdriver"} updater := NewUpdater(s, NewRestartSupervisor(s)) updater.Run(ctx, cluster, service, getRunnableServiceTasks(t, s, service)) updatedTasks := getRunnableServiceTasks(t, s, service) for _, task := range updatedTasks { assert.Equal(t, "v:2", task.Spec.GetContainer().Image) assert.Equal(t, service.Spec.Task.LogDriver, task.LogDriver) // pick up from task } service.Spec.Task.GetContainer().Image = "v:3" cluster.Spec.DefaultLogDriver = &api.Driver{Name: "clusterlogdriver"} // make cluster default logdriver. service.Spec.Update = &api.UpdateConfig{ Parallelism: 1, } updater = NewUpdater(s, NewRestartSupervisor(s)) updater.Run(ctx, cluster, service, getRunnableServiceTasks(t, s, service)) updatedTasks = getRunnableServiceTasks(t, s, service) for _, task := range updatedTasks { assert.Equal(t, "v:3", task.Spec.GetContainer().Image) assert.Equal(t, service.Spec.Task.LogDriver, task.LogDriver) // still pick up from task } service.Spec.Task.GetContainer().Image = "v:4" service.Spec.Task.LogDriver = nil // use cluster default now. service.Spec.Update = &api.UpdateConfig{ Parallelism: 1, Delay: *ptypes.DurationProto(10 * time.Millisecond), } updater = NewUpdater(s, NewRestartSupervisor(s)) updater.Run(ctx, cluster, service, getRunnableServiceTasks(t, s, service)) updatedTasks = getRunnableServiceTasks(t, s, service) for _, task := range updatedTasks { assert.Equal(t, "v:4", task.Spec.GetContainer().Image) assert.Equal(t, cluster.Spec.DefaultLogDriver, task.LogDriver) // pick up from cluster } }
// UpdateTaskStatus updates status of task. Node should send such updates // on every status change of its tasks. func (d *Dispatcher) UpdateTaskStatus(ctx context.Context, r *api.UpdateTaskStatusRequest) (*api.UpdateTaskStatusResponse, error) { nodeInfo, err := ca.RemoteNode(ctx) if err != nil { return nil, err } nodeID := nodeInfo.NodeID fields := logrus.Fields{ "node.id": nodeID, "node.session": r.SessionID, "method": "(*Dispatcher).UpdateTaskStatus", } if nodeInfo.ForwardedBy != nil { fields["forwarder.id"] = nodeInfo.ForwardedBy.NodeID } log := log.G(ctx).WithFields(fields) if err := d.isRunningLocked(); err != nil { return nil, err } if _, err := d.nodes.GetWithSession(nodeID, r.SessionID); err != nil { return nil, err } // Validate task updates for _, u := range r.Updates { if u.Status == nil { log.WithField("task.id", u.TaskID).Warn("task report has nil status") continue } var t *api.Task d.store.View(func(tx store.ReadTx) { t = store.GetTask(tx, u.TaskID) }) if t == nil { log.WithField("task.id", u.TaskID).Warn("cannot find target task in store") continue } if t.NodeID != nodeID { err := grpc.Errorf(codes.PermissionDenied, "cannot update a task not assigned this node") log.WithField("task.id", u.TaskID).Error(err) return nil, err } } d.taskUpdatesLock.Lock() // Enqueue task updates for _, u := range r.Updates { if u.Status == nil { continue } d.taskUpdates[u.TaskID] = u.Status } numUpdates := len(d.taskUpdates) d.taskUpdatesLock.Unlock() if numUpdates >= maxBatchItems { d.processTaskUpdatesTrigger <- struct{}{} } return nil, nil }
func TestUpdaterStopGracePeriod(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) // Move tasks to their desired state. watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { for { select { case e := <-watch: task := e.(state.EventUpdateTask).Task err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) // Explicitly do not set task state to // DEAD to trigger StopGracePeriod if task.DesiredState == api.TaskStateRunning && task.Status.State != api.TaskStateRunning { task.Status.State = api.TaskStateRunning return store.UpdateTask(tx, task) } return nil }) assert.NoError(t, err) } } }() var instances uint64 = 3 service := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "v:1", StopGracePeriod: ptypes.DurationProto(100 * time.Millisecond), }, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: instances, }, }, }, } err := s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateService(tx, service)) for i := uint64(0); i < instances; i++ { task := newTask(nil, service, uint64(i)) task.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) originalTasks := getRunnableServiceTasks(t, s, service) for _, task := range originalTasks { assert.Equal(t, "v:1", task.Spec.GetContainer().Image) } before := time.Now() service.Spec.Task.GetContainer().Image = "v:2" updater := NewUpdater(s, NewRestartSupervisor(s)) // Override the default (1 minute) to speed up the test. updater.restarts.taskTimeout = 100 * time.Millisecond updater.Run(ctx, nil, service, getRunnableServiceTasks(t, s, service)) updatedTasks := getRunnableServiceTasks(t, s, service) for _, task := range updatedTasks { assert.Equal(t, "v:2", task.Spec.GetContainer().Image) } after := time.Now() // At least 100 ms should have elapsed. Only check the lower bound, // because the system may be slow and it could have taken longer. if after.Sub(before) < 100*time.Millisecond { t.Fatal("stop timeout should have elapsed") } }
func (ce *ConstraintEnforcer) shutdownNoncompliantTasks(node *api.Node) { // If the availability is "drain", the orchestrator will // shut down all tasks. // If the availability is "pause", we shouldn't touch // the tasks on this node. if node.Spec.Availability != api.NodeAvailabilityActive { return } var ( tasks []*api.Task err error ) ce.store.View(func(tx store.ReadTx) { tasks, err = store.FindTasks(tx, store.ByNodeID(node.ID)) }) if err != nil { log.L.WithError(err).Errorf("failed to list tasks for node ID %s", node.ID) } var availableMemoryBytes, availableNanoCPUs int64 if node.Description != nil && node.Description.Resources != nil { availableMemoryBytes = node.Description.Resources.MemoryBytes availableNanoCPUs = node.Description.Resources.NanoCPUs } removeTasks := make(map[string]*api.Task) // TODO(aaronl): The set of tasks removed will be // nondeterministic because it depends on the order of // the slice returned from FindTasks. We could do // a separate pass over the tasks for each type of // resource, and sort by the size of the reservation // to remove the most resource-intensive tasks. for _, t := range tasks { if t.DesiredState < api.TaskStateAssigned || t.DesiredState > api.TaskStateRunning { continue } // Ensure that the task still meets scheduling // constraints. if t.Spec.Placement != nil && len(t.Spec.Placement.Constraints) != 0 { constraints, _ := constraint.Parse(t.Spec.Placement.Constraints) if !constraint.NodeMatches(constraints, node) { removeTasks[t.ID] = t continue } } // Ensure that the task assigned to the node // still satisfies the resource limits. if t.Spec.Resources != nil && t.Spec.Resources.Reservations != nil { if t.Spec.Resources.Reservations.MemoryBytes > availableMemoryBytes { removeTasks[t.ID] = t continue } if t.Spec.Resources.Reservations.NanoCPUs > availableNanoCPUs { removeTasks[t.ID] = t continue } availableMemoryBytes -= t.Spec.Resources.Reservations.MemoryBytes availableNanoCPUs -= t.Spec.Resources.Reservations.NanoCPUs } } if len(removeTasks) != 0 { _, err := ce.store.Batch(func(batch *store.Batch) error { for _, t := range removeTasks { err := batch.Update(func(tx store.Tx) error { t = store.GetTask(tx, t.ID) if t == nil || t.DesiredState > api.TaskStateRunning { return nil } t.DesiredState = api.TaskStateShutdown return store.UpdateTask(tx, t) }) if err != nil { log.L.WithError(err).Errorf("failed to shut down task %s", t.ID) } } return nil }) if err != nil { log.L.WithError(err).Errorf("failed to shut down tasks") } } }
func TestSchedulerResourceConstraintDeadTask(t *testing.T) { ctx := context.Background() // Create a ready node without enough memory to run the task. node := &api.Node{ ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "node", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 1e9, MemoryBytes: 1e9, }, }, } bigTask1 := &api.Task{ DesiredState: api.TaskStateRunning, ID: "id1", Spec: api.TaskSpec{ Resources: &api.ResourceRequirements{ Reservations: &api.Resources{ MemoryBytes: 8e8, }, }, }, ServiceAnnotations: api.Annotations{ Name: "big", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } bigTask2 := bigTask1.Copy() bigTask2.ID = "id2" s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial node and task assert.NoError(t, store.CreateNode(tx, node)) assert.NoError(t, store.CreateTask(tx, bigTask1)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() // The task fits, so it should get assigned assignment := watchAssignment(t, watch) assert.Equal(t, "id1", assignment.ID) assert.Equal(t, "id1", assignment.NodeID) err = s.Update(func(tx store.Tx) error { // Add a second task. It shouldn't get assigned because of // resource constraints. return store.CreateTask(tx, bigTask2) }) assert.NoError(t, err) time.Sleep(100 * time.Millisecond) s.View(func(tx store.ReadTx) { tasks, err := store.FindTasks(tx, store.ByNodeID(node.ID)) assert.NoError(t, err) assert.Len(t, tasks, 1) }) err = s.Update(func(tx store.Tx) error { // The task becomes dead updatedTask := store.GetTask(tx, bigTask1.ID) updatedTask.Status.State = api.TaskStateShutdown return store.UpdateTask(tx, updatedTask) }) assert.NoError(t, err) // With the first task no longer consuming resources, the second // one can be scheduled. assignment = watchAssignment(t, watch) assert.Equal(t, "id2", assignment.ID) assert.Equal(t, "id1", assignment.NodeID) }
func TestSchedulerPluginConstraint(t *testing.T) { ctx := context.Background() // Node1: vol plugin1 n1 := &api.Node{ ID: "node1_ID", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "node1", }, }, Description: &api.NodeDescription{ Engine: &api.EngineDescription{ Plugins: []api.PluginDescription{ { Type: "Volume", Name: "plugin1", }, }, }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } // Node2: vol plugin1, vol plugin2 n2 := &api.Node{ ID: "node2_ID", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "node2", }, }, Description: &api.NodeDescription{ Engine: &api.EngineDescription{ Plugins: []api.PluginDescription{ { Type: "Volume", Name: "plugin1", }, { Type: "Volume", Name: "plugin2", }, }, }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } // Node3: vol plugin1, network plugin1 n3 := &api.Node{ ID: "node3_ID", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "node3", }, }, Description: &api.NodeDescription{ Engine: &api.EngineDescription{ Plugins: []api.PluginDescription{ { Type: "Volume", Name: "plugin1", }, { Type: "Network", Name: "plugin1", }, }, }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } volumeOptionsDriver := func(driver string) *api.Mount_VolumeOptions { return &api.Mount_VolumeOptions{ DriverConfig: &api.Driver{ Name: driver, }, } } // Task1: vol plugin1 t1 := &api.Task{ ID: "task1_ID", DesiredState: api.TaskStateRunning, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Mounts: []api.Mount{ { Source: "testVol1", Target: "/foo", Type: api.MountTypeVolume, VolumeOptions: volumeOptionsDriver("plugin1"), }, }, }, }, }, ServiceAnnotations: api.Annotations{ Name: "task1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } // Task2: vol plugin1, vol plugin2 t2 := &api.Task{ ID: "task2_ID", DesiredState: api.TaskStateRunning, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Mounts: []api.Mount{ { Source: "testVol1", Target: "/foo", Type: api.MountTypeVolume, VolumeOptions: volumeOptionsDriver("plugin1"), }, { Source: "testVol2", Target: "/foo", Type: api.MountTypeVolume, VolumeOptions: volumeOptionsDriver("plugin2"), }, }, }, }, }, ServiceAnnotations: api.Annotations{ Name: "task2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } // Task3: vol plugin1, network plugin1 t3 := &api.Task{ ID: "task3_ID", DesiredState: api.TaskStateRunning, Networks: []*api.NetworkAttachment{ { Network: &api.Network{ ID: "testNwID1", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "testVol1", }, }, DriverState: &api.Driver{ Name: "plugin1", }, }, }, }, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Mounts: []api.Mount{ { Source: "testVol1", Target: "/foo", Type: api.MountTypeVolume, VolumeOptions: volumeOptionsDriver("plugin1"), }, }, }, }, }, ServiceAnnotations: api.Annotations{ Name: "task2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() // Add initial node and task err := s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, t1)) assert.NoError(t, store.CreateNode(tx, n1)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() // t1 should get assigned assignment := watchAssignment(t, watch) assert.Equal(t, assignment.NodeID, "node1_ID") // Create t2; it should stay in the pending state because there is // no node that with volume plugin `plugin2` err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, t2)) return nil }) assert.NoError(t, err) time.Sleep(100 * time.Millisecond) s.View(func(tx store.ReadTx) { task := store.GetTask(tx, "task2_ID") if task.Status.State >= api.TaskStateAssigned { t.Fatalf("task 'task2_ID' should not have been assigned to node %v", task.NodeID) } }) // Now add the second node err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, n2)) return nil }) assert.NoError(t, err) // Check that t2 has been assigned assignment1 := watchAssignment(t, watch) assert.Equal(t, assignment1.ID, "task2_ID") assert.Equal(t, assignment1.NodeID, "node2_ID") // Create t3; it should stay in the pending state because there is // no node that with network plugin `plugin1` err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, t3)) return nil }) assert.NoError(t, err) time.Sleep(100 * time.Millisecond) s.View(func(tx store.ReadTx) { task := store.GetTask(tx, "task3_ID") if task.Status.State >= api.TaskStateAssigned { t.Fatal("task 'task3_ID' should not have been assigned") } }) // Now add the node3 err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, n3)) return nil }) assert.NoError(t, err) // Check that t3 has been assigned assignment2 := watchAssignment(t, watch) assert.Equal(t, assignment2.ID, "task3_ID") assert.Equal(t, assignment2.NodeID, "node3_ID") }
func (d *Dispatcher) processUpdates(ctx context.Context) { var ( taskUpdates map[string]*api.TaskStatus nodeUpdates map[string]nodeUpdate ) d.taskUpdatesLock.Lock() if len(d.taskUpdates) != 0 { taskUpdates = d.taskUpdates d.taskUpdates = make(map[string]*api.TaskStatus) } d.taskUpdatesLock.Unlock() d.nodeUpdatesLock.Lock() if len(d.nodeUpdates) != 0 { nodeUpdates = d.nodeUpdates d.nodeUpdates = make(map[string]nodeUpdate) } d.nodeUpdatesLock.Unlock() if len(taskUpdates) == 0 && len(nodeUpdates) == 0 { return } log := log.G(ctx).WithFields(logrus.Fields{ "method": "(*Dispatcher).processUpdates", }) _, err := d.store.Batch(func(batch *store.Batch) error { for taskID, status := range taskUpdates { err := batch.Update(func(tx store.Tx) error { logger := log.WithField("task.id", taskID) task := store.GetTask(tx, taskID) if task == nil { logger.Errorf("task unavailable") return nil } logger = logger.WithField("state.transition", fmt.Sprintf("%v->%v", task.Status.State, status.State)) if task.Status == *status { logger.Debug("task status identical, ignoring") return nil } if task.Status.State > status.State { logger.Debug("task status invalid transition") return nil } task.Status = *status if err := store.UpdateTask(tx, task); err != nil { logger.WithError(err).Error("failed to update task status") return nil } logger.Debug("task status updated") return nil }) if err != nil { log.WithError(err).Error("dispatcher task update transaction failed") } } for nodeID, nodeUpdate := range nodeUpdates { err := batch.Update(func(tx store.Tx) error { logger := log.WithField("node.id", nodeID) node := store.GetNode(tx, nodeID) if node == nil { logger.Errorf("node unavailable") return nil } if nodeUpdate.status != nil { node.Status.State = nodeUpdate.status.State node.Status.Message = nodeUpdate.status.Message if nodeUpdate.status.Addr != "" { node.Status.Addr = nodeUpdate.status.Addr } } if nodeUpdate.description != nil { node.Description = nodeUpdate.description } if err := store.UpdateNode(tx, node); err != nil { logger.WithError(err).Error("failed to update node status") return nil } logger.Debug("node status updated") return nil }) if err != nil { log.WithError(err).Error("dispatcher node update transaction failed") } } return nil }) if err != nil { log.WithError(err).Error("dispatcher batch failed") } d.processUpdatesCond.Broadcast() }
func TestAllocator(t *testing.T) { s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() a, err := New(s) assert.NoError(t, err) assert.NotNil(t, a) // Try adding some objects to store before allocator is started assert.NoError(t, s.Update(func(tx store.Tx) error { n1 := &api.Network{ ID: "testID1", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test1", }, }, } assert.NoError(t, store.CreateNetwork(tx, n1)) s1 := &api.Service{ ID: "testServiceID1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "service1", }, Task: api.TaskSpec{ Networks: []*api.NetworkAttachmentConfig{ { Target: "testID1", }, }, }, Endpoint: &api.EndpointSpec{}, }, } assert.NoError(t, store.CreateService(tx, s1)) t1 := &api.Task{ ID: "testTaskID1", Status: api.TaskStatus{ State: api.TaskStateNew, }, Networks: []*api.NetworkAttachment{ { Network: n1, }, }, } assert.NoError(t, store.CreateTask(tx, t1)) return nil })) netWatch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateNetwork{}, state.EventDeleteNetwork{}) defer cancel() taskWatch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}, state.EventDeleteTask{}) defer cancel() serviceWatch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateService{}, state.EventDeleteService{}) defer cancel() // Start allocator go func() { assert.NoError(t, a.Run(context.Background())) }() // Now verify if we get network and tasks updated properly watchNetwork(t, netWatch, false, isValidNetwork) watchTask(t, s, taskWatch, false, isValidTask) watchService(t, serviceWatch, false, nil) // Add new networks/tasks/services after allocator is started. assert.NoError(t, s.Update(func(tx store.Tx) error { n2 := &api.Network{ ID: "testID2", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test2", }, }, } assert.NoError(t, store.CreateNetwork(tx, n2)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) assert.NoError(t, s.Update(func(tx store.Tx) error { s2 := &api.Service{ ID: "testServiceID2", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "service2", }, Networks: []*api.NetworkAttachmentConfig{ { Target: "testID2", }, }, Endpoint: &api.EndpointSpec{}, }, } assert.NoError(t, store.CreateService(tx, s2)) return nil })) watchService(t, serviceWatch, false, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { t2 := &api.Task{ ID: "testTaskID2", Status: api.TaskStatus{ State: api.TaskStateNew, }, ServiceID: "testServiceID2", DesiredState: api.TaskStateRunning, } assert.NoError(t, store.CreateTask(tx, t2)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) // Now try adding a task which depends on a network before adding the network. n3 := &api.Network{ ID: "testID3", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test3", }, }, } assert.NoError(t, s.Update(func(tx store.Tx) error { t3 := &api.Task{ ID: "testTaskID3", Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, Networks: []*api.NetworkAttachment{ { Network: n3, }, }, } assert.NoError(t, store.CreateTask(tx, t3)) return nil })) // Wait for a little bit of time before adding network just to // test network is not available while task allocation is // going through time.Sleep(10 * time.Millisecond) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNetwork(tx, n3)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteTask(tx, "testTaskID3")) return nil })) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { t5 := &api.Task{ ID: "testTaskID5", Spec: api.TaskSpec{ Networks: []*api.NetworkAttachmentConfig{ { Target: "testID2", }, }, }, Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, ServiceID: "testServiceID2", } assert.NoError(t, store.CreateTask(tx, t5)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteNetwork(tx, "testID3")) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteService(tx, "testServiceID2")) return nil })) watchService(t, serviceWatch, false, nil) // Try to create a task with no network attachments and test // that it moves to ALLOCATED state. assert.NoError(t, s.Update(func(tx store.Tx) error { t4 := &api.Task{ ID: "testTaskID4", Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, } assert.NoError(t, store.CreateTask(tx, t4)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { n2 := store.GetNetwork(tx, "testID2") require.NotEqual(t, nil, n2) assert.NoError(t, store.UpdateNetwork(tx, n2)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) watchNetwork(t, netWatch, true, nil) // Try updating task which is already allocated assert.NoError(t, s.Update(func(tx store.Tx) error { t2 := store.GetTask(tx, "testTaskID2") require.NotEqual(t, nil, t2) assert.NoError(t, store.UpdateTask(tx, t2)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) watchTask(t, s, taskWatch, true, nil) // Try adding networks with conflicting network resources and // add task which attaches to a network which gets allocated // later and verify if task reconciles and moves to ALLOCATED. n4 := &api.Network{ ID: "testID4", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test4", }, DriverConfig: &api.Driver{ Name: "overlay", Options: map[string]string{ "com.docker.network.driver.overlay.vxlanid_list": "328", }, }, }, } n5 := n4.Copy() n5.ID = "testID5" n5.Spec.Annotations.Name = "test5" assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNetwork(tx, n4)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNetwork(tx, n5)) return nil })) watchNetwork(t, netWatch, true, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { t6 := &api.Task{ ID: "testTaskID6", Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, Networks: []*api.NetworkAttachment{ { Network: n5, }, }, } assert.NoError(t, store.CreateTask(tx, t6)) return nil })) watchTask(t, s, taskWatch, true, nil) // Now remove the conflicting network. assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteNetwork(tx, n4.ID)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) watchTask(t, s, taskWatch, false, isValidTask) // Try adding services with conflicting port configs and add // task which is part of the service whose allocation hasn't // happened and when that happens later and verify if task // reconciles and moves to ALLOCATED. s3 := &api.Service{ ID: "testServiceID3", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "service3", }, Endpoint: &api.EndpointSpec{ Ports: []*api.PortConfig{ { Name: "http", TargetPort: 80, PublishedPort: 8080, }, }, }, }, } s4 := s3.Copy() s4.ID = "testServiceID4" s4.Spec.Annotations.Name = "service4" assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateService(tx, s3)) return nil })) watchService(t, serviceWatch, false, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateService(tx, s4)) return nil })) watchService(t, serviceWatch, true, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { t7 := &api.Task{ ID: "testTaskID7", Status: api.TaskStatus{ State: api.TaskStateNew, }, ServiceID: "testServiceID4", DesiredState: api.TaskStateRunning, } assert.NoError(t, store.CreateTask(tx, t7)) return nil })) watchTask(t, s, taskWatch, true, nil) // Now remove the conflicting service. assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteService(tx, s3.ID)) return nil })) watchService(t, serviceWatch, false, nil) watchTask(t, s, taskWatch, false, isValidTask) a.Stop() }
func (a *Allocator) doNetworkInit(ctx context.Context) error { na, err := networkallocator.New() if err != nil { return err } nc := &networkContext{ nwkAllocator: na, unallocatedTasks: make(map[string]*api.Task), unallocatedServices: make(map[string]*api.Service), unallocatedNetworks: make(map[string]*api.Network), ingressNetwork: newIngressNetwork(), } // Check if we have the ingress network. If not found create // it before reading all network objects for allocation. var networks []*api.Network a.store.View(func(tx store.ReadTx) { networks, err = store.FindNetworks(tx, store.ByName(ingressNetworkName)) if len(networks) > 0 { nc.ingressNetwork = networks[0] } }) if err != nil { return fmt.Errorf("failed to find ingress network during init: %v", err) } // If ingress network is not found, create one right away // using the predefined template. if len(networks) == 0 { if err := a.store.Update(func(tx store.Tx) error { nc.ingressNetwork.ID = identity.NewID() if err := store.CreateNetwork(tx, nc.ingressNetwork); err != nil { return err } return nil }); err != nil { return fmt.Errorf("failed to create ingress network: %v", err) } a.store.View(func(tx store.ReadTx) { networks, err = store.FindNetworks(tx, store.ByName(ingressNetworkName)) if len(networks) > 0 { nc.ingressNetwork = networks[0] } }) if err != nil { return fmt.Errorf("failed to find ingress network after creating it: %v", err) } } // Try to complete ingress network allocation before anything else so // that the we can get the preferred subnet for ingress // network. if !na.IsAllocated(nc.ingressNetwork) { if err := a.allocateNetwork(ctx, nc, nc.ingressNetwork); err != nil { log.G(ctx).Errorf("failed allocating ingress network during init: %v", err) } // Update store after allocation if err := a.store.Update(func(tx store.Tx) error { if err := store.UpdateNetwork(tx, nc.ingressNetwork); err != nil { return err } return nil }); err != nil { return fmt.Errorf("failed to create ingress network: %v", err) } } // Allocate networks in the store so far before we started // watching. a.store.View(func(tx store.ReadTx) { networks, err = store.FindNetworks(tx, store.All) }) if err != nil { return fmt.Errorf("error listing all networks in store while trying to allocate during init: %v", err) } for _, n := range networks { if na.IsAllocated(n) { continue } if err := a.allocateNetwork(ctx, nc, n); err != nil { log.G(ctx).Errorf("failed allocating network %s during init: %v", n.ID, err) } } // Allocate nodes in the store so far before we process watched events. var nodes []*api.Node a.store.View(func(tx store.ReadTx) { nodes, err = store.FindNodes(tx, store.All) }) if err != nil { return fmt.Errorf("error listing all services in store while trying to allocate during init: %v", err) } for _, node := range nodes { if na.IsNodeAllocated(node) { continue } if node.Attachment == nil { node.Attachment = &api.NetworkAttachment{} } node.Attachment.Network = nc.ingressNetwork.Copy() if err := a.allocateNode(ctx, nc, node); err != nil { log.G(ctx).Errorf("Failed to allocate network resources for node %s during init: %v", node.ID, err) } } // Allocate services in the store so far before we process watched events. var services []*api.Service a.store.View(func(tx store.ReadTx) { services, err = store.FindServices(tx, store.All) }) if err != nil { return fmt.Errorf("error listing all services in store while trying to allocate during init: %v", err) } for _, s := range services { if nc.nwkAllocator.IsServiceAllocated(s) { continue } if err := a.allocateService(ctx, nc, s); err != nil { log.G(ctx).Errorf("failed allocating service %s during init: %v", s.ID, err) } } // Allocate tasks in the store so far before we started watching. var tasks []*api.Task a.store.View(func(tx store.ReadTx) { tasks, err = store.FindTasks(tx, store.All) }) if err != nil { return fmt.Errorf("error listing all tasks in store while trying to allocate during init: %v", err) } if _, err := a.store.Batch(func(batch *store.Batch) error { for _, t := range tasks { if taskDead(t) { continue } var s *api.Service if t.ServiceID != "" { a.store.View(func(tx store.ReadTx) { s = store.GetService(tx, t.ServiceID) }) } // Populate network attachments in the task // based on service spec. a.taskCreateNetworkAttachments(t, s) if taskReadyForNetworkVote(t, s, nc) { if t.Status.State >= api.TaskStateAllocated { continue } if a.taskAllocateVote(networkVoter, t.ID) { // If the task is not attached to any network, network // allocators job is done. Immediately cast a vote so // that the task can be moved to ALLOCATED state as // soon as possible. if err := batch.Update(func(tx store.Tx) error { storeT := store.GetTask(tx, t.ID) if storeT == nil { return fmt.Errorf("task %s not found while trying to update state", t.ID) } updateTaskStatus(storeT, api.TaskStateAllocated, "allocated") if err := store.UpdateTask(tx, storeT); err != nil { return fmt.Errorf("failed updating state in store transaction for task %s: %v", storeT.ID, err) } return nil }); err != nil { log.G(ctx).WithError(err).Error("error updating task network") } } continue } err := batch.Update(func(tx store.Tx) error { _, err := a.allocateTask(ctx, nc, tx, t) return err }) if err != nil { log.G(ctx).Errorf("failed allocating task %s during init: %v", t.ID, err) nc.unallocatedTasks[t.ID] = t } } return nil }); err != nil { return err } a.netCtx = nc return nil }
func TestSchedulerFaultyNode(t *testing.T) { ctx := context.Background() taskTemplate := &api.Task{ ServiceID: "service1", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } node1 := &api.Node{ ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "id1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } node2 := &api.Node{ ID: "id2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "id2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial nodes, and one task assigned to node id1 assert.NoError(t, store.CreateNode(tx, node1)) assert.NoError(t, store.CreateNode(tx, node2)) task1 := taskTemplate.Copy() task1.ID = "id1" task1.NodeID = "id1" task1.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task1)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() for i := 0; i != 8; i++ { // Simulate a task failure cycle newTask := taskTemplate.Copy() newTask.ID = identity.NewID() err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, newTask)) return nil }) assert.NoError(t, err) assignment := watchAssignment(t, watch) assert.Equal(t, newTask.ID, assignment.ID) if i < 5 { // The first 5 attempts should be assigned to node id2 because // it has no replicas of the service. assert.Equal(t, "id2", assignment.NodeID) } else { // The next ones should be assigned to id1, since we'll // flag id2 as potentially faulty. assert.Equal(t, "id1", assignment.NodeID) } err = s.Update(func(tx store.Tx) error { newTask := store.GetTask(tx, newTask.ID) require.NotNil(t, newTask) newTask.Status.State = api.TaskStateFailed assert.NoError(t, store.UpdateTask(tx, newTask)) return nil }) assert.NoError(t, err) } }