func (u *Updater) worker(ctx context.Context, queue <-chan orchestrator.Slot) { for slot := range queue { // Do we have a task with the new spec in desired state = RUNNING? // If so, all we have to do to complete the update is remove the // other tasks. Or if we have a task with the new spec that has // desired state < RUNNING, advance it to running and remove the // other tasks. var ( runningTask *api.Task cleanTask *api.Task ) for _, t := range slot { if !u.isTaskDirty(t) { if t.DesiredState == api.TaskStateRunning { runningTask = t break } if t.DesiredState < api.TaskStateRunning { cleanTask = t } } } if runningTask != nil { if err := u.useExistingTask(ctx, slot, runningTask); err != nil { log.G(ctx).WithError(err).Error("update failed") } } else if cleanTask != nil { if err := u.useExistingTask(ctx, slot, cleanTask); err != nil { log.G(ctx).WithError(err).Error("update failed") } } else { updated := orchestrator.NewTask(u.cluster, u.newService, slot[0].Slot, "") if orchestrator.IsGlobalService(u.newService) { updated = orchestrator.NewTask(u.cluster, u.newService, slot[0].Slot, slot[0].NodeID) } updated.DesiredState = api.TaskStateReady if err := u.updateTask(ctx, slot, updated); err != nil { log.G(ctx).WithError(err).WithField("task.id", updated.ID).Error("update failed") } } if u.newService.Spec.Update != nil && (u.newService.Spec.Update.Delay.Seconds != 0 || u.newService.Spec.Update.Delay.Nanos != 0) { delay, err := ptypes.Duration(&u.newService.Spec.Update.Delay) if err != nil { log.G(ctx).WithError(err).Error("invalid update delay") continue } select { case <-time.After(delay): case <-u.stopChan: return } } } }
func (r *Supervisor) shouldRestart(ctx context.Context, t *api.Task, service *api.Service) bool { // TODO(aluzzardi): This function should not depend on `service`. condition := orchestrator.RestartCondition(t) if condition != api.RestartOnAny && (condition != api.RestartOnFailure || t.Status.State == api.TaskStateCompleted) { return false } if t.Spec.Restart == nil || t.Spec.Restart.MaxAttempts == 0 { return true } instanceTuple := instanceTuple{ instance: t.Slot, serviceID: t.ServiceID, } // Instance is not meaningful for "global" tasks, so they need to be // indexed by NodeID. if orchestrator.IsGlobalService(service) { instanceTuple.nodeID = t.NodeID } r.mu.Lock() defer r.mu.Unlock() restartInfo := r.history[instanceTuple] if restartInfo == nil { return true } if t.Spec.Restart.Window == nil || (t.Spec.Restart.Window.Seconds == 0 && t.Spec.Restart.Window.Nanos == 0) { return restartInfo.totalRestarts < t.Spec.Restart.MaxAttempts } if restartInfo.restartedInstances == nil { return true } window, err := ptypes.Duration(t.Spec.Restart.Window) if err != nil { log.G(ctx).WithError(err).Error("invalid restart lookback window") return restartInfo.totalRestarts < t.Spec.Restart.MaxAttempts } lookback := time.Now().Add(-window) var next *list.Element for e := restartInfo.restartedInstances.Front(); e != nil; e = next { next = e.Next() if e.Value.(restartedInstance).timestamp.After(lookback) { break } restartInfo.restartedInstances.Remove(e) } numRestarts := uint64(restartInfo.restartedInstances.Len()) if numRestarts == 0 { restartInfo.restartedInstances = nil } return numRestarts < t.Spec.Restart.MaxAttempts }
// Run contains the global orchestrator event loop func (g *Orchestrator) Run(ctx context.Context) error { defer close(g.doneChan) // Watch changes to services and tasks queue := g.store.WatchQueue() watcher, cancel := queue.Watch() defer cancel() // lookup the cluster var err error g.store.View(func(readTx store.ReadTx) { var clusters []*api.Cluster clusters, err = store.FindClusters(readTx, store.ByName("default")) if len(clusters) != 1 { return // just pick up the cluster when it is created. } g.cluster = clusters[0] }) if err != nil { return err } // Get list of nodes var nodes []*api.Node g.store.View(func(readTx store.ReadTx) { nodes, err = store.FindNodes(readTx, store.All) }) if err != nil { return err } for _, n := range nodes { g.updateNode(n) } // Lookup global services var existingServices []*api.Service g.store.View(func(readTx store.ReadTx) { existingServices, err = store.FindServices(readTx, store.All) }) if err != nil { return err } var reconcileServiceIDs []string for _, s := range existingServices { if orchestrator.IsGlobalService(s) { g.updateService(s) reconcileServiceIDs = append(reconcileServiceIDs, s.ID) } } g.reconcileServices(ctx, reconcileServiceIDs) for { select { case event := <-watcher: // TODO(stevvooe): Use ctx to limit running time of operation. switch v := event.(type) { case state.EventUpdateCluster: g.cluster = v.Cluster case state.EventCreateService: if !orchestrator.IsGlobalService(v.Service) { continue } g.updateService(v.Service) g.reconcileServices(ctx, []string{v.Service.ID}) case state.EventUpdateService: if !orchestrator.IsGlobalService(v.Service) { continue } g.updateService(v.Service) g.reconcileServices(ctx, []string{v.Service.ID}) case state.EventDeleteService: if !orchestrator.IsGlobalService(v.Service) { continue } orchestrator.DeleteServiceTasks(ctx, g.store, v.Service) // delete the service from service map delete(g.globalServices, v.Service.ID) g.restarts.ClearServiceHistory(v.Service.ID) case state.EventCreateNode: g.updateNode(v.Node) g.reconcileOneNode(ctx, v.Node) case state.EventUpdateNode: g.updateNode(v.Node) switch v.Node.Status.State { // NodeStatus_DISCONNECTED is a transient state, no need to make any change case api.NodeStatus_DOWN: g.removeTasksFromNode(ctx, v.Node) case api.NodeStatus_READY: // node could come back to READY from DOWN or DISCONNECT g.reconcileOneNode(ctx, v.Node) } case state.EventDeleteNode: g.removeTasksFromNode(ctx, v.Node) delete(g.nodes, v.Node.ID) case state.EventUpdateTask: if _, exists := g.globalServices[v.Task.ServiceID]; !exists { continue } // global orchestrator needs to inspect when a task has terminated // it should ignore tasks whose DesiredState is past running, which // means the task has been processed if isTaskTerminated(v.Task) { g.restartTask(ctx, v.Task.ID, v.Task.ServiceID) } case state.EventDeleteTask: // CLI allows deleting task if _, exists := g.globalServices[v.Task.ServiceID]; !exists { continue } g.reconcileServicesOneNode(ctx, []string{v.Task.ServiceID}, v.Task.NodeID) } case <-g.stopChan: return nil } } }
// Restart initiates a new task to replace t if appropriate under the service's // restart policy. func (r *Supervisor) Restart(ctx context.Context, tx store.Tx, cluster *api.Cluster, service *api.Service, t api.Task) error { // TODO(aluzzardi): This function should not depend on `service`. // Is the old task still in the process of restarting? If so, wait for // its restart delay to elapse, to avoid tight restart loops (for // example, when the image doesn't exist). r.mu.Lock() oldDelay, ok := r.delays[t.ID] if ok { if !oldDelay.waiter { oldDelay.waiter = true go r.waitRestart(ctx, oldDelay, cluster, t.ID) } r.mu.Unlock() return nil } r.mu.Unlock() // Sanity check: was the task shut down already by a separate call to // Restart? If so, we must avoid restarting it, because this will create // an extra task. This should never happen unless there is a bug. if t.DesiredState > api.TaskStateRunning { return errors.New("Restart called on task that was already shut down") } t.DesiredState = api.TaskStateShutdown err := store.UpdateTask(tx, &t) if err != nil { log.G(ctx).WithError(err).Errorf("failed to set task desired state to dead") return err } if !r.shouldRestart(ctx, &t, service) { return nil } var restartTask *api.Task if orchestrator.IsReplicatedService(service) { restartTask = orchestrator.NewTask(cluster, service, t.Slot, "") } else if orchestrator.IsGlobalService(service) { restartTask = orchestrator.NewTask(cluster, service, 0, t.NodeID) } else { log.G(ctx).Error("service not supported by restart supervisor") return nil } n := store.GetNode(tx, t.NodeID) restartTask.DesiredState = api.TaskStateReady var restartDelay time.Duration // Restart delay is not applied to drained nodes if n == nil || n.Spec.Availability != api.NodeAvailabilityDrain { if t.Spec.Restart != nil && t.Spec.Restart.Delay != nil { var err error restartDelay, err = ptypes.Duration(t.Spec.Restart.Delay) if err != nil { log.G(ctx).WithError(err).Error("invalid restart delay; using default") restartDelay = orchestrator.DefaultRestartDelay } } else { restartDelay = orchestrator.DefaultRestartDelay } } waitStop := true // Normally we wait for the old task to stop running, but we skip this // if the old task is already dead or the node it's assigned to is down. if (n != nil && n.Status.State == api.NodeStatus_DOWN) || t.Status.State > api.TaskStateRunning { waitStop = false } if err := store.CreateTask(tx, restartTask); err != nil { log.G(ctx).WithError(err).WithField("task.id", restartTask.ID).Error("task create failed") return err } r.recordRestartHistory(restartTask) r.DelayStart(ctx, tx, &t, restartTask.ID, restartDelay, waitStop) return nil }