func newTask(cluster *api.Cluster, service *api.Service, slot uint64, nodeID string) *api.Task { var logDriver *api.Driver if service.Spec.Task.LogDriver != nil { // use the log driver specific to the task, if we have it. logDriver = service.Spec.Task.LogDriver } else if cluster != nil { // pick up the cluster default, if available. logDriver = cluster.Spec.TaskDefaults.LogDriver // nil is okay here. } taskID := identity.NewID() task := api.Task{ ID: taskID, ServiceAnnotations: service.Spec.Annotations, Spec: service.Spec.Task, ServiceID: service.ID, Slot: slot, Status: api.TaskStatus{ State: api.TaskStateNew, Timestamp: ptypes.MustTimestampProto(time.Now()), Message: "created", }, Endpoint: &api.Endpoint{ Spec: service.Spec.Endpoint.Copy(), }, DesiredState: api.TaskStateRunning, LogDriver: logDriver, } // In global mode we also set the NodeID if nodeID != "" { task.NodeID = nodeID } // Assign name based on task name schema name := store.TaskName(&task) task.Annotations = api.Annotations{Name: name} return &task }
// Restart initiates a new task to replace t if appropriate under the service's // restart policy. func (r *RestartSupervisor) Restart(ctx context.Context, tx store.Tx, service *api.Service, t api.Task) error { // TODO(aluzzardi): This function should not depend on `service`. t.DesiredState = api.TaskStateShutdown err := store.UpdateTask(tx, &t) if err != nil { log.G(ctx).WithError(err).Errorf("failed to set task desired state to dead") return err } if !r.shouldRestart(ctx, &t, service) { return nil } var restartTask *api.Task if isReplicatedService(service) { restartTask = newTask(service, t.Slot) } else if isGlobalService(service) { restartTask = newTask(service, 0) restartTask.NodeID = t.NodeID } else { log.G(ctx).Error("service not supported by restart supervisor") return nil } n := store.GetNode(tx, t.NodeID) restartTask.DesiredState = api.TaskStateAccepted var restartDelay time.Duration // Restart delay does not applied to drained nodes if n == nil || n.Spec.Availability != api.NodeAvailabilityDrain { if t.Spec.Restart != nil && t.Spec.Restart.Delay != nil { var err error restartDelay, err = ptypes.Duration(t.Spec.Restart.Delay) if err != nil { log.G(ctx).WithError(err).Error("invalid restart delay; using default") restartDelay = defaultRestartDelay } } else { restartDelay = defaultRestartDelay } } waitStop := true // Normally we wait for the old task to stop running, but we skip this // if the old task is already dead or the node it's assigned to is down. if (n != nil && n.Status.State == api.NodeStatus_DOWN) || t.Status.State > api.TaskStateRunning { waitStop = false } if err := store.CreateTask(tx, restartTask); err != nil { log.G(ctx).WithError(err).WithField("task.id", restartTask.ID).Error("task create failed") return err } r.recordRestartHistory(restartTask) r.DelayStart(ctx, tx, &t, restartTask.ID, restartDelay, waitStop) return nil }
// Restart initiates a new task to replace t if appropriate under the service's // restart policy. func (r *RestartSupervisor) Restart(ctx context.Context, tx store.Tx, cluster *api.Cluster, service *api.Service, t api.Task) error { // TODO(aluzzardi): This function should not depend on `service`. // Is the old task still in the process of restarting? If so, wait for // its restart delay to elapse, to avoid tight restart loops (for // example, when the image doesn't exist). r.mu.Lock() oldDelay, ok := r.delays[t.ID] if ok { if !oldDelay.waiter { oldDelay.waiter = true go r.waitRestart(ctx, oldDelay, cluster, t.ID) } r.mu.Unlock() return nil } r.mu.Unlock() // Sanity check: was the task shut down already by a separate call to // Restart? If so, we must avoid restarting it, because this will create // an extra task. This should never happen unless there is a bug. if t.DesiredState > api.TaskStateRunning { return errors.New("Restart called on task that was already shut down") } t.DesiredState = api.TaskStateShutdown err := store.UpdateTask(tx, &t) if err != nil { log.G(ctx).WithError(err).Errorf("failed to set task desired state to dead") return err } if !r.shouldRestart(ctx, &t, service) { return nil } var restartTask *api.Task if isReplicatedService(service) { restartTask = newTask(cluster, service, t.Slot) } else if isGlobalService(service) { restartTask = newTask(cluster, service, 0) restartTask.NodeID = t.NodeID } else { log.G(ctx).Error("service not supported by restart supervisor") return nil } n := store.GetNode(tx, t.NodeID) restartTask.DesiredState = api.TaskStateReady var restartDelay time.Duration // Restart delay is not applied to drained nodes if n == nil || n.Spec.Availability != api.NodeAvailabilityDrain { if t.Spec.Restart != nil && t.Spec.Restart.Delay != nil { var err error restartDelay, err = ptypes.Duration(t.Spec.Restart.Delay) if err != nil { log.G(ctx).WithError(err).Error("invalid restart delay; using default") restartDelay = defaultRestartDelay } } else { restartDelay = defaultRestartDelay } } waitStop := true // Normally we wait for the old task to stop running, but we skip this // if the old task is already dead or the node it's assigned to is down. if (n != nil && n.Status.State == api.NodeStatus_DOWN) || t.Status.State > api.TaskStateRunning { waitStop = false } if err := store.CreateTask(tx, restartTask); err != nil { log.G(ctx).WithError(err).WithField("task.id", restartTask.ID).Error("task create failed") return err } r.recordRestartHistory(restartTask) r.DelayStart(ctx, tx, &t, restartTask.ID, restartDelay, waitStop) return nil }