func TestExpireBlacklistedCerts(t *testing.T) { now := time.Now() longAgo := now.Add(-24 * time.Hour * 1000) justBeforeGrace := now.Add(-expiredCertGrace - 5*time.Minute) justAfterGrace := now.Add(-expiredCertGrace + 5*time.Minute) future := now.Add(time.Hour) cluster := &api.Cluster{ BlacklistedCertificates: map[string]*api.BlacklistedCertificate{ "longAgo": {Expiry: ptypes.MustTimestampProto(longAgo)}, "justBeforeGrace": {Expiry: ptypes.MustTimestampProto(justBeforeGrace)}, "justAfterGrace": {Expiry: ptypes.MustTimestampProto(justAfterGrace)}, "future": {Expiry: ptypes.MustTimestampProto(future)}, }, } expireBlacklistedCerts(cluster) assert.Len(t, cluster.BlacklistedCertificates, 2) _, hasJustAfterGrace := cluster.BlacklistedCertificates["justAfterGrace"] assert.True(t, hasJustAfterGrace) _, hasFuture := cluster.BlacklistedCertificates["future"] assert.True(t, hasFuture) }
// taskFitNode checks if a node has enough resources to accommodate a task. func (s *Scheduler) taskFitNode(ctx context.Context, t *api.Task, nodeID string) *api.Task { nodeInfo, err := s.nodeSet.nodeInfo(nodeID) if err != nil { // node does not exist in set (it may have been deleted) return nil } newT := *t s.pipeline.SetTask(t) if !s.pipeline.Process(&nodeInfo) { // this node cannot accommodate this task newT.Status.Timestamp = ptypes.MustTimestampProto(time.Now()) newT.Status.Message = s.pipeline.Explain() s.allTasks[t.ID] = &newT return &newT } newT.Status = api.TaskStatus{ State: api.TaskStateAssigned, Timestamp: ptypes.MustTimestampProto(time.Now()), Message: "scheduler confirmed task can run on preassigned node", } s.allTasks[t.ID] = &newT if nodeInfo.addTask(&newT) { s.nodeSet.updateNode(nodeInfo) } return &newT }
func (u *Updater) completeUpdate(ctx context.Context, serviceID string) { log.G(ctx).Debugf("update of service %s complete", serviceID) err := u.store.Update(func(tx store.Tx) error { service := store.GetService(tx, serviceID) if service == nil { return nil } if service.UpdateStatus == nil { // The service was changed since we started this update return nil } if service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED { service.UpdateStatus.State = api.UpdateStatus_ROLLBACK_COMPLETED service.UpdateStatus.Message = "rollback completed" } else { service.UpdateStatus.State = api.UpdateStatus_COMPLETED service.UpdateStatus.Message = "update completed" } service.UpdateStatus.CompletedAt = ptypes.MustTimestampProto(time.Now()) return store.UpdateService(tx, service) }) if err != nil { log.G(ctx).WithError(err).Errorf("failed to mark update of service %s complete", serviceID) } }
// newLogMessage is just a helper to build a new log message. func newLogMessage(msgctx api.LogContext, format string, vs ...interface{}) api.LogMessage { return api.LogMessage{ Context: msgctx, Timestamp: ptypes.MustTimestampProto(time.Now()), Data: []byte(fmt.Sprintf(format, vs...)), } }
func newTask(cluster *api.Cluster, service *api.Service, slot uint64) *api.Task { var logDriver *api.Driver if service.Spec.Task.LogDriver != nil { // use the log driver specific to the task, if we have it. logDriver = service.Spec.Task.LogDriver } else if cluster != nil { // pick up the cluster default, if available. logDriver = cluster.Spec.TaskDefaults.LogDriver // nil is okay here. } taskID := identity.NewID() // We use the following scheme to assign Task names to Annotations: // Annotations.Name := <ServiceAnnotations.Name>.<Slot>.<TaskID> name := fmt.Sprintf("%v.%v.%v", service.Spec.Annotations.Name, slot, taskID) return &api.Task{ ID: taskID, Annotations: api.Annotations{Name: name}, ServiceAnnotations: service.Spec.Annotations, Spec: service.Spec.Task, ServiceID: service.ID, Slot: slot, Status: api.TaskStatus{ State: api.TaskStateNew, Timestamp: ptypes.MustTimestampProto(time.Now()), Message: "created", }, Endpoint: &api.Endpoint{ Spec: service.Spec.Endpoint.Copy(), }, DesiredState: api.TaskStateRunning, LogDriver: logDriver, } }
func newTask(cluster *api.Cluster, service *api.Service, instance uint64) *api.Task { var logDriver *api.Driver if service.Spec.Task.LogDriver != nil { // use the log driver specific to the task, if we have it. logDriver = service.Spec.Task.LogDriver } else if cluster != nil { // pick up the cluster default, if available. logDriver = cluster.Spec.TaskDefaults.LogDriver // nil is okay here. } // NOTE(stevvooe): For now, we don't override the container naming and // labeling scheme in the agent. If we decide to do this in the future, // they should be overridden here. return &api.Task{ ID: identity.NewID(), ServiceAnnotations: service.Spec.Annotations, Spec: service.Spec.Task, ServiceID: service.ID, Slot: instance, Status: api.TaskStatus{ State: api.TaskStateNew, Timestamp: ptypes.MustTimestampProto(time.Now()), Message: "created", }, Endpoint: &api.Endpoint{ Spec: service.Spec.Endpoint.Copy(), }, DesiredState: api.TaskStateRunning, LogDriver: logDriver, } }
// AttachNetwork allows the node to request the resources // allocation needed for a network attachment on the specific node. // - Returns `InvalidArgument` if the Spec is malformed. // - Returns `NotFound` if the Network is not found. // - Returns `PermissionDenied` if the Network is not manually attachable. // - Returns an error if the creation fails. func (ra *ResourceAllocator) AttachNetwork(ctx context.Context, request *api.AttachNetworkRequest) (*api.AttachNetworkResponse, error) { nodeInfo, err := ca.RemoteNode(ctx) if err != nil { return nil, err } var network *api.Network ra.store.View(func(tx store.ReadTx) { network = store.GetNetwork(tx, request.Config.Target) if network == nil { if networks, err := store.FindNetworks(tx, store.ByName(request.Config.Target)); err == nil && len(networks) == 1 { network = networks[0] } } }) if network == nil { return nil, grpc.Errorf(codes.NotFound, "network %s not found", request.Config.Target) } if !network.Spec.Attachable { return nil, grpc.Errorf(codes.PermissionDenied, "network %s not manually attachable", request.Config.Target) } t := &api.Task{ ID: identity.NewID(), NodeID: nodeInfo.NodeID, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Attachment{ Attachment: &api.NetworkAttachmentSpec{ ContainerID: request.ContainerID, }, }, Networks: []*api.NetworkAttachmentConfig{ { Target: network.ID, Addresses: request.Config.Addresses, }, }, }, Status: api.TaskStatus{ State: api.TaskStateNew, Timestamp: ptypes.MustTimestampProto(time.Now()), Message: "created", }, DesiredState: api.TaskStateRunning, // TODO: Add Network attachment. } if err := ra.store.Update(func(tx store.Tx) error { return store.CreateTask(tx, t) }); err != nil { return nil, err } return &api.AttachNetworkResponse{AttachmentID: t.ID}, nil }
func (s *Scheduler) noSuitableNode(ctx context.Context, taskGroup map[string]*api.Task, schedulingDecisions map[string]schedulingDecision) { explanation := s.pipeline.Explain() for _, t := range taskGroup { log.G(ctx).WithField("task.id", t.ID).Debug("no suitable node available for task") newT := *t newT.Status.Timestamp = ptypes.MustTimestampProto(time.Now()) if explanation != "" { newT.Status.Message = "no suitable node (" + explanation + ")" } else { newT.Status.Message = "no suitable node" } s.allTasks[t.ID] = &newT schedulingDecisions[t.ID] = schedulingDecision{old: t, new: &newT} s.enqueue(&newT) } }
func newTask(service *api.Service, instance uint64) *api.Task { // NOTE(stevvooe): For now, we don't override the container naming and // labeling scheme in the agent. If we decide to do this in the future, // they should be overridden here. return &api.Task{ ID: identity.NewID(), ServiceAnnotations: service.Spec.Annotations, Spec: service.Spec.Task, ServiceID: service.ID, Slot: instance, Status: api.TaskStatus{ State: api.TaskStateNew, Timestamp: ptypes.MustTimestampProto(time.Now()), Message: "created", }, DesiredState: api.TaskStateRunning, } }
// taskFitNode checks if a node has enough resource to accommodate a task func (s *Scheduler) taskFitNode(ctx context.Context, t *api.Task, nodeID string) *api.Task { nodeInfo := s.nodeHeap.nodeInfo(nodeID) s.pipeline.SetTask(t) if !s.pipeline.Process(&nodeInfo) { // this node cannot accommodate this task return nil } newT := *t newT.Status = api.TaskStatus{ State: api.TaskStateAssigned, Timestamp: ptypes.MustTimestampProto(time.Now()), Message: "scheduler confirmed task can run on preassigned node", } s.allTasks[t.ID] = &newT if nodeInfo.addTask(&newT) { s.nodeHeap.updateNode(nodeInfo) } return &newT }
func newTask(cluster *api.Cluster, service *api.Service, slot uint64, nodeID string) *api.Task { var logDriver *api.Driver if service.Spec.Task.LogDriver != nil { // use the log driver specific to the task, if we have it. logDriver = service.Spec.Task.LogDriver } else if cluster != nil { // pick up the cluster default, if available. logDriver = cluster.Spec.TaskDefaults.LogDriver // nil is okay here. } taskID := identity.NewID() task := api.Task{ ID: taskID, ServiceAnnotations: service.Spec.Annotations, Spec: service.Spec.Task, ServiceID: service.ID, Slot: slot, Status: api.TaskStatus{ State: api.TaskStateNew, Timestamp: ptypes.MustTimestampProto(time.Now()), Message: "created", }, Endpoint: &api.Endpoint{ Spec: service.Spec.Endpoint.Copy(), }, DesiredState: api.TaskStateRunning, LogDriver: logDriver, } // In global mode we also set the NodeID if nodeID != "" { task.NodeID = nodeID } // Assign name based on task name schema name := store.TaskName(&task) task.Annotations = api.Annotations{Name: name} return &task }
func (u *Updater) startUpdate(ctx context.Context, serviceID string) { err := u.store.Update(func(tx store.Tx) error { service := store.GetService(tx, serviceID) if service == nil { return nil } if service.UpdateStatus != nil { return nil } service.UpdateStatus = &api.UpdateStatus{ State: api.UpdateStatus_UPDATING, Message: "update in progress", StartedAt: ptypes.MustTimestampProto(time.Now()), } return store.UpdateService(tx, service) }) if err != nil { log.G(ctx).WithError(err).Errorf("failed to mark update of service %s in progress", serviceID) } }
// scheduleTask schedules a single task. func (s *Scheduler) scheduleTask(ctx context.Context, t *api.Task) *api.Task { s.pipeline.SetTask(t) n, _ := s.nodeHeap.findMin(s.pipeline.Process, s.scanAllNodes) if n == nil { log.G(ctx).WithField("task.id", t.ID).Debug("No suitable node available for task") return nil } log.G(ctx).WithField("task.id", t.ID).Debugf("Assigning to node %s", n.ID) newT := *t newT.NodeID = n.ID newT.Status = api.TaskStatus{ State: api.TaskStateAssigned, Timestamp: ptypes.MustTimestampProto(time.Now()), Message: "scheduler assigned task to node", } s.allTasks[t.ID] = &newT nodeInfo := s.nodeHeap.nodeInfo(n.ID) if nodeInfo.addTask(&newT) { s.nodeHeap.updateNode(nodeInfo) } return &newT }
// updateTaskStatus sets TaskStatus and updates timestamp. func updateTaskStatus(t *api.Task, newStatus api.TaskState, message string) { t.Status.State = newStatus t.Status.Message = message t.Status.Timestamp = ptypes.MustTimestampProto(time.Now()) }
// Do progresses the task state using the controller performing a single // operation on the controller. The return TaskStatus should be marked as the // new state of the task. // // The returned status should be reported and placed back on to task // before the next call. The operation can be cancelled by creating a // cancelling context. // // Errors from the task controller will reported on the returned status. Any // errors coming from this function should not be reported as related to the // individual task. // // If ErrTaskNoop is returned, it means a second call to Do will result in no // change. If ErrTaskDead is returned, calls to Do will no longer result in any // action. func Do(ctx context.Context, task *api.Task, ctlr Controller) (*api.TaskStatus, error) { status := task.Status.Copy() // stay in the current state. noop := func(errs ...error) (*api.TaskStatus, error) { return status, ErrTaskNoop } retry := func() (*api.TaskStatus, error) { // while we retry on all errors, this allows us to explicitly declare // retry cases. return status, ErrTaskRetry } // transition moves the task to the next state. transition := func(state api.TaskState, msg string) (*api.TaskStatus, error) { current := status.State status.State = state status.Message = msg if current > state { panic("invalid state transition") } return status, nil } // containerStatus exitCode keeps track of whether or not we've set it in // this particular method. Eventually, we assemble this as part of a defer. var ( containerStatus *api.ContainerStatus portStatus *api.PortStatus exitCode int ) // returned when a fatal execution of the task is fatal. In this case, we // proceed to a terminal error state and set the appropriate fields. // // Common checks for the nature of an error should be included here. If the // error is determined not to be fatal for the task, fatal := func(err error) (*api.TaskStatus, error) { if err == nil { panic("err must not be nil when fatal") } if cs, ok := err.(ContainerStatuser); ok { var err error containerStatus, err = cs.ContainerStatus(ctx) if err != nil && !contextDoneError(err) { log.G(ctx).WithError(err).Error("error resolving container status on fatal") } } // make sure we've set the *correct* exit code if ec, ok := err.(ExitCoder); ok { exitCode = ec.ExitCode() } if cause := errors.Cause(err); cause == context.DeadlineExceeded || cause == context.Canceled { return retry() } status.Err = err.Error() // still reported on temporary if IsTemporary(err) { return retry() } // only at this point do we consider the error fatal to the task. log.G(ctx).WithError(err).Error("fatal task error") // NOTE(stevvooe): The following switch dictates the terminal failure // state based on the state in which the failure was encountered. switch { case status.State < api.TaskStateStarting: status.State = api.TaskStateRejected case status.State >= api.TaskStateStarting: status.State = api.TaskStateFailed } return status, nil } // below, we have several callbacks that are run after the state transition // is completed. defer func() { logStateChange(ctx, task.DesiredState, task.Status.State, status.State) if !equality.TaskStatusesEqualStable(status, &task.Status) { status.Timestamp = ptypes.MustTimestampProto(time.Now()) } }() // extract the container status from the container, if supported. defer func() { // only do this if in an active state if status.State < api.TaskStateStarting { return } if containerStatus == nil { // collect this, if we haven't cctlr, ok := ctlr.(ContainerStatuser) if !ok { return } var err error containerStatus, err = cctlr.ContainerStatus(ctx) if err != nil && !contextDoneError(err) { log.G(ctx).WithError(err).Error("container status unavailable") } // at this point, things have gone fairly wrong. Remain positive // and let's get something out the door. if containerStatus == nil { containerStatus = new(api.ContainerStatus) containerStatusTask := task.Status.GetContainer() if containerStatusTask != nil { *containerStatus = *containerStatusTask // copy it over. } } } // at this point, we *must* have a containerStatus. if exitCode != 0 { containerStatus.ExitCode = int32(exitCode) } status.RuntimeStatus = &api.TaskStatus_Container{ Container: containerStatus, } if portStatus == nil { pctlr, ok := ctlr.(PortStatuser) if !ok { return } var err error portStatus, err = pctlr.PortStatus(ctx) if err != nil && !contextDoneError(err) { log.G(ctx).WithError(err).Error("container port status unavailable") } } status.PortStatus = portStatus }() if task.DesiredState == api.TaskStateShutdown { if status.State >= api.TaskStateCompleted { return noop() } if err := ctlr.Shutdown(ctx); err != nil { return fatal(err) } return transition(api.TaskStateShutdown, "shutdown") } if status.State > task.DesiredState { return noop() // way beyond desired state, pause } // the following states may proceed past desired state. switch status.State { case api.TaskStatePreparing: if err := ctlr.Prepare(ctx); err != nil && err != ErrTaskPrepared { return fatal(err) } return transition(api.TaskStateReady, "prepared") case api.TaskStateStarting: if err := ctlr.Start(ctx); err != nil && err != ErrTaskStarted { return fatal(err) } return transition(api.TaskStateRunning, "started") case api.TaskStateRunning: if err := ctlr.Wait(ctx); err != nil { return fatal(err) } return transition(api.TaskStateCompleted, "finished") } // The following represent "pause" states. We can only proceed when the // desired state is beyond our current state. if status.State >= task.DesiredState { return noop() } switch status.State { case api.TaskStateNew, api.TaskStatePending, api.TaskStateAssigned: return transition(api.TaskStateAccepted, "accepted") case api.TaskStateAccepted: return transition(api.TaskStatePreparing, "preparing") case api.TaskStateReady: return transition(api.TaskStateStarting, "starting") default: // terminal states return noop() } }
// scheduleTaskGroup schedules a batch of tasks that are part of the same // service and share the same version of the spec. func (s *Scheduler) scheduleTaskGroup(ctx context.Context, taskGroup map[string]*api.Task, schedulingDecisions map[string]schedulingDecision) { // Pick at task at random from taskGroup to use for constraint // evaluation. It doesn't matter which one we pick because all the // tasks in the group are equal in terms of the fields the constraint // filters consider. var t *api.Task for _, t = range taskGroup { break } s.pipeline.SetTask(t) now := time.Now() nodeLess := func(a *NodeInfo, b *NodeInfo) bool { // If either node has at least maxFailures recent failures, // that's the deciding factor. recentFailuresA := a.countRecentFailures(now, t.ServiceID) recentFailuresB := b.countRecentFailures(now, t.ServiceID) if recentFailuresA >= maxFailures || recentFailuresB >= maxFailures { if recentFailuresA > recentFailuresB { return false } if recentFailuresB > recentFailuresA { return true } } tasksByServiceA := a.DesiredRunningTasksCountByService[t.ServiceID] tasksByServiceB := b.DesiredRunningTasksCountByService[t.ServiceID] if tasksByServiceA < tasksByServiceB { return true } if tasksByServiceA > tasksByServiceB { return false } // Total number of tasks breaks ties. return a.DesiredRunningTasksCount < b.DesiredRunningTasksCount } nodes := s.nodeSet.findBestNodes(len(taskGroup), s.pipeline.Process, nodeLess) nodeCount := len(nodes) if nodeCount == 0 { s.noSuitableNode(ctx, taskGroup, schedulingDecisions) return } failedConstraints := make(map[int]bool) // key is index in nodes slice nodeIter := 0 for taskID, t := range taskGroup { n := &nodes[nodeIter%nodeCount] log.G(ctx).WithField("task.id", t.ID).Debugf("assigning to node %s", n.ID) newT := *t newT.NodeID = n.ID newT.Status = api.TaskStatus{ State: api.TaskStateAssigned, Timestamp: ptypes.MustTimestampProto(time.Now()), Message: "scheduler assigned task to node", } s.allTasks[t.ID] = &newT nodeInfo, err := s.nodeSet.nodeInfo(n.ID) if err == nil && nodeInfo.addTask(&newT) { s.nodeSet.updateNode(nodeInfo) nodes[nodeIter%nodeCount] = nodeInfo } schedulingDecisions[taskID] = schedulingDecision{old: t, new: &newT} delete(taskGroup, taskID) if nodeIter+1 < nodeCount { // First pass fills the nodes until they have the same // number of tasks from this service. nextNode := nodes[(nodeIter+1)%nodeCount] if nodeLess(&nextNode, &nodeInfo) { nodeIter++ } } else { // In later passes, we just assign one task at a time // to each node that still meets the constraints. nodeIter++ } origNodeIter := nodeIter for failedConstraints[nodeIter%nodeCount] || !s.pipeline.Process(&nodes[nodeIter%nodeCount]) { failedConstraints[nodeIter%nodeCount] = true nodeIter++ if nodeIter-origNodeIter == nodeCount { // None of the nodes meet the constraints anymore. s.noSuitableNode(ctx, taskGroup, schedulingDecisions) return } } } }