Example #1
0
func (g *GlobalOrchestrator) removeTasksFromNode(ctx context.Context, node *api.Node) {
	var (
		tasks []*api.Task
		err   error
	)
	g.store.View(func(tx store.ReadTx) {
		tasks, err = store.FindTasks(tx, store.ByNodeID(node.ID))
	})
	if err != nil {
		log.G(ctx).WithError(err).Errorf("global orchestrator: removeTasksFromNode failed finding tasks")
		return
	}

	_, err = g.store.Batch(func(batch *store.Batch) error {
		for _, t := range tasks {
			// GlobalOrchestrator only removes tasks from globalServices
			if _, exists := g.globalServices[t.ServiceID]; exists {
				g.removeTask(ctx, batch, t)
			}
		}
		return nil
	})
	if err != nil {
		log.G(ctx).WithError(err).Errorf("global orchestrator: removeTasksFromNode failed")
	}
}
Example #2
0
func deleteServiceTasks(ctx context.Context, s *store.MemoryStore, service *api.Service) {
	var (
		tasks []*api.Task
		err   error
	)
	s.View(func(tx store.ReadTx) {
		tasks, err = store.FindTasks(tx, store.ByServiceID(service.ID))
	})
	if err != nil {
		log.G(ctx).WithError(err).Errorf("failed to list tasks")
		return
	}

	_, err = s.Batch(func(batch *store.Batch) error {
		for _, t := range tasks {
			err := batch.Update(func(tx store.Tx) error {
				if err := store.DeleteTask(tx, t.ID); err != nil {
					log.G(ctx).WithError(err).Errorf("failed to delete task")
				}
				return nil
			})
			if err != nil {
				return err
			}
		}
		return nil
	})
	if err != nil {
		log.G(ctx).WithError(err).Errorf("task search transaction failed")
	}
}
Example #3
0
// reconcileServiceOneNode checks one service on one node
func (g *GlobalOrchestrator) reconcileServiceOneNode(ctx context.Context, serviceID string, nodeID string) {
	_, exists := g.nodes[nodeID]
	if !exists {
		return
	}
	service, exists := g.globalServices[serviceID]
	if !exists {
		return
	}
	// the node has completed this servie
	completed := false
	// tasks for this node and service
	var (
		tasks []*api.Task
		err   error
	)
	g.store.View(func(tx store.ReadTx) {
		var tasksOnNode []*api.Task
		tasksOnNode, err = store.FindTasks(tx, store.ByNodeID(nodeID))
		if err != nil {
			return
		}
		for _, t := range tasksOnNode {
			// only interested in one service
			if t.ServiceID != serviceID {
				continue
			}
			if isTaskRunning(t) {
				tasks = append(tasks, t)
			} else {
				if isTaskCompleted(t, restartCondition(t)) {
					completed = true
				}
			}
		}
	})
	if err != nil {
		log.G(ctx).WithError(err).Errorf("global orchestrator: reconcile failed finding tasks")
		return
	}

	_, err = g.store.Batch(func(batch *store.Batch) error {
		// if restart policy considers this node has finished its task
		// it should remove all running tasks
		if completed {
			g.removeTasks(ctx, batch, service, tasks)
			return nil
		}
		// this node needs to run 1 copy of the task
		if len(tasks) == 0 {
			g.addTask(ctx, batch, service, nodeID)
		} else {
			g.removeTasks(ctx, batch, service, tasks[1:])
		}
		return nil
	})
	if err != nil {
		log.G(ctx).WithError(err).Errorf("global orchestrator: reconcileServiceOneNode batch failed")
	}
}
Example #4
0
// getRunnableAndDeadSlots returns two maps of slots. The first contains slots
// that have at least one task with a desired state above NEW and lesser or
// equal to RUNNING. The second is for slots that only contain tasks with a
// desired state above RUNNING.
func getRunnableAndDeadSlots(s *store.MemoryStore, serviceID string) (map[uint64]slot, map[uint64]slot, error) {
	var (
		tasks []*api.Task
		err   error
	)
	s.View(func(tx store.ReadTx) {
		tasks, err = store.FindTasks(tx, store.ByServiceID(serviceID))
	})
	if err != nil {
		return nil, nil, err
	}

	runningSlots := make(map[uint64]slot)
	for _, t := range tasks {
		if t.DesiredState <= api.TaskStateRunning {
			runningSlots[t.Slot] = append(runningSlots[t.Slot], t)
		}
	}

	deadSlots := make(map[uint64]slot)
	for _, t := range tasks {
		if _, exists := runningSlots[t.Slot]; !exists {
			deadSlots[t.Slot] = append(deadSlots[t.Slot], t)
		}
	}

	return runningSlots, deadSlots, nil
}
Example #5
0
func (g *GlobalOrchestrator) reconcileOneService(ctx context.Context, service *api.Service) {
	var (
		tasks []*api.Task
		err   error
	)
	g.store.View(func(tx store.ReadTx) {
		tasks, err = store.FindTasks(tx, store.ByServiceID(service.ID))
	})
	if err != nil {
		log.G(ctx).WithError(err).Errorf("global orchestrator: reconcileOneService failed finding tasks")
		return
	}
	// a node may have completed this service
	nodeCompleted := make(map[string]struct{})
	// nodeID -> task list
	nodeTasks := make(map[string][]*api.Task)

	for _, t := range tasks {
		if isTaskRunning(t) {
			// Collect all running instances of this service
			nodeTasks[t.NodeID] = append(nodeTasks[t.NodeID], t)
		} else {
			// for finished tasks, check restartPolicy
			if isTaskCompleted(t, restartCondition(t)) {
				nodeCompleted[t.NodeID] = struct{}{}
			}
		}
	}

	_, err = g.store.Batch(func(batch *store.Batch) error {
		var updateTasks []*api.Task
		for nodeID := range g.nodes {
			ntasks := nodeTasks[nodeID]
			// if restart policy considers this node has finished its task
			// it should remove all running tasks
			if _, exists := nodeCompleted[nodeID]; exists {
				g.removeTasks(ctx, batch, service, ntasks)
				return nil
			}
			// this node needs to run 1 copy of the task
			if len(ntasks) == 0 {
				g.addTask(ctx, batch, service, nodeID)
			} else {
				updateTasks = append(updateTasks, ntasks[0])
				g.removeTasks(ctx, batch, service, ntasks[1:])
			}
		}
		if len(updateTasks) > 0 {
			g.updater.Update(ctx, service, updateTasks)
		}
		return nil
	})
	if err != nil {
		log.G(ctx).WithError(err).Errorf("global orchestrator: reconcileOneService transaction failed")
	}
}
Example #6
0
func (r *ReplicatedOrchestrator) reconcile(ctx context.Context, service *api.Service) {
	var (
		tasks []*api.Task
		err   error
	)
	r.store.View(func(tx store.ReadTx) {
		tasks, err = store.FindTasks(tx, store.ByServiceID(service.ID))
	})
	if err != nil {
		log.G(ctx).WithError(err).Errorf("reconcile failed finding tasks")
		return
	}

	runningTasks := make([]*api.Task, 0, len(tasks))
	runningInstances := make(map[uint64]struct{}) // this could be a bitfield...
	for _, t := range tasks {
		// Technically the check below could just be
		// t.DesiredState <= api.TaskStateRunning, but ignoring tasks
		// with DesiredState == NEW simplifies the drainer unit tests.
		if t.DesiredState > api.TaskStateNew && t.DesiredState <= api.TaskStateRunning {
			runningTasks = append(runningTasks, t)
			runningInstances[t.Slot] = struct{}{}
		}
	}
	numTasks := len(runningTasks)

	deploy := service.Spec.GetMode().(*api.ServiceSpec_Replicated)
	specifiedInstances := int(deploy.Replicated.Replicas)

	// TODO(aaronl): Add support for restart delays.

	_, err = r.store.Batch(func(batch *store.Batch) error {
		switch {
		case specifiedInstances > numTasks:
			log.G(ctx).Debugf("Service %s was scaled up from %d to %d instances", service.ID, numTasks, specifiedInstances)
			// Update all current tasks then add missing tasks
			r.updater.Update(ctx, service, runningTasks)
			r.addTasks(ctx, batch, service, runningInstances, specifiedInstances-numTasks)

		case specifiedInstances < numTasks:
			// Update up to N tasks then remove the extra
			log.G(ctx).Debugf("Service %s was scaled down from %d to %d instances", service.ID, numTasks, specifiedInstances)
			r.updater.Update(ctx, service, runningTasks[:specifiedInstances])
			r.removeTasks(ctx, batch, service, runningTasks[specifiedInstances:])

		case specifiedInstances == numTasks:
			// Simple update, no scaling - update all tasks.
			r.updater.Update(ctx, service, runningTasks)
		}
		return nil
	})

	if err != nil {
		log.G(ctx).WithError(err).Errorf("reconcile batch failed")
	}
}
Example #7
0
func getRunnableServiceTasks(t *testing.T, s *store.MemoryStore, service *api.Service) []*api.Task {
	var (
		err   error
		tasks []*api.Task
	)

	s.View(func(tx store.ReadTx) {
		tasks, err = store.FindTasks(tx, store.ByServiceID(service.ID))
	})
	assert.NoError(t, err)

	runnable := []*api.Task{}
	for _, task := range tasks {
		if task.DesiredState == api.TaskStateRunning {
			runnable = append(runnable, task)
		}
	}
	return runnable
}
Example #8
0
// RemoveNetwork removes a Network referenced by NetworkID.
// - Returns `InvalidArgument` if NetworkID is not provided.
// - Returns `NotFound` if the Network is not found.
// - Returns an error if the deletion fails.
func (s *Server) RemoveNetwork(ctx context.Context, request *api.RemoveNetworkRequest) (*api.RemoveNetworkResponse, error) {
	if request.NetworkID == "" {
		return nil, grpc.Errorf(codes.InvalidArgument, errInvalidArgument.Error())
	}

	err := s.store.Update(func(tx store.Tx) error {
		services, err := store.FindServices(tx, store.ByReferencedNetworkID(request.NetworkID))
		if err != nil {
			return grpc.Errorf(codes.Internal, "could not find services using network %s: %v", request.NetworkID, err)
		}

		if len(services) != 0 {
			return grpc.Errorf(codes.FailedPrecondition, "network %s is in use by service %s", request.NetworkID, services[0].ID)
		}

		tasks, err := store.FindTasks(tx, store.ByReferencedNetworkID(request.NetworkID))
		if err != nil {
			return grpc.Errorf(codes.Internal, "could not find tasks using network %s: %v", request.NetworkID, err)
		}

		if len(tasks) != 0 {
			return grpc.Errorf(codes.FailedPrecondition, "network %s is in use by task %s", request.NetworkID, tasks[0].ID)
		}

		nw := store.GetNetwork(tx, request.NetworkID)
		if _, ok := nw.Spec.Annotations.Labels["com.docker.swarm.internal"]; ok {
			networkDescription := nw.ID
			if nw.Spec.Annotations.Name != "" {
				networkDescription = fmt.Sprintf("%s (%s)", nw.Spec.Annotations.Name, nw.ID)
			}
			return grpc.Errorf(codes.PermissionDenied, "%s is a pre-defined network and cannot be removed", networkDescription)
		}
		return store.DeleteNetwork(tx, request.NetworkID)
	})
	if err != nil {
		if err == store.ErrNotExist {
			return nil, grpc.Errorf(codes.NotFound, "network %s not found", request.NetworkID)
		}
		return nil, err
	}
	return &api.RemoveNetworkResponse{}, nil
}
Example #9
0
func (s *subscription) match() {
	s.mu.Lock()
	defer s.mu.Unlock()

	add := func(t *api.Task) {
		if t.NodeID == "" {
			s.pendingTasks[t.ID] = struct{}{}
			return
		}
		if _, ok := s.nodes[t.NodeID]; !ok {
			s.nodes[t.NodeID] = struct{}{}
			s.wg.Add(1)
		}
	}

	s.store.View(func(tx store.ReadTx) {
		for _, nid := range s.message.Selector.NodeIDs {
			s.nodes[nid] = struct{}{}
		}

		for _, tid := range s.message.Selector.TaskIDs {
			if task := store.GetTask(tx, tid); task != nil {
				add(task)
			}
		}

		for _, sid := range s.message.Selector.ServiceIDs {
			tasks, err := store.FindTasks(tx, store.ByServiceID(sid))
			if err != nil {
				log.L.Warning(err)
				continue
			}
			for _, task := range tasks {
				add(task)
			}
		}
	})
}
Example #10
0
func (r *Orchestrator) restartTasksByNodeID(ctx context.Context, nodeID string) {
	var err error
	r.store.View(func(tx store.ReadTx) {
		var tasks []*api.Task
		tasks, err = store.FindTasks(tx, store.ByNodeID(nodeID))
		if err != nil {
			return
		}

		for _, t := range tasks {
			if t.DesiredState > api.TaskStateRunning {
				continue
			}
			service := store.GetService(tx, t.ServiceID)
			if orchestrator.IsReplicatedService(service) {
				r.restartTasks[t.ID] = struct{}{}
			}
		}
	})
	if err != nil {
		log.G(ctx).WithError(err).Errorf("failed to list tasks to remove")
	}
}
Example #11
0
func (d *Dispatcher) moveTasksToOrphaned(nodeID string) error {
	_, err := d.store.Batch(func(batch *store.Batch) error {
		var (
			tasks []*api.Task
			err   error
		)

		d.store.View(func(tx store.ReadTx) {
			tasks, err = store.FindTasks(tx, store.ByNodeID(nodeID))
		})
		if err != nil {
			return err
		}

		for _, task := range tasks {
			if task.Status.State < api.TaskStateOrphaned {
				task.Status.State = api.TaskStateOrphaned
			}

			if err := batch.Update(func(tx store.Tx) error {
				err := store.UpdateTask(tx, task)
				if err != nil {
					return err
				}

				return nil
			}); err != nil {
				return err
			}

		}

		return nil
	})

	return err
}
Example #12
0
func (s *Scheduler) setupTasksList(tx store.ReadTx) error {
	tasks, err := store.FindTasks(tx, store.All)
	if err != nil {
		return err
	}

	tasksByNode := make(map[string]map[string]*api.Task)
	for _, t := range tasks {
		// Ignore all tasks that have not reached ALLOCATED
		// state.
		if t.Status.State < api.TaskStateAllocated {
			continue
		}

		s.allTasks[t.ID] = t
		if t.NodeID == "" {
			s.enqueue(t)
			continue
		}
		// preassigned tasks need to validate resource requirement on corresponding node
		if t.Status.State == api.TaskStateAllocated {
			s.preassignedTasks[t.ID] = t
			continue
		}

		if tasksByNode[t.NodeID] == nil {
			tasksByNode[t.NodeID] = make(map[string]*api.Task)
		}
		tasksByNode[t.NodeID][t.ID] = t
	}

	if err := s.buildNodeHeap(tx, tasksByNode); err != nil {
		return err
	}

	return nil
}
Example #13
0
// getRunnableSlots returns a map of slots that have at least one task with
// a desired state above NEW and lesser or equal to RUNNING.
func getRunnableSlots(s *store.MemoryStore, serviceID string) (map[uint64]slot, error) {
	var (
		tasks []*api.Task
		err   error
	)
	s.View(func(tx store.ReadTx) {
		tasks, err = store.FindTasks(tx, store.ByServiceID(serviceID))
	})
	if err != nil {
		return nil, err
	}

	runningSlots := make(map[uint64]slot)
	for _, t := range tasks {
		// Technically the check below could just be
		// t.DesiredState <= api.TaskStateRunning, but ignoring tasks
		// with DesiredState == NEW simplifies the drainer unit tests.
		if t.DesiredState > api.TaskStateNew && t.DesiredState <= api.TaskStateRunning {
			runningSlots[t.Slot] = append(runningSlots[t.Slot], t)
		}
	}

	return runningSlots, nil
}
Example #14
0
// Tasks is a stream of tasks state for node. Each message contains full list
// of tasks which should be run on node, if task is not present in that list,
// it should be terminated.
func (d *Dispatcher) Tasks(r *api.TasksRequest, stream api.Dispatcher_TasksServer) error {
	nodeInfo, err := ca.RemoteNode(stream.Context())
	if err != nil {
		return err
	}
	nodeID := nodeInfo.NodeID

	if err := d.isRunningLocked(); err != nil {
		return err
	}

	fields := logrus.Fields{
		"node.id":      nodeID,
		"node.session": r.SessionID,
		"method":       "(*Dispatcher).Tasks",
	}
	if nodeInfo.ForwardedBy != nil {
		fields["forwarder.id"] = nodeInfo.ForwardedBy.NodeID
	}
	log.G(stream.Context()).WithFields(fields).Debugf("")

	if _, err = d.nodes.GetWithSession(nodeID, r.SessionID); err != nil {
		return err
	}

	tasksMap := make(map[string]*api.Task)
	nodeTasks, cancel, err := store.ViewAndWatch(
		d.store,
		func(readTx store.ReadTx) error {
			tasks, err := store.FindTasks(readTx, store.ByNodeID(nodeID))
			if err != nil {
				return err
			}
			for _, t := range tasks {
				tasksMap[t.ID] = t
			}
			return nil
		},
		state.EventCreateTask{Task: &api.Task{NodeID: nodeID},
			Checks: []state.TaskCheckFunc{state.TaskCheckNodeID}},
		state.EventUpdateTask{Task: &api.Task{NodeID: nodeID},
			Checks: []state.TaskCheckFunc{state.TaskCheckNodeID}},
		state.EventDeleteTask{Task: &api.Task{NodeID: nodeID},
			Checks: []state.TaskCheckFunc{state.TaskCheckNodeID}},
	)
	if err != nil {
		return err
	}
	defer cancel()

	for {
		if _, err := d.nodes.GetWithSession(nodeID, r.SessionID); err != nil {
			return err
		}

		var tasks []*api.Task
		for _, t := range tasksMap {
			// dispatcher only sends tasks that have been assigned to a node
			if t != nil && t.Status.State >= api.TaskStateAssigned {
				tasks = append(tasks, t)
			}
		}

		if err := stream.Send(&api.TasksMessage{Tasks: tasks}); err != nil {
			return err
		}

		select {
		case event := <-nodeTasks:
			switch v := event.(type) {
			case state.EventCreateTask:
				tasksMap[v.Task.ID] = v.Task
			case state.EventUpdateTask:
				tasksMap[v.Task.ID] = v.Task
			case state.EventDeleteTask:
				delete(tasksMap, v.Task.ID)
			}
		case <-stream.Context().Done():
			return stream.Context().Err()
		case <-d.ctx.Done():
			return d.ctx.Err()
		}
	}
}
Example #15
0
func TestReplicatedScaleDown(t *testing.T) {
	ctx := context.Background()
	s := store.NewMemoryStore(nil)
	assert.NotNil(t, s)
	defer s.Close()

	orchestrator := NewReplicatedOrchestrator(s)
	defer orchestrator.Stop()

	watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{})
	defer cancel()

	s1 := &api.Service{
		ID: "id1",
		Spec: api.ServiceSpec{
			Annotations: api.Annotations{
				Name: "name1",
			},
			Mode: &api.ServiceSpec_Replicated{
				Replicated: &api.ReplicatedService{
					Replicas: 6,
				},
			},
		},
	}

	err := s.Update(func(tx store.Tx) error {
		assert.NoError(t, store.CreateService(tx, s1))

		nodes := []*api.Node{
			{
				ID: "node1",
				Spec: api.NodeSpec{
					Annotations: api.Annotations{
						Name: "name1",
					},
					Availability: api.NodeAvailabilityActive,
				},
				Status: api.NodeStatus{
					State: api.NodeStatus_READY,
				},
			},
			{
				ID: "node2",
				Spec: api.NodeSpec{
					Annotations: api.Annotations{
						Name: "name2",
					},
					Availability: api.NodeAvailabilityActive,
				},
				Status: api.NodeStatus{
					State: api.NodeStatus_READY,
				},
			},
			{
				ID: "node3",
				Spec: api.NodeSpec{
					Annotations: api.Annotations{
						Name: "name3",
					},
					Availability: api.NodeAvailabilityActive,
				},
				Status: api.NodeStatus{
					State: api.NodeStatus_READY,
				},
			},
		}
		for _, node := range nodes {
			assert.NoError(t, store.CreateNode(tx, node))
		}

		// task1 is assigned to node1
		// task2 - task3 are assigned to node2
		// task4 - task6 are assigned to node3
		// task7 is unassigned

		tasks := []*api.Task{
			{
				ID:           "task1",
				Slot:         1,
				DesiredState: api.TaskStateRunning,
				Status: api.TaskStatus{
					State: api.TaskStateStarting,
				},
				ServiceAnnotations: api.Annotations{
					Name: "task1",
				},
				ServiceID: "id1",
				NodeID:    "node1",
			},
			{
				ID:           "task2",
				Slot:         2,
				DesiredState: api.TaskStateRunning,
				Status: api.TaskStatus{
					State: api.TaskStateRunning,
				},
				ServiceAnnotations: api.Annotations{
					Name: "task2",
				},
				ServiceID: "id1",
				NodeID:    "node2",
			},
			{
				ID:           "task3",
				Slot:         3,
				DesiredState: api.TaskStateRunning,
				Status: api.TaskStatus{
					State: api.TaskStateRunning,
				},
				ServiceAnnotations: api.Annotations{
					Name: "task3",
				},
				ServiceID: "id1",
				NodeID:    "node2",
			},
			{
				ID:           "task4",
				Slot:         4,
				DesiredState: api.TaskStateRunning,
				Status: api.TaskStatus{
					State: api.TaskStateRunning,
				},
				ServiceAnnotations: api.Annotations{
					Name: "task4",
				},
				ServiceID: "id1",
				NodeID:    "node3",
			},
			{
				ID:           "task5",
				Slot:         5,
				DesiredState: api.TaskStateRunning,
				Status: api.TaskStatus{
					State: api.TaskStateRunning,
				},
				ServiceAnnotations: api.Annotations{
					Name: "task5",
				},
				ServiceID: "id1",
				NodeID:    "node3",
			},
			{
				ID:           "task6",
				Slot:         6,
				DesiredState: api.TaskStateRunning,
				Status: api.TaskStatus{
					State: api.TaskStateRunning,
				},
				ServiceAnnotations: api.Annotations{
					Name: "task6",
				},
				ServiceID: "id1",
				NodeID:    "node3",
			},
			{
				ID:           "task7",
				Slot:         7,
				DesiredState: api.TaskStateRunning,
				Status: api.TaskStatus{
					State: api.TaskStateNew,
				},
				ServiceAnnotations: api.Annotations{
					Name: "task7",
				},
				ServiceID: "id1",
			},
		}
		for _, task := range tasks {
			assert.NoError(t, store.CreateTask(tx, task))
		}

		return nil
	})
	assert.NoError(t, err)

	// Start the orchestrator.
	go func() {
		assert.NoError(t, orchestrator.Run(ctx))
	}()

	// Replicas was set to 6, but we started with 7 tasks. task7 should
	// be the one the orchestrator chose to shut down because it was not
	// assigned yet.

	observedShutdown := watchShutdownTask(t, watch)
	assert.Equal(t, "task7", observedShutdown.ID)

	// Now scale down to 2 instances.
	err = s.Update(func(tx store.Tx) error {
		s1.Spec.Mode = &api.ServiceSpec_Replicated{
			Replicated: &api.ReplicatedService{
				Replicas: 2,
			},
		}
		assert.NoError(t, store.UpdateService(tx, s1))
		return nil
	})
	assert.NoError(t, err)

	// Tasks should be shut down in a way that balances the remaining tasks.
	// node2 and node3 should be preferred over node1 because node1's task
	// is not running yet.

	shutdowns := make(map[string]int)
	for i := 0; i != 4; i++ {
		observedShutdown := watchShutdownTask(t, watch)
		shutdowns[observedShutdown.NodeID]++
	}

	assert.Equal(t, 1, shutdowns["node1"])
	assert.Equal(t, 1, shutdowns["node2"])
	assert.Equal(t, 2, shutdowns["node3"])

	// There should be remaining tasks on node2 and node3.
	s.View(func(readTx store.ReadTx) {
		tasks, err := store.FindTasks(readTx, store.ByDesiredState(api.TaskStateRunning))
		require.NoError(t, err)
		require.Len(t, tasks, 2)
		if tasks[0].NodeID == "node2" {
			assert.Equal(t, "node3", tasks[1].NodeID)
		} else {
			assert.Equal(t, "node3", tasks[0].NodeID)
			assert.Equal(t, "node2", tasks[1].NodeID)
		}
	})
}
Example #16
0
func (tr *TaskReaper) tick() {
	if len(tr.dirty) == 0 {
		return
	}

	defer func() {
		tr.dirty = make(map[instanceTuple]struct{})
	}()

	var deleteTasks []string

	tr.store.View(func(tx store.ReadTx) {
		for dirty := range tr.dirty {
			service := store.GetService(tx, dirty.serviceID)
			if service == nil {
				continue
			}

			taskHistory := tr.taskHistory

			if taskHistory < 0 {
				continue
			}

			var historicTasks []*api.Task

			switch service.Spec.GetMode().(type) {
			case *api.ServiceSpec_Replicated:
				var err error
				historicTasks, err = store.FindTasks(tx, store.BySlot(dirty.serviceID, dirty.instance))
				if err != nil {
					continue
				}

			case *api.ServiceSpec_Global:
				tasksByNode, err := store.FindTasks(tx, store.ByNodeID(dirty.nodeID))
				if err != nil {
					continue
				}

				for _, t := range tasksByNode {
					if t.ServiceID == dirty.serviceID {
						historicTasks = append(historicTasks, t)
					}
				}
			}

			if int64(len(historicTasks)) <= taskHistory {
				continue
			}

			// TODO(aaronl): This could filter for non-running tasks and use quickselect
			// instead of sorting the whole slice.
			sort.Sort(tasksByTimestamp(historicTasks))

			for _, t := range historicTasks {
				if t.DesiredState <= api.TaskStateRunning {
					// Don't delete running tasks
					continue
				}

				deleteTasks = append(deleteTasks, t.ID)

				taskHistory++
				if int64(len(historicTasks)) <= taskHistory {
					break
				}
			}

		}
	})

	if len(deleteTasks) > 0 {
		tr.store.Batch(func(batch *store.Batch) error {
			for _, taskID := range deleteTasks {
				batch.Update(func(tx store.Tx) error {
					return store.DeleteTask(tx, taskID)
				})
			}
			return nil
		})
	}
}
Example #17
0
func (g *Orchestrator) reconcileServices(ctx context.Context, serviceIDs []string) {
	nodeCompleted := make(map[string]map[string]struct{})
	nodeTasks := make(map[string]map[string][]*api.Task)

	g.store.View(func(tx store.ReadTx) {
		for _, serviceID := range serviceIDs {
			tasks, err := store.FindTasks(tx, store.ByServiceID(serviceID))
			if err != nil {
				log.G(ctx).WithError(err).Errorf("global orchestrator: reconcileServices failed finding tasks for service %s", serviceID)
				continue
			}

			// a node may have completed this service
			nodeCompleted[serviceID] = make(map[string]struct{})
			// nodeID -> task list
			nodeTasks[serviceID] = make(map[string][]*api.Task)

			for _, t := range tasks {
				if isTaskRunning(t) {
					// Collect all running instances of this service
					nodeTasks[serviceID][t.NodeID] = append(nodeTasks[serviceID][t.NodeID], t)
				} else {
					// for finished tasks, check restartPolicy
					if isTaskCompleted(t, orchestrator.RestartCondition(t)) {
						nodeCompleted[serviceID][t.NodeID] = struct{}{}
					}
				}
			}
		}
	})

	_, err := g.store.Batch(func(batch *store.Batch) error {
		var updateTasks []orchestrator.Slot
		for _, serviceID := range serviceIDs {
			if _, exists := nodeTasks[serviceID]; !exists {
				continue
			}

			service := g.globalServices[serviceID]

			for nodeID, node := range g.nodes {
				meetsConstraints := constraint.NodeMatches(service.constraints, node)
				ntasks := nodeTasks[serviceID][nodeID]
				delete(nodeTasks[serviceID], nodeID)

				// if restart policy considers this node has finished its task
				// it should remove all running tasks
				if _, exists := nodeCompleted[serviceID][nodeID]; exists || !meetsConstraints {
					g.removeTasks(ctx, batch, ntasks)
					continue
				}

				if node.Spec.Availability == api.NodeAvailabilityPause {
					// the node is paused, so we won't add or update
					// any tasks
					continue
				}

				// this node needs to run 1 copy of the task
				if len(ntasks) == 0 {
					g.addTask(ctx, batch, service.Service, nodeID)
				} else {
					updateTasks = append(updateTasks, ntasks)
				}
			}
			if len(updateTasks) > 0 {
				g.updater.Update(ctx, g.cluster, service.Service, updateTasks)
			}

			// Remove any tasks assigned to nodes not found in g.nodes.
			// These must be associated with nodes that are drained, or
			// nodes that no longer exist.
			for _, ntasks := range nodeTasks[serviceID] {
				g.removeTasks(ctx, batch, ntasks)
			}
		}
		return nil
	})
	if err != nil {
		log.G(ctx).WithError(err).Errorf("global orchestrator: reconcileServices transaction failed")
	}
}
Example #18
0
// Assignments is a stream of assignments for a node. Each message contains
// either full list of tasks and secrets for the node, or an incremental update.
func (d *Dispatcher) Assignments(r *api.AssignmentsRequest, stream api.Dispatcher_AssignmentsServer) error {
	nodeInfo, err := ca.RemoteNode(stream.Context())
	if err != nil {
		return err
	}
	nodeID := nodeInfo.NodeID

	dctx, err := d.isRunningLocked()
	if err != nil {
		return err
	}

	fields := logrus.Fields{
		"node.id":      nodeID,
		"node.session": r.SessionID,
		"method":       "(*Dispatcher).Assignments",
	}
	if nodeInfo.ForwardedBy != nil {
		fields["forwarder.id"] = nodeInfo.ForwardedBy.NodeID
	}
	log := log.G(stream.Context()).WithFields(fields)
	log.Debugf("")

	if _, err = d.nodes.GetWithSession(nodeID, r.SessionID); err != nil {
		return err
	}

	var (
		sequence  int64
		appliesTo string
		initial   api.AssignmentsMessage
	)
	tasksMap := make(map[string]*api.Task)
	tasksUsingSecret := make(map[string]map[string]struct{})

	sendMessage := func(msg api.AssignmentsMessage, assignmentType api.AssignmentsMessage_Type) error {
		sequence++
		msg.AppliesTo = appliesTo
		msg.ResultsIn = strconv.FormatInt(sequence, 10)
		appliesTo = msg.ResultsIn
		msg.Type = assignmentType

		if err := stream.Send(&msg); err != nil {
			return err
		}
		return nil
	}

	// returns a slice of new secrets to send down
	addSecretsForTask := func(readTx store.ReadTx, t *api.Task) []*api.Secret {
		container := t.Spec.GetContainer()
		if container == nil {
			return nil
		}
		var newSecrets []*api.Secret
		for _, secretRef := range container.Secrets {
			// Empty ID prefix will return all secrets. Bail if there is no SecretID
			if secretRef.SecretID == "" {
				log.Debugf("invalid secret reference")
				continue
			}
			secretID := secretRef.SecretID
			log := log.WithFields(logrus.Fields{
				"secret.id":   secretID,
				"secret.name": secretRef.SecretName,
			})

			if len(tasksUsingSecret[secretID]) == 0 {
				tasksUsingSecret[secretID] = make(map[string]struct{})

				secrets, err := store.FindSecrets(readTx, store.ByIDPrefix(secretID))
				if err != nil {
					log.WithError(err).Errorf("error retrieving secret")
					continue
				}
				if len(secrets) != 1 {
					log.Debugf("secret not found")
					continue
				}

				// If the secret was found and there was one result
				// (there should never be more than one because of the
				// uniqueness constraint), add this secret to our
				// initial set that we send down.
				newSecrets = append(newSecrets, secrets[0])
			}
			tasksUsingSecret[secretID][t.ID] = struct{}{}
		}

		return newSecrets
	}

	// TODO(aaronl): Also send node secrets that should be exposed to
	// this node.
	nodeTasks, cancel, err := store.ViewAndWatch(
		d.store,
		func(readTx store.ReadTx) error {
			tasks, err := store.FindTasks(readTx, store.ByNodeID(nodeID))
			if err != nil {
				return err
			}

			for _, t := range tasks {
				// We only care about tasks that are ASSIGNED or
				// higher. If the state is below ASSIGNED, the
				// task may not meet the constraints for this
				// node, so we have to be careful about sending
				// secrets associated with it.
				if t.Status.State < api.TaskStateAssigned {
					continue
				}

				tasksMap[t.ID] = t
				taskChange := &api.AssignmentChange{
					Assignment: &api.Assignment{
						Item: &api.Assignment_Task{
							Task: t,
						},
					},
					Action: api.AssignmentChange_AssignmentActionUpdate,
				}
				initial.Changes = append(initial.Changes, taskChange)
				// Only send secrets down if these tasks are in < RUNNING
				if t.Status.State <= api.TaskStateRunning {
					newSecrets := addSecretsForTask(readTx, t)
					for _, secret := range newSecrets {
						secretChange := &api.AssignmentChange{
							Assignment: &api.Assignment{
								Item: &api.Assignment_Secret{
									Secret: secret,
								},
							},
							Action: api.AssignmentChange_AssignmentActionUpdate,
						}

						initial.Changes = append(initial.Changes, secretChange)
					}
				}
			}
			return nil
		},
		state.EventUpdateTask{Task: &api.Task{NodeID: nodeID},
			Checks: []state.TaskCheckFunc{state.TaskCheckNodeID}},
		state.EventDeleteTask{Task: &api.Task{NodeID: nodeID},
			Checks: []state.TaskCheckFunc{state.TaskCheckNodeID}},
		state.EventUpdateSecret{},
		state.EventDeleteSecret{},
	)
	if err != nil {
		return err
	}
	defer cancel()

	if err := sendMessage(initial, api.AssignmentsMessage_COMPLETE); err != nil {
		return err
	}

	for {
		// Check for session expiration
		if _, err := d.nodes.GetWithSession(nodeID, r.SessionID); err != nil {
			return err
		}

		// bursty events should be processed in batches and sent out together
		var (
			update          api.AssignmentsMessage
			modificationCnt int
			batchingTimer   *time.Timer
			batchingTimeout <-chan time.Time
			updateTasks     = make(map[string]*api.Task)
			updateSecrets   = make(map[string]*api.Secret)
			removeTasks     = make(map[string]struct{})
			removeSecrets   = make(map[string]struct{})
		)

		oneModification := func() {
			modificationCnt++

			if batchingTimer != nil {
				batchingTimer.Reset(batchingWaitTime)
			} else {
				batchingTimer = time.NewTimer(batchingWaitTime)
				batchingTimeout = batchingTimer.C
			}
		}

		// Release the secrets references from this task
		releaseSecretsForTask := func(t *api.Task) bool {
			var modified bool
			container := t.Spec.GetContainer()
			if container == nil {
				return modified
			}

			for _, secretRef := range container.Secrets {
				secretID := secretRef.SecretID
				delete(tasksUsingSecret[secretID], t.ID)
				if len(tasksUsingSecret[secretID]) == 0 {
					// No tasks are using the secret anymore
					delete(tasksUsingSecret, secretID)
					removeSecrets[secretID] = struct{}{}
					modified = true
				}
			}

			return modified
		}

		// The batching loop waits for 50 ms after the most recent
		// change, or until modificationBatchLimit is reached. The
		// worst case latency is modificationBatchLimit * batchingWaitTime,
		// which is 10 seconds.
	batchingLoop:
		for modificationCnt < modificationBatchLimit {
			select {
			case event := <-nodeTasks:
				switch v := event.(type) {
				// We don't monitor EventCreateTask because tasks are
				// never created in the ASSIGNED state. First tasks are
				// created by the orchestrator, then the scheduler moves
				// them to ASSIGNED. If this ever changes, we will need
				// to monitor task creations as well.
				case state.EventUpdateTask:
					// We only care about tasks that are ASSIGNED or
					// higher.
					if v.Task.Status.State < api.TaskStateAssigned {
						continue
					}

					if oldTask, exists := tasksMap[v.Task.ID]; exists {
						// States ASSIGNED and below are set by the orchestrator/scheduler,
						// not the agent, so tasks in these states need to be sent to the
						// agent even if nothing else has changed.
						if equality.TasksEqualStable(oldTask, v.Task) && v.Task.Status.State > api.TaskStateAssigned {
							// this update should not trigger a task change for the agent
							tasksMap[v.Task.ID] = v.Task
							// If this task got updated to a final state, let's release
							// the secrets that are being used by the task
							if v.Task.Status.State > api.TaskStateRunning {
								// If releasing the secrets caused a secret to be
								// removed from an agent, mark one modification
								if releaseSecretsForTask(v.Task) {
									oneModification()
								}
							}
							continue
						}
					} else if v.Task.Status.State <= api.TaskStateRunning {
						// If this task wasn't part of the assignment set before, and it's <= RUNNING
						// add the secrets it references to the secrets assignment.
						// Task states > RUNNING are worker reported only, are never created in
						// a > RUNNING state.
						var newSecrets []*api.Secret
						d.store.View(func(readTx store.ReadTx) {
							newSecrets = addSecretsForTask(readTx, v.Task)
						})
						for _, secret := range newSecrets {
							updateSecrets[secret.ID] = secret
						}
					}
					tasksMap[v.Task.ID] = v.Task
					updateTasks[v.Task.ID] = v.Task

					oneModification()
				case state.EventDeleteTask:
					if _, exists := tasksMap[v.Task.ID]; !exists {
						continue
					}

					removeTasks[v.Task.ID] = struct{}{}

					delete(tasksMap, v.Task.ID)

					// Release the secrets being used by this task
					// Ignoring the return here. We will always mark
					// this as a modification, since a task is being
					// removed.
					releaseSecretsForTask(v.Task)

					oneModification()
				// TODO(aaronl): For node secrets, we'll need to handle
				// EventCreateSecret.
				case state.EventUpdateSecret:
					if _, exists := tasksUsingSecret[v.Secret.ID]; !exists {
						continue
					}
					log.Debugf("Secret %s (ID: %d) was updated though it was still referenced by one or more tasks",
						v.Secret.Spec.Annotations.Name, v.Secret.ID)

				case state.EventDeleteSecret:
					if _, exists := tasksUsingSecret[v.Secret.ID]; !exists {
						continue
					}
					log.Debugf("Secret %s (ID: %d) was deleted though it was still referenced by one or more tasks",
						v.Secret.Spec.Annotations.Name, v.Secret.ID)
				}
			case <-batchingTimeout:
				break batchingLoop
			case <-stream.Context().Done():
				return stream.Context().Err()
			case <-dctx.Done():
				return dctx.Err()
			}
		}

		if batchingTimer != nil {
			batchingTimer.Stop()
		}

		if modificationCnt > 0 {
			for id, task := range updateTasks {
				if _, ok := removeTasks[id]; !ok {
					taskChange := &api.AssignmentChange{
						Assignment: &api.Assignment{
							Item: &api.Assignment_Task{
								Task: task,
							},
						},
						Action: api.AssignmentChange_AssignmentActionUpdate,
					}

					update.Changes = append(update.Changes, taskChange)
				}
			}
			for id, secret := range updateSecrets {
				// If, due to multiple updates, this secret is no longer in use,
				// don't send it down.
				if len(tasksUsingSecret[id]) == 0 {
					// delete this secret for the secrets to be updated
					// so that deleteSecrets knows the current list
					delete(updateSecrets, id)
					continue
				}
				secretChange := &api.AssignmentChange{
					Assignment: &api.Assignment{
						Item: &api.Assignment_Secret{
							Secret: secret,
						},
					},
					Action: api.AssignmentChange_AssignmentActionUpdate,
				}

				update.Changes = append(update.Changes, secretChange)
			}
			for id := range removeTasks {
				taskChange := &api.AssignmentChange{
					Assignment: &api.Assignment{
						Item: &api.Assignment_Task{
							Task: &api.Task{ID: id},
						},
					},
					Action: api.AssignmentChange_AssignmentActionRemove,
				}

				update.Changes = append(update.Changes, taskChange)
			}
			for id := range removeSecrets {
				// If this secret is also being sent on the updated set
				// don't also add it to the removed set
				if _, ok := updateSecrets[id]; ok {
					continue
				}

				secretChange := &api.AssignmentChange{
					Assignment: &api.Assignment{
						Item: &api.Assignment_Secret{
							Secret: &api.Secret{ID: id},
						},
					},
					Action: api.AssignmentChange_AssignmentActionRemove,
				}

				update.Changes = append(update.Changes, secretChange)
			}

			if err := sendMessage(update, api.AssignmentsMessage_INCREMENTAL); err != nil {
				return err
			}
		}
	}
}
Example #19
0
// Tasks is a stream of tasks state for node. Each message contains full list
// of tasks which should be run on node, if task is not present in that list,
// it should be terminated.
func (d *Dispatcher) Tasks(r *api.TasksRequest, stream api.Dispatcher_TasksServer) error {
	nodeInfo, err := ca.RemoteNode(stream.Context())
	if err != nil {
		return err
	}
	nodeID := nodeInfo.NodeID

	dctx, err := d.isRunningLocked()
	if err != nil {
		return err
	}

	fields := logrus.Fields{
		"node.id":      nodeID,
		"node.session": r.SessionID,
		"method":       "(*Dispatcher).Tasks",
	}
	if nodeInfo.ForwardedBy != nil {
		fields["forwarder.id"] = nodeInfo.ForwardedBy.NodeID
	}
	log.G(stream.Context()).WithFields(fields).Debugf("")

	if _, err = d.nodes.GetWithSession(nodeID, r.SessionID); err != nil {
		return err
	}

	tasksMap := make(map[string]*api.Task)
	nodeTasks, cancel, err := store.ViewAndWatch(
		d.store,
		func(readTx store.ReadTx) error {
			tasks, err := store.FindTasks(readTx, store.ByNodeID(nodeID))
			if err != nil {
				return err
			}
			for _, t := range tasks {
				tasksMap[t.ID] = t
			}
			return nil
		},
		state.EventCreateTask{Task: &api.Task{NodeID: nodeID},
			Checks: []state.TaskCheckFunc{state.TaskCheckNodeID}},
		state.EventUpdateTask{Task: &api.Task{NodeID: nodeID},
			Checks: []state.TaskCheckFunc{state.TaskCheckNodeID}},
		state.EventDeleteTask{Task: &api.Task{NodeID: nodeID},
			Checks: []state.TaskCheckFunc{state.TaskCheckNodeID}},
	)
	if err != nil {
		return err
	}
	defer cancel()

	for {
		if _, err := d.nodes.GetWithSession(nodeID, r.SessionID); err != nil {
			return err
		}

		var tasks []*api.Task
		for _, t := range tasksMap {
			// dispatcher only sends tasks that have been assigned to a node
			if t != nil && t.Status.State >= api.TaskStateAssigned {
				tasks = append(tasks, t)
			}
		}

		if err := stream.Send(&api.TasksMessage{Tasks: tasks}); err != nil {
			return err
		}

		// bursty events should be processed in batches and sent out snapshot
		var (
			modificationCnt int
			batchingTimer   *time.Timer
			batchingTimeout <-chan time.Time
		)

	batchingLoop:
		for modificationCnt < modificationBatchLimit {
			select {
			case event := <-nodeTasks:
				switch v := event.(type) {
				case state.EventCreateTask:
					tasksMap[v.Task.ID] = v.Task
					modificationCnt++
				case state.EventUpdateTask:
					if oldTask, exists := tasksMap[v.Task.ID]; exists {
						// States ASSIGNED and below are set by the orchestrator/scheduler,
						// not the agent, so tasks in these states need to be sent to the
						// agent even if nothing else has changed.
						if equality.TasksEqualStable(oldTask, v.Task) && v.Task.Status.State > api.TaskStateAssigned {
							// this update should not trigger action at agent
							tasksMap[v.Task.ID] = v.Task
							continue
						}
					}
					tasksMap[v.Task.ID] = v.Task
					modificationCnt++
				case state.EventDeleteTask:
					delete(tasksMap, v.Task.ID)
					modificationCnt++
				}
				if batchingTimer != nil {
					batchingTimer.Reset(batchingWaitTime)
				} else {
					batchingTimer = time.NewTimer(batchingWaitTime)
					batchingTimeout = batchingTimer.C
				}
			case <-batchingTimeout:
				break batchingLoop
			case <-stream.Context().Done():
				return stream.Context().Err()
			case <-dctx.Done():
				return dctx.Err()
			}
		}

		if batchingTimer != nil {
			batchingTimer.Stop()
		}
	}
}
Example #20
0
func (a *Allocator) doNetworkInit(ctx context.Context) error {
	na, err := networkallocator.New()
	if err != nil {
		return err
	}

	nc := &networkContext{
		nwkAllocator:        na,
		unallocatedTasks:    make(map[string]*api.Task),
		unallocatedServices: make(map[string]*api.Service),
		unallocatedNetworks: make(map[string]*api.Network),
	}

	// Check if we have the ingress network. If not found create
	// it before reading all network objects for allocation.
	var networks []*api.Network
	a.store.View(func(tx store.ReadTx) {
		networks, err = store.FindNetworks(tx, store.ByName(ingressNetworkName))
		if len(networks) > 0 {
			ingressNetwork = networks[0]
		}
	})
	if err != nil {
		return fmt.Errorf("failed to find ingress network during init: %v", err)
	}

	// If ingress network is not found, create one right away
	// using the predefined template.
	if len(networks) == 0 {
		if err := a.store.Update(func(tx store.Tx) error {
			ingressNetwork.ID = identity.NewID()
			if err := store.CreateNetwork(tx, ingressNetwork); err != nil {
				return err
			}

			return nil
		}); err != nil {
			return fmt.Errorf("failed to create ingress network: %v", err)
		}

		a.store.View(func(tx store.ReadTx) {
			networks, err = store.FindNetworks(tx, store.ByName(ingressNetworkName))
			if len(networks) > 0 {
				ingressNetwork = networks[0]
			}
		})
		if err != nil {
			return fmt.Errorf("failed to find ingress network after creating it: %v", err)
		}

	}

	// Try to complete ingress network allocation before anything else so
	// that the we can get the preferred subnet for ingress
	// network.
	if !na.IsAllocated(ingressNetwork) {
		if err := a.allocateNetwork(ctx, nc, ingressNetwork); err != nil {
			log.G(ctx).Errorf("failed allocating ingress network during init: %v", err)
		}

		// Update store after allocation
		if err := a.store.Update(func(tx store.Tx) error {
			if err := store.UpdateNetwork(tx, ingressNetwork); err != nil {
				return err
			}

			return nil
		}); err != nil {
			return fmt.Errorf("failed to create ingress network: %v", err)
		}
	}

	// Allocate networks in the store so far before we started
	// watching.
	a.store.View(func(tx store.ReadTx) {
		networks, err = store.FindNetworks(tx, store.All)
	})
	if err != nil {
		return fmt.Errorf("error listing all networks in store while trying to allocate during init: %v", err)
	}

	for _, n := range networks {
		if na.IsAllocated(n) {
			continue
		}

		if err := a.allocateNetwork(ctx, nc, n); err != nil {
			log.G(ctx).Errorf("failed allocating network %s during init: %v", n.ID, err)
		}
	}

	// Allocate nodes in the store so far before we process watched events.
	var nodes []*api.Node
	a.store.View(func(tx store.ReadTx) {
		nodes, err = store.FindNodes(tx, store.All)
	})
	if err != nil {
		return fmt.Errorf("error listing all services in store while trying to allocate during init: %v", err)
	}

	for _, node := range nodes {
		if na.IsNodeAllocated(node) {
			continue
		}

		if node.Attachment == nil {
			node.Attachment = &api.NetworkAttachment{}
		}

		node.Attachment.Network = ingressNetwork.Copy()
		if err := a.allocateNode(ctx, nc, node); err != nil {
			log.G(ctx).Errorf("Failed to allocate network resources for node %s during init: %v", node.ID, err)
		}
	}

	// Allocate services in the store so far before we process watched events.
	var services []*api.Service
	a.store.View(func(tx store.ReadTx) {
		services, err = store.FindServices(tx, store.All)
	})
	if err != nil {
		return fmt.Errorf("error listing all services in store while trying to allocate during init: %v", err)
	}

	for _, s := range services {
		if s.Spec.Endpoint == nil {
			continue
		}

		if na.IsServiceAllocated(s) {
			continue
		}

		if err := a.allocateService(ctx, nc, s); err != nil {
			log.G(ctx).Errorf("failed allocating service %s during init: %v", s.ID, err)
		}
	}

	// Allocate tasks in the store so far before we started watching.
	var tasks []*api.Task
	a.store.View(func(tx store.ReadTx) {
		tasks, err = store.FindTasks(tx, store.All)
	})
	if err != nil {
		return fmt.Errorf("error listing all tasks in store while trying to allocate during init: %v", err)
	}

	if _, err := a.store.Batch(func(batch *store.Batch) error {
		for _, t := range tasks {
			if taskDead(t) {
				continue
			}

			var s *api.Service
			if t.ServiceID != "" {
				a.store.View(func(tx store.ReadTx) {
					s = store.GetService(tx, t.ServiceID)
				})
			}

			// Populate network attachments in the task
			// based on service spec.
			a.taskCreateNetworkAttachments(t, s)

			if taskReadyForNetworkVote(t, s, nc) {
				if t.Status.State >= api.TaskStateAllocated {
					continue
				}

				if a.taskAllocateVote(networkVoter, t.ID) {
					// If the task is not attached to any network, network
					// allocators job is done. Immediately cast a vote so
					// that the task can be moved to ALLOCATED state as
					// soon as possible.
					if err := batch.Update(func(tx store.Tx) error {
						storeT := store.GetTask(tx, t.ID)
						if storeT == nil {
							return fmt.Errorf("task %s not found while trying to update state", t.ID)
						}

						updateTaskStatus(storeT, api.TaskStateAllocated, "allocated")

						if err := store.UpdateTask(tx, storeT); err != nil {
							return fmt.Errorf("failed updating state in store transaction for task %s: %v", storeT.ID, err)
						}

						return nil
					}); err != nil {
						log.G(ctx).WithError(err).Error("error updating task network")
					}
				}
				continue
			}

			err := batch.Update(func(tx store.Tx) error {
				_, err := a.allocateTask(ctx, nc, tx, t)
				return err
			})
			if err != nil {
				log.G(ctx).Errorf("failed allocating task %s during init: %v", t.ID, err)
				nc.unallocatedTasks[t.ID] = t
			}
		}

		return nil
	}); err != nil {
		return err
	}

	a.netCtx = nc
	return nil
}
Example #21
0
func (ce *ConstraintEnforcer) shutdownNoncompliantTasks(node *api.Node) {
	// If the availability is "drain", the orchestrator will
	// shut down all tasks.
	// If the availability is "pause", we shouldn't touch
	// the tasks on this node.
	if node.Spec.Availability != api.NodeAvailabilityActive {
		return
	}

	var (
		tasks []*api.Task
		err   error
	)

	ce.store.View(func(tx store.ReadTx) {
		tasks, err = store.FindTasks(tx, store.ByNodeID(node.ID))
	})

	if err != nil {
		log.L.WithError(err).Errorf("failed to list tasks for node ID %s", node.ID)
	}

	var availableMemoryBytes, availableNanoCPUs int64
	if node.Description != nil && node.Description.Resources != nil {
		availableMemoryBytes = node.Description.Resources.MemoryBytes
		availableNanoCPUs = node.Description.Resources.NanoCPUs
	}

	removeTasks := make(map[string]*api.Task)

	// TODO(aaronl): The set of tasks removed will be
	// nondeterministic because it depends on the order of
	// the slice returned from FindTasks. We could do
	// a separate pass over the tasks for each type of
	// resource, and sort by the size of the reservation
	// to remove the most resource-intensive tasks.
	for _, t := range tasks {
		if t.DesiredState < api.TaskStateAssigned || t.DesiredState > api.TaskStateRunning {
			continue
		}

		// Ensure that the task still meets scheduling
		// constraints.
		if t.Spec.Placement != nil && len(t.Spec.Placement.Constraints) != 0 {
			constraints, _ := constraint.Parse(t.Spec.Placement.Constraints)
			if !constraint.NodeMatches(constraints, node) {
				removeTasks[t.ID] = t
				continue
			}
		}

		// Ensure that the task assigned to the node
		// still satisfies the resource limits.
		if t.Spec.Resources != nil && t.Spec.Resources.Reservations != nil {
			if t.Spec.Resources.Reservations.MemoryBytes > availableMemoryBytes {
				removeTasks[t.ID] = t
				continue
			}
			if t.Spec.Resources.Reservations.NanoCPUs > availableNanoCPUs {
				removeTasks[t.ID] = t
				continue
			}
			availableMemoryBytes -= t.Spec.Resources.Reservations.MemoryBytes
			availableNanoCPUs -= t.Spec.Resources.Reservations.NanoCPUs
		}
	}

	if len(removeTasks) != 0 {
		_, err := ce.store.Batch(func(batch *store.Batch) error {
			for _, t := range removeTasks {
				err := batch.Update(func(tx store.Tx) error {
					t = store.GetTask(tx, t.ID)
					if t == nil || t.DesiredState > api.TaskStateRunning {
						return nil
					}

					t.DesiredState = api.TaskStateShutdown
					return store.UpdateTask(tx, t)
				})
				if err != nil {
					log.L.WithError(err).Errorf("failed to shut down task %s", t.ID)
				}
			}
			return nil
		})

		if err != nil {
			log.L.WithError(err).Errorf("failed to shut down tasks")
		}
	}
}
Example #22
0
func (r *Orchestrator) initTasks(ctx context.Context, readTx store.ReadTx) error {
	tasks, err := store.FindTasks(readTx, store.All)
	if err != nil {
		return err
	}
	for _, t := range tasks {
		if t.NodeID != "" {
			n := store.GetNode(readTx, t.NodeID)
			if invalidNode(n) && t.Status.State <= api.TaskStateRunning && t.DesiredState <= api.TaskStateRunning {
				r.restartTasks[t.ID] = struct{}{}
			}
		}
	}

	_, err = r.store.Batch(func(batch *store.Batch) error {
		for _, t := range tasks {
			if t.ServiceID == "" {
				continue
			}

			// TODO(aluzzardi): We should NOT retrieve the service here.
			service := store.GetService(readTx, t.ServiceID)
			if service == nil {
				// Service was deleted
				err := batch.Update(func(tx store.Tx) error {
					return store.DeleteTask(tx, t.ID)
				})
				if err != nil {
					log.G(ctx).WithError(err).Error("failed to set task desired state to dead")
				}
				continue
			}
			// TODO(aluzzardi): This is shady. We should have a more generic condition.
			if t.DesiredState != api.TaskStateReady || !orchestrator.IsReplicatedService(service) {
				continue
			}
			restartDelay := orchestrator.DefaultRestartDelay
			if t.Spec.Restart != nil && t.Spec.Restart.Delay != nil {
				var err error
				restartDelay, err = gogotypes.DurationFromProto(t.Spec.Restart.Delay)
				if err != nil {
					log.G(ctx).WithError(err).Error("invalid restart delay")
					restartDelay = orchestrator.DefaultRestartDelay
				}
			}
			if restartDelay != 0 {
				timestamp, err := gogotypes.TimestampFromProto(t.Status.Timestamp)
				if err == nil {
					restartTime := timestamp.Add(restartDelay)
					calculatedRestartDelay := restartTime.Sub(time.Now())
					if calculatedRestartDelay < restartDelay {
						restartDelay = calculatedRestartDelay
					}
					if restartDelay > 0 {
						_ = batch.Update(func(tx store.Tx) error {
							t := store.GetTask(tx, t.ID)
							// TODO(aluzzardi): This is shady as well. We should have a more generic condition.
							if t == nil || t.DesiredState != api.TaskStateReady {
								return nil
							}
							r.restarts.DelayStart(ctx, tx, nil, t.ID, restartDelay, true)
							return nil
						})
						continue
					}
				} else {
					log.G(ctx).WithError(err).Error("invalid status timestamp")
				}
			}

			// Start now
			err := batch.Update(func(tx store.Tx) error {
				return r.restarts.StartNow(tx, t.ID)
			})
			if err != nil {
				log.G(ctx).WithError(err).WithField("task.id", t.ID).Error("moving task out of delayed state failed")
			}
		}
		return nil
	})

	return err
}
Example #23
0
func TestReplicatedOrchestrator(t *testing.T) {
	ctx := context.Background()
	s := store.NewMemoryStore(nil)
	assert.NotNil(t, s)
	defer s.Close()

	orchestrator := NewReplicatedOrchestrator(s)
	defer orchestrator.Stop()

	watch, cancel := state.Watch(s.WatchQueue() /*state.EventCreateTask{}, state.EventUpdateTask{}*/)
	defer cancel()

	// Create a service with two instances specified before the orchestrator is
	// started. This should result in two tasks when the orchestrator
	// starts up.
	err := s.Update(func(tx store.Tx) error {
		s1 := &api.Service{
			ID: "id1",
			Spec: api.ServiceSpec{
				Annotations: api.Annotations{
					Name: "name1",
				},
				Task: api.TaskSpec{
					Runtime: &api.TaskSpec_Container{
						Container: &api.ContainerSpec{},
					},
				},
				Mode: &api.ServiceSpec_Replicated{
					Replicated: &api.ReplicatedService{
						Replicas: 2,
					},
				},
			},
		}
		assert.NoError(t, store.CreateService(tx, s1))
		return nil
	})
	assert.NoError(t, err)

	// Start the orchestrator.
	go func() {
		assert.NoError(t, orchestrator.Run(ctx))
	}()

	observedTask1 := watchTaskCreate(t, watch)
	assert.Equal(t, observedTask1.Status.State, api.TaskStateNew)
	assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1")

	observedTask2 := watchTaskCreate(t, watch)
	assert.Equal(t, observedTask2.Status.State, api.TaskStateNew)
	assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1")

	// Create a second service.
	err = s.Update(func(tx store.Tx) error {
		s2 := &api.Service{
			ID: "id2",
			Spec: api.ServiceSpec{
				Annotations: api.Annotations{
					Name: "name2",
				},
				Task: api.TaskSpec{
					Runtime: &api.TaskSpec_Container{
						Container: &api.ContainerSpec{},
					},
				},
				Mode: &api.ServiceSpec_Replicated{
					Replicated: &api.ReplicatedService{
						Replicas: 1,
					},
				},
			},
		}
		assert.NoError(t, store.CreateService(tx, s2))
		return nil
	})
	assert.NoError(t, err)

	observedTask3 := watchTaskCreate(t, watch)
	assert.Equal(t, observedTask3.Status.State, api.TaskStateNew)
	assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name2")

	// Update a service to scale it out to 3 instances
	err = s.Update(func(tx store.Tx) error {
		s2 := &api.Service{
			ID: "id2",
			Spec: api.ServiceSpec{
				Annotations: api.Annotations{
					Name: "name2",
				},
				Task: api.TaskSpec{
					Runtime: &api.TaskSpec_Container{
						Container: &api.ContainerSpec{},
					},
				},
				Mode: &api.ServiceSpec_Replicated{
					Replicated: &api.ReplicatedService{
						Replicas: 3,
					},
				},
			},
		}
		assert.NoError(t, store.UpdateService(tx, s2))
		return nil
	})
	assert.NoError(t, err)

	observedTask4 := watchTaskCreate(t, watch)
	assert.Equal(t, observedTask4.Status.State, api.TaskStateNew)
	assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name2")

	observedTask5 := watchTaskCreate(t, watch)
	assert.Equal(t, observedTask5.Status.State, api.TaskStateNew)
	assert.Equal(t, observedTask5.ServiceAnnotations.Name, "name2")

	// Now scale it back down to 1 instance
	err = s.Update(func(tx store.Tx) error {
		s2 := &api.Service{
			ID: "id2",
			Spec: api.ServiceSpec{
				Annotations: api.Annotations{
					Name: "name2",
				},
				Task: api.TaskSpec{
					Runtime: &api.TaskSpec_Container{
						Container: &api.ContainerSpec{},
					},
				},
				Mode: &api.ServiceSpec_Replicated{
					Replicated: &api.ReplicatedService{
						Replicas: 1,
					},
				},
			},
		}
		assert.NoError(t, store.UpdateService(tx, s2))
		return nil
	})
	assert.NoError(t, err)

	observedDeletion1 := watchShutdownTask(t, watch)
	assert.Equal(t, observedDeletion1.Status.State, api.TaskStateNew)
	assert.Equal(t, observedDeletion1.ServiceAnnotations.Name, "name2")

	observedDeletion2 := watchShutdownTask(t, watch)
	assert.Equal(t, observedDeletion2.Status.State, api.TaskStateNew)
	assert.Equal(t, observedDeletion2.ServiceAnnotations.Name, "name2")

	// There should be one remaining task attached to service id2/name2.
	var liveTasks []*api.Task
	s.View(func(readTx store.ReadTx) {
		var tasks []*api.Task
		tasks, err = store.FindTasks(readTx, store.ByServiceID("id2"))
		for _, t := range tasks {
			if t.DesiredState == api.TaskStateRunning {
				liveTasks = append(liveTasks, t)
			}
		}
	})
	assert.NoError(t, err)
	assert.Len(t, liveTasks, 1)

	// Delete the remaining task directly. It should be recreated by the
	// orchestrator.
	err = s.Update(func(tx store.Tx) error {
		assert.NoError(t, store.DeleteTask(tx, liveTasks[0].ID))
		return nil
	})
	assert.NoError(t, err)

	observedTask6 := watchTaskCreate(t, watch)
	assert.Equal(t, observedTask6.Status.State, api.TaskStateNew)
	assert.Equal(t, observedTask6.ServiceAnnotations.Name, "name2")

	// Delete the service. Its remaining task should go away.
	err = s.Update(func(tx store.Tx) error {
		assert.NoError(t, store.DeleteService(tx, "id2"))
		return nil
	})
	assert.NoError(t, err)

	deletedTask := watchTaskDelete(t, watch)
	assert.Equal(t, deletedTask.Status.State, api.TaskStateNew)
	assert.Equal(t, deletedTask.ServiceAnnotations.Name, "name2")
}
Example #24
0
func TestHA(t *testing.T) {
	ctx := context.Background()
	initialNodeSet := []*api.Node{
		{
			ID: "id1",
			Status: api.NodeStatus{
				State: api.NodeStatus_READY,
			},
		},
		{
			ID: "id2",
			Status: api.NodeStatus{
				State: api.NodeStatus_READY,
			},
		},
		{
			ID: "id3",
			Status: api.NodeStatus{
				State: api.NodeStatus_READY,
			},
		},
		{
			ID: "id4",
			Status: api.NodeStatus{
				State: api.NodeStatus_READY,
			},
		},
		{
			ID: "id5",
			Status: api.NodeStatus{
				State: api.NodeStatus_READY,
			},
		},
	}

	taskTemplate1 := &api.Task{
		DesiredState: api.TaskStateRunning,
		ServiceID:    "service1",
		Spec: api.TaskSpec{
			Runtime: &api.TaskSpec_Container{
				Container: &api.ContainerSpec{
					Image: "v:1",
				},
			},
		},
		Status: api.TaskStatus{
			State: api.TaskStatePending,
		},
	}

	taskTemplate2 := &api.Task{
		DesiredState: api.TaskStateRunning,
		ServiceID:    "service2",
		Spec: api.TaskSpec{
			Runtime: &api.TaskSpec_Container{
				Container: &api.ContainerSpec{
					Image: "v:2",
				},
			},
		},
		Status: api.TaskStatus{
			State: api.TaskStatePending,
		},
	}

	s := store.NewMemoryStore(nil)
	assert.NotNil(t, s)
	defer s.Close()

	t1Instances := 18

	err := s.Update(func(tx store.Tx) error {
		// Prepoulate nodes
		for _, n := range initialNodeSet {
			assert.NoError(t, store.CreateNode(tx, n))
		}

		// Prepopulate tasks from template 1
		for i := 0; i != t1Instances; i++ {
			taskTemplate1.ID = fmt.Sprintf("t1id%d", i)
			assert.NoError(t, store.CreateTask(tx, taskTemplate1))
		}
		return nil
	})
	assert.NoError(t, err)

	scheduler := New(s)

	watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{})
	defer cancel()

	go func() {
		assert.NoError(t, scheduler.Run(ctx))
	}()
	defer scheduler.Stop()

	t1Assignments := make(map[string]int)
	for i := 0; i != t1Instances; i++ {
		assignment := watchAssignment(t, watch)
		if !strings.HasPrefix(assignment.ID, "t1") {
			t.Fatal("got assignment for different kind of task")
		}
		t1Assignments[assignment.NodeID]++
	}

	assert.Len(t, t1Assignments, 5)

	nodesWith3T1Tasks := 0
	nodesWith4T1Tasks := 0
	for nodeID, taskCount := range t1Assignments {
		if taskCount == 3 {
			nodesWith3T1Tasks++
		} else if taskCount == 4 {
			nodesWith4T1Tasks++
		} else {
			t.Fatalf("unexpected number of tasks %d on node %s", taskCount, nodeID)
		}
	}

	assert.Equal(t, 3, nodesWith4T1Tasks)
	assert.Equal(t, 2, nodesWith3T1Tasks)

	t2Instances := 2

	// Add a new service with two instances. They should fill the nodes
	// that only have two tasks.
	err = s.Update(func(tx store.Tx) error {
		for i := 0; i != t2Instances; i++ {
			taskTemplate2.ID = fmt.Sprintf("t2id%d", i)
			assert.NoError(t, store.CreateTask(tx, taskTemplate2))
		}
		return nil
	})
	assert.NoError(t, err)

	t2Assignments := make(map[string]int)
	for i := 0; i != t2Instances; i++ {
		assignment := watchAssignment(t, watch)
		if !strings.HasPrefix(assignment.ID, "t2") {
			t.Fatal("got assignment for different kind of task")
		}
		t2Assignments[assignment.NodeID]++
	}

	assert.Len(t, t2Assignments, 2)

	for nodeID := range t2Assignments {
		assert.Equal(t, 3, t1Assignments[nodeID])
	}

	// Scale up service 1 to 21 tasks. It should cover the two nodes that
	// service 2 was assigned to, and also one other node.
	err = s.Update(func(tx store.Tx) error {
		for i := t1Instances; i != t1Instances+3; i++ {
			taskTemplate1.ID = fmt.Sprintf("t1id%d", i)
			assert.NoError(t, store.CreateTask(tx, taskTemplate1))
		}
		return nil
	})
	assert.NoError(t, err)

	var sharedNodes [2]string

	for i := 0; i != 3; i++ {
		assignment := watchAssignment(t, watch)
		if !strings.HasPrefix(assignment.ID, "t1") {
			t.Fatal("got assignment for different kind of task")
		}
		if t1Assignments[assignment.NodeID] == 5 {
			t.Fatal("more than one new task assigned to the same node")
		}
		t1Assignments[assignment.NodeID]++

		if t2Assignments[assignment.NodeID] != 0 {
			if sharedNodes[0] == "" {
				sharedNodes[0] = assignment.NodeID
			} else if sharedNodes[1] == "" {
				sharedNodes[1] = assignment.NodeID
			} else {
				t.Fatal("all three assignments went to nodes with service2 tasks")
			}
		}
	}

	assert.NotEmpty(t, sharedNodes[0])
	assert.NotEmpty(t, sharedNodes[1])
	assert.NotEqual(t, sharedNodes[0], sharedNodes[1])

	nodesWith4T1Tasks = 0
	nodesWith5T1Tasks := 0
	for nodeID, taskCount := range t1Assignments {
		if taskCount == 4 {
			nodesWith4T1Tasks++
		} else if taskCount == 5 {
			nodesWith5T1Tasks++
		} else {
			t.Fatalf("unexpected number of tasks %d on node %s", taskCount, nodeID)
		}
	}

	assert.Equal(t, 4, nodesWith4T1Tasks)
	assert.Equal(t, 1, nodesWith5T1Tasks)

	// Add another task from service2. It must not land on the node that
	// has 5 service1 tasks.
	err = s.Update(func(tx store.Tx) error {
		taskTemplate2.ID = "t2id4"
		assert.NoError(t, store.CreateTask(tx, taskTemplate2))
		return nil
	})
	assert.NoError(t, err)

	assignment := watchAssignment(t, watch)
	if assignment.ID != "t2id4" {
		t.Fatal("got assignment for different task")
	}

	if t2Assignments[assignment.NodeID] != 0 {
		t.Fatal("was scheduled on a node that already has a service2 task")
	}
	if t1Assignments[assignment.NodeID] == 5 {
		t.Fatal("was scheduled on the node that has the most service1 tasks")
	}
	t2Assignments[assignment.NodeID]++

	// Remove all tasks on node id1.
	err = s.Update(func(tx store.Tx) error {
		tasks, err := store.FindTasks(tx, store.ByNodeID("id1"))
		assert.NoError(t, err)
		for _, task := range tasks {
			assert.NoError(t, store.DeleteTask(tx, task.ID))
		}
		return nil
	})
	assert.NoError(t, err)

	t1Assignments["id1"] = 0
	t2Assignments["id1"] = 0

	// Add four instances of service1 and two instances of service2.
	// All instances of service1 should land on node "id1", and one
	// of the two service2 instances should as well.
	// Put these in a map to randomize the order in which they are
	// created.
	err = s.Update(func(tx store.Tx) error {
		tasksMap := make(map[string]*api.Task)
		for i := 22; i <= 25; i++ {
			taskTemplate1.ID = fmt.Sprintf("t1id%d", i)
			tasksMap[taskTemplate1.ID] = taskTemplate1.Copy()
		}
		for i := 5; i <= 6; i++ {
			taskTemplate2.ID = fmt.Sprintf("t2id%d", i)
			tasksMap[taskTemplate2.ID] = taskTemplate2.Copy()
		}
		for _, task := range tasksMap {
			assert.NoError(t, store.CreateTask(tx, task))
		}
		return nil
	})
	assert.NoError(t, err)

	for i := 0; i != 4+2; i++ {
		assignment := watchAssignment(t, watch)
		if strings.HasPrefix(assignment.ID, "t1") {
			t1Assignments[assignment.NodeID]++
		} else if strings.HasPrefix(assignment.ID, "t2") {
			t2Assignments[assignment.NodeID]++
		}
	}

	assert.Equal(t, 4, t1Assignments["id1"])
	assert.Equal(t, 1, t2Assignments["id1"])
}
Example #25
0
func TestSchedulerResourceConstraintDeadTask(t *testing.T) {
	ctx := context.Background()
	// Create a ready node without enough memory to run the task.
	node := &api.Node{
		ID: "id1",
		Spec: api.NodeSpec{
			Annotations: api.Annotations{
				Name: "node",
			},
		},
		Status: api.NodeStatus{
			State: api.NodeStatus_READY,
		},
		Description: &api.NodeDescription{
			Resources: &api.Resources{
				NanoCPUs:    1e9,
				MemoryBytes: 1e9,
			},
		},
	}

	bigTask1 := &api.Task{
		DesiredState: api.TaskStateRunning,
		ID:           "id1",
		Spec: api.TaskSpec{
			Resources: &api.ResourceRequirements{
				Reservations: &api.Resources{
					MemoryBytes: 8e8,
				},
			},
		},
		ServiceAnnotations: api.Annotations{
			Name: "big",
		},
		Status: api.TaskStatus{
			State: api.TaskStatePending,
		},
	}

	bigTask2 := bigTask1.Copy()
	bigTask2.ID = "id2"

	s := store.NewMemoryStore(nil)
	assert.NotNil(t, s)
	defer s.Close()

	err := s.Update(func(tx store.Tx) error {
		// Add initial node and task
		assert.NoError(t, store.CreateNode(tx, node))
		assert.NoError(t, store.CreateTask(tx, bigTask1))
		return nil
	})
	assert.NoError(t, err)

	scheduler := New(s)

	watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{})
	defer cancel()

	go func() {
		assert.NoError(t, scheduler.Run(ctx))
	}()
	defer scheduler.Stop()

	// The task fits, so it should get assigned
	assignment := watchAssignment(t, watch)
	assert.Equal(t, "id1", assignment.ID)
	assert.Equal(t, "id1", assignment.NodeID)

	err = s.Update(func(tx store.Tx) error {
		// Add a second task. It shouldn't get assigned because of
		// resource constraints.
		return store.CreateTask(tx, bigTask2)
	})
	assert.NoError(t, err)

	time.Sleep(100 * time.Millisecond)
	s.View(func(tx store.ReadTx) {
		tasks, err := store.FindTasks(tx, store.ByNodeID(node.ID))
		assert.NoError(t, err)
		assert.Len(t, tasks, 1)
	})

	err = s.Update(func(tx store.Tx) error {
		// The task becomes dead
		updatedTask := store.GetTask(tx, bigTask1.ID)
		updatedTask.Status.State = api.TaskStateShutdown
		return store.UpdateTask(tx, updatedTask)
	})
	assert.NoError(t, err)

	// With the first task no longer consuming resources, the second
	// one can be scheduled.
	assignment = watchAssignment(t, watch)
	assert.Equal(t, "id2", assignment.ID)
	assert.Equal(t, "id1", assignment.NodeID)
}
Example #26
0
func TestTaskHistory(t *testing.T) {
	ctx := context.Background()
	s := store.NewMemoryStore(nil)
	assert.NotNil(t, s)

	assert.NoError(t, s.Update(func(tx store.Tx) error {
		store.CreateCluster(tx, &api.Cluster{
			ID: identity.NewID(),
			Spec: api.ClusterSpec{
				Annotations: api.Annotations{
					Name: store.DefaultClusterName,
				},
				Orchestration: api.OrchestrationConfig{
					TaskHistoryRetentionLimit: 2,
				},
			},
		})
		return nil
	}))

	taskReaper := NewTaskReaper(s)
	defer taskReaper.Stop()
	orchestrator := NewReplicatedOrchestrator(s)
	defer orchestrator.Stop()

	watch, cancel := state.Watch(s.WatchQueue() /*state.EventCreateTask{}, state.EventUpdateTask{}*/)
	defer cancel()

	// Create a service with two instances specified before the orchestrator is
	// started. This should result in two tasks when the orchestrator
	// starts up.
	err := s.Update(func(tx store.Tx) error {
		j1 := &api.Service{
			ID: "id1",
			Spec: api.ServiceSpec{
				Annotations: api.Annotations{
					Name: "name1",
				},
				Mode: &api.ServiceSpec_Replicated{
					Replicated: &api.ReplicatedService{
						Replicas: 2,
					},
				},
				Task: api.TaskSpec{
					Restart: &api.RestartPolicy{
						Condition: api.RestartOnAny,
						Delay:     ptypes.DurationProto(0),
					},
				},
			},
		}
		assert.NoError(t, store.CreateService(tx, j1))
		return nil
	})
	assert.NoError(t, err)

	// Start the orchestrator.
	go func() {
		assert.NoError(t, orchestrator.Run(ctx))
	}()
	go taskReaper.Run()

	observedTask1 := watchTaskCreate(t, watch)
	assert.Equal(t, observedTask1.Status.State, api.TaskStateNew)
	assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1")

	observedTask2 := watchTaskCreate(t, watch)
	assert.Equal(t, observedTask2.Status.State, api.TaskStateNew)
	assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1")

	// Fail both tasks. They should both get restarted.
	updatedTask1 := observedTask1.Copy()
	updatedTask1.Status.State = api.TaskStateFailed
	updatedTask1.ServiceAnnotations = api.Annotations{Name: "original"}
	updatedTask2 := observedTask2.Copy()
	updatedTask2.Status.State = api.TaskStateFailed
	updatedTask2.ServiceAnnotations = api.Annotations{Name: "original"}
	err = s.Update(func(tx store.Tx) error {
		assert.NoError(t, store.UpdateTask(tx, updatedTask1))
		assert.NoError(t, store.UpdateTask(tx, updatedTask2))
		return nil
	})

	expectCommit(t, watch)
	expectTaskUpdate(t, watch)
	expectTaskUpdate(t, watch)
	expectCommit(t, watch)

	expectTaskUpdate(t, watch)
	observedTask3 := watchTaskCreate(t, watch)
	assert.Equal(t, observedTask3.Status.State, api.TaskStateNew)
	assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1")

	expectTaskUpdate(t, watch)
	observedTask4 := watchTaskCreate(t, watch)
	assert.Equal(t, observedTask4.Status.State, api.TaskStateNew)
	assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1")

	// Fail these replacement tasks. Since TaskHistory is set to 2, this
	// should cause the oldest tasks for each instance to get deleted.
	updatedTask3 := observedTask3.Copy()
	updatedTask3.Status.State = api.TaskStateFailed
	updatedTask4 := observedTask4.Copy()
	updatedTask4.Status.State = api.TaskStateFailed
	err = s.Update(func(tx store.Tx) error {
		assert.NoError(t, store.UpdateTask(tx, updatedTask3))
		assert.NoError(t, store.UpdateTask(tx, updatedTask4))
		return nil
	})

	deletedTask1 := watchTaskDelete(t, watch)
	deletedTask2 := watchTaskDelete(t, watch)

	assert.Equal(t, api.TaskStateFailed, deletedTask1.Status.State)
	assert.Equal(t, "original", deletedTask1.ServiceAnnotations.Name)
	assert.Equal(t, api.TaskStateFailed, deletedTask2.Status.State)
	assert.Equal(t, "original", deletedTask2.ServiceAnnotations.Name)

	var foundTasks []*api.Task
	s.View(func(tx store.ReadTx) {
		foundTasks, err = store.FindTasks(tx, store.All)
	})
	assert.NoError(t, err)
	assert.Len(t, foundTasks, 4)
}
Example #27
0
func (a *Allocator) doNetworkInit(ctx context.Context) (err error) {
	na, err := networkallocator.New()
	if err != nil {
		return err
	}

	nc := &networkContext{
		nwkAllocator:        na,
		unallocatedTasks:    make(map[string]*api.Task),
		unallocatedServices: make(map[string]*api.Service),
		unallocatedNetworks: make(map[string]*api.Network),
		ingressNetwork:      newIngressNetwork(),
	}
	a.netCtx = nc
	defer func() {
		// Clear a.netCtx if initialization was unsuccessful.
		if err != nil {
			a.netCtx = nil
		}
	}()

	// Check if we have the ingress network. If not found create
	// it before reading all network objects for allocation.
	var networks []*api.Network
	a.store.View(func(tx store.ReadTx) {
		networks, err = store.FindNetworks(tx, store.ByName(ingressNetworkName))
		if len(networks) > 0 {
			nc.ingressNetwork = networks[0]
		}
	})
	if err != nil {
		return errors.Wrap(err, "failed to find ingress network during init")
	}

	// If ingress network is not found, create one right away
	// using the predefined template.
	if len(networks) == 0 {
		if err := a.store.Update(func(tx store.Tx) error {
			nc.ingressNetwork.ID = identity.NewID()
			if err := store.CreateNetwork(tx, nc.ingressNetwork); err != nil {
				return err
			}

			return nil
		}); err != nil {
			return errors.Wrap(err, "failed to create ingress network")
		}

		a.store.View(func(tx store.ReadTx) {
			networks, err = store.FindNetworks(tx, store.ByName(ingressNetworkName))
			if len(networks) > 0 {
				nc.ingressNetwork = networks[0]
			}
		})
		if err != nil {
			return errors.Wrap(err, "failed to find ingress network after creating it")
		}

	}

	// Try to complete ingress network allocation before anything else so
	// that the we can get the preferred subnet for ingress
	// network.
	if !na.IsAllocated(nc.ingressNetwork) {
		if err := a.allocateNetwork(ctx, nc.ingressNetwork); err != nil {
			log.G(ctx).WithError(err).Error("failed allocating ingress network during init")
		} else if _, err := a.store.Batch(func(batch *store.Batch) error {
			if err := a.commitAllocatedNetwork(ctx, batch, nc.ingressNetwork); err != nil {
				log.G(ctx).WithError(err).Error("failed committing allocation of ingress network during init")
			}
			return nil
		}); err != nil {
			log.G(ctx).WithError(err).Error("failed committing allocation of ingress network during init")
		}
	}

	// Allocate networks in the store so far before we started
	// watching.
	a.store.View(func(tx store.ReadTx) {
		networks, err = store.FindNetworks(tx, store.All)
	})
	if err != nil {
		return errors.Wrap(err, "error listing all networks in store while trying to allocate during init")
	}

	var allocatedNetworks []*api.Network
	for _, n := range networks {
		if na.IsAllocated(n) {
			continue
		}

		if err := a.allocateNetwork(ctx, n); err != nil {
			log.G(ctx).WithError(err).Errorf("failed allocating network %s during init", n.ID)
			continue
		}
		allocatedNetworks = append(allocatedNetworks, n)
	}

	if _, err := a.store.Batch(func(batch *store.Batch) error {
		for _, n := range allocatedNetworks {
			if err := a.commitAllocatedNetwork(ctx, batch, n); err != nil {
				log.G(ctx).WithError(err).Errorf("failed committing allocation of network %s during init", n.ID)
			}
		}
		return nil
	}); err != nil {
		log.G(ctx).WithError(err).Error("failed committing allocation of networks during init")
	}

	// Allocate nodes in the store so far before we process watched events.
	var nodes []*api.Node
	a.store.View(func(tx store.ReadTx) {
		nodes, err = store.FindNodes(tx, store.All)
	})
	if err != nil {
		return errors.Wrap(err, "error listing all nodes in store while trying to allocate during init")
	}

	var allocatedNodes []*api.Node
	for _, node := range nodes {
		if na.IsNodeAllocated(node) {
			continue
		}

		if node.Attachment == nil {
			node.Attachment = &api.NetworkAttachment{}
		}

		node.Attachment.Network = nc.ingressNetwork.Copy()
		if err := a.allocateNode(ctx, node); err != nil {
			log.G(ctx).WithError(err).Errorf("Failed to allocate network resources for node %s during init", node.ID)
			continue
		}

		allocatedNodes = append(allocatedNodes, node)
	}

	if _, err := a.store.Batch(func(batch *store.Batch) error {
		for _, node := range allocatedNodes {
			if err := a.commitAllocatedNode(ctx, batch, node); err != nil {
				log.G(ctx).WithError(err).Errorf("Failed to commit allocation of network resources for node %s during init", node.ID)
			}
		}
		return nil
	}); err != nil {
		log.G(ctx).WithError(err).Error("Failed to commit allocation of network resources for nodes during init")
	}

	// Allocate services in the store so far before we process watched events.
	var services []*api.Service
	a.store.View(func(tx store.ReadTx) {
		services, err = store.FindServices(tx, store.All)
	})
	if err != nil {
		return errors.Wrap(err, "error listing all services in store while trying to allocate during init")
	}

	var allocatedServices []*api.Service
	for _, s := range services {
		if nc.nwkAllocator.IsServiceAllocated(s) {
			continue
		}

		if err := a.allocateService(ctx, s); err != nil {
			log.G(ctx).WithError(err).Errorf("failed allocating service %s during init", s.ID)
			continue
		}
		allocatedServices = append(allocatedServices, s)
	}

	if _, err := a.store.Batch(func(batch *store.Batch) error {
		for _, s := range allocatedServices {
			if err := a.commitAllocatedService(ctx, batch, s); err != nil {
				log.G(ctx).WithError(err).Errorf("failed committing allocation of service %s during init", s.ID)
			}
		}
		return nil
	}); err != nil {
		log.G(ctx).WithError(err).Error("failed committing allocation of services during init")
	}

	// Allocate tasks in the store so far before we started watching.
	var (
		tasks          []*api.Task
		allocatedTasks []*api.Task
	)
	a.store.View(func(tx store.ReadTx) {
		tasks, err = store.FindTasks(tx, store.All)
	})
	if err != nil {
		return errors.Wrap(err, "error listing all tasks in store while trying to allocate during init")
	}

	for _, t := range tasks {
		if taskDead(t) {
			continue
		}

		var s *api.Service
		if t.ServiceID != "" {
			a.store.View(func(tx store.ReadTx) {
				s = store.GetService(tx, t.ServiceID)
			})
		}

		// Populate network attachments in the task
		// based on service spec.
		a.taskCreateNetworkAttachments(t, s)

		if taskReadyForNetworkVote(t, s, nc) {
			if t.Status.State >= api.TaskStatePending {
				continue
			}

			if a.taskAllocateVote(networkVoter, t.ID) {
				// If the task is not attached to any network, network
				// allocators job is done. Immediately cast a vote so
				// that the task can be moved to ALLOCATED state as
				// soon as possible.
				allocatedTasks = append(allocatedTasks, t)
			}
			continue
		}

		err := a.allocateTask(ctx, t)
		if err == nil {
			allocatedTasks = append(allocatedTasks, t)
		} else if err != errNoChanges {
			log.G(ctx).WithError(err).Errorf("failed allocating task %s during init", t.ID)
			nc.unallocatedTasks[t.ID] = t
		}
	}

	if _, err := a.store.Batch(func(batch *store.Batch) error {
		for _, t := range allocatedTasks {
			if err := a.commitAllocatedTask(ctx, batch, t); err != nil {
				log.G(ctx).WithError(err).Errorf("failed committing allocation of task %s during init", t.ID)
			}
		}

		return nil
	}); err != nil {
		log.G(ctx).WithError(err).Error("failed committing allocation of tasks during init")
	}

	return nil
}
Example #28
0
File: task.go Project: Mic92/docker
// ListTasks returns a list of all tasks.
func (s *Server) ListTasks(ctx context.Context, request *api.ListTasksRequest) (*api.ListTasksResponse, error) {
	var (
		tasks []*api.Task
		err   error
	)

	s.store.View(func(tx store.ReadTx) {
		switch {
		case request.Filters != nil && len(request.Filters.Names) > 0:
			tasks, err = store.FindTasks(tx, buildFilters(store.ByName, request.Filters.Names))
		case request.Filters != nil && len(request.Filters.NamePrefixes) > 0:
			tasks, err = store.FindTasks(tx, buildFilters(store.ByNamePrefix, request.Filters.NamePrefixes))
		case request.Filters != nil && len(request.Filters.IDPrefixes) > 0:
			tasks, err = store.FindTasks(tx, buildFilters(store.ByIDPrefix, request.Filters.IDPrefixes))
		case request.Filters != nil && len(request.Filters.ServiceIDs) > 0:
			tasks, err = store.FindTasks(tx, buildFilters(store.ByServiceID, request.Filters.ServiceIDs))
		case request.Filters != nil && len(request.Filters.NodeIDs) > 0:
			tasks, err = store.FindTasks(tx, buildFilters(store.ByNodeID, request.Filters.NodeIDs))
		case request.Filters != nil && len(request.Filters.DesiredStates) > 0:
			filters := make([]store.By, 0, len(request.Filters.DesiredStates))
			for _, v := range request.Filters.DesiredStates {
				filters = append(filters, store.ByDesiredState(v))
			}
			tasks, err = store.FindTasks(tx, store.Or(filters...))
		default:
			tasks, err = store.FindTasks(tx, store.All)
		}
	})
	if err != nil {
		return nil, err
	}

	if request.Filters != nil {
		tasks = filterTasks(tasks,
			func(e *api.Task) bool {
				return filterContains(naming.Task(e), request.Filters.Names)
			},
			func(e *api.Task) bool {
				return filterContainsPrefix(naming.Task(e), request.Filters.NamePrefixes)
			},
			func(e *api.Task) bool {
				return filterContainsPrefix(e.ID, request.Filters.IDPrefixes)
			},
			func(e *api.Task) bool {
				return filterMatchLabels(e.ServiceAnnotations.Labels, request.Filters.Labels)
			},
			func(e *api.Task) bool {
				return filterContains(e.ServiceID, request.Filters.ServiceIDs)
			},
			func(e *api.Task) bool {
				return filterContains(e.NodeID, request.Filters.NodeIDs)
			},
			func(e *api.Task) bool {
				if len(request.Filters.DesiredStates) == 0 {
					return true
				}
				for _, c := range request.Filters.DesiredStates {
					if c == e.DesiredState {
						return true
					}
				}
				return false
			},
		)
	}

	return &api.ListTasksResponse{
		Tasks: tasks,
	}, nil
}
Example #29
0
// Assignments is a stream of assignments for a node. Each message contains
// either full list of tasks and secrets for the node, or an incremental update.
func (d *Dispatcher) Assignments(r *api.AssignmentsRequest, stream api.Dispatcher_AssignmentsServer) error {
	nodeInfo, err := ca.RemoteNode(stream.Context())
	if err != nil {
		return err
	}
	nodeID := nodeInfo.NodeID

	if err := d.isRunningLocked(); err != nil {
		return err
	}

	fields := logrus.Fields{
		"node.id":      nodeID,
		"node.session": r.SessionID,
		"method":       "(*Dispatcher).Assignments",
	}
	if nodeInfo.ForwardedBy != nil {
		fields["forwarder.id"] = nodeInfo.ForwardedBy.NodeID
	}
	log := log.G(stream.Context()).WithFields(fields)
	log.Debugf("")

	if _, err = d.nodes.GetWithSession(nodeID, r.SessionID); err != nil {
		return err
	}

	var (
		sequence  int64
		appliesTo string
		initial   api.AssignmentsMessage
	)
	tasksMap := make(map[string]*api.Task)

	sendMessage := func(msg api.AssignmentsMessage, assignmentType api.AssignmentsMessage_Type) error {
		sequence++
		msg.AppliesTo = appliesTo
		msg.ResultsIn = strconv.FormatInt(sequence, 10)
		appliesTo = msg.ResultsIn
		msg.Type = assignmentType

		if err := stream.Send(&msg); err != nil {
			return err
		}
		return nil
	}

	// TODO(aaronl): Also send node secrets that should be exposed to
	// this node.
	nodeTasks, cancel, err := store.ViewAndWatch(
		d.store,
		func(readTx store.ReadTx) error {
			tasks, err := store.FindTasks(readTx, store.ByNodeID(nodeID))
			if err != nil {
				return err
			}

			for _, t := range tasks {
				// We only care about tasks that are ASSIGNED or
				// higher. If the state is below ASSIGNED, the
				// task may not meet the constraints for this
				// node, so we have to be careful about sending
				// secrets associated with it.
				if t.Status.State < api.TaskStateAssigned {
					continue
				}

				tasksMap[t.ID] = t
				initial.UpdateTasks = append(initial.UpdateTasks, t)
			}
			return nil
		},
		state.EventUpdateTask{Task: &api.Task{NodeID: nodeID},
			Checks: []state.TaskCheckFunc{state.TaskCheckNodeID}},
		state.EventDeleteTask{Task: &api.Task{NodeID: nodeID},
			Checks: []state.TaskCheckFunc{state.TaskCheckNodeID}},
	)
	if err != nil {
		return err
	}
	defer cancel()

	if err := sendMessage(initial, api.AssignmentsMessage_COMPLETE); err != nil {
		return err
	}

	for {
		// Check for session expiration
		if _, err := d.nodes.GetWithSession(nodeID, r.SessionID); err != nil {
			return err
		}

		// bursty events should be processed in batches and sent out together
		var (
			update          api.AssignmentsMessage
			modificationCnt int
			batchingTimer   *time.Timer
			batchingTimeout <-chan time.Time
			updateTasks     = make(map[string]*api.Task)
			removeTasks     = make(map[string]struct{})
		)

		oneModification := func() {
			modificationCnt++

			if batchingTimer != nil {
				batchingTimer.Reset(batchingWaitTime)
			} else {
				batchingTimer = time.NewTimer(batchingWaitTime)
				batchingTimeout = batchingTimer.C
			}
		}

		// The batching loop waits for 50 ms after the most recent
		// change, or until modificationBatchLimit is reached. The
		// worst case latency is modificationBatchLimit * batchingWaitTime,
		// which is 10 seconds.
	batchingLoop:
		for modificationCnt < modificationBatchLimit {
			select {
			case event := <-nodeTasks:
				switch v := event.(type) {
				// We don't monitor EventCreateTask because tasks are
				// never created in the ASSIGNED state. First tasks are
				// created by the orchestrator, then the scheduler moves
				// them to ASSIGNED. If this ever changes, we will need
				// to monitor task creations as well.
				case state.EventUpdateTask:
					// We only care about tasks that are ASSIGNED or
					// higher.
					if v.Task.Status.State < api.TaskStateAssigned {
						continue
					}

					if oldTask, exists := tasksMap[v.Task.ID]; exists {
						// States ASSIGNED and below are set by the orchestrator/scheduler,
						// not the agent, so tasks in these states need to be sent to the
						// agent even if nothing else has changed.
						if equality.TasksEqualStable(oldTask, v.Task) && v.Task.Status.State > api.TaskStateAssigned {
							// this update should not trigger a task change for the agent
							tasksMap[v.Task.ID] = v.Task
							continue
						}
					}
					tasksMap[v.Task.ID] = v.Task
					updateTasks[v.Task.ID] = v.Task

					oneModification()
				case state.EventDeleteTask:

					if _, exists := tasksMap[v.Task.ID]; !exists {
						continue
					}

					removeTasks[v.Task.ID] = struct{}{}

					delete(tasksMap, v.Task.ID)

					oneModification()
				}
			case <-batchingTimeout:
				break batchingLoop
			case <-stream.Context().Done():
				return stream.Context().Err()
			case <-d.ctx.Done():
				return d.ctx.Err()
			}
		}

		if batchingTimer != nil {
			batchingTimer.Stop()
		}

		if modificationCnt > 0 {
			for id, task := range updateTasks {
				if _, ok := removeTasks[id]; !ok {
					update.UpdateTasks = append(update.UpdateTasks, task)
				}
			}
			for id := range removeTasks {
				update.RemoveTasks = append(update.RemoveTasks, id)
			}
			if err := sendMessage(update, api.AssignmentsMessage_INCREMENTAL); err != nil {
				return err
			}
		}
	}
}
Example #30
0
// reconcileServicesOneNode checks the specified services on one node
func (g *Orchestrator) reconcileServicesOneNode(ctx context.Context, serviceIDs []string, nodeID string) {
	node, exists := g.nodes[nodeID]
	if !exists {
		return
	}

	// whether each service has completed on the node
	completed := make(map[string]bool)
	// tasks by service
	tasks := make(map[string][]*api.Task)

	var (
		tasksOnNode []*api.Task
		err         error
	)

	g.store.View(func(tx store.ReadTx) {
		tasksOnNode, err = store.FindTasks(tx, store.ByNodeID(nodeID))
	})
	if err != nil {
		log.G(ctx).WithError(err).Errorf("global orchestrator: reconcile failed finding tasks on node %s", nodeID)
		return
	}

	for _, serviceID := range serviceIDs {
		for _, t := range tasksOnNode {
			if t.ServiceID != serviceID {
				continue
			}
			if isTaskRunning(t) {
				tasks[serviceID] = append(tasks[serviceID], t)
			} else {
				if isTaskCompleted(t, orchestrator.RestartCondition(t)) {
					completed[serviceID] = true
				}
			}
		}
	}

	_, err = g.store.Batch(func(batch *store.Batch) error {
		for _, serviceID := range serviceIDs {
			service, exists := g.globalServices[serviceID]
			if !exists {
				continue
			}

			if !constraint.NodeMatches(service.constraints, node) {
				continue
			}

			// if restart policy considers this node has finished its task
			// it should remove all running tasks
			if completed[serviceID] {
				g.removeTasks(ctx, batch, tasks[serviceID])
				continue
			}

			if node.Spec.Availability == api.NodeAvailabilityPause {
				// the node is paused, so we won't add or update tasks
				continue
			}

			if len(tasks) == 0 {
				g.addTask(ctx, batch, service.Service, nodeID)
			} else {
				// If task is out of date, update it. This can happen
				// on node reconciliation if, for example, we pause a
				// node, update the service, and then activate the node
				// later.

				// We don't use g.updater here for two reasons:
				// - This is not a rolling update. Since it was not
				//   triggered directly by updating the service, it
				//   should not observe the rolling update parameters
				//   or show status in UpdateStatus.
				// - Calling Update cancels any current rolling updates
				//   for the service, such as one triggered by service
				//   reconciliation.

				var (
					dirtyTasks []*api.Task
					cleanTasks []*api.Task
				)

				for _, t := range tasks[serviceID] {
					if orchestrator.IsTaskDirty(service.Service, t) {
						dirtyTasks = append(dirtyTasks, t)
					} else {
						cleanTasks = append(cleanTasks, t)
					}
				}

				if len(cleanTasks) == 0 {
					g.addTask(ctx, batch, service.Service, nodeID)
				} else {
					dirtyTasks = append(dirtyTasks, cleanTasks[1:]...)
				}
				g.removeTasks(ctx, batch, dirtyTasks)
			}
		}
		return nil
	})
	if err != nil {
		log.G(ctx).WithError(err).Errorf("global orchestrator: reconcileServiceOneNode batch failed")
	}
}