Example #1
0
// ListServices returns a list of all services.
func (s *Server) ListServices(ctx context.Context, request *api.ListServicesRequest) (*api.ListServicesResponse, error) {
	var (
		services []*api.Service
		err      error
	)

	s.store.View(func(tx store.ReadTx) {
		switch {
		case request.Filters != nil && len(request.Filters.Names) > 0:
			services, err = store.FindServices(tx, buildFilters(store.ByName, request.Filters.Names))
		case request.Filters != nil && len(request.Filters.NamePrefixes) > 0:
			services, err = store.FindServices(tx, buildFilters(store.ByNamePrefix, request.Filters.NamePrefixes))
		case request.Filters != nil && len(request.Filters.IDPrefixes) > 0:
			services, err = store.FindServices(tx, buildFilters(store.ByIDPrefix, request.Filters.IDPrefixes))
		default:
			services, err = store.FindServices(tx, store.All)
		}
	})
	if err != nil {
		return nil, err
	}

	if request.Filters != nil {
		services = filterServices(services,
			func(e *api.Service) bool {
				return filterContains(e.Spec.Annotations.Name, request.Filters.Names)
			},
			func(e *api.Service) bool {
				return filterContainsPrefix(e.Spec.Annotations.Name, request.Filters.NamePrefixes)
			},
			func(e *api.Service) bool {
				return filterContainsPrefix(e.ID, request.Filters.IDPrefixes)
			},
			func(e *api.Service) bool {
				return filterMatchLabels(e.Spec.Annotations.Labels, request.Filters.Labels)
			},
		)
	}

	return &api.ListServicesResponse{
		Services: services,
	}, nil
}
Example #2
0
// checkPortConflicts does a best effort to find if the passed in spec has port
// conflicts with existing services.
// `serviceID string` is the service ID of the spec in service update. If
// `serviceID` is not "", then conflicts check will be skipped against this
// service (the service being updated).
func (s *Server) checkPortConflicts(spec *api.ServiceSpec, serviceID string) error {
	if spec.Endpoint == nil {
		return nil
	}

	pcToString := func(pc *api.PortConfig) string {
		port := strconv.FormatUint(uint64(pc.PublishedPort), 10)
		return port + "/" + pc.Protocol.String()
	}

	reqPorts := make(map[string]bool)
	for _, pc := range spec.Endpoint.Ports {
		if pc.PublishedPort > 0 {
			reqPorts[pcToString(pc)] = true
		}
	}
	if len(reqPorts) == 0 {
		return nil
	}

	var (
		services []*api.Service
		err      error
	)

	s.store.View(func(tx store.ReadTx) {
		services, err = store.FindServices(tx, store.All)
	})
	if err != nil {
		return err
	}

	for _, service := range services {
		// If service ID is the same (and not "") then this is an update
		if serviceID != "" && serviceID == service.ID {
			continue
		}
		if service.Spec.Endpoint != nil {
			for _, pc := range service.Spec.Endpoint.Ports {
				if reqPorts[pcToString(pc)] {
					return grpc.Errorf(codes.InvalidArgument, "port '%d' is already in use by service '%s' (%s)", pc.PublishedPort, service.Spec.Annotations.Name, service.ID)
				}
			}
		}
		if service.Endpoint != nil {
			for _, pc := range service.Endpoint.Ports {
				if reqPorts[pcToString(pc)] {
					return grpc.Errorf(codes.InvalidArgument, "port '%d' is already in use by service '%s' (%s)", pc.PublishedPort, service.Spec.Annotations.Name, service.ID)
				}
			}
		}
	}
	return nil
}
Example #3
0
func (r *ReplicatedOrchestrator) initServices(readTx store.ReadTx) error {
	services, err := store.FindServices(readTx, store.All)
	if err != nil {
		return err
	}
	for _, s := range services {
		if isReplicatedService(s) {
			r.reconcileServices[s.ID] = s
		}
	}
	return nil
}
Example #4
0
// RemoveSecret removes the secret referenced by `RemoveSecretRequest.ID`.
// - Returns `InvalidArgument` if `RemoveSecretRequest.ID` is empty.
// - Returns `NotFound` if the a secret named `RemoveSecretRequest.ID` is not found.
// - Returns `SecretInUse` if the secret is currently in use
// - Returns an error if the deletion fails.
func (s *Server) RemoveSecret(ctx context.Context, request *api.RemoveSecretRequest) (*api.RemoveSecretResponse, error) {
	if request.SecretID == "" {
		return nil, grpc.Errorf(codes.InvalidArgument, "secret ID must be provided")
	}

	err := s.store.Update(func(tx store.Tx) error {
		// Check if the secret exists
		secret := store.GetSecret(tx, request.SecretID)
		if secret == nil {
			return grpc.Errorf(codes.NotFound, "could not find secret %s", request.SecretID)
		}

		// Check if any services currently reference this secret, return error if so
		services, err := store.FindServices(tx, store.ByReferencedSecretID(request.SecretID))
		if err != nil {
			return grpc.Errorf(codes.Internal, "could not find services using secret %s: %v", request.SecretID, err)
		}

		if len(services) != 0 {
			serviceNames := make([]string, 0, len(services))
			for _, service := range services {
				serviceNames = append(serviceNames, service.Spec.Annotations.Name)
			}

			secretName := secret.Spec.Annotations.Name
			serviceNameStr := strings.Join(serviceNames, ", ")
			serviceStr := "services"
			if len(serviceNames) == 1 {
				serviceStr = "service"
			}

			return grpc.Errorf(codes.InvalidArgument, "secret '%s' is in use by the following %s: %v", secretName, serviceStr, serviceNameStr)
		}

		return store.DeleteSecret(tx, request.SecretID)
	})
	switch err {
	case store.ErrNotExist:
		return nil, grpc.Errorf(codes.NotFound, "secret %s not found", request.SecretID)
	case nil:
		log.G(ctx).WithFields(logrus.Fields{
			"secret.ID": request.SecretID,
			"method":    "RemoveSecret",
		}).Debugf("secret removed")

		return &api.RemoveSecretResponse{}, nil
	default:
		return nil, err
	}
}
Example #5
0
// RemoveNetwork removes a Network referenced by NetworkID.
// - Returns `InvalidArgument` if NetworkID is not provided.
// - Returns `NotFound` if the Network is not found.
// - Returns an error if the deletion fails.
func (s *Server) RemoveNetwork(ctx context.Context, request *api.RemoveNetworkRequest) (*api.RemoveNetworkResponse, error) {
	var (
		services []*api.Service
		err      error
	)

	if request.NetworkID == "" {
		return nil, grpc.Errorf(codes.InvalidArgument, errInvalidArgument.Error())
	}

	s.store.View(func(tx store.ReadTx) {
		services, err = store.FindServices(tx, store.All)
	})
	if err != nil {
		return nil, grpc.Errorf(codes.Internal, "could not find services using network %s", request.NetworkID)
	}

	for _, s := range services {
		specNetworks := s.Spec.Task.Networks
		if len(specNetworks) == 0 {
			specNetworks = s.Spec.Networks
		}

		for _, na := range specNetworks {
			if na.Target == request.NetworkID {
				return nil, grpc.Errorf(codes.FailedPrecondition, "network %s is in use", request.NetworkID)
			}
		}
	}

	err = s.store.Update(func(tx store.Tx) error {
		nw := store.GetNetwork(tx, request.NetworkID)
		if _, ok := nw.Spec.Annotations.Labels["com.docker.swarm.internal"]; ok {
			networkDescription := nw.ID
			if nw.Spec.Annotations.Name != "" {
				networkDescription = fmt.Sprintf("%s (%s)", nw.Spec.Annotations.Name, nw.ID)
			}
			return grpc.Errorf(codes.PermissionDenied, "%s is a pre-defined network and cannot be removed", networkDescription)
		}
		return store.DeleteNetwork(tx, request.NetworkID)
	})
	if err != nil {
		if err == store.ErrNotExist {
			return nil, grpc.Errorf(codes.NotFound, "network %s not found", request.NetworkID)
		}
		return nil, err
	}
	return &api.RemoveNetworkResponse{}, nil
}
Example #6
0
// RemoveNetwork removes a Network referenced by NetworkID.
// - Returns `InvalidArgument` if NetworkID is not provided.
// - Returns `NotFound` if the Network is not found.
// - Returns an error if the deletion fails.
func (s *Server) RemoveNetwork(ctx context.Context, request *api.RemoveNetworkRequest) (*api.RemoveNetworkResponse, error) {
	if request.NetworkID == "" {
		return nil, grpc.Errorf(codes.InvalidArgument, errInvalidArgument.Error())
	}

	err := s.store.Update(func(tx store.Tx) error {
		services, err := store.FindServices(tx, store.ByReferencedNetworkID(request.NetworkID))
		if err != nil {
			return grpc.Errorf(codes.Internal, "could not find services using network %s: %v", request.NetworkID, err)
		}

		if len(services) != 0 {
			return grpc.Errorf(codes.FailedPrecondition, "network %s is in use by service %s", request.NetworkID, services[0].ID)
		}

		tasks, err := store.FindTasks(tx, store.ByReferencedNetworkID(request.NetworkID))
		if err != nil {
			return grpc.Errorf(codes.Internal, "could not find tasks using network %s: %v", request.NetworkID, err)
		}

		if len(tasks) != 0 {
			return grpc.Errorf(codes.FailedPrecondition, "network %s is in use by task %s", request.NetworkID, tasks[0].ID)
		}

		nw := store.GetNetwork(tx, request.NetworkID)
		if _, ok := nw.Spec.Annotations.Labels["com.docker.swarm.internal"]; ok {
			networkDescription := nw.ID
			if nw.Spec.Annotations.Name != "" {
				networkDescription = fmt.Sprintf("%s (%s)", nw.Spec.Annotations.Name, nw.ID)
			}
			return grpc.Errorf(codes.PermissionDenied, "%s is a pre-defined network and cannot be removed", networkDescription)
		}
		return store.DeleteNetwork(tx, request.NetworkID)
	})
	if err != nil {
		if err == store.ErrNotExist {
			return nil, grpc.Errorf(codes.NotFound, "network %s not found", request.NetworkID)
		}
		return nil, err
	}
	return &api.RemoveNetworkResponse{}, nil
}
Example #7
0
func (a *Allocator) doNetworkInit(ctx context.Context) (err error) {
	na, err := networkallocator.New()
	if err != nil {
		return err
	}

	nc := &networkContext{
		nwkAllocator:        na,
		unallocatedTasks:    make(map[string]*api.Task),
		unallocatedServices: make(map[string]*api.Service),
		unallocatedNetworks: make(map[string]*api.Network),
		ingressNetwork:      newIngressNetwork(),
	}
	a.netCtx = nc
	defer func() {
		// Clear a.netCtx if initialization was unsuccessful.
		if err != nil {
			a.netCtx = nil
		}
	}()

	// Check if we have the ingress network. If not found create
	// it before reading all network objects for allocation.
	var networks []*api.Network
	a.store.View(func(tx store.ReadTx) {
		networks, err = store.FindNetworks(tx, store.ByName(ingressNetworkName))
		if len(networks) > 0 {
			nc.ingressNetwork = networks[0]
		}
	})
	if err != nil {
		return errors.Wrap(err, "failed to find ingress network during init")
	}

	// If ingress network is not found, create one right away
	// using the predefined template.
	if len(networks) == 0 {
		if err := a.store.Update(func(tx store.Tx) error {
			nc.ingressNetwork.ID = identity.NewID()
			if err := store.CreateNetwork(tx, nc.ingressNetwork); err != nil {
				return err
			}

			return nil
		}); err != nil {
			return errors.Wrap(err, "failed to create ingress network")
		}

		a.store.View(func(tx store.ReadTx) {
			networks, err = store.FindNetworks(tx, store.ByName(ingressNetworkName))
			if len(networks) > 0 {
				nc.ingressNetwork = networks[0]
			}
		})
		if err != nil {
			return errors.Wrap(err, "failed to find ingress network after creating it")
		}

	}

	// Try to complete ingress network allocation before anything else so
	// that the we can get the preferred subnet for ingress
	// network.
	if !na.IsAllocated(nc.ingressNetwork) {
		if err := a.allocateNetwork(ctx, nc.ingressNetwork); err != nil {
			log.G(ctx).WithError(err).Error("failed allocating ingress network during init")
		} else if _, err := a.store.Batch(func(batch *store.Batch) error {
			if err := a.commitAllocatedNetwork(ctx, batch, nc.ingressNetwork); err != nil {
				log.G(ctx).WithError(err).Error("failed committing allocation of ingress network during init")
			}
			return nil
		}); err != nil {
			log.G(ctx).WithError(err).Error("failed committing allocation of ingress network during init")
		}
	}

	// Allocate networks in the store so far before we started
	// watching.
	a.store.View(func(tx store.ReadTx) {
		networks, err = store.FindNetworks(tx, store.All)
	})
	if err != nil {
		return errors.Wrap(err, "error listing all networks in store while trying to allocate during init")
	}

	var allocatedNetworks []*api.Network
	for _, n := range networks {
		if na.IsAllocated(n) {
			continue
		}

		if err := a.allocateNetwork(ctx, n); err != nil {
			log.G(ctx).WithError(err).Errorf("failed allocating network %s during init", n.ID)
			continue
		}
		allocatedNetworks = append(allocatedNetworks, n)
	}

	if _, err := a.store.Batch(func(batch *store.Batch) error {
		for _, n := range allocatedNetworks {
			if err := a.commitAllocatedNetwork(ctx, batch, n); err != nil {
				log.G(ctx).WithError(err).Errorf("failed committing allocation of network %s during init", n.ID)
			}
		}
		return nil
	}); err != nil {
		log.G(ctx).WithError(err).Error("failed committing allocation of networks during init")
	}

	// Allocate nodes in the store so far before we process watched events.
	var nodes []*api.Node
	a.store.View(func(tx store.ReadTx) {
		nodes, err = store.FindNodes(tx, store.All)
	})
	if err != nil {
		return errors.Wrap(err, "error listing all nodes in store while trying to allocate during init")
	}

	var allocatedNodes []*api.Node
	for _, node := range nodes {
		if na.IsNodeAllocated(node) {
			continue
		}

		if node.Attachment == nil {
			node.Attachment = &api.NetworkAttachment{}
		}

		node.Attachment.Network = nc.ingressNetwork.Copy()
		if err := a.allocateNode(ctx, node); err != nil {
			log.G(ctx).WithError(err).Errorf("Failed to allocate network resources for node %s during init", node.ID)
			continue
		}

		allocatedNodes = append(allocatedNodes, node)
	}

	if _, err := a.store.Batch(func(batch *store.Batch) error {
		for _, node := range allocatedNodes {
			if err := a.commitAllocatedNode(ctx, batch, node); err != nil {
				log.G(ctx).WithError(err).Errorf("Failed to commit allocation of network resources for node %s during init", node.ID)
			}
		}
		return nil
	}); err != nil {
		log.G(ctx).WithError(err).Error("Failed to commit allocation of network resources for nodes during init")
	}

	// Allocate services in the store so far before we process watched events.
	var services []*api.Service
	a.store.View(func(tx store.ReadTx) {
		services, err = store.FindServices(tx, store.All)
	})
	if err != nil {
		return errors.Wrap(err, "error listing all services in store while trying to allocate during init")
	}

	var allocatedServices []*api.Service
	for _, s := range services {
		if nc.nwkAllocator.IsServiceAllocated(s) {
			continue
		}

		if err := a.allocateService(ctx, s); err != nil {
			log.G(ctx).WithError(err).Errorf("failed allocating service %s during init", s.ID)
			continue
		}
		allocatedServices = append(allocatedServices, s)
	}

	if _, err := a.store.Batch(func(batch *store.Batch) error {
		for _, s := range allocatedServices {
			if err := a.commitAllocatedService(ctx, batch, s); err != nil {
				log.G(ctx).WithError(err).Errorf("failed committing allocation of service %s during init", s.ID)
			}
		}
		return nil
	}); err != nil {
		log.G(ctx).WithError(err).Error("failed committing allocation of services during init")
	}

	// Allocate tasks in the store so far before we started watching.
	var (
		tasks          []*api.Task
		allocatedTasks []*api.Task
	)
	a.store.View(func(tx store.ReadTx) {
		tasks, err = store.FindTasks(tx, store.All)
	})
	if err != nil {
		return errors.Wrap(err, "error listing all tasks in store while trying to allocate during init")
	}

	for _, t := range tasks {
		if taskDead(t) {
			continue
		}

		var s *api.Service
		if t.ServiceID != "" {
			a.store.View(func(tx store.ReadTx) {
				s = store.GetService(tx, t.ServiceID)
			})
		}

		// Populate network attachments in the task
		// based on service spec.
		a.taskCreateNetworkAttachments(t, s)

		if taskReadyForNetworkVote(t, s, nc) {
			if t.Status.State >= api.TaskStatePending {
				continue
			}

			if a.taskAllocateVote(networkVoter, t.ID) {
				// If the task is not attached to any network, network
				// allocators job is done. Immediately cast a vote so
				// that the task can be moved to ALLOCATED state as
				// soon as possible.
				allocatedTasks = append(allocatedTasks, t)
			}
			continue
		}

		err := a.allocateTask(ctx, t)
		if err == nil {
			allocatedTasks = append(allocatedTasks, t)
		} else if err != errNoChanges {
			log.G(ctx).WithError(err).Errorf("failed allocating task %s during init", t.ID)
			nc.unallocatedTasks[t.ID] = t
		}
	}

	if _, err := a.store.Batch(func(batch *store.Batch) error {
		for _, t := range allocatedTasks {
			if err := a.commitAllocatedTask(ctx, batch, t); err != nil {
				log.G(ctx).WithError(err).Errorf("failed committing allocation of task %s during init", t.ID)
			}
		}

		return nil
	}); err != nil {
		log.G(ctx).WithError(err).Error("failed committing allocation of tasks during init")
	}

	return nil
}
Example #8
0
// Run contains the GlobalOrchestrator event loop
func (g *GlobalOrchestrator) Run(ctx context.Context) error {
	defer close(g.doneChan)

	// Watch changes to services and tasks
	queue := g.store.WatchQueue()
	watcher, cancel := queue.Watch()
	defer cancel()

	// Get list of nodes
	var (
		nodes []*api.Node
		err   error
	)
	g.store.View(func(readTx store.ReadTx) {
		nodes, err = store.FindNodes(readTx, store.All)
	})
	if err != nil {
		return err
	}
	for _, n := range nodes {
		// if a node is in drain state, do not add it
		if isValidNode(n) {
			g.nodes[n.ID] = struct{}{}
		}
	}

	// Lookup global services
	var existingServices []*api.Service
	g.store.View(func(readTx store.ReadTx) {
		existingServices, err = store.FindServices(readTx, store.All)
	})
	if err != nil {
		return err
	}
	for _, s := range existingServices {
		if isGlobalService(s) {
			g.globalServices[s.ID] = s
			g.reconcileOneService(ctx, s)
		}
	}

	for {
		select {
		case event := <-watcher:
			// TODO(stevvooe): Use ctx to limit running time of operation.
			switch v := event.(type) {
			case state.EventCreateService:
				if !isGlobalService(v.Service) {
					continue
				}
				g.globalServices[v.Service.ID] = v.Service
				g.reconcileOneService(ctx, v.Service)
			case state.EventUpdateService:
				if !isGlobalService(v.Service) {
					continue
				}
				g.globalServices[v.Service.ID] = v.Service
				g.reconcileOneService(ctx, v.Service)
			case state.EventDeleteService:
				if !isGlobalService(v.Service) {
					continue
				}
				deleteServiceTasks(ctx, g.store, v.Service)
				// delete the service from service map
				delete(g.globalServices, v.Service.ID)
				g.restarts.ClearServiceHistory(v.Service.ID)
			case state.EventCreateNode:
				g.reconcileOneNode(ctx, v.Node)
			case state.EventUpdateNode:
				switch v.Node.Status.State {
				// NodeStatus_DISCONNECTED is a transient state, no need to make any change
				case api.NodeStatus_DOWN:
					g.removeTasksFromNode(ctx, v.Node)
				case api.NodeStatus_READY:
					// node could come back to READY from DOWN or DISCONNECT
					g.reconcileOneNode(ctx, v.Node)
				}
			case state.EventDeleteNode:
				g.removeTasksFromNode(ctx, v.Node)
				delete(g.nodes, v.Node.ID)
			case state.EventUpdateTask:
				if _, exists := g.globalServices[v.Task.ServiceID]; !exists {
					continue
				}
				// global orchestrator needs to inspect when a task has terminated
				// it should ignore tasks whose DesiredState is past running, which
				// means the task has been processed
				if isTaskTerminated(v.Task) {
					g.restartTask(ctx, v.Task.ID, v.Task.ServiceID)
				}
			case state.EventDeleteTask:
				// CLI allows deleting task
				if _, exists := g.globalServices[v.Task.ServiceID]; !exists {
					continue
				}
				g.reconcileServiceOneNode(ctx, v.Task.ServiceID, v.Task.NodeID)
			}
		case <-g.stopChan:
			return nil
		}
	}
}
Example #9
0
func (a *Allocator) doNetworkInit(ctx context.Context) error {
	na, err := networkallocator.New()
	if err != nil {
		return err
	}

	nc := &networkContext{
		nwkAllocator:        na,
		unallocatedTasks:    make(map[string]*api.Task),
		unallocatedServices: make(map[string]*api.Service),
		unallocatedNetworks: make(map[string]*api.Network),
	}

	// Check if we have the ingress network. If not found create
	// it before reading all network objects for allocation.
	var networks []*api.Network
	a.store.View(func(tx store.ReadTx) {
		networks, err = store.FindNetworks(tx, store.ByName(ingressNetworkName))
		if len(networks) > 0 {
			ingressNetwork = networks[0]
		}
	})
	if err != nil {
		return fmt.Errorf("failed to find ingress network during init: %v", err)
	}

	// If ingress network is not found, create one right away
	// using the predefined template.
	if len(networks) == 0 {
		if err := a.store.Update(func(tx store.Tx) error {
			ingressNetwork.ID = identity.NewID()
			if err := store.CreateNetwork(tx, ingressNetwork); err != nil {
				return err
			}

			return nil
		}); err != nil {
			return fmt.Errorf("failed to create ingress network: %v", err)
		}

		a.store.View(func(tx store.ReadTx) {
			networks, err = store.FindNetworks(tx, store.ByName(ingressNetworkName))
			if len(networks) > 0 {
				ingressNetwork = networks[0]
			}
		})
		if err != nil {
			return fmt.Errorf("failed to find ingress network after creating it: %v", err)
		}

	}

	// Try to complete ingress network allocation before anything else so
	// that the we can get the preferred subnet for ingress
	// network.
	if !na.IsAllocated(ingressNetwork) {
		if err := a.allocateNetwork(ctx, nc, ingressNetwork); err != nil {
			log.G(ctx).Errorf("failed allocating ingress network during init: %v", err)
		}

		// Update store after allocation
		if err := a.store.Update(func(tx store.Tx) error {
			if err := store.UpdateNetwork(tx, ingressNetwork); err != nil {
				return err
			}

			return nil
		}); err != nil {
			return fmt.Errorf("failed to create ingress network: %v", err)
		}
	}

	// Allocate networks in the store so far before we started
	// watching.
	a.store.View(func(tx store.ReadTx) {
		networks, err = store.FindNetworks(tx, store.All)
	})
	if err != nil {
		return fmt.Errorf("error listing all networks in store while trying to allocate during init: %v", err)
	}

	for _, n := range networks {
		if na.IsAllocated(n) {
			continue
		}

		if err := a.allocateNetwork(ctx, nc, n); err != nil {
			log.G(ctx).Errorf("failed allocating network %s during init: %v", n.ID, err)
		}
	}

	// Allocate nodes in the store so far before we process watched events.
	var nodes []*api.Node
	a.store.View(func(tx store.ReadTx) {
		nodes, err = store.FindNodes(tx, store.All)
	})
	if err != nil {
		return fmt.Errorf("error listing all services in store while trying to allocate during init: %v", err)
	}

	for _, node := range nodes {
		if na.IsNodeAllocated(node) {
			continue
		}

		if node.Attachment == nil {
			node.Attachment = &api.NetworkAttachment{}
		}

		node.Attachment.Network = ingressNetwork.Copy()
		if err := a.allocateNode(ctx, nc, node); err != nil {
			log.G(ctx).Errorf("Failed to allocate network resources for node %s during init: %v", node.ID, err)
		}
	}

	// Allocate services in the store so far before we process watched events.
	var services []*api.Service
	a.store.View(func(tx store.ReadTx) {
		services, err = store.FindServices(tx, store.All)
	})
	if err != nil {
		return fmt.Errorf("error listing all services in store while trying to allocate during init: %v", err)
	}

	for _, s := range services {
		if s.Spec.Endpoint == nil {
			continue
		}

		if na.IsServiceAllocated(s) {
			continue
		}

		if err := a.allocateService(ctx, nc, s); err != nil {
			log.G(ctx).Errorf("failed allocating service %s during init: %v", s.ID, err)
		}
	}

	// Allocate tasks in the store so far before we started watching.
	var tasks []*api.Task
	a.store.View(func(tx store.ReadTx) {
		tasks, err = store.FindTasks(tx, store.All)
	})
	if err != nil {
		return fmt.Errorf("error listing all tasks in store while trying to allocate during init: %v", err)
	}

	if _, err := a.store.Batch(func(batch *store.Batch) error {
		for _, t := range tasks {
			if taskDead(t) {
				continue
			}

			var s *api.Service
			if t.ServiceID != "" {
				a.store.View(func(tx store.ReadTx) {
					s = store.GetService(tx, t.ServiceID)
				})
			}

			// Populate network attachments in the task
			// based on service spec.
			a.taskCreateNetworkAttachments(t, s)

			if taskReadyForNetworkVote(t, s, nc) {
				if t.Status.State >= api.TaskStateAllocated {
					continue
				}

				if a.taskAllocateVote(networkVoter, t.ID) {
					// If the task is not attached to any network, network
					// allocators job is done. Immediately cast a vote so
					// that the task can be moved to ALLOCATED state as
					// soon as possible.
					if err := batch.Update(func(tx store.Tx) error {
						storeT := store.GetTask(tx, t.ID)
						if storeT == nil {
							return fmt.Errorf("task %s not found while trying to update state", t.ID)
						}

						updateTaskStatus(storeT, api.TaskStateAllocated, "allocated")

						if err := store.UpdateTask(tx, storeT); err != nil {
							return fmt.Errorf("failed updating state in store transaction for task %s: %v", storeT.ID, err)
						}

						return nil
					}); err != nil {
						log.G(ctx).WithError(err).Error("error updating task network")
					}
				}
				continue
			}

			err := batch.Update(func(tx store.Tx) error {
				_, err := a.allocateTask(ctx, nc, tx, t)
				return err
			})
			if err != nil {
				log.G(ctx).Errorf("failed allocating task %s during init: %v", t.ID, err)
				nc.unallocatedTasks[t.ID] = t
			}
		}

		return nil
	}); err != nil {
		return err
	}

	a.netCtx = nc
	return nil
}
Example #10
0
// Run contains the global orchestrator event loop
func (g *Orchestrator) Run(ctx context.Context) error {
	defer close(g.doneChan)

	// Watch changes to services and tasks
	queue := g.store.WatchQueue()
	watcher, cancel := queue.Watch()
	defer cancel()

	// lookup the cluster
	var err error
	g.store.View(func(readTx store.ReadTx) {
		var clusters []*api.Cluster
		clusters, err = store.FindClusters(readTx, store.ByName("default"))

		if len(clusters) != 1 {
			return // just pick up the cluster when it is created.
		}
		g.cluster = clusters[0]
	})
	if err != nil {
		return err
	}

	// Get list of nodes
	var nodes []*api.Node
	g.store.View(func(readTx store.ReadTx) {
		nodes, err = store.FindNodes(readTx, store.All)
	})
	if err != nil {
		return err
	}
	for _, n := range nodes {
		g.updateNode(n)
	}

	// Lookup global services
	var existingServices []*api.Service
	g.store.View(func(readTx store.ReadTx) {
		existingServices, err = store.FindServices(readTx, store.All)
	})
	if err != nil {
		return err
	}

	var reconcileServiceIDs []string
	for _, s := range existingServices {
		if orchestrator.IsGlobalService(s) {
			g.updateService(s)
			reconcileServiceIDs = append(reconcileServiceIDs, s.ID)
		}
	}
	g.reconcileServices(ctx, reconcileServiceIDs)

	for {
		select {
		case event := <-watcher:
			// TODO(stevvooe): Use ctx to limit running time of operation.
			switch v := event.(type) {
			case state.EventUpdateCluster:
				g.cluster = v.Cluster
			case state.EventCreateService:
				if !orchestrator.IsGlobalService(v.Service) {
					continue
				}
				g.updateService(v.Service)
				g.reconcileServices(ctx, []string{v.Service.ID})
			case state.EventUpdateService:
				if !orchestrator.IsGlobalService(v.Service) {
					continue
				}
				g.updateService(v.Service)
				g.reconcileServices(ctx, []string{v.Service.ID})
			case state.EventDeleteService:
				if !orchestrator.IsGlobalService(v.Service) {
					continue
				}
				orchestrator.DeleteServiceTasks(ctx, g.store, v.Service)
				// delete the service from service map
				delete(g.globalServices, v.Service.ID)
				g.restarts.ClearServiceHistory(v.Service.ID)
			case state.EventCreateNode:
				g.updateNode(v.Node)
				g.reconcileOneNode(ctx, v.Node)
			case state.EventUpdateNode:
				g.updateNode(v.Node)
				switch v.Node.Status.State {
				// NodeStatus_DISCONNECTED is a transient state, no need to make any change
				case api.NodeStatus_DOWN:
					g.removeTasksFromNode(ctx, v.Node)
				case api.NodeStatus_READY:
					// node could come back to READY from DOWN or DISCONNECT
					g.reconcileOneNode(ctx, v.Node)
				}
			case state.EventDeleteNode:
				g.removeTasksFromNode(ctx, v.Node)
				delete(g.nodes, v.Node.ID)
			case state.EventUpdateTask:
				if _, exists := g.globalServices[v.Task.ServiceID]; !exists {
					continue
				}
				// global orchestrator needs to inspect when a task has terminated
				// it should ignore tasks whose DesiredState is past running, which
				// means the task has been processed
				if isTaskTerminated(v.Task) {
					g.restartTask(ctx, v.Task.ID, v.Task.ServiceID)
				}
			case state.EventDeleteTask:
				// CLI allows deleting task
				if _, exists := g.globalServices[v.Task.ServiceID]; !exists {
					continue
				}
				g.reconcileServicesOneNode(ctx, []string{v.Task.ServiceID}, v.Task.NodeID)
			}
		case <-g.stopChan:
			return nil
		}
	}
}