Example #1
0
// SyncServices syncs the services of the task that the executor is running with
// Consul
func (e *UniversalExecutor) SyncServices(ctx *ConsulContext) error {
	e.logger.Printf("[INFO] executor: registering services")
	e.consulCtx = ctx
	if e.consulSyncer == nil {
		cs, err := consul.NewSyncer(ctx.ConsulConfig, e.shutdownCh, e.logger)
		if err != nil {
			return err
		}
		e.consulSyncer = cs
		go e.consulSyncer.Run()
	}
	e.interpolateServices(e.ctx.Task)
	e.consulSyncer.SetDelegatedChecks(e.createCheckMap(), e.createCheck)
	e.consulSyncer.SetAddrFinder(e.ctx.Task.FindHostAndPortFor)
	domain := consul.NewExecutorDomain(e.ctx.AllocID, e.ctx.Task.Name)
	serviceMap := generateServiceKeys(e.ctx.AllocID, e.ctx.Task.Services)
	e.consulSyncer.SetServices(domain, serviceMap)
	return nil
}
Example #2
0
func (e *UniversalExecutor) UpdateTask(task *structs.Task) error {
	e.ctx.Task = task

	// Updating Log Config
	fileSize := int64(task.LogConfig.MaxFileSizeMB * 1024 * 1024)
	e.lro.MaxFiles = task.LogConfig.MaxFiles
	e.lro.FileSize = fileSize
	e.lre.MaxFiles = task.LogConfig.MaxFiles
	e.lre.FileSize = fileSize

	// Re-syncing task with Consul agent
	if e.consulSyncer != nil {
		e.interpolateServices(e.ctx.Task)
		domain := consul.NewExecutorDomain(e.ctx.AllocID, task.Name)
		serviceMap := generateServiceKeys(e.ctx.AllocID, task.Services)
		e.consulSyncer.SetServices(domain, serviceMap)
	}
	return nil
}
Example #3
0
// setupConsulSyncer creates Client-mode consul.Syncer which periodically
// executes callbacks on a fixed interval.
//
// TODO(sean@): this could eventually be moved to a priority queue and give
// each task an interval, but that is not necessary at this time.
func (c *Client) setupConsulSyncer() error {
	// The bootstrapFn callback handler is used to periodically poll
	// Consul to look up the Nomad Servers in Consul.  In the event the
	// heartbeat deadline has been exceeded and this Client is orphaned
	// from its servers, periodically poll Consul to reattach this Client
	// to its cluster and automatically recover from a detached state.
	bootstrapFn := func() error {
		now := time.Now()
		c.heartbeatLock.Lock()

		// If the last heartbeat didn't contain a leader, give the
		// Nomad server this Agent is talking to one more attempt at
		// providing a heartbeat that does contain a leader.
		if atomic.LoadInt32(&c.lastHeartbeatFromQuorum) == 1 && now.Before(c.consulPullHeartbeatDeadline) {
			c.heartbeatLock.Unlock()
			return nil
		}
		c.heartbeatLock.Unlock()

		consulCatalog := c.consulSyncer.ConsulClient().Catalog()
		dcs, err := consulCatalog.Datacenters()
		if err != nil {
			return fmt.Errorf("client.consul: unable to query Consul datacenters: %v", err)
		}
		if len(dcs) > 2 {
			// Query the local DC first, then shuffle the
			// remaining DCs.  Future heartbeats will cause Nomad
			// Clients to fixate on their local datacenter so
			// it's okay to talk with remote DCs.  If the no
			// Nomad servers are available within
			// datacenterQueryLimit, the next heartbeat will pick
			// a new set of servers so it's okay.
			nearestDC := dcs[0]
			otherDCs := make([]string, 0, len(dcs))
			shuffleStrings(otherDCs)
			otherDCs = dcs[1:lib.MinInt(len(dcs), datacenterQueryLimit)]

			dcs = append([]string{nearestDC}, otherDCs...)
		}

		// Forward RPCs to our region
		nomadRPCArgs := structs.GenericRequest{
			QueryOptions: structs.QueryOptions{
				Region: c.Region(),
			},
		}

		nomadServerServiceName := c.config.ConsulConfig.ServerServiceName
		var mErr multierror.Error
		const defaultMaxNumNomadServers = 8
		nomadServerServices := make([]string, 0, defaultMaxNumNomadServers)
		c.logger.Printf("[DEBUG] client.consul: bootstrap contacting following Consul DCs: %+q", dcs)
		for _, dc := range dcs {
			consulOpts := &consulapi.QueryOptions{
				AllowStale: true,
				Datacenter: dc,
				Near:       "_agent",
				WaitTime:   consul.DefaultQueryWaitDuration,
			}
			consulServices, _, err := consulCatalog.Service(nomadServerServiceName, consul.ServiceTagRPC, consulOpts)
			if err != nil {
				mErr.Errors = append(mErr.Errors, fmt.Errorf("unable to query service %+q from Consul datacenter %+q: %v", nomadServerServiceName, dc, err))
				continue
			}

			for _, s := range consulServices {
				port := strconv.FormatInt(int64(s.ServicePort), 10)
				addr := s.ServiceAddress
				if addr == "" {
					addr = s.Address
				}
				serverAddr := net.JoinHostPort(addr, port)
				serverEndpoint, err := rpcproxy.NewServerEndpoint(serverAddr)
				if err != nil {
					mErr.Errors = append(mErr.Errors, err)
					continue
				}
				var peers []string
				if err := c.connPool.RPC(c.Region(), serverEndpoint.Addr, c.RPCMajorVersion(), "Status.Peers", nomadRPCArgs, &peers); err != nil {
					mErr.Errors = append(mErr.Errors, err)
					continue
				}
				// Successfully received the Server peers list of the correct
				// region
				if len(peers) != 0 {
					nomadServerServices = append(nomadServerServices, peers...)
					break
				}
			}
			// Break if at least one Nomad Server was successfully pinged
			if len(nomadServerServices) > 0 {
				break
			}
		}
		if len(nomadServerServices) == 0 {
			if len(mErr.Errors) > 0 {
				return mErr.ErrorOrNil()
			}

			return fmt.Errorf("no Nomad Servers advertising service %q in Consul datacenters: %q", nomadServerServiceName, dcs)
		}

		// Log the servers we are adding
		c.logger.Printf("[DEBUG] client.consul: bootstrap adding following Servers: %q", nomadServerServices)

		c.heartbeatLock.Lock()
		if atomic.LoadInt32(&c.lastHeartbeatFromQuorum) == 1 && now.Before(c.consulPullHeartbeatDeadline) {
			c.heartbeatLock.Unlock()
			// Common, healthy path
			if err := c.rpcProxy.SetBackupServers(nomadServerServices); err != nil {
				return fmt.Errorf("client.consul: unable to set backup servers: %v", err)
			}
		} else {
			c.heartbeatLock.Unlock()
			// If this Client is talking with a Server that
			// doesn't have a leader, and we have exceeded the
			// consulPullHeartbeatDeadline, change the call from
			// SetBackupServers() to calling AddPrimaryServer()
			// in order to allow the Clients to randomly begin
			// considering all known Nomad servers and
			// eventually, hopefully, find their way to a Nomad
			// Server that has quorum (assuming Consul has a
			// server list that is in the majority).
			for _, s := range nomadServerServices {
				c.rpcProxy.AddPrimaryServer(s)
			}
		}

		return nil
	}
	if c.config.ConsulConfig.ClientAutoJoin {
		c.consulSyncer.AddPeriodicHandler("Nomad Client Fallback Server Handler", bootstrapFn)
	}

	consulServicesReaperFn := func() error {
		const estInitialExecutorDomains = 8

		// Create the domains to keep and add the server and client
		domains := make([]consul.ServiceDomain, 2, estInitialExecutorDomains)
		domains[0] = consul.ServerDomain
		domains[1] = consul.ClientDomain

		for allocID, ar := range c.getAllocRunners() {
			ar.taskStatusLock.RLock()
			taskStates := copyTaskStates(ar.taskStates)
			ar.taskStatusLock.RUnlock()
			for taskName, taskState := range taskStates {
				// Only keep running tasks
				if taskState.State == structs.TaskStateRunning {
					d := consul.NewExecutorDomain(allocID, taskName)
					domains = append(domains, d)
				}
			}
		}

		return c.consulSyncer.ReapUnmatched(domains)
	}
	if c.config.ConsulConfig.AutoAdvertise {
		c.consulSyncer.AddPeriodicHandler("Nomad Client Services Sync Handler", consulServicesReaperFn)
	}

	return nil
}