// SyncServices syncs the services of the task that the executor is running with // Consul func (e *UniversalExecutor) SyncServices(ctx *ConsulContext) error { e.logger.Printf("[INFO] executor: registering services") e.consulCtx = ctx if e.consulSyncer == nil { cs, err := consul.NewSyncer(ctx.ConsulConfig, e.shutdownCh, e.logger) if err != nil { return err } e.consulSyncer = cs go e.consulSyncer.Run() } e.interpolateServices(e.ctx.Task) e.consulSyncer.SetDelegatedChecks(e.createCheckMap(), e.createCheck) e.consulSyncer.SetAddrFinder(e.ctx.Task.FindHostAndPortFor) domain := consul.NewExecutorDomain(e.ctx.AllocID, e.ctx.Task.Name) serviceMap := generateServiceKeys(e.ctx.AllocID, e.ctx.Task.Services) e.consulSyncer.SetServices(domain, serviceMap) return nil }
func (e *UniversalExecutor) UpdateTask(task *structs.Task) error { e.ctx.Task = task // Updating Log Config fileSize := int64(task.LogConfig.MaxFileSizeMB * 1024 * 1024) e.lro.MaxFiles = task.LogConfig.MaxFiles e.lro.FileSize = fileSize e.lre.MaxFiles = task.LogConfig.MaxFiles e.lre.FileSize = fileSize // Re-syncing task with Consul agent if e.consulSyncer != nil { e.interpolateServices(e.ctx.Task) domain := consul.NewExecutorDomain(e.ctx.AllocID, task.Name) serviceMap := generateServiceKeys(e.ctx.AllocID, task.Services) e.consulSyncer.SetServices(domain, serviceMap) } return nil }
// setupConsulSyncer creates Client-mode consul.Syncer which periodically // executes callbacks on a fixed interval. // // TODO(sean@): this could eventually be moved to a priority queue and give // each task an interval, but that is not necessary at this time. func (c *Client) setupConsulSyncer() error { // The bootstrapFn callback handler is used to periodically poll // Consul to look up the Nomad Servers in Consul. In the event the // heartbeat deadline has been exceeded and this Client is orphaned // from its servers, periodically poll Consul to reattach this Client // to its cluster and automatically recover from a detached state. bootstrapFn := func() error { now := time.Now() c.heartbeatLock.Lock() // If the last heartbeat didn't contain a leader, give the // Nomad server this Agent is talking to one more attempt at // providing a heartbeat that does contain a leader. if atomic.LoadInt32(&c.lastHeartbeatFromQuorum) == 1 && now.Before(c.consulPullHeartbeatDeadline) { c.heartbeatLock.Unlock() return nil } c.heartbeatLock.Unlock() consulCatalog := c.consulSyncer.ConsulClient().Catalog() dcs, err := consulCatalog.Datacenters() if err != nil { return fmt.Errorf("client.consul: unable to query Consul datacenters: %v", err) } if len(dcs) > 2 { // Query the local DC first, then shuffle the // remaining DCs. Future heartbeats will cause Nomad // Clients to fixate on their local datacenter so // it's okay to talk with remote DCs. If the no // Nomad servers are available within // datacenterQueryLimit, the next heartbeat will pick // a new set of servers so it's okay. nearestDC := dcs[0] otherDCs := make([]string, 0, len(dcs)) shuffleStrings(otherDCs) otherDCs = dcs[1:lib.MinInt(len(dcs), datacenterQueryLimit)] dcs = append([]string{nearestDC}, otherDCs...) } // Forward RPCs to our region nomadRPCArgs := structs.GenericRequest{ QueryOptions: structs.QueryOptions{ Region: c.Region(), }, } nomadServerServiceName := c.config.ConsulConfig.ServerServiceName var mErr multierror.Error const defaultMaxNumNomadServers = 8 nomadServerServices := make([]string, 0, defaultMaxNumNomadServers) c.logger.Printf("[DEBUG] client.consul: bootstrap contacting following Consul DCs: %+q", dcs) for _, dc := range dcs { consulOpts := &consulapi.QueryOptions{ AllowStale: true, Datacenter: dc, Near: "_agent", WaitTime: consul.DefaultQueryWaitDuration, } consulServices, _, err := consulCatalog.Service(nomadServerServiceName, consul.ServiceTagRPC, consulOpts) if err != nil { mErr.Errors = append(mErr.Errors, fmt.Errorf("unable to query service %+q from Consul datacenter %+q: %v", nomadServerServiceName, dc, err)) continue } for _, s := range consulServices { port := strconv.FormatInt(int64(s.ServicePort), 10) addr := s.ServiceAddress if addr == "" { addr = s.Address } serverAddr := net.JoinHostPort(addr, port) serverEndpoint, err := rpcproxy.NewServerEndpoint(serverAddr) if err != nil { mErr.Errors = append(mErr.Errors, err) continue } var peers []string if err := c.connPool.RPC(c.Region(), serverEndpoint.Addr, c.RPCMajorVersion(), "Status.Peers", nomadRPCArgs, &peers); err != nil { mErr.Errors = append(mErr.Errors, err) continue } // Successfully received the Server peers list of the correct // region if len(peers) != 0 { nomadServerServices = append(nomadServerServices, peers...) break } } // Break if at least one Nomad Server was successfully pinged if len(nomadServerServices) > 0 { break } } if len(nomadServerServices) == 0 { if len(mErr.Errors) > 0 { return mErr.ErrorOrNil() } return fmt.Errorf("no Nomad Servers advertising service %q in Consul datacenters: %q", nomadServerServiceName, dcs) } // Log the servers we are adding c.logger.Printf("[DEBUG] client.consul: bootstrap adding following Servers: %q", nomadServerServices) c.heartbeatLock.Lock() if atomic.LoadInt32(&c.lastHeartbeatFromQuorum) == 1 && now.Before(c.consulPullHeartbeatDeadline) { c.heartbeatLock.Unlock() // Common, healthy path if err := c.rpcProxy.SetBackupServers(nomadServerServices); err != nil { return fmt.Errorf("client.consul: unable to set backup servers: %v", err) } } else { c.heartbeatLock.Unlock() // If this Client is talking with a Server that // doesn't have a leader, and we have exceeded the // consulPullHeartbeatDeadline, change the call from // SetBackupServers() to calling AddPrimaryServer() // in order to allow the Clients to randomly begin // considering all known Nomad servers and // eventually, hopefully, find their way to a Nomad // Server that has quorum (assuming Consul has a // server list that is in the majority). for _, s := range nomadServerServices { c.rpcProxy.AddPrimaryServer(s) } } return nil } if c.config.ConsulConfig.ClientAutoJoin { c.consulSyncer.AddPeriodicHandler("Nomad Client Fallback Server Handler", bootstrapFn) } consulServicesReaperFn := func() error { const estInitialExecutorDomains = 8 // Create the domains to keep and add the server and client domains := make([]consul.ServiceDomain, 2, estInitialExecutorDomains) domains[0] = consul.ServerDomain domains[1] = consul.ClientDomain for allocID, ar := range c.getAllocRunners() { ar.taskStatusLock.RLock() taskStates := copyTaskStates(ar.taskStates) ar.taskStatusLock.RUnlock() for taskName, taskState := range taskStates { // Only keep running tasks if taskState.State == structs.TaskStateRunning { d := consul.NewExecutorDomain(allocID, taskName) domains = append(domains, d) } } } return c.consulSyncer.ReapUnmatched(domains) } if c.config.ConsulConfig.AutoAdvertise { c.consulSyncer.AddPeriodicHandler("Nomad Client Services Sync Handler", consulServicesReaperFn) } return nil }