func HandleEngineEvents(taskEngine engine.TaskEngine, client api.ECSClient, saver statemanager.Saver) { statesaver = saver for { taskEvents, containerEvents := taskEngine.TaskEvents() for taskEvents != nil && containerEvents != nil { select { case event, open := <-containerEvents: if !open { containerEvents = nil log.Error("Container events closed") break } AddContainerEvent(event, client) case event, open := <-taskEvents: if !open { taskEvents = nil log.Crit("Task events closed") break } AddTaskEvent(event, client) } } } }
// FinalSave should be called immediately before exiting, and only before // exiting, in order to flush tasks to disk. It waits a short timeout for state // to settle if necessary. If unable to reach a steady-state and save within // this short timeout, it returns an error func FinalSave(saver statemanager.Saver, taskEngine engine.TaskEngine) error { engineDisabled := make(chan error) disableTimer := time.AfterFunc(engineDisableTimeout, func() { engineDisabled <- errors.New("Timed out waiting for TaskEngine to settle") }) go func() { log.Debug("Shutting down task engine") taskEngine.Disable() disableTimer.Stop() engineDisabled <- nil }() disableErr := <-engineDisabled stateSaved := make(chan error) saveTimer := time.AfterFunc(finalSaveTimeout, func() { stateSaved <- errors.New("Timed out trying to save to disk") }) go func() { log.Debug("Saving state before shutting down") stateSaved <- saver.ForceSave() saveTimer.Stop() }() saveErr := <-stateSaved if disableErr != nil || saveErr != nil { return utils.NewMultiError(disableErr, saveErr) } return nil }
func addNonstoppedTasks(tasks []*api.Task, taskEngine engine.TaskEngine) bool { allTasksOk := true for _, task := range tasks { if task.DesiredStatus == api.TaskStopped { continue } err := taskEngine.AddTask(task) if err != nil { log.Warn("Could not add task; taskengine probably disabled") // Don't ack allTasksOk = false } } return allTasksOk }
// AcsWsUrl returns the websocket url for ACS given the endpoint. func AcsWsUrl(endpoint, cluster, containerInstanceArn string, taskEngine engine.TaskEngine) string { acsUrl := endpoint if endpoint[len(endpoint)-1] != '/' { acsUrl += "/" } acsUrl += "ws" query := url.Values{} query.Set("clusterArn", cluster) query.Set("containerInstanceArn", containerInstanceArn) query.Set("agentHash", version.GitHashString()) query.Set("agentVersion", version.Version) query.Set("seqNum", strconv.FormatInt(SequenceNumber.Get(), 10)) if dockerVersion, err := taskEngine.Version(); err == nil { query.Set("dockerVersion", dockerVersion) } return acsUrl + "?" + query.Encode() }
// acsWsURL returns the websocket url for ACS given the endpoint func acsWsURL(endpoint, cluster, containerInstanceArn string, taskEngine engine.TaskEngine, acsSessionState sessionState) string { acsUrl := endpoint if endpoint[len(endpoint)-1] != '/' { acsUrl += "/" } acsUrl += "ws" query := url.Values{} query.Set("clusterArn", cluster) query.Set("containerInstanceArn", containerInstanceArn) query.Set("agentHash", version.GitHashString()) query.Set("agentVersion", version.Version) query.Set("seqNum", "1") if dockerVersion, err := taskEngine.Version(); err == nil { query.Set("dockerVersion", dockerVersion) } query.Set(sendCredentialsURLParameterName, acsSessionState.getSendCredentialsURLParameter()) return acsUrl + "?" + query.Encode() }
func _main() int { defer log.Flush() versionFlag := flag.Bool("version", false, "Print the agent version information and exit") acceptInsecureCert := flag.Bool("k", false, "Do not verify ssl certs") logLevel := flag.String("loglevel", "", "Loglevel: [<crit>|<error>|<warn>|<info>|<debug>]") flag.Parse() logger.SetLevel(*logLevel) log.Infof("Starting Agent: %v", version.String()) log.Info("Loading configuration") cfg, err := config.NewConfig() // Load cfg before doing 'versionFlag' so that it has the DOCKER_HOST // variable loaded if needed if *versionFlag { versionableEngine := engine.NewTaskEngine(cfg) version.PrintVersion(versionableEngine) return exitcodes.ExitSuccess } if err != nil { log.Criticalf("Error loading config: %v", err) // All required config values can be inferred from EC2 Metadata, so this error could be transient. return exitcodes.ExitError } log.Debug("Loaded config: " + cfg.String()) var currentEc2InstanceID, containerInstanceArn string var taskEngine engine.TaskEngine if cfg.Checkpoint { log.Info("Checkpointing is enabled. Attempting to load state") var previousCluster, previousEc2InstanceID, previousContainerInstanceArn string previousTaskEngine := engine.NewTaskEngine(cfg) // previousState is used to verify that our current runtime configuration is // compatible with our past configuration as reflected by our state-file previousState, err := initializeStateManager(cfg, previousTaskEngine, &previousCluster, &previousContainerInstanceArn, &previousEc2InstanceID, acshandler.SequenceNumber) if err != nil { log.Criticalf("Error creating state manager: %v", err) return exitcodes.ExitTerminal } err = previousState.Load() if err != nil { log.Criticalf("Error loading previously saved state: %v", err) return exitcodes.ExitTerminal } if previousCluster != "" { // TODO Handle default cluster in a sane and unified way across the codebase configuredCluster := cfg.Cluster if configuredCluster == "" { log.Debug("Setting cluster to default; none configured") configuredCluster = config.DEFAULT_CLUSTER_NAME } if previousCluster != configuredCluster { log.Criticalf("Data mismatch; saved cluster '%v' does not match configured cluster '%v'. Perhaps you want to delete the configured checkpoint file?", previousCluster, configuredCluster) return exitcodes.ExitTerminal } cfg.Cluster = previousCluster log.Infof("Restored cluster '%v'", cfg.Cluster) } if instanceIdentityDoc, err := ec2.GetInstanceIdentityDocument(); err == nil { currentEc2InstanceID = instanceIdentityDoc.InstanceId } else { log.Criticalf("Unable to access EC2 Metadata service to determine EC2 ID: %v", err) } if previousEc2InstanceID != "" && previousEc2InstanceID != currentEc2InstanceID { log.Warnf("Data mismatch; saved InstanceID '%v' does not match current InstanceID '%v'. Overwriting old datafile", previousEc2InstanceID, currentEc2InstanceID) // Reset taskEngine; all the other values are still default taskEngine = engine.NewTaskEngine(cfg) } else { // Use the values we loaded if there's no issue containerInstanceArn = previousContainerInstanceArn taskEngine = previousTaskEngine } } else { log.Info("Checkpointing not enabled; a new container instance will be created each time the agent is run") taskEngine = engine.NewTaskEngine(cfg) } stateManager, err := initializeStateManager(cfg, taskEngine, &cfg.Cluster, &containerInstanceArn, ¤tEc2InstanceID, acshandler.SequenceNumber) if err != nil { log.Criticalf("Error creating state manager: %v", err) return exitcodes.ExitTerminal } credentialProvider := auth.NewBasicAWSCredentialProvider() awsCreds := auth.ToSDK(credentialProvider) // Preflight request to make sure they're good if preflightCreds, err := awsCreds.Credentials(); err != nil || preflightCreds.AccessKeyID == "" { if preflightCreds != nil { log.Warnf("Error getting valid credentials (AKID %v): %v", preflightCreds.AccessKeyID, err) } else { log.Warnf("Error getting preflight credentials: %v", err) } } client := api.NewECSClient(awsCreds, cfg, *acceptInsecureCert) if containerInstanceArn == "" { log.Info("Registering Instance with ECS") containerInstanceArn, err = client.RegisterContainerInstance() if err != nil { log.Errorf("Error registering: %v", err) if retriable, ok := err.(utils.Retriable); ok && !retriable.Retry() { return exitcodes.ExitTerminal } return exitcodes.ExitError } log.Infof("Registration completed successfully. I am running as '%v' in cluster '%v'", containerInstanceArn, cfg.Cluster) // Save our shiny new containerInstanceArn stateManager.Save() } else { log.Infof("Restored from checkpoint file. I am running as '%v' in cluster '%v'", containerInstanceArn, cfg.Cluster) } // Begin listening to the docker daemon and saving changes taskEngine.SetSaver(stateManager) taskEngine.MustInit() go sighandlers.StartTerminationHandler(stateManager, taskEngine) // Agent introspection api go handlers.ServeHttp(&containerInstanceArn, taskEngine, cfg) // Start sending events to the backend go eventhandler.HandleEngineEvents(taskEngine, client, stateManager) log.Info("Beginning Polling for updates") err = acshandler.StartSession(containerInstanceArn, credentialProvider, cfg, taskEngine, client, stateManager, *acceptInsecureCert) if err != nil { log.Criticalf("Unretriable error starting communicating with ACS: %v", err) return exitcodes.ExitTerminal } log.Critical("ACS Session handler should never exit") return exitcodes.ExitError }
func _main() int { defer log.Flush() flagset := flag.NewFlagSet("Amazon ECS Agent", flag.ContinueOnError) versionFlag := flagset.Bool("version", false, "Print the agent version information and exit") logLevel := flagset.String("loglevel", "", "Loglevel: [<crit>|<error>|<warn>|<info>|<debug>]") acceptInsecureCert := flagset.Bool("k", false, "Disable SSL certificate verification. We do not recommend setting this option.") licenseFlag := flagset.Bool("license", false, "Print the LICENSE and NOTICE files and exit") blackholeEc2Metadata := flagset.Bool("blackhole-ec2-metadata", false, "Blackhole the EC2 Metadata requests. Setting this option can cause the ECS Agent to fail to work properly. We do not recommend setting this option") err := flagset.Parse(os.Args[1:]) if err != nil { return exitcodes.ExitTerminal } if *licenseFlag { license := utils.NewLicenseProvider() text, err := license.GetText() if err != nil { fmt.Fprintln(os.Stderr, err) return exitcodes.ExitError } fmt.Println(text) return exitcodes.ExitSuccess } logger.SetLevel(*logLevel) ec2MetadataClient := ec2.DefaultClient if *blackholeEc2Metadata { ec2MetadataClient = ec2.NewBlackholeEC2MetadataClient() } log.Infof("Starting Agent: %s", version.String()) if *acceptInsecureCert { log.Warn("SSL certificate verification disabled. This is not recommended.") } log.Info("Loading configuration") cfg, cfgErr := config.NewConfig(ec2MetadataClient) // Load cfg and create Docker client before doing 'versionFlag' so that it has the DOCKER_HOST variable loaded if needed clientFactory := dockerclient.NewFactory(cfg.DockerEndpoint) dockerClient, err := engine.NewDockerGoClient(clientFactory, *acceptInsecureCert, cfg) if err != nil { log.Criticalf("Error creating Docker client: %v", err) return exitcodes.ExitError } ctx := context.Background() // Create the DockerContainerChange event stream for tcs containerChangeEventStream := eventstream.NewEventStream(ContainerChangeEventStream, ctx) containerChangeEventStream.StartListening() // Create credentials manager. This will be used by the task engine and // the credentials handler credentialsManager := credentials.NewManager() // Create image manager. This will be used by the task engine for saving image states state := dockerstate.NewDockerTaskEngineState() imageManager := engine.NewImageManager(cfg, dockerClient, state) if *versionFlag { versionableEngine := engine.NewTaskEngine(cfg, dockerClient, credentialsManager, containerChangeEventStream, imageManager, state) version.PrintVersion(versionableEngine) return exitcodes.ExitSuccess } sighandlers.StartDebugHandler() if cfgErr != nil { log.Criticalf("Error loading config: %v", err) // All required config values can be inferred from EC2 Metadata, so this error could be transient. return exitcodes.ExitError } log.Debug("Loaded config: " + cfg.String()) var currentEc2InstanceID, containerInstanceArn string var taskEngine engine.TaskEngine if cfg.Checkpoint { log.Info("Checkpointing is enabled. Attempting to load state") var previousCluster, previousEc2InstanceID, previousContainerInstanceArn string previousTaskEngine := engine.NewTaskEngine(cfg, dockerClient, credentialsManager, containerChangeEventStream, imageManager, state) // previousState is used to verify that our current runtime configuration is // compatible with our past configuration as reflected by our state-file previousState, err := initializeStateManager(cfg, previousTaskEngine, &previousCluster, &previousContainerInstanceArn, &previousEc2InstanceID) if err != nil { log.Criticalf("Error creating state manager: %v", err) return exitcodes.ExitTerminal } err = previousState.Load() if err != nil { log.Criticalf("Error loading previously saved state: %v", err) return exitcodes.ExitTerminal } if previousCluster != "" { // TODO Handle default cluster in a sane and unified way across the codebase configuredCluster := cfg.Cluster if configuredCluster == "" { log.Debug("Setting cluster to default; none configured") configuredCluster = config.DefaultClusterName } if previousCluster != configuredCluster { log.Criticalf("Data mismatch; saved cluster '%v' does not match configured cluster '%v'. Perhaps you want to delete the configured checkpoint file?", previousCluster, configuredCluster) return exitcodes.ExitTerminal } cfg.Cluster = previousCluster log.Infof("Restored cluster '%v'", cfg.Cluster) } if instanceIdentityDoc, err := ec2MetadataClient.InstanceIdentityDocument(); err == nil { currentEc2InstanceID = instanceIdentityDoc.InstanceId } else { log.Criticalf("Unable to access EC2 Metadata service to determine EC2 ID: %v", err) } if previousEc2InstanceID != "" && previousEc2InstanceID != currentEc2InstanceID { log.Warnf("Data mismatch; saved InstanceID '%s' does not match current InstanceID '%s'. Overwriting old datafile", previousEc2InstanceID, currentEc2InstanceID) // Reset taskEngine; all the other values are still default taskEngine = engine.NewTaskEngine(cfg, dockerClient, credentialsManager, containerChangeEventStream, imageManager, state) } else { // Use the values we loaded if there's no issue containerInstanceArn = previousContainerInstanceArn taskEngine = previousTaskEngine } } else { log.Info("Checkpointing not enabled; a new container instance will be created each time the agent is run") taskEngine = engine.NewTaskEngine(cfg, dockerClient, credentialsManager, containerChangeEventStream, imageManager, state) } stateManager, err := initializeStateManager(cfg, taskEngine, &cfg.Cluster, &containerInstanceArn, ¤tEc2InstanceID) if err != nil { log.Criticalf("Error creating state manager: %v", err) return exitcodes.ExitTerminal } capabilities := taskEngine.Capabilities() // We instantiate our own credentialProvider for use in acs/tcs. This tries // to mimic roughly the way it's instantiated by the SDK for a default // session. credentialProvider := defaults.CredChain(defaults.Config(), defaults.Handlers()) // Preflight request to make sure they're good if preflightCreds, err := credentialProvider.Get(); err != nil || preflightCreds.AccessKeyID == "" { log.Warnf("Error getting valid credentials (AKID %s): %v", preflightCreds.AccessKeyID, err) } client := api.NewECSClient(credentialProvider, cfg, httpclient.New(api.RoundtripTimeout, *acceptInsecureCert), ec2MetadataClient) if containerInstanceArn == "" { log.Info("Registering Instance with ECS") containerInstanceArn, err = client.RegisterContainerInstance("", capabilities) if err != nil { log.Errorf("Error registering: %v", err) if retriable, ok := err.(utils.Retriable); ok && !retriable.Retry() { return exitcodes.ExitTerminal } return exitcodes.ExitError } log.Infof("Registration completed successfully. I am running as '%s' in cluster '%s'", containerInstanceArn, cfg.Cluster) // Save our shiny new containerInstanceArn stateManager.Save() } else { log.Infof("Restored from checkpoint file. I am running as '%s' in cluster '%s'", containerInstanceArn, cfg.Cluster) _, err = client.RegisterContainerInstance(containerInstanceArn, capabilities) if err != nil { log.Errorf("Error re-registering: %v", err) if awserr, ok := err.(awserr.Error); ok && api.IsInstanceTypeChangedError(awserr) { log.Criticalf("The current instance type does not match the registered instance type. Please revert the instance type change, or alternatively launch a new instance. Error: %v", err) return exitcodes.ExitTerminal } return exitcodes.ExitError } } // Begin listening to the docker daemon and saving changes taskEngine.SetSaver(stateManager) imageManager.SetSaver(stateManager) taskEngine.MustInit() // start of the periodic image cleanup process if !cfg.ImageCleanupDisabled { go imageManager.StartImageCleanupProcess(ctx) } go sighandlers.StartTerminationHandler(stateManager, taskEngine) // Agent introspection api go handlers.ServeHttp(&containerInstanceArn, taskEngine, cfg) // Start serving the endpoint to fetch IAM Role credentials go credentialshandler.ServeHttp(credentialsManager, containerInstanceArn, cfg) // Start sending events to the backend go eventhandler.HandleEngineEvents(taskEngine, client, stateManager) deregisterInstanceEventStream := eventstream.NewEventStream(DeregisterContainerInstanceEventStream, ctx) deregisterInstanceEventStream.StartListening() telemetrySessionParams := tcshandler.TelemetrySessionParams{ ContainerInstanceArn: containerInstanceArn, CredentialProvider: credentialProvider, Cfg: cfg, DeregisterInstanceEventStream: deregisterInstanceEventStream, ContainerChangeEventStream: containerChangeEventStream, DockerClient: dockerClient, AcceptInvalidCert: *acceptInsecureCert, EcsClient: client, TaskEngine: taskEngine, } // Start metrics session in a go routine go tcshandler.StartMetricsSession(telemetrySessionParams) log.Info("Beginning Polling for updates") err = acshandler.StartSession(ctx, acshandler.StartSessionArguments{ AcceptInvalidCert: *acceptInsecureCert, Config: cfg, DeregisterInstanceEventStream: deregisterInstanceEventStream, ContainerInstanceArn: containerInstanceArn, CredentialProvider: credentialProvider, ECSClient: client, StateManager: stateManager, TaskEngine: taskEngine, CredentialsManager: credentialsManager, }) if err != nil { log.Criticalf("Unretriable error starting communicating with ACS: %v", err) return exitcodes.ExitTerminal } log.Critical("ACS Session handler should never exit") return exitcodes.ExitError }