func mocks(t *testing.T, cfg *config.Config) (*gomock.Controller, *mock_engine.MockDockerClient, engine.TaskEngine) { ctrl := gomock.NewController(t) client := mock_engine.NewMockDockerClient(ctrl) taskEngine := engine.NewTaskEngine(cfg) taskEngine.(*engine.DockerTaskEngine).SetDockerClient(client) return ctrl, client, taskEngine }
func TestLoadsV1DataCorrectly(t *testing.T) { cfg := &config.Config{DataDir: filepath.Join(".", "testdata", "v1", "1")} taskEngine := engine.NewTaskEngine(&config.Config{}, nil, nil, nil) var containerInstanceArn, cluster, savedInstanceID string var sequenceNumber int64 stateManager, err := statemanager.NewStateManager(cfg, statemanager.AddSaveable("TaskEngine", taskEngine), statemanager.AddSaveable("ContainerInstanceArn", &containerInstanceArn), statemanager.AddSaveable("Cluster", &cluster), statemanager.AddSaveable("EC2InstanceID", &savedInstanceID), statemanager.AddSaveable("SeqNum", &sequenceNumber), ) if err != nil { t.Fatal(err) } err = stateManager.Load() if err != nil { t.Fatal("Error loading state", err) } if cluster != "test" { t.Fatal("Wrong cluster: " + cluster) } if sequenceNumber != 0 { t.Fatal("v1 should give a sequence number of 0") } tasks, err := taskEngine.ListTasks() if err != nil { t.Fatal(err) } var deadTask *api.Task for _, task := range tasks { if task.Arn == "arn:aws:ecs:us-west-2:1234567890:task/f44b4fc9-adb0-4f4f-9dff-871512310588" { deadTask = task } } if deadTask == nil { t.Fatal("Could not find task expected to be in state") } if deadTask.SentStatus != api.TaskStopped { t.Fatal("task dead should be stopped now") } if deadTask.Containers[0].SentStatus != api.ContainerStopped { t.Fatal("container Dead should go to stopped") } if deadTask.Containers[0].DesiredStatus != api.ContainerStopped { t.Fatal("container Dead should go to stopped") } if deadTask.Containers[0].KnownStatus != api.ContainerStopped { t.Fatal("container Dead should go to stopped") } expected, _ := time.Parse(time.RFC3339, "2015-04-28T17:29:48.129140193Z") if deadTask.KnownStatusTime != expected { t.Fatal("Time was not correct") } }
func backendMappingTestHelper(containers []*api.Container, testTask *api.Task, desiredStatus string, knownStatus string, t *testing.T) { taskEngine := engine.NewTaskEngine(&config.Config{}) // Populate Tasks and Container map in the engine. dockerTaskEngine, _ := taskEngine.(*engine.DockerTaskEngine) dockerTaskEngine.State().AddTask(testTask) dockerTaskEngine.State().AddContainer(&api.DockerContainer{DockerId: "docker1", DockerName: "someName", Container: containers[0]}, testTask) taskHandler := TasksV1RequestHandlerMaker(taskEngine) server := httptest.NewServer(http.HandlerFunc(taskHandler)) defer server.Close() resp, err := http.Get(server.URL + "/v1/tasks") if err != nil { t.Fatalf("Get: %v", err) } body, err := ioutil.ReadAll(resp.Body) if err != nil { t.Fatal(err) } var taskResponse TasksResponse json.Unmarshal(body, &taskResponse) tasks := taskResponse.Tasks if tasks[0].DesiredStatus != desiredStatus { t.Error("Incorrect known status in response: ", tasks[0].DesiredStatus) } if tasks[0].KnownStatus != knownStatus { t.Error("Incorrect known status in response: ", tasks[0].KnownStatus) } }
func TestUndownloadedUpdate(t *testing.T) { u, ctrl, cfg, _, mockacs, _ := mocks(t, nil) defer ctrl.Finish() mockacs.EXPECT().MakeRequest(&nackRequestMatcher{&ecsacs.NackRequest{ Cluster: ptr("cluster").(*string), ContainerInstance: ptr("containerInstance").(*string), MessageId: ptr("mid").(*string), }}) u.performUpdateHandler(statemanager.NewNoopStateManager(), engine.NewTaskEngine(cfg))(&ecsacs.PerformUpdateMessage{ ClusterArn: ptr("cluster").(*string), ContainerInstanceArn: ptr("containerInstance").(*string), MessageId: ptr("mid").(*string), }) }
func TestFullUpdateFlow(t *testing.T) { u, ctrl, cfg, mockfs, mockacs, mockhttp := mocks(t, nil) defer ctrl.Finish() var writtenFile bytes.Buffer gomock.InOrder( mockhttp.EXPECT().RoundTrip(mock_http.NewHTTPSimpleMatcher("GET", "https://s3.amazonaws.com/amazon-ecs-agent/update.tar")).Return(mock_http.SuccessResponse("update-tar-data"), nil), mockfs.EXPECT().Create(gomock.Any()).Return(mock_os.NopReadWriteCloser(&writtenFile), nil), mockfs.EXPECT().WriteFile("/tmp/test/desired-image", gomock.Any(), gomock.Any()).Return(nil), mockacs.EXPECT().MakeRequest(gomock.Eq(&ecsacs.AckRequest{ Cluster: ptr("cluster").(*string), ContainerInstance: ptr("containerInstance").(*string), MessageId: ptr("mid").(*string), })), mockacs.EXPECT().MakeRequest(gomock.Eq(&ecsacs.AckRequest{ Cluster: ptr("cluster").(*string), ContainerInstance: ptr("containerInstance").(*string), MessageId: ptr("mid2").(*string), })), mockfs.EXPECT().Exit(exitcodes.ExitUpdate), ) u.stageUpdateHandler()(&ecsacs.StageUpdateMessage{ ClusterArn: ptr("cluster").(*string), ContainerInstanceArn: ptr("containerInstance").(*string), MessageId: ptr("mid").(*string), UpdateInfo: &ecsacs.UpdateInfo{ Location: ptr("https://s3.amazonaws.com/amazon-ecs-agent/update.tar").(*string), Signature: ptr("6caeef375a080e3241781725b357890758d94b15d7ce63f6b2ff1cb5589f2007").(*string), }, }) if writtenFile.String() != "update-tar-data" { t.Error("Incorrect data written") } u.performUpdateHandler(statemanager.NewNoopStateManager(), engine.NewTaskEngine(cfg))(&ecsacs.PerformUpdateMessage{ ClusterArn: ptr("cluster").(*string), ContainerInstanceArn: ptr("containerInstance").(*string), MessageId: ptr("mid2").(*string), UpdateInfo: &ecsacs.UpdateInfo{ Location: ptr("https://s3.amazonaws.com/amazon-ecs-agent/update.tar").(*string), Signature: ptr("c54518806ff4d14b680c35784113e1e7478491fe").(*string), }, }) }
func TestPerformUpdateWithUpdatesDisabled(t *testing.T) { u, ctrl, cfg, _, mockacs, _ := mocks(t, &config.Config{ UpdatesEnabled: false, }) defer ctrl.Finish() mockacs.EXPECT().MakeRequest(&nackRequestMatcher{&ecsacs.NackRequest{ Cluster: ptr("cluster").(*string), ContainerInstance: ptr("containerInstance").(*string), MessageId: ptr("mid").(*string), Reason: ptr("Updates are disabled").(*string), }}) u.performUpdateHandler(statemanager.NewNoopStateManager(), engine.NewTaskEngine(cfg, nil, nil, nil, nil, nil))(&ecsacs.PerformUpdateMessage{ ClusterArn: ptr("cluster").(*string), ContainerInstanceArn: ptr("containerInstance").(*string), MessageId: ptr("mid").(*string), UpdateInfo: &ecsacs.UpdateInfo{ Location: ptr("https://s3.amazonaws.com/amazon-ecs-agent/update.tar").(*string), Signature: ptr("c54518806ff4d14b680c35784113e1e7478491fe").(*string), }, }) }
func _main() int { defer log.Flush() versionFlag := flag.Bool("version", false, "Print the agent version information and exit") acceptInsecureCert := flag.Bool("k", false, "Do not verify ssl certs") logLevel := flag.String("loglevel", "", "Loglevel: [<crit>|<error>|<warn>|<info>|<debug>]") flag.Parse() logger.SetLevel(*logLevel) log.Infof("Starting Agent: %v", version.String()) log.Info("Loading configuration") cfg, err := config.NewConfig() // Load cfg before doing 'versionFlag' so that it has the DOCKER_HOST // variable loaded if needed if *versionFlag { versionableEngine := engine.NewTaskEngine(cfg) version.PrintVersion(versionableEngine) return exitcodes.ExitSuccess } if err != nil { log.Criticalf("Error loading config: %v", err) // All required config values can be inferred from EC2 Metadata, so this error could be transient. return exitcodes.ExitError } log.Debug("Loaded config: " + cfg.String()) var currentEc2InstanceID, containerInstanceArn string var taskEngine engine.TaskEngine if cfg.Checkpoint { log.Info("Checkpointing is enabled. Attempting to load state") var previousCluster, previousEc2InstanceID, previousContainerInstanceArn string previousTaskEngine := engine.NewTaskEngine(cfg) // previousState is used to verify that our current runtime configuration is // compatible with our past configuration as reflected by our state-file previousState, err := initializeStateManager(cfg, previousTaskEngine, &previousCluster, &previousContainerInstanceArn, &previousEc2InstanceID, acshandler.SequenceNumber) if err != nil { log.Criticalf("Error creating state manager: %v", err) return exitcodes.ExitTerminal } err = previousState.Load() if err != nil { log.Criticalf("Error loading previously saved state: %v", err) return exitcodes.ExitTerminal } if previousCluster != "" { // TODO Handle default cluster in a sane and unified way across the codebase configuredCluster := cfg.Cluster if configuredCluster == "" { log.Debug("Setting cluster to default; none configured") configuredCluster = config.DEFAULT_CLUSTER_NAME } if previousCluster != configuredCluster { log.Criticalf("Data mismatch; saved cluster '%v' does not match configured cluster '%v'. Perhaps you want to delete the configured checkpoint file?", previousCluster, configuredCluster) return exitcodes.ExitTerminal } cfg.Cluster = previousCluster log.Infof("Restored cluster '%v'", cfg.Cluster) } if instanceIdentityDoc, err := ec2.GetInstanceIdentityDocument(); err == nil { currentEc2InstanceID = instanceIdentityDoc.InstanceId } else { log.Criticalf("Unable to access EC2 Metadata service to determine EC2 ID: %v", err) } if previousEc2InstanceID != "" && previousEc2InstanceID != currentEc2InstanceID { log.Warnf("Data mismatch; saved InstanceID '%v' does not match current InstanceID '%v'. Overwriting old datafile", previousEc2InstanceID, currentEc2InstanceID) // Reset taskEngine; all the other values are still default taskEngine = engine.NewTaskEngine(cfg) } else { // Use the values we loaded if there's no issue containerInstanceArn = previousContainerInstanceArn taskEngine = previousTaskEngine } } else { log.Info("Checkpointing not enabled; a new container instance will be created each time the agent is run") taskEngine = engine.NewTaskEngine(cfg) } stateManager, err := initializeStateManager(cfg, taskEngine, &cfg.Cluster, &containerInstanceArn, ¤tEc2InstanceID, acshandler.SequenceNumber) if err != nil { log.Criticalf("Error creating state manager: %v", err) return exitcodes.ExitTerminal } credentialProvider := auth.NewBasicAWSCredentialProvider() awsCreds := auth.ToSDK(credentialProvider) // Preflight request to make sure they're good if preflightCreds, err := awsCreds.Credentials(); err != nil || preflightCreds.AccessKeyID == "" { if preflightCreds != nil { log.Warnf("Error getting valid credentials (AKID %v): %v", preflightCreds.AccessKeyID, err) } else { log.Warnf("Error getting preflight credentials: %v", err) } } client := api.NewECSClient(awsCreds, cfg, *acceptInsecureCert) if containerInstanceArn == "" { log.Info("Registering Instance with ECS") containerInstanceArn, err = client.RegisterContainerInstance() if err != nil { log.Errorf("Error registering: %v", err) if retriable, ok := err.(utils.Retriable); ok && !retriable.Retry() { return exitcodes.ExitTerminal } return exitcodes.ExitError } log.Infof("Registration completed successfully. I am running as '%v' in cluster '%v'", containerInstanceArn, cfg.Cluster) // Save our shiny new containerInstanceArn stateManager.Save() } else { log.Infof("Restored from checkpoint file. I am running as '%v' in cluster '%v'", containerInstanceArn, cfg.Cluster) } // Begin listening to the docker daemon and saving changes taskEngine.SetSaver(stateManager) taskEngine.MustInit() go sighandlers.StartTerminationHandler(stateManager, taskEngine) // Agent introspection api go handlers.ServeHttp(&containerInstanceArn, taskEngine, cfg) // Start sending events to the backend go eventhandler.HandleEngineEvents(taskEngine, client, stateManager) log.Info("Beginning Polling for updates") err = acshandler.StartSession(containerInstanceArn, credentialProvider, cfg, taskEngine, client, stateManager, *acceptInsecureCert) if err != nil { log.Criticalf("Unretriable error starting communicating with ACS: %v", err) return exitcodes.ExitTerminal } log.Critical("ACS Session handler should never exit") return exitcodes.ExitError }
func TestStatsEngineWithDockerTaskEngine(t *testing.T) { // This should be a functional test. Upgrading to docker 1.6 breaks our ability to // read state.json file for containers. t.Skip("Skipping integ test as this is really a functional test") taskEngine := engine.NewTaskEngine(&config.Config{}) container, err := createGremlin(client) if err != nil { t.Fatal("Error creating container", err) } defer client.RemoveContainer(docker.RemoveContainerOptions{ ID: container.ID, Force: true, }) unmappedContainer, err := createGremlin(client) if err != nil { t.Fatal("Error creating container", err) } defer client.RemoveContainer(docker.RemoveContainerOptions{ ID: unmappedContainer.ID, Force: true, }) containers := []*api.Container{ &api.Container{ Name: "gremlin", }, } testTask := api.Task{ Arn: "gremlin-task", DesiredStatus: api.TaskRunning, KnownStatus: api.TaskRunning, Family: "test", Version: "1", Containers: containers, } // Populate Tasks and Container map in the engine. dockerTaskEngine, _ := taskEngine.(*engine.DockerTaskEngine) dockerTaskEngine.State().AddTask(&testTask) dockerTaskEngine.State().AddContainer( &api.DockerContainer{ DockerId: container.ID, DockerName: "gremlin", Container: containers[0], }, &testTask) statsEngine := NewDockerStatsEngine(&cfg) statsEngine.client, err = engine.NewDockerGoClient(nil, "", config.NewSensitiveRawMessage([]byte(""))) if err != nil { t.Fatal("Error initializing docker client: ", err) } err = statsEngine.MustInit(taskEngine, defaultCluster, defaultContainerInstance) if err != nil { t.Error("Error initializing stats engine: ", err) } err = client.StartContainer(container.ID, nil) defer client.StopContainer(container.ID, defaultDockerTimeoutSeconds) if err != nil { t.Error("Error starting container: ", container.ID, " error: ", err) } err = client.StartContainer(unmappedContainer.ID, nil) defer client.StopContainer(unmappedContainer.ID, defaultDockerTimeoutSeconds) if err != nil { t.Error("Error starting container: ", unmappedContainer.ID, " error: ", err) } // Wait for the stats collection go routine to start. time.Sleep(checkPointSleep) metadata, taskMetrics, err := statsEngine.GetInstanceMetrics() if err != nil { t.Error("Error gettting instance metrics: ", err) } if len(taskMetrics) != 1 { t.Error("Incorrect number of tasks. Expected: 1, got: ", len(taskMetrics)) } err = validateMetricsMetadata(metadata) if err != nil { t.Error("Error validating metadata: ", err) } err = validateContainerMetrics(taskMetrics[0].ContainerMetrics, 1) if err != nil { t.Error("Error validating container metrics: ", err) } err = client.StopContainer(container.ID, defaultDockerTimeoutSeconds) if err != nil { t.Error("Error stopping container: ", container.ID, " error: ", err) } time.Sleep(waitForCleanupSleep) // Should not contain any metrics after cleanup. err = validateIdleContainerMetrics(statsEngine) if err != nil { t.Fatal("Error validating metadata: ", err) } }
func TestNewerUpdateMessages(t *testing.T) { u, ctrl, cfg, mockfs, mockacs, mockhttp := mocks(t, nil) defer ctrl.Finish() var writtenFile bytes.Buffer gomock.InOrder( mockhttp.EXPECT().RoundTrip(mock_http.NewHTTPSimpleMatcher("GET", "https://s3.amazonaws.com/amazon-ecs-agent/update.tar")).Return(mock_http.SuccessResponse("update-tar-data"), nil), mockfs.EXPECT().Create(gomock.Any()).Return(mock_os.NopReadWriteCloser(&writtenFile), nil), mockfs.EXPECT().WriteFile("/tmp/test/desired-image", gomock.Any(), gomock.Any()).Return(nil), mockacs.EXPECT().MakeRequest(gomock.Eq(&ecsacs.AckRequest{ Cluster: ptr("cluster").(*string), ContainerInstance: ptr("containerInstance").(*string), MessageId: ptr("StageMID").(*string), })), mockacs.EXPECT().MakeRequest(&nackRequestMatcher{&ecsacs.NackRequest{ Cluster: ptr("cluster").(*string), ContainerInstance: ptr("containerInstance").(*string), MessageId: ptr("StageMID").(*string), Reason: ptr("New update arrived: StageMIDNew").(*string), }}), mockhttp.EXPECT().RoundTrip(mock_http.NewHTTPSimpleMatcher("GET", "https://s3.amazonaws.com/amazon-ecs-agent/new.tar")).Return(mock_http.SuccessResponse("newer-update-tar-data"), nil), mockfs.EXPECT().Create(gomock.Any()).Return(mock_os.NopReadWriteCloser(&writtenFile), nil), mockfs.EXPECT().WriteFile("/tmp/test/desired-image", gomock.Any(), gomock.Any()).Return(nil), mockacs.EXPECT().MakeRequest(gomock.Eq(&ecsacs.AckRequest{ Cluster: ptr("cluster").(*string), ContainerInstance: ptr("containerInstance").(*string), MessageId: ptr("StageMIDNew").(*string), })), mockacs.EXPECT().MakeRequest(gomock.Eq(&ecsacs.AckRequest{ Cluster: ptr("cluster").(*string), ContainerInstance: ptr("containerInstance").(*string), MessageId: ptr("mid2").(*string), })), mockfs.EXPECT().Exit(exitcodes.ExitUpdate), ) u.stageUpdateHandler()(&ecsacs.StageUpdateMessage{ ClusterArn: ptr("cluster").(*string), ContainerInstanceArn: ptr("containerInstance").(*string), MessageId: ptr("StageMID").(*string), UpdateInfo: &ecsacs.UpdateInfo{ Location: ptr("https://s3.amazonaws.com/amazon-ecs-agent/update.tar").(*string), Signature: ptr("6caeef375a080e3241781725b357890758d94b15d7ce63f6b2ff1cb5589f2007").(*string), }, }) if writtenFile.String() != "update-tar-data" { t.Error("Incorrect data written") } writtenFile.Reset() // Never perform, make sure a new hash results in a new stage u.stageUpdateHandler()(&ecsacs.StageUpdateMessage{ ClusterArn: ptr("cluster").(*string), ContainerInstanceArn: ptr("containerInstance").(*string), MessageId: ptr("StageMIDNew").(*string), UpdateInfo: &ecsacs.UpdateInfo{ Location: ptr("https://s3.amazonaws.com/amazon-ecs-agent/new.tar").(*string), Signature: ptr("9c6ea7bd7d49f95b6d516517e453b965897109bf8a1d6ff3a6e57287049eb2de").(*string), }, }) if writtenFile.String() != "newer-update-tar-data" { t.Error("Incorrect data written") } u.performUpdateHandler(statemanager.NewNoopStateManager(), engine.NewTaskEngine(cfg))(&ecsacs.PerformUpdateMessage{ ClusterArn: ptr("cluster").(*string), ContainerInstanceArn: ptr("containerInstance").(*string), MessageId: ptr("mid2").(*string), UpdateInfo: &ecsacs.UpdateInfo{ Location: ptr("https://s3.amazonaws.com/amazon-ecs-agent/update.tar").(*string), Signature: ptr("c54518806ff4d14b680c35784113e1e7478491fe").(*string), }, }) }
func _main() int { defer log.Flush() flagset := flag.NewFlagSet("Amazon ECS Agent", flag.ContinueOnError) versionFlag := flagset.Bool("version", false, "Print the agent version information and exit") logLevel := flagset.String("loglevel", "", "Loglevel: [<crit>|<error>|<warn>|<info>|<debug>]") acceptInsecureCert := flagset.Bool("k", false, "Disable SSL certificate verification. We do not recommend setting this option.") licenseFlag := flagset.Bool("license", false, "Print the LICENSE and NOTICE files and exit") blackholeEc2Metadata := flagset.Bool("blackhole-ec2-metadata", false, "Blackhole the EC2 Metadata requests. Setting this option can cause the ECS Agent to fail to work properly. We do not recommend setting this option") err := flagset.Parse(os.Args[1:]) if err != nil { return exitcodes.ExitTerminal } if *licenseFlag { license := utils.NewLicenseProvider() text, err := license.GetText() if err != nil { fmt.Fprintln(os.Stderr, err) return exitcodes.ExitError } fmt.Println(text) return exitcodes.ExitSuccess } logger.SetLevel(*logLevel) ec2MetadataClient := ec2.DefaultClient if *blackholeEc2Metadata { ec2MetadataClient = ec2.NewBlackholeEC2MetadataClient() } log.Infof("Starting Agent: %s", version.String()) if *acceptInsecureCert { log.Warn("SSL certificate verification disabled. This is not recommended.") } log.Info("Loading configuration") cfg, cfgErr := config.NewConfig(ec2MetadataClient) // Load cfg and create Docker client before doing 'versionFlag' so that it has the DOCKER_HOST variable loaded if needed clientFactory := dockerclient.NewFactory(cfg.DockerEndpoint) dockerClient, err := engine.NewDockerGoClient(clientFactory, *acceptInsecureCert, cfg) if err != nil { log.Criticalf("Error creating Docker client: %v", err) return exitcodes.ExitError } ctx := context.Background() // Create the DockerContainerChange event stream for tcs containerChangeEventStream := eventstream.NewEventStream(ContainerChangeEventStream, ctx) containerChangeEventStream.StartListening() // Create credentials manager. This will be used by the task engine and // the credentials handler credentialsManager := credentials.NewManager() // Create image manager. This will be used by the task engine for saving image states state := dockerstate.NewDockerTaskEngineState() imageManager := engine.NewImageManager(cfg, dockerClient, state) if *versionFlag { versionableEngine := engine.NewTaskEngine(cfg, dockerClient, credentialsManager, containerChangeEventStream, imageManager, state) version.PrintVersion(versionableEngine) return exitcodes.ExitSuccess } sighandlers.StartDebugHandler() if cfgErr != nil { log.Criticalf("Error loading config: %v", err) // All required config values can be inferred from EC2 Metadata, so this error could be transient. return exitcodes.ExitError } log.Debug("Loaded config: " + cfg.String()) var currentEc2InstanceID, containerInstanceArn string var taskEngine engine.TaskEngine if cfg.Checkpoint { log.Info("Checkpointing is enabled. Attempting to load state") var previousCluster, previousEc2InstanceID, previousContainerInstanceArn string previousTaskEngine := engine.NewTaskEngine(cfg, dockerClient, credentialsManager, containerChangeEventStream, imageManager, state) // previousState is used to verify that our current runtime configuration is // compatible with our past configuration as reflected by our state-file previousState, err := initializeStateManager(cfg, previousTaskEngine, &previousCluster, &previousContainerInstanceArn, &previousEc2InstanceID) if err != nil { log.Criticalf("Error creating state manager: %v", err) return exitcodes.ExitTerminal } err = previousState.Load() if err != nil { log.Criticalf("Error loading previously saved state: %v", err) return exitcodes.ExitTerminal } if previousCluster != "" { // TODO Handle default cluster in a sane and unified way across the codebase configuredCluster := cfg.Cluster if configuredCluster == "" { log.Debug("Setting cluster to default; none configured") configuredCluster = config.DefaultClusterName } if previousCluster != configuredCluster { log.Criticalf("Data mismatch; saved cluster '%v' does not match configured cluster '%v'. Perhaps you want to delete the configured checkpoint file?", previousCluster, configuredCluster) return exitcodes.ExitTerminal } cfg.Cluster = previousCluster log.Infof("Restored cluster '%v'", cfg.Cluster) } if instanceIdentityDoc, err := ec2MetadataClient.InstanceIdentityDocument(); err == nil { currentEc2InstanceID = instanceIdentityDoc.InstanceId } else { log.Criticalf("Unable to access EC2 Metadata service to determine EC2 ID: %v", err) } if previousEc2InstanceID != "" && previousEc2InstanceID != currentEc2InstanceID { log.Warnf("Data mismatch; saved InstanceID '%s' does not match current InstanceID '%s'. Overwriting old datafile", previousEc2InstanceID, currentEc2InstanceID) // Reset taskEngine; all the other values are still default taskEngine = engine.NewTaskEngine(cfg, dockerClient, credentialsManager, containerChangeEventStream, imageManager, state) } else { // Use the values we loaded if there's no issue containerInstanceArn = previousContainerInstanceArn taskEngine = previousTaskEngine } } else { log.Info("Checkpointing not enabled; a new container instance will be created each time the agent is run") taskEngine = engine.NewTaskEngine(cfg, dockerClient, credentialsManager, containerChangeEventStream, imageManager, state) } stateManager, err := initializeStateManager(cfg, taskEngine, &cfg.Cluster, &containerInstanceArn, ¤tEc2InstanceID) if err != nil { log.Criticalf("Error creating state manager: %v", err) return exitcodes.ExitTerminal } capabilities := taskEngine.Capabilities() // We instantiate our own credentialProvider for use in acs/tcs. This tries // to mimic roughly the way it's instantiated by the SDK for a default // session. credentialProvider := defaults.CredChain(defaults.Config(), defaults.Handlers()) // Preflight request to make sure they're good if preflightCreds, err := credentialProvider.Get(); err != nil || preflightCreds.AccessKeyID == "" { log.Warnf("Error getting valid credentials (AKID %s): %v", preflightCreds.AccessKeyID, err) } client := api.NewECSClient(credentialProvider, cfg, httpclient.New(api.RoundtripTimeout, *acceptInsecureCert), ec2MetadataClient) if containerInstanceArn == "" { log.Info("Registering Instance with ECS") containerInstanceArn, err = client.RegisterContainerInstance("", capabilities) if err != nil { log.Errorf("Error registering: %v", err) if retriable, ok := err.(utils.Retriable); ok && !retriable.Retry() { return exitcodes.ExitTerminal } return exitcodes.ExitError } log.Infof("Registration completed successfully. I am running as '%s' in cluster '%s'", containerInstanceArn, cfg.Cluster) // Save our shiny new containerInstanceArn stateManager.Save() } else { log.Infof("Restored from checkpoint file. I am running as '%s' in cluster '%s'", containerInstanceArn, cfg.Cluster) _, err = client.RegisterContainerInstance(containerInstanceArn, capabilities) if err != nil { log.Errorf("Error re-registering: %v", err) if awserr, ok := err.(awserr.Error); ok && api.IsInstanceTypeChangedError(awserr) { log.Criticalf("The current instance type does not match the registered instance type. Please revert the instance type change, or alternatively launch a new instance. Error: %v", err) return exitcodes.ExitTerminal } return exitcodes.ExitError } } // Begin listening to the docker daemon and saving changes taskEngine.SetSaver(stateManager) imageManager.SetSaver(stateManager) taskEngine.MustInit() // start of the periodic image cleanup process if !cfg.ImageCleanupDisabled { go imageManager.StartImageCleanupProcess(ctx) } go sighandlers.StartTerminationHandler(stateManager, taskEngine) // Agent introspection api go handlers.ServeHttp(&containerInstanceArn, taskEngine, cfg) // Start serving the endpoint to fetch IAM Role credentials go credentialshandler.ServeHttp(credentialsManager, containerInstanceArn, cfg) // Start sending events to the backend go eventhandler.HandleEngineEvents(taskEngine, client, stateManager) deregisterInstanceEventStream := eventstream.NewEventStream(DeregisterContainerInstanceEventStream, ctx) deregisterInstanceEventStream.StartListening() telemetrySessionParams := tcshandler.TelemetrySessionParams{ ContainerInstanceArn: containerInstanceArn, CredentialProvider: credentialProvider, Cfg: cfg, DeregisterInstanceEventStream: deregisterInstanceEventStream, ContainerChangeEventStream: containerChangeEventStream, DockerClient: dockerClient, AcceptInvalidCert: *acceptInsecureCert, EcsClient: client, TaskEngine: taskEngine, } // Start metrics session in a go routine go tcshandler.StartMetricsSession(telemetrySessionParams) log.Info("Beginning Polling for updates") err = acshandler.StartSession(ctx, acshandler.StartSessionArguments{ AcceptInvalidCert: *acceptInsecureCert, Config: cfg, DeregisterInstanceEventStream: deregisterInstanceEventStream, ContainerInstanceArn: containerInstanceArn, CredentialProvider: credentialProvider, ECSClient: client, StateManager: stateManager, TaskEngine: taskEngine, CredentialsManager: credentialsManager, }) if err != nil { log.Criticalf("Unretriable error starting communicating with ACS: %v", err) return exitcodes.ExitTerminal } log.Critical("ACS Session handler should never exit") return exitcodes.ExitError }
func TestStateManager(t *testing.T) { tmpDir, err := ioutil.TempDir("/tmp", "ecs_statemanager_test") if err != nil { t.Fatal(err) } if err != nil { t.Fatal(err) } defer os.RemoveAll(tmpDir) cfg := &config.Config{DataDir: tmpDir} manager, err := statemanager.NewStateManager(cfg) if err != nil { t.Fatal("Error loading manager", err) } err = manager.Load() if err != nil { t.Error("Expected loading a non-existant file to not be an error") } // Now let's make some state to save containerInstanceArn := "" taskEngine := engine.NewTaskEngine(&config.Config{}, nil, nil, nil) manager, err = statemanager.NewStateManager(cfg, statemanager.AddSaveable("TaskEngine", taskEngine), statemanager.AddSaveable("ContainerInstanceArn", &containerInstanceArn)) if err != nil { t.Fatal(err) } containerInstanceArn = "containerInstanceArn" testTask := &api.Task{Arn: "test-arn"} taskEngine.(*engine.DockerTaskEngine).State().AddTask(testTask) err = manager.Save() if err != nil { t.Fatal("Error saving state", err) } // Now make sure we can load that state sanely loadedTaskEngine := engine.NewTaskEngine(&config.Config{}, nil, nil, nil) var loadedContainerInstanceArn string manager, err = statemanager.NewStateManager(cfg, statemanager.AddSaveable("TaskEngine", &loadedTaskEngine), statemanager.AddSaveable("ContainerInstanceArn", &loadedContainerInstanceArn)) if err != nil { t.Fatal(err) } err = manager.Load() if err != nil { t.Fatal("Error loading state", err) } if loadedContainerInstanceArn != containerInstanceArn { t.Error("Did not load containerInstanceArn correctly; got ", loadedContainerInstanceArn, " instead of ", containerInstanceArn) } if !engine_testutils.DockerTaskEnginesEqual(loadedTaskEngine.(*engine.DockerTaskEngine), (taskEngine.(*engine.DockerTaskEngine))) { t.Error("Did not load taskEngine correctly") } // I'd rather double check .Equal there; let's make sure ListTasks agrees. tasks, err := loadedTaskEngine.ListTasks() if err != nil { t.Error("Error listing tasks", err) } if len(tasks) != 1 { t.Error("Should have a task!") } else { if tasks[0].Arn != "test-arn" { t.Error("Wrong arn, expected test-arn but got ", tasks[0].Arn) } } }
func TestStatsEngineWithDockerTaskEngineMissingRemoveEvent(t *testing.T) { if testing.Short() { t.Skip("Skipping integ test in short mode") } containerChangeEventStream := eventStream("TestStatsEngineWithDockerTaskEngine") taskEngine := ecsengine.NewTaskEngine(&config.Config{}, nil, nil, containerChangeEventStream, nil, dockerstate.NewDockerTaskEngineState()) container, err := createGremlin(client) if err != nil { t.Fatalf("Error creating container: %v", err) } defer client.RemoveContainer(docker.RemoveContainerOptions{ ID: container.ID, Force: true, }) containers := []*api.Container{ &api.Container{ Name: "gremlin", KnownStatus: api.ContainerStopped, }, } testTask := api.Task{ Arn: "gremlin-task", DesiredStatus: api.TaskRunning, KnownStatus: api.TaskRunning, Family: "test", Version: "1", Containers: containers, } // Populate Tasks and Container map in the engine. dockerTaskEngine, _ := taskEngine.(*ecsengine.DockerTaskEngine) dockerTaskEngine.State().AddTask(&testTask) dockerTaskEngine.State().AddContainer( &api.DockerContainer{ DockerId: container.ID, DockerName: "gremlin", Container: containers[0], }, &testTask) // Create a new docker stats engine // TODO make dockerStatsEngine not a singleton object dockerStatsEngine = nil statsEngine := NewDockerStatsEngine(&cfg, dockerClient, containerChangeEventStream) err = statsEngine.MustInit(taskEngine, defaultCluster, defaultContainerInstance) if err != nil { t.Errorf("Error initializing stats engine: %v", err) } defer statsEngine.removeAll() defer statsEngine.containerChangeEventStream.Unsubscribe(containerChangeHandler) err = client.StartContainer(container.ID, nil) defer client.StopContainer(container.ID, defaultDockerTimeoutSeconds) if err != nil { t.Errorf("Error starting container: %s, err: %v", container.ID, err) } err = containerChangeEventStream.WriteToEventStream(ecsengine.DockerContainerChangeEvent{ Status: api.ContainerRunning, DockerContainerMetadata: ecsengine.DockerContainerMetadata{ DockerId: container.ID, }, }) if err != nil { t.Errorf("Failed to write to container change event stream err: %v", err) } // Wait for the stats collection go routine to start. time.Sleep(checkPointSleep) err = client.StopContainer(container.ID, defaultDockerTimeoutSeconds) if err != nil { t.Fatalf("Error stopping container: %s, err: %v", container.ID, err) } err = client.RemoveContainer(docker.RemoveContainerOptions{ ID: container.ID, Force: true, }) if err != nil { t.Fatalf("Error removing container: %s, err: %v", container.ID, err) } time.Sleep(checkPointSleep) // Simulate tcs client invoking GetInstanceMetrics. _, _, err = statsEngine.GetInstanceMetrics() if err == nil { t.Fatalf("Expected error 'no task metrics tp report' when getting instance metrics") } // Should not contain any metrics after cleanup. err = validateIdleContainerMetrics(statsEngine) if err != nil { t.Fatalf("Error validating idle metrics: %v", err) } }
func TestStatsEngineWithDockerTaskEngine(t *testing.T) { if testing.Short() { t.Skip("Skipping integ test in short mode") } taskEngine := engine.NewTaskEngine(&config.Config{}, nil, nil) container, err := createGremlin(client) if err != nil { t.Fatal("Error creating container", err) } defer client.RemoveContainer(docker.RemoveContainerOptions{ ID: container.ID, Force: true, }) unmappedContainer, err := createGremlin(client) if err != nil { t.Fatal("Error creating container", err) } defer client.RemoveContainer(docker.RemoveContainerOptions{ ID: unmappedContainer.ID, Force: true, }) containers := []*api.Container{ &api.Container{ Name: "gremlin", }, } testTask := api.Task{ Arn: "gremlin-task", DesiredStatus: api.TaskRunning, KnownStatus: api.TaskRunning, Family: "test", Version: "1", Containers: containers, } // Populate Tasks and Container map in the engine. dockerTaskEngine, _ := taskEngine.(*engine.DockerTaskEngine) dockerTaskEngine.State().AddTask(&testTask) dockerTaskEngine.State().AddContainer( &api.DockerContainer{ DockerId: container.ID, DockerName: "gremlin", Container: containers[0], }, &testTask) statsEngine := NewDockerStatsEngine(&cfg, dockerClient) err = statsEngine.MustInit(taskEngine, defaultCluster, defaultContainerInstance) if err != nil { t.Error("Error initializing stats engine: ", err) } defer statsEngine.unsubscribeContainerEvents() err = client.StartContainer(container.ID, nil) defer client.StopContainer(container.ID, defaultDockerTimeoutSeconds) if err != nil { t.Error("Error starting container: ", container.ID, " error: ", err) } err = client.StartContainer(unmappedContainer.ID, nil) defer client.StopContainer(unmappedContainer.ID, defaultDockerTimeoutSeconds) if err != nil { t.Error("Error starting container: ", unmappedContainer.ID, " error: ", err) } // Wait for the stats collection go routine to start. time.Sleep(checkPointSleep) metadata, taskMetrics, err := statsEngine.GetInstanceMetrics() if err != nil { t.Error("Error gettting instance metrics: ", err) } if len(taskMetrics) != 1 { t.Error("Incorrect number of tasks. Expected: 1, got: ", len(taskMetrics)) } err = validateMetricsMetadata(metadata) if err != nil { t.Error("Error validating metadata: ", err) } err = validateContainerMetrics(taskMetrics[0].ContainerMetrics, 1) if err != nil { t.Error("Error validating container metrics: ", err) } err = client.StopContainer(container.ID, defaultDockerTimeoutSeconds) if err != nil { t.Error("Error stopping container: ", container.ID, " error: ", err) } time.Sleep(waitForCleanupSleep) // Should not contain any metrics after cleanup. err = validateIdleContainerMetrics(statsEngine) if err != nil { t.Fatal("Error validating idle metrics: ", err) } }
func TestServeHttp(t *testing.T) { taskEngine := engine.NewTaskEngine(&config.Config{}) containers := []*api.Container{ &api.Container{ Name: "c1", }, } testTask := api.Task{ Arn: "task1", DesiredStatus: api.TaskRunning, KnownStatus: api.TaskRunning, Family: "test", Version: "1", Containers: containers, } // Populate Tasks and Container map in the engine. dockerTaskEngine, _ := taskEngine.(*engine.DockerTaskEngine) dockerTaskEngine.State().AddTask(&testTask) dockerTaskEngine.State().AddContainer(&api.DockerContainer{DockerId: "docker1", DockerName: "someName", Container: containers[0]}, &testTask) go ServeHttp(utils.Strptr(TestContainerInstanceArn), taskEngine, &config.Config{Cluster: TestClusterArn}) body := getResponseBodyFromLocalHost("/v1/metadata", t) var metadata MetadataResponse json.Unmarshal(body, &metadata) if metadata.Cluster != TestClusterArn { t.Error("Metadata returned the wrong cluster arn") } if *metadata.ContainerInstanceArn != TestContainerInstanceArn { t.Error("Metadata returned the wrong cluster arn") } var tasksResponse TasksResponse body = getResponseBodyFromLocalHost("/v1/tasks", t) json.Unmarshal(body, &tasksResponse) tasks := tasksResponse.Tasks if len(tasks) != 1 { t.Error("Incorrect number of tasks in response: ", len(tasks)) } if tasks[0].Arn != "task1" { t.Error("Incorrect task arn in response: ", tasks[0].Arn) } containersResponse := tasks[0].Containers if len(containersResponse) != 1 { t.Error("Incorrect number of containers in response: ", len(containersResponse)) } if containersResponse[0].Name != "c1" { t.Error("Incorrect container name in response: ", containersResponse[0].Name) } var taskResponse TaskResponse body = getResponseBodyFromLocalHost("/v1/tasks?dockerid=docker1", t) json.Unmarshal(body, &taskResponse) if taskResponse.Arn != "task1" { t.Error("Incorrect task arn in response") } if taskResponse.Containers[0].Name != "c1" { t.Error("Incorrect task arn in response") } resp, err := http.Get("http://localhost:" + strconv.Itoa(config.AGENT_INTROSPECTION_PORT) + "/v1/tasks?dockerid=docker2") if err != nil { t.Fatal(err) } if resp.StatusCode != 400 { t.Error("API did not return bad request status for invalid docker id") } body = getResponseBodyFromLocalHost("/v1/tasks?taskarn=task1", t) json.Unmarshal(body, &taskResponse) if taskResponse.Arn != "task1" { t.Error("Incorrect task arn in response") } resp, err = http.Get("http://localhost:" + strconv.Itoa(config.AGENT_INTROSPECTION_PORT) + "/v1/tasks?taskarn=task2") if resp.StatusCode != 400 { t.Error("API did not return bad request status for invalid task id") } resp, err = http.Get("http://localhost:" + strconv.Itoa(config.AGENT_INTROSPECTION_PORT) + "/v1/tasks?taskarn=") if resp.StatusCode != 400 { t.Error("API did not return bad request status for invalid task id") } resp, err = http.Get("http://localhost:" + strconv.Itoa(config.AGENT_INTROSPECTION_PORT) + "/v1/tasks?taskarn=task1&dockerid=docker1") if err != nil { t.Fatal(err) } if resp.StatusCode != 400 { t.Error("API did not return bad request status when both dockerid and taskarn are specified.") } }