func mocks(t *testing.T, cfg *config.Config) (*gomock.Controller, *mock_engine.MockDockerClient, engine.TaskEngine) {
	ctrl := gomock.NewController(t)
	client := mock_engine.NewMockDockerClient(ctrl)
	taskEngine := engine.NewTaskEngine(cfg)
	taskEngine.(*engine.DockerTaskEngine).SetDockerClient(client)
	return ctrl, client, taskEngine
}
func TestLoadsV1DataCorrectly(t *testing.T) {
	cfg := &config.Config{DataDir: filepath.Join(".", "testdata", "v1", "1")}

	taskEngine := engine.NewTaskEngine(&config.Config{}, nil, nil, nil)
	var containerInstanceArn, cluster, savedInstanceID string
	var sequenceNumber int64

	stateManager, err := statemanager.NewStateManager(cfg,
		statemanager.AddSaveable("TaskEngine", taskEngine),
		statemanager.AddSaveable("ContainerInstanceArn", &containerInstanceArn),
		statemanager.AddSaveable("Cluster", &cluster),
		statemanager.AddSaveable("EC2InstanceID", &savedInstanceID),
		statemanager.AddSaveable("SeqNum", &sequenceNumber),
	)
	if err != nil {
		t.Fatal(err)
	}

	err = stateManager.Load()
	if err != nil {
		t.Fatal("Error loading state", err)
	}

	if cluster != "test" {
		t.Fatal("Wrong cluster: " + cluster)
	}

	if sequenceNumber != 0 {
		t.Fatal("v1 should give a sequence number of 0")
	}
	tasks, err := taskEngine.ListTasks()
	if err != nil {
		t.Fatal(err)
	}
	var deadTask *api.Task
	for _, task := range tasks {
		if task.Arn == "arn:aws:ecs:us-west-2:1234567890:task/f44b4fc9-adb0-4f4f-9dff-871512310588" {
			deadTask = task
		}
	}
	if deadTask == nil {
		t.Fatal("Could not find task expected to be in state")
	}
	if deadTask.SentStatus != api.TaskStopped {
		t.Fatal("task dead should be stopped now")
	}
	if deadTask.Containers[0].SentStatus != api.ContainerStopped {
		t.Fatal("container Dead should go to stopped")
	}
	if deadTask.Containers[0].DesiredStatus != api.ContainerStopped {
		t.Fatal("container Dead should go to stopped")
	}
	if deadTask.Containers[0].KnownStatus != api.ContainerStopped {
		t.Fatal("container Dead should go to stopped")
	}
	expected, _ := time.Parse(time.RFC3339, "2015-04-28T17:29:48.129140193Z")
	if deadTask.KnownStatusTime != expected {
		t.Fatal("Time was not correct")
	}
}
func backendMappingTestHelper(containers []*api.Container, testTask *api.Task, desiredStatus string, knownStatus string, t *testing.T) {
	taskEngine := engine.NewTaskEngine(&config.Config{})
	// Populate Tasks and Container map in the engine.
	dockerTaskEngine, _ := taskEngine.(*engine.DockerTaskEngine)
	dockerTaskEngine.State().AddTask(testTask)
	dockerTaskEngine.State().AddContainer(&api.DockerContainer{DockerId: "docker1", DockerName: "someName", Container: containers[0]}, testTask)
	taskHandler := TasksV1RequestHandlerMaker(taskEngine)
	server := httptest.NewServer(http.HandlerFunc(taskHandler))
	defer server.Close()
	resp, err := http.Get(server.URL + "/v1/tasks")
	if err != nil {
		t.Fatalf("Get: %v", err)
	}
	body, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		t.Fatal(err)
	}
	var taskResponse TasksResponse
	json.Unmarshal(body, &taskResponse)
	tasks := taskResponse.Tasks
	if tasks[0].DesiredStatus != desiredStatus {
		t.Error("Incorrect known status in response: ", tasks[0].DesiredStatus)
	}
	if tasks[0].KnownStatus != knownStatus {
		t.Error("Incorrect known status in response: ", tasks[0].KnownStatus)
	}
}
func TestUndownloadedUpdate(t *testing.T) {
	u, ctrl, cfg, _, mockacs, _ := mocks(t, nil)
	defer ctrl.Finish()

	mockacs.EXPECT().MakeRequest(&nackRequestMatcher{&ecsacs.NackRequest{
		Cluster:           ptr("cluster").(*string),
		ContainerInstance: ptr("containerInstance").(*string),
		MessageId:         ptr("mid").(*string),
	}})

	u.performUpdateHandler(statemanager.NewNoopStateManager(), engine.NewTaskEngine(cfg))(&ecsacs.PerformUpdateMessage{
		ClusterArn:           ptr("cluster").(*string),
		ContainerInstanceArn: ptr("containerInstance").(*string),
		MessageId:            ptr("mid").(*string),
	})
}
func TestFullUpdateFlow(t *testing.T) {
	u, ctrl, cfg, mockfs, mockacs, mockhttp := mocks(t, nil)
	defer ctrl.Finish()

	var writtenFile bytes.Buffer
	gomock.InOrder(
		mockhttp.EXPECT().RoundTrip(mock_http.NewHTTPSimpleMatcher("GET", "https://s3.amazonaws.com/amazon-ecs-agent/update.tar")).Return(mock_http.SuccessResponse("update-tar-data"), nil),
		mockfs.EXPECT().Create(gomock.Any()).Return(mock_os.NopReadWriteCloser(&writtenFile), nil),
		mockfs.EXPECT().WriteFile("/tmp/test/desired-image", gomock.Any(), gomock.Any()).Return(nil),
		mockacs.EXPECT().MakeRequest(gomock.Eq(&ecsacs.AckRequest{
			Cluster:           ptr("cluster").(*string),
			ContainerInstance: ptr("containerInstance").(*string),
			MessageId:         ptr("mid").(*string),
		})),
		mockacs.EXPECT().MakeRequest(gomock.Eq(&ecsacs.AckRequest{
			Cluster:           ptr("cluster").(*string),
			ContainerInstance: ptr("containerInstance").(*string),
			MessageId:         ptr("mid2").(*string),
		})),
		mockfs.EXPECT().Exit(exitcodes.ExitUpdate),
	)

	u.stageUpdateHandler()(&ecsacs.StageUpdateMessage{
		ClusterArn:           ptr("cluster").(*string),
		ContainerInstanceArn: ptr("containerInstance").(*string),
		MessageId:            ptr("mid").(*string),
		UpdateInfo: &ecsacs.UpdateInfo{
			Location:  ptr("https://s3.amazonaws.com/amazon-ecs-agent/update.tar").(*string),
			Signature: ptr("6caeef375a080e3241781725b357890758d94b15d7ce63f6b2ff1cb5589f2007").(*string),
		},
	})

	if writtenFile.String() != "update-tar-data" {
		t.Error("Incorrect data written")
	}

	u.performUpdateHandler(statemanager.NewNoopStateManager(), engine.NewTaskEngine(cfg))(&ecsacs.PerformUpdateMessage{
		ClusterArn:           ptr("cluster").(*string),
		ContainerInstanceArn: ptr("containerInstance").(*string),
		MessageId:            ptr("mid2").(*string),
		UpdateInfo: &ecsacs.UpdateInfo{
			Location:  ptr("https://s3.amazonaws.com/amazon-ecs-agent/update.tar").(*string),
			Signature: ptr("c54518806ff4d14b680c35784113e1e7478491fe").(*string),
		},
	})
}
func TestPerformUpdateWithUpdatesDisabled(t *testing.T) {
	u, ctrl, cfg, _, mockacs, _ := mocks(t, &config.Config{
		UpdatesEnabled: false,
	})
	defer ctrl.Finish()

	mockacs.EXPECT().MakeRequest(&nackRequestMatcher{&ecsacs.NackRequest{
		Cluster:           ptr("cluster").(*string),
		ContainerInstance: ptr("containerInstance").(*string),
		MessageId:         ptr("mid").(*string),
		Reason:            ptr("Updates are disabled").(*string),
	}})

	u.performUpdateHandler(statemanager.NewNoopStateManager(), engine.NewTaskEngine(cfg, nil, nil, nil, nil, nil))(&ecsacs.PerformUpdateMessage{
		ClusterArn:           ptr("cluster").(*string),
		ContainerInstanceArn: ptr("containerInstance").(*string),
		MessageId:            ptr("mid").(*string),
		UpdateInfo: &ecsacs.UpdateInfo{
			Location:  ptr("https://s3.amazonaws.com/amazon-ecs-agent/update.tar").(*string),
			Signature: ptr("c54518806ff4d14b680c35784113e1e7478491fe").(*string),
		},
	})
}
Beispiel #7
0
func _main() int {
	defer log.Flush()
	versionFlag := flag.Bool("version", false, "Print the agent version information and exit")
	acceptInsecureCert := flag.Bool("k", false, "Do not verify ssl certs")
	logLevel := flag.String("loglevel", "", "Loglevel: [<crit>|<error>|<warn>|<info>|<debug>]")
	flag.Parse()

	logger.SetLevel(*logLevel)

	log.Infof("Starting Agent: %v", version.String())

	log.Info("Loading configuration")
	cfg, err := config.NewConfig()
	// Load cfg before doing 'versionFlag' so that it has the DOCKER_HOST
	// variable loaded if needed
	if *versionFlag {
		versionableEngine := engine.NewTaskEngine(cfg)
		version.PrintVersion(versionableEngine)
		return exitcodes.ExitSuccess
	}

	if err != nil {
		log.Criticalf("Error loading config: %v", err)
		// All required config values can be inferred from EC2 Metadata, so this error could be transient.
		return exitcodes.ExitError
	}
	log.Debug("Loaded config: " + cfg.String())

	var currentEc2InstanceID, containerInstanceArn string
	var taskEngine engine.TaskEngine

	if cfg.Checkpoint {
		log.Info("Checkpointing is enabled. Attempting to load state")
		var previousCluster, previousEc2InstanceID, previousContainerInstanceArn string
		previousTaskEngine := engine.NewTaskEngine(cfg)
		// previousState is used to verify that our current runtime configuration is
		// compatible with our past configuration as reflected by our state-file
		previousState, err := initializeStateManager(cfg, previousTaskEngine, &previousCluster, &previousContainerInstanceArn, &previousEc2InstanceID, acshandler.SequenceNumber)
		if err != nil {
			log.Criticalf("Error creating state manager: %v", err)
			return exitcodes.ExitTerminal
		}

		err = previousState.Load()
		if err != nil {
			log.Criticalf("Error loading previously saved state: %v", err)
			return exitcodes.ExitTerminal
		}

		if previousCluster != "" {
			// TODO Handle default cluster in a sane and unified way across the codebase
			configuredCluster := cfg.Cluster
			if configuredCluster == "" {
				log.Debug("Setting cluster to default; none configured")
				configuredCluster = config.DEFAULT_CLUSTER_NAME
			}
			if previousCluster != configuredCluster {
				log.Criticalf("Data mismatch; saved cluster '%v' does not match configured cluster '%v'. Perhaps you want to delete the configured checkpoint file?", previousCluster, configuredCluster)
				return exitcodes.ExitTerminal
			}
			cfg.Cluster = previousCluster
			log.Infof("Restored cluster '%v'", cfg.Cluster)
		}

		if instanceIdentityDoc, err := ec2.GetInstanceIdentityDocument(); err == nil {
			currentEc2InstanceID = instanceIdentityDoc.InstanceId
		} else {
			log.Criticalf("Unable to access EC2 Metadata service to determine EC2 ID: %v", err)
		}

		if previousEc2InstanceID != "" && previousEc2InstanceID != currentEc2InstanceID {
			log.Warnf("Data mismatch; saved InstanceID '%v' does not match current InstanceID '%v'. Overwriting old datafile", previousEc2InstanceID, currentEc2InstanceID)

			// Reset taskEngine; all the other values are still default
			taskEngine = engine.NewTaskEngine(cfg)
		} else {
			// Use the values we loaded if there's no issue
			containerInstanceArn = previousContainerInstanceArn
			taskEngine = previousTaskEngine
		}
	} else {
		log.Info("Checkpointing not enabled; a new container instance will be created each time the agent is run")
		taskEngine = engine.NewTaskEngine(cfg)
	}

	stateManager, err := initializeStateManager(cfg, taskEngine, &cfg.Cluster, &containerInstanceArn, &currentEc2InstanceID, acshandler.SequenceNumber)
	if err != nil {
		log.Criticalf("Error creating state manager: %v", err)
		return exitcodes.ExitTerminal
	}

	credentialProvider := auth.NewBasicAWSCredentialProvider()
	awsCreds := auth.ToSDK(credentialProvider)
	// Preflight request to make sure they're good
	if preflightCreds, err := awsCreds.Credentials(); err != nil || preflightCreds.AccessKeyID == "" {
		if preflightCreds != nil {
			log.Warnf("Error getting valid credentials (AKID %v): %v", preflightCreds.AccessKeyID, err)
		} else {
			log.Warnf("Error getting preflight credentials: %v", err)
		}
	}
	client := api.NewECSClient(awsCreds, cfg, *acceptInsecureCert)

	if containerInstanceArn == "" {
		log.Info("Registering Instance with ECS")
		containerInstanceArn, err = client.RegisterContainerInstance()
		if err != nil {
			log.Errorf("Error registering: %v", err)
			if retriable, ok := err.(utils.Retriable); ok && !retriable.Retry() {
				return exitcodes.ExitTerminal
			}
			return exitcodes.ExitError
		}
		log.Infof("Registration completed successfully. I am running as '%v' in cluster '%v'", containerInstanceArn, cfg.Cluster)
		// Save our shiny new containerInstanceArn
		stateManager.Save()
	} else {
		log.Infof("Restored from checkpoint file. I am running as '%v' in cluster '%v'", containerInstanceArn, cfg.Cluster)
	}

	// Begin listening to the docker daemon and saving changes
	taskEngine.SetSaver(stateManager)
	taskEngine.MustInit()

	go sighandlers.StartTerminationHandler(stateManager, taskEngine)

	// Agent introspection api
	go handlers.ServeHttp(&containerInstanceArn, taskEngine, cfg)

	// Start sending events to the backend
	go eventhandler.HandleEngineEvents(taskEngine, client, stateManager)

	log.Info("Beginning Polling for updates")
	err = acshandler.StartSession(containerInstanceArn, credentialProvider, cfg, taskEngine, client, stateManager, *acceptInsecureCert)
	if err != nil {
		log.Criticalf("Unretriable error starting communicating with ACS: %v", err)
		return exitcodes.ExitTerminal
	}
	log.Critical("ACS Session handler should never exit")
	return exitcodes.ExitError
}
func TestStatsEngineWithDockerTaskEngine(t *testing.T) {
	// This should be a functional test. Upgrading to docker 1.6 breaks our ability to
	// read state.json file for containers.
	t.Skip("Skipping integ test as this is really a functional test")
	taskEngine := engine.NewTaskEngine(&config.Config{})
	container, err := createGremlin(client)
	if err != nil {
		t.Fatal("Error creating container", err)
	}
	defer client.RemoveContainer(docker.RemoveContainerOptions{
		ID:    container.ID,
		Force: true,
	})
	unmappedContainer, err := createGremlin(client)
	if err != nil {
		t.Fatal("Error creating container", err)
	}
	defer client.RemoveContainer(docker.RemoveContainerOptions{
		ID:    unmappedContainer.ID,
		Force: true,
	})
	containers := []*api.Container{
		&api.Container{
			Name: "gremlin",
		},
	}
	testTask := api.Task{
		Arn:           "gremlin-task",
		DesiredStatus: api.TaskRunning,
		KnownStatus:   api.TaskRunning,
		Family:        "test",
		Version:       "1",
		Containers:    containers,
	}
	// Populate Tasks and Container map in the engine.
	dockerTaskEngine, _ := taskEngine.(*engine.DockerTaskEngine)
	dockerTaskEngine.State().AddTask(&testTask)
	dockerTaskEngine.State().AddContainer(
		&api.DockerContainer{
			DockerId:   container.ID,
			DockerName: "gremlin",
			Container:  containers[0],
		},
		&testTask)
	statsEngine := NewDockerStatsEngine(&cfg)
	statsEngine.client, err = engine.NewDockerGoClient(nil, "", config.NewSensitiveRawMessage([]byte("")))
	if err != nil {
		t.Fatal("Error initializing docker client: ", err)
	}
	err = statsEngine.MustInit(taskEngine, defaultCluster, defaultContainerInstance)
	if err != nil {
		t.Error("Error initializing stats engine: ", err)
	}

	err = client.StartContainer(container.ID, nil)
	defer client.StopContainer(container.ID, defaultDockerTimeoutSeconds)
	if err != nil {
		t.Error("Error starting container: ", container.ID, " error: ", err)
	}

	err = client.StartContainer(unmappedContainer.ID, nil)
	defer client.StopContainer(unmappedContainer.ID, defaultDockerTimeoutSeconds)
	if err != nil {
		t.Error("Error starting container: ", unmappedContainer.ID, " error: ", err)
	}

	// Wait for the stats collection go routine to start.
	time.Sleep(checkPointSleep)

	metadata, taskMetrics, err := statsEngine.GetInstanceMetrics()
	if err != nil {
		t.Error("Error gettting instance metrics: ", err)
	}

	if len(taskMetrics) != 1 {
		t.Error("Incorrect number of tasks. Expected: 1, got: ", len(taskMetrics))
	}
	err = validateMetricsMetadata(metadata)
	if err != nil {
		t.Error("Error validating metadata: ", err)
	}

	err = validateContainerMetrics(taskMetrics[0].ContainerMetrics, 1)
	if err != nil {
		t.Error("Error validating container metrics: ", err)
	}

	err = client.StopContainer(container.ID, defaultDockerTimeoutSeconds)
	if err != nil {
		t.Error("Error stopping container: ", container.ID, " error: ", err)
	}

	time.Sleep(waitForCleanupSleep)

	// Should not contain any metrics after cleanup.
	err = validateIdleContainerMetrics(statsEngine)
	if err != nil {
		t.Fatal("Error validating metadata: ", err)
	}
}
func TestNewerUpdateMessages(t *testing.T) {
	u, ctrl, cfg, mockfs, mockacs, mockhttp := mocks(t, nil)
	defer ctrl.Finish()

	var writtenFile bytes.Buffer
	gomock.InOrder(
		mockhttp.EXPECT().RoundTrip(mock_http.NewHTTPSimpleMatcher("GET", "https://s3.amazonaws.com/amazon-ecs-agent/update.tar")).Return(mock_http.SuccessResponse("update-tar-data"), nil),
		mockfs.EXPECT().Create(gomock.Any()).Return(mock_os.NopReadWriteCloser(&writtenFile), nil),
		mockfs.EXPECT().WriteFile("/tmp/test/desired-image", gomock.Any(), gomock.Any()).Return(nil),
		mockacs.EXPECT().MakeRequest(gomock.Eq(&ecsacs.AckRequest{
			Cluster:           ptr("cluster").(*string),
			ContainerInstance: ptr("containerInstance").(*string),
			MessageId:         ptr("StageMID").(*string),
		})),
		mockacs.EXPECT().MakeRequest(&nackRequestMatcher{&ecsacs.NackRequest{
			Cluster:           ptr("cluster").(*string),
			ContainerInstance: ptr("containerInstance").(*string),
			MessageId:         ptr("StageMID").(*string),
			Reason:            ptr("New update arrived: StageMIDNew").(*string),
		}}),
		mockhttp.EXPECT().RoundTrip(mock_http.NewHTTPSimpleMatcher("GET", "https://s3.amazonaws.com/amazon-ecs-agent/new.tar")).Return(mock_http.SuccessResponse("newer-update-tar-data"), nil),
		mockfs.EXPECT().Create(gomock.Any()).Return(mock_os.NopReadWriteCloser(&writtenFile), nil),
		mockfs.EXPECT().WriteFile("/tmp/test/desired-image", gomock.Any(), gomock.Any()).Return(nil),
		mockacs.EXPECT().MakeRequest(gomock.Eq(&ecsacs.AckRequest{
			Cluster:           ptr("cluster").(*string),
			ContainerInstance: ptr("containerInstance").(*string),
			MessageId:         ptr("StageMIDNew").(*string),
		})),
		mockacs.EXPECT().MakeRequest(gomock.Eq(&ecsacs.AckRequest{
			Cluster:           ptr("cluster").(*string),
			ContainerInstance: ptr("containerInstance").(*string),
			MessageId:         ptr("mid2").(*string),
		})),
		mockfs.EXPECT().Exit(exitcodes.ExitUpdate),
	)

	u.stageUpdateHandler()(&ecsacs.StageUpdateMessage{
		ClusterArn:           ptr("cluster").(*string),
		ContainerInstanceArn: ptr("containerInstance").(*string),
		MessageId:            ptr("StageMID").(*string),
		UpdateInfo: &ecsacs.UpdateInfo{
			Location:  ptr("https://s3.amazonaws.com/amazon-ecs-agent/update.tar").(*string),
			Signature: ptr("6caeef375a080e3241781725b357890758d94b15d7ce63f6b2ff1cb5589f2007").(*string),
		},
	})

	if writtenFile.String() != "update-tar-data" {
		t.Error("Incorrect data written")
	}
	writtenFile.Reset()

	// Never perform, make sure a new hash results in a new stage
	u.stageUpdateHandler()(&ecsacs.StageUpdateMessage{
		ClusterArn:           ptr("cluster").(*string),
		ContainerInstanceArn: ptr("containerInstance").(*string),
		MessageId:            ptr("StageMIDNew").(*string),
		UpdateInfo: &ecsacs.UpdateInfo{
			Location:  ptr("https://s3.amazonaws.com/amazon-ecs-agent/new.tar").(*string),
			Signature: ptr("9c6ea7bd7d49f95b6d516517e453b965897109bf8a1d6ff3a6e57287049eb2de").(*string),
		},
	})

	if writtenFile.String() != "newer-update-tar-data" {
		t.Error("Incorrect data written")
	}

	u.performUpdateHandler(statemanager.NewNoopStateManager(), engine.NewTaskEngine(cfg))(&ecsacs.PerformUpdateMessage{
		ClusterArn:           ptr("cluster").(*string),
		ContainerInstanceArn: ptr("containerInstance").(*string),
		MessageId:            ptr("mid2").(*string),
		UpdateInfo: &ecsacs.UpdateInfo{
			Location:  ptr("https://s3.amazonaws.com/amazon-ecs-agent/update.tar").(*string),
			Signature: ptr("c54518806ff4d14b680c35784113e1e7478491fe").(*string),
		},
	})
}
func _main() int {
	defer log.Flush()
	flagset := flag.NewFlagSet("Amazon ECS Agent", flag.ContinueOnError)
	versionFlag := flagset.Bool("version", false, "Print the agent version information and exit")
	logLevel := flagset.String("loglevel", "", "Loglevel: [<crit>|<error>|<warn>|<info>|<debug>]")
	acceptInsecureCert := flagset.Bool("k", false, "Disable SSL certificate verification. We do not recommend setting this option.")
	licenseFlag := flagset.Bool("license", false, "Print the LICENSE and NOTICE files and exit")
	blackholeEc2Metadata := flagset.Bool("blackhole-ec2-metadata", false, "Blackhole the EC2 Metadata requests. Setting this option can cause the ECS Agent to fail to work properly.  We do not recommend setting this option")
	err := flagset.Parse(os.Args[1:])
	if err != nil {
		return exitcodes.ExitTerminal
	}

	if *licenseFlag {
		license := utils.NewLicenseProvider()
		text, err := license.GetText()
		if err != nil {
			fmt.Fprintln(os.Stderr, err)
			return exitcodes.ExitError
		}
		fmt.Println(text)
		return exitcodes.ExitSuccess
	}

	logger.SetLevel(*logLevel)
	ec2MetadataClient := ec2.DefaultClient
	if *blackholeEc2Metadata {
		ec2MetadataClient = ec2.NewBlackholeEC2MetadataClient()
	}

	log.Infof("Starting Agent: %s", version.String())
	if *acceptInsecureCert {
		log.Warn("SSL certificate verification disabled. This is not recommended.")
	}
	log.Info("Loading configuration")
	cfg, cfgErr := config.NewConfig(ec2MetadataClient)
	// Load cfg and create Docker client before doing 'versionFlag' so that it has the DOCKER_HOST variable loaded if needed
	clientFactory := dockerclient.NewFactory(cfg.DockerEndpoint)
	dockerClient, err := engine.NewDockerGoClient(clientFactory, *acceptInsecureCert, cfg)
	if err != nil {
		log.Criticalf("Error creating Docker client: %v", err)
		return exitcodes.ExitError
	}

	ctx := context.Background()
	// Create the DockerContainerChange event stream for tcs
	containerChangeEventStream := eventstream.NewEventStream(ContainerChangeEventStream, ctx)
	containerChangeEventStream.StartListening()

	// Create credentials manager. This will be used by the task engine and
	// the credentials handler
	credentialsManager := credentials.NewManager()
	// Create image manager. This will be used by the task engine for saving image states
	state := dockerstate.NewDockerTaskEngineState()
	imageManager := engine.NewImageManager(cfg, dockerClient, state)
	if *versionFlag {
		versionableEngine := engine.NewTaskEngine(cfg, dockerClient, credentialsManager, containerChangeEventStream, imageManager, state)
		version.PrintVersion(versionableEngine)
		return exitcodes.ExitSuccess
	}

	sighandlers.StartDebugHandler()

	if cfgErr != nil {
		log.Criticalf("Error loading config: %v", err)
		// All required config values can be inferred from EC2 Metadata, so this error could be transient.
		return exitcodes.ExitError
	}
	log.Debug("Loaded config: " + cfg.String())

	var currentEc2InstanceID, containerInstanceArn string
	var taskEngine engine.TaskEngine

	if cfg.Checkpoint {
		log.Info("Checkpointing is enabled. Attempting to load state")
		var previousCluster, previousEc2InstanceID, previousContainerInstanceArn string
		previousTaskEngine := engine.NewTaskEngine(cfg, dockerClient, credentialsManager, containerChangeEventStream, imageManager, state)
		// previousState is used to verify that our current runtime configuration is
		// compatible with our past configuration as reflected by our state-file
		previousState, err := initializeStateManager(cfg, previousTaskEngine, &previousCluster, &previousContainerInstanceArn, &previousEc2InstanceID)
		if err != nil {
			log.Criticalf("Error creating state manager: %v", err)
			return exitcodes.ExitTerminal
		}

		err = previousState.Load()
		if err != nil {
			log.Criticalf("Error loading previously saved state: %v", err)
			return exitcodes.ExitTerminal
		}

		if previousCluster != "" {
			// TODO Handle default cluster in a sane and unified way across the codebase
			configuredCluster := cfg.Cluster
			if configuredCluster == "" {
				log.Debug("Setting cluster to default; none configured")
				configuredCluster = config.DefaultClusterName
			}
			if previousCluster != configuredCluster {
				log.Criticalf("Data mismatch; saved cluster '%v' does not match configured cluster '%v'. Perhaps you want to delete the configured checkpoint file?", previousCluster, configuredCluster)
				return exitcodes.ExitTerminal
			}
			cfg.Cluster = previousCluster
			log.Infof("Restored cluster '%v'", cfg.Cluster)
		}

		if instanceIdentityDoc, err := ec2MetadataClient.InstanceIdentityDocument(); err == nil {
			currentEc2InstanceID = instanceIdentityDoc.InstanceId
		} else {
			log.Criticalf("Unable to access EC2 Metadata service to determine EC2 ID: %v", err)
		}

		if previousEc2InstanceID != "" && previousEc2InstanceID != currentEc2InstanceID {
			log.Warnf("Data mismatch; saved InstanceID '%s' does not match current InstanceID '%s'. Overwriting old datafile", previousEc2InstanceID, currentEc2InstanceID)

			// Reset taskEngine; all the other values are still default
			taskEngine = engine.NewTaskEngine(cfg, dockerClient, credentialsManager, containerChangeEventStream, imageManager, state)
		} else {
			// Use the values we loaded if there's no issue
			containerInstanceArn = previousContainerInstanceArn
			taskEngine = previousTaskEngine
		}
	} else {
		log.Info("Checkpointing not enabled; a new container instance will be created each time the agent is run")
		taskEngine = engine.NewTaskEngine(cfg, dockerClient, credentialsManager, containerChangeEventStream, imageManager, state)
	}

	stateManager, err := initializeStateManager(cfg, taskEngine, &cfg.Cluster, &containerInstanceArn, &currentEc2InstanceID)
	if err != nil {
		log.Criticalf("Error creating state manager: %v", err)
		return exitcodes.ExitTerminal
	}

	capabilities := taskEngine.Capabilities()

	// We instantiate our own credentialProvider for use in acs/tcs. This tries
	// to mimic roughly the way it's instantiated by the SDK for a default
	// session.
	credentialProvider := defaults.CredChain(defaults.Config(), defaults.Handlers())
	// Preflight request to make sure they're good
	if preflightCreds, err := credentialProvider.Get(); err != nil || preflightCreds.AccessKeyID == "" {
		log.Warnf("Error getting valid credentials (AKID %s): %v", preflightCreds.AccessKeyID, err)
	}
	client := api.NewECSClient(credentialProvider, cfg, httpclient.New(api.RoundtripTimeout, *acceptInsecureCert), ec2MetadataClient)

	if containerInstanceArn == "" {
		log.Info("Registering Instance with ECS")
		containerInstanceArn, err = client.RegisterContainerInstance("", capabilities)
		if err != nil {
			log.Errorf("Error registering: %v", err)
			if retriable, ok := err.(utils.Retriable); ok && !retriable.Retry() {
				return exitcodes.ExitTerminal
			}
			return exitcodes.ExitError
		}
		log.Infof("Registration completed successfully. I am running as '%s' in cluster '%s'", containerInstanceArn, cfg.Cluster)
		// Save our shiny new containerInstanceArn
		stateManager.Save()
	} else {
		log.Infof("Restored from checkpoint file. I am running as '%s' in cluster '%s'", containerInstanceArn, cfg.Cluster)
		_, err = client.RegisterContainerInstance(containerInstanceArn, capabilities)
		if err != nil {
			log.Errorf("Error re-registering: %v", err)
			if awserr, ok := err.(awserr.Error); ok && api.IsInstanceTypeChangedError(awserr) {
				log.Criticalf("The current instance type does not match the registered instance type. Please revert the instance type change, or alternatively launch a new instance. Error: %v", err)
				return exitcodes.ExitTerminal
			}
			return exitcodes.ExitError
		}
	}

	// Begin listening to the docker daemon and saving changes
	taskEngine.SetSaver(stateManager)
	imageManager.SetSaver(stateManager)
	taskEngine.MustInit()

	// start of the periodic image cleanup process
	if !cfg.ImageCleanupDisabled {
		go imageManager.StartImageCleanupProcess(ctx)
	}

	go sighandlers.StartTerminationHandler(stateManager, taskEngine)

	// Agent introspection api
	go handlers.ServeHttp(&containerInstanceArn, taskEngine, cfg)

	// Start serving the endpoint to fetch IAM Role credentials
	go credentialshandler.ServeHttp(credentialsManager, containerInstanceArn, cfg)

	// Start sending events to the backend
	go eventhandler.HandleEngineEvents(taskEngine, client, stateManager)

	deregisterInstanceEventStream := eventstream.NewEventStream(DeregisterContainerInstanceEventStream, ctx)
	deregisterInstanceEventStream.StartListening()

	telemetrySessionParams := tcshandler.TelemetrySessionParams{
		ContainerInstanceArn: containerInstanceArn,
		CredentialProvider:   credentialProvider,
		Cfg:                  cfg,
		DeregisterInstanceEventStream: deregisterInstanceEventStream,
		ContainerChangeEventStream:    containerChangeEventStream,
		DockerClient:                  dockerClient,
		AcceptInvalidCert:             *acceptInsecureCert,
		EcsClient:                     client,
		TaskEngine:                    taskEngine,
	}

	// Start metrics session in a go routine
	go tcshandler.StartMetricsSession(telemetrySessionParams)

	log.Info("Beginning Polling for updates")
	err = acshandler.StartSession(ctx, acshandler.StartSessionArguments{
		AcceptInvalidCert: *acceptInsecureCert,
		Config:            cfg,
		DeregisterInstanceEventStream: deregisterInstanceEventStream,
		ContainerInstanceArn:          containerInstanceArn,
		CredentialProvider:            credentialProvider,
		ECSClient:                     client,
		StateManager:                  stateManager,
		TaskEngine:                    taskEngine,
		CredentialsManager:            credentialsManager,
	})
	if err != nil {
		log.Criticalf("Unretriable error starting communicating with ACS: %v", err)
		return exitcodes.ExitTerminal
	}
	log.Critical("ACS Session handler should never exit")
	return exitcodes.ExitError
}
func TestStateManager(t *testing.T) {
	tmpDir, err := ioutil.TempDir("/tmp", "ecs_statemanager_test")
	if err != nil {
		t.Fatal(err)
	}
	if err != nil {
		t.Fatal(err)
	}
	defer os.RemoveAll(tmpDir)
	cfg := &config.Config{DataDir: tmpDir}
	manager, err := statemanager.NewStateManager(cfg)
	if err != nil {
		t.Fatal("Error loading manager", err)
	}

	err = manager.Load()
	if err != nil {
		t.Error("Expected loading a non-existant file to not be an error")
	}

	// Now let's make some state to save
	containerInstanceArn := ""
	taskEngine := engine.NewTaskEngine(&config.Config{}, nil, nil, nil)

	manager, err = statemanager.NewStateManager(cfg, statemanager.AddSaveable("TaskEngine", taskEngine), statemanager.AddSaveable("ContainerInstanceArn", &containerInstanceArn))
	if err != nil {
		t.Fatal(err)
	}

	containerInstanceArn = "containerInstanceArn"

	testTask := &api.Task{Arn: "test-arn"}
	taskEngine.(*engine.DockerTaskEngine).State().AddTask(testTask)

	err = manager.Save()
	if err != nil {
		t.Fatal("Error saving state", err)
	}

	// Now make sure we can load that state sanely
	loadedTaskEngine := engine.NewTaskEngine(&config.Config{}, nil, nil, nil)
	var loadedContainerInstanceArn string

	manager, err = statemanager.NewStateManager(cfg, statemanager.AddSaveable("TaskEngine", &loadedTaskEngine), statemanager.AddSaveable("ContainerInstanceArn", &loadedContainerInstanceArn))
	if err != nil {
		t.Fatal(err)
	}

	err = manager.Load()
	if err != nil {
		t.Fatal("Error loading state", err)
	}

	if loadedContainerInstanceArn != containerInstanceArn {
		t.Error("Did not load containerInstanceArn correctly; got ", loadedContainerInstanceArn, " instead of ", containerInstanceArn)
	}

	if !engine_testutils.DockerTaskEnginesEqual(loadedTaskEngine.(*engine.DockerTaskEngine), (taskEngine.(*engine.DockerTaskEngine))) {
		t.Error("Did not load taskEngine correctly")
	}

	// I'd rather double check .Equal there; let's make sure ListTasks agrees.
	tasks, err := loadedTaskEngine.ListTasks()
	if err != nil {
		t.Error("Error listing tasks", err)
	}
	if len(tasks) != 1 {
		t.Error("Should have a task!")
	} else {
		if tasks[0].Arn != "test-arn" {
			t.Error("Wrong arn, expected test-arn but got ", tasks[0].Arn)
		}
	}
}
func TestStatsEngineWithDockerTaskEngineMissingRemoveEvent(t *testing.T) {
	if testing.Short() {
		t.Skip("Skipping integ test in short mode")
	}

	containerChangeEventStream := eventStream("TestStatsEngineWithDockerTaskEngine")
	taskEngine := ecsengine.NewTaskEngine(&config.Config{}, nil, nil, containerChangeEventStream, nil, dockerstate.NewDockerTaskEngineState())

	container, err := createGremlin(client)
	if err != nil {
		t.Fatalf("Error creating container: %v", err)
	}
	defer client.RemoveContainer(docker.RemoveContainerOptions{
		ID:    container.ID,
		Force: true,
	})
	containers := []*api.Container{
		&api.Container{
			Name:        "gremlin",
			KnownStatus: api.ContainerStopped,
		},
	}
	testTask := api.Task{
		Arn:           "gremlin-task",
		DesiredStatus: api.TaskRunning,
		KnownStatus:   api.TaskRunning,
		Family:        "test",
		Version:       "1",
		Containers:    containers,
	}
	// Populate Tasks and Container map in the engine.
	dockerTaskEngine, _ := taskEngine.(*ecsengine.DockerTaskEngine)
	dockerTaskEngine.State().AddTask(&testTask)
	dockerTaskEngine.State().AddContainer(
		&api.DockerContainer{
			DockerId:   container.ID,
			DockerName: "gremlin",
			Container:  containers[0],
		},
		&testTask)

	// Create a new docker stats engine
	// TODO make dockerStatsEngine not a singleton object
	dockerStatsEngine = nil
	statsEngine := NewDockerStatsEngine(&cfg, dockerClient, containerChangeEventStream)
	err = statsEngine.MustInit(taskEngine, defaultCluster, defaultContainerInstance)
	if err != nil {
		t.Errorf("Error initializing stats engine: %v", err)
	}
	defer statsEngine.removeAll()
	defer statsEngine.containerChangeEventStream.Unsubscribe(containerChangeHandler)

	err = client.StartContainer(container.ID, nil)
	defer client.StopContainer(container.ID, defaultDockerTimeoutSeconds)
	if err != nil {
		t.Errorf("Error starting container: %s, err: %v", container.ID, err)
	}

	err = containerChangeEventStream.WriteToEventStream(ecsengine.DockerContainerChangeEvent{
		Status: api.ContainerRunning,
		DockerContainerMetadata: ecsengine.DockerContainerMetadata{
			DockerId: container.ID,
		},
	})
	if err != nil {
		t.Errorf("Failed to write to container change event stream err: %v", err)
	}

	// Wait for the stats collection go routine to start.
	time.Sleep(checkPointSleep)
	err = client.StopContainer(container.ID, defaultDockerTimeoutSeconds)
	if err != nil {
		t.Fatalf("Error stopping container: %s, err: %v", container.ID, err)
	}
	err = client.RemoveContainer(docker.RemoveContainerOptions{
		ID:    container.ID,
		Force: true,
	})
	if err != nil {
		t.Fatalf("Error removing container: %s, err: %v", container.ID, err)
	}

	time.Sleep(checkPointSleep)

	// Simulate tcs client invoking GetInstanceMetrics.
	_, _, err = statsEngine.GetInstanceMetrics()
	if err == nil {
		t.Fatalf("Expected error 'no task metrics tp report' when getting instance metrics")
	}

	// Should not contain any metrics after cleanup.
	err = validateIdleContainerMetrics(statsEngine)
	if err != nil {
		t.Fatalf("Error validating idle metrics: %v", err)
	}
}
func TestStatsEngineWithDockerTaskEngine(t *testing.T) {
	if testing.Short() {
		t.Skip("Skipping integ test in short mode")
	}
	taskEngine := engine.NewTaskEngine(&config.Config{}, nil, nil)
	container, err := createGremlin(client)
	if err != nil {
		t.Fatal("Error creating container", err)
	}
	defer client.RemoveContainer(docker.RemoveContainerOptions{
		ID:    container.ID,
		Force: true,
	})
	unmappedContainer, err := createGremlin(client)
	if err != nil {
		t.Fatal("Error creating container", err)
	}
	defer client.RemoveContainer(docker.RemoveContainerOptions{
		ID:    unmappedContainer.ID,
		Force: true,
	})
	containers := []*api.Container{
		&api.Container{
			Name: "gremlin",
		},
	}
	testTask := api.Task{
		Arn:           "gremlin-task",
		DesiredStatus: api.TaskRunning,
		KnownStatus:   api.TaskRunning,
		Family:        "test",
		Version:       "1",
		Containers:    containers,
	}
	// Populate Tasks and Container map in the engine.
	dockerTaskEngine, _ := taskEngine.(*engine.DockerTaskEngine)
	dockerTaskEngine.State().AddTask(&testTask)
	dockerTaskEngine.State().AddContainer(
		&api.DockerContainer{
			DockerId:   container.ID,
			DockerName: "gremlin",
			Container:  containers[0],
		},
		&testTask)
	statsEngine := NewDockerStatsEngine(&cfg, dockerClient)
	err = statsEngine.MustInit(taskEngine, defaultCluster, defaultContainerInstance)
	if err != nil {
		t.Error("Error initializing stats engine: ", err)
	}
	defer statsEngine.unsubscribeContainerEvents()

	err = client.StartContainer(container.ID, nil)
	defer client.StopContainer(container.ID, defaultDockerTimeoutSeconds)
	if err != nil {
		t.Error("Error starting container: ", container.ID, " error: ", err)
	}

	err = client.StartContainer(unmappedContainer.ID, nil)
	defer client.StopContainer(unmappedContainer.ID, defaultDockerTimeoutSeconds)
	if err != nil {
		t.Error("Error starting container: ", unmappedContainer.ID, " error: ", err)
	}

	// Wait for the stats collection go routine to start.
	time.Sleep(checkPointSleep)

	metadata, taskMetrics, err := statsEngine.GetInstanceMetrics()
	if err != nil {
		t.Error("Error gettting instance metrics: ", err)
	}

	if len(taskMetrics) != 1 {
		t.Error("Incorrect number of tasks. Expected: 1, got: ", len(taskMetrics))
	}
	err = validateMetricsMetadata(metadata)
	if err != nil {
		t.Error("Error validating metadata: ", err)
	}

	err = validateContainerMetrics(taskMetrics[0].ContainerMetrics, 1)
	if err != nil {
		t.Error("Error validating container metrics: ", err)
	}

	err = client.StopContainer(container.ID, defaultDockerTimeoutSeconds)
	if err != nil {
		t.Error("Error stopping container: ", container.ID, " error: ", err)
	}

	time.Sleep(waitForCleanupSleep)

	// Should not contain any metrics after cleanup.
	err = validateIdleContainerMetrics(statsEngine)
	if err != nil {
		t.Fatal("Error validating idle metrics: ", err)
	}
}
func TestServeHttp(t *testing.T) {
	taskEngine := engine.NewTaskEngine(&config.Config{})
	containers := []*api.Container{
		&api.Container{
			Name: "c1",
		},
	}
	testTask := api.Task{
		Arn:           "task1",
		DesiredStatus: api.TaskRunning,
		KnownStatus:   api.TaskRunning,
		Family:        "test",
		Version:       "1",
		Containers:    containers,
	}
	// Populate Tasks and Container map in the engine.
	dockerTaskEngine, _ := taskEngine.(*engine.DockerTaskEngine)
	dockerTaskEngine.State().AddTask(&testTask)
	dockerTaskEngine.State().AddContainer(&api.DockerContainer{DockerId: "docker1", DockerName: "someName", Container: containers[0]}, &testTask)
	go ServeHttp(utils.Strptr(TestContainerInstanceArn), taskEngine, &config.Config{Cluster: TestClusterArn})

	body := getResponseBodyFromLocalHost("/v1/metadata", t)
	var metadata MetadataResponse
	json.Unmarshal(body, &metadata)

	if metadata.Cluster != TestClusterArn {
		t.Error("Metadata returned the wrong cluster arn")
	}
	if *metadata.ContainerInstanceArn != TestContainerInstanceArn {
		t.Error("Metadata returned the wrong cluster arn")
	}
	var tasksResponse TasksResponse
	body = getResponseBodyFromLocalHost("/v1/tasks", t)
	json.Unmarshal(body, &tasksResponse)
	tasks := tasksResponse.Tasks

	if len(tasks) != 1 {
		t.Error("Incorrect number of tasks in response: ", len(tasks))
	}
	if tasks[0].Arn != "task1" {
		t.Error("Incorrect task arn in response: ", tasks[0].Arn)
	}
	containersResponse := tasks[0].Containers
	if len(containersResponse) != 1 {
		t.Error("Incorrect number of containers in response: ", len(containersResponse))
	}
	if containersResponse[0].Name != "c1" {
		t.Error("Incorrect container name in response: ", containersResponse[0].Name)
	}
	var taskResponse TaskResponse
	body = getResponseBodyFromLocalHost("/v1/tasks?dockerid=docker1", t)
	json.Unmarshal(body, &taskResponse)
	if taskResponse.Arn != "task1" {
		t.Error("Incorrect task arn in response")
	}
	if taskResponse.Containers[0].Name != "c1" {
		t.Error("Incorrect task arn in response")
	}

	resp, err := http.Get("http://localhost:" + strconv.Itoa(config.AGENT_INTROSPECTION_PORT) + "/v1/tasks?dockerid=docker2")
	if err != nil {
		t.Fatal(err)
	}
	if resp.StatusCode != 400 {
		t.Error("API did not return bad request status for invalid docker id")
	}

	body = getResponseBodyFromLocalHost("/v1/tasks?taskarn=task1", t)
	json.Unmarshal(body, &taskResponse)
	if taskResponse.Arn != "task1" {
		t.Error("Incorrect task arn in response")
	}

	resp, err = http.Get("http://localhost:" + strconv.Itoa(config.AGENT_INTROSPECTION_PORT) + "/v1/tasks?taskarn=task2")
	if resp.StatusCode != 400 {
		t.Error("API did not return bad request status for invalid task id")
	}

	resp, err = http.Get("http://localhost:" + strconv.Itoa(config.AGENT_INTROSPECTION_PORT) + "/v1/tasks?taskarn=")
	if resp.StatusCode != 400 {
		t.Error("API did not return bad request status for invalid task id")
	}

	resp, err = http.Get("http://localhost:" + strconv.Itoa(config.AGENT_INTROSPECTION_PORT) + "/v1/tasks?taskarn=task1&dockerid=docker1")
	if err != nil {
		t.Fatal(err)
	}
	if resp.StatusCode != 400 {
		t.Error("API did not return bad request status when both dockerid and taskarn are specified.")
	}
}