func (c *ec2MetadataClientImpl) ReadResource(path string) ([]byte, error) {
	endpoint := c.ResourceServiceUrl(path)

	var err error
	var resp *http.Response
	utils.RetryNWithBackoff(utils.NewSimpleBackoff(metadataRetryStartDelay, metadataRetryMaxDelay, metadataRetryDelayMultiple, 0.2), metadataRetries, func() error {
		resp, err = c.client.Get(endpoint)
		if err == nil && resp.StatusCode == 200 {
			return nil
		}
		if resp != nil && resp.Body != nil {
			resp.Body.Close()
		}
		if err == nil {
			seelog.Warnf("Error accessing the EC2 Metadata Service; non-200 response: %v", resp.StatusCode)
			return fmt.Errorf("Error contacting EC2 Metadata service; non-200 response: %v", resp.StatusCode)
		} else {
			seelog.Warnf("Error accessing the EC2 Metadata Service; retrying: %v", err)
			return err
		}
	})
	if resp != nil && resp.Body != nil {
		defer resp.Body.Close()
	}
	if err != nil {
		return nil, err
	}

	return ioutil.ReadAll(resp.Body)
}
Example #2
0
// ServeHttp serves IAM Role Credentials for Tasks being managed by the agent.
func ServeHttp(credentialsManager credentials.Manager, containerInstanceArn string, cfg *config.Config) {
	// Create and initialize the audit log
	// TODO Use seelog's programmatic configuration instead of xml.
	logger, err := log.LoggerFromConfigAsString(audit.AuditLoggerConfig(cfg))
	if err != nil {
		log.Errorf("Error initializing the audit log: %v", err)
		// If the logger cannot be initialized, use the provided dummy seelog.LoggerInterface, seelog.Disabled.
		logger = log.Disabled
	}

	auditLogger := audit.NewAuditLog(containerInstanceArn, cfg, logger)

	server := setupServer(credentialsManager, auditLogger)

	for {
		utils.RetryWithBackoff(utils.NewSimpleBackoff(time.Second, time.Minute, 0.2, 2), func() error {
			// TODO, make this cancellable and use the passed in context;
			err := server.ListenAndServe()
			if err != nil {
				log.Errorf("Error running http api: %v", err)
			}
			return err
		})
	}
}
// TestHandlerReconnectsCorrectlySetsSendCredentialsURLParameter tests if
// the 'sendCredentials' URL parameter is set correctly for successive
// invocations of startACSSession
func TestHandlerReconnectsCorrectlySetsSendCredentialsURLParameter(t *testing.T) {
	ctrl := gomock.NewController(t)
	defer ctrl.Finish()
	taskEngine := engine.NewMockTaskEngine(ctrl)
	ecsClient := mock_api.NewMockECSClient(ctrl)
	statemanager := statemanager.NewNoopStateManager()

	ctx, cancel := context.WithCancel(context.Background())
	mockWsClient := mock_wsclient.NewMockClientServer(ctrl)

	args := StartSessionArguments{
		ContainerInstanceArn: "myArn",
		CredentialProvider:   credentials.AnonymousCredentials,
		Config:               &config.Config{Cluster: "someCluster"},
		TaskEngine:           taskEngine,
		ECSClient:            ecsClient,
		StateManager:         statemanager,
		AcceptInvalidCert:    true,
		_heartbeatTimeout:    20 * time.Millisecond,
		_heartbeatJitter:     10 * time.Millisecond,
	}
	session := newSessionResources(args)

	mockWsClient.EXPECT().SetAnyRequestHandler(gomock.Any()).AnyTimes()
	mockWsClient.EXPECT().AddRequestHandler(gomock.Any()).AnyTimes()
	mockWsClient.EXPECT().Close().Return(nil).AnyTimes()
	mockWsClient.EXPECT().Serve().Return(io.EOF).AnyTimes()
	gomock.InOrder(
		// When the websocket client connects to ACS for the first
		// time, 'sendCredentials' should be set to true
		mockWsClient.EXPECT().Connect().Do(func() {
			validateSendCredentialsInSession(t, session, "true")
		}).Return(nil),
		// For all subsequent connections to ACS, 'sendCredentials'
		// should be set to false
		mockWsClient.EXPECT().Connect().Do(func() {
			validateSendCredentialsInSession(t, session, "false")
		}).Return(nil).AnyTimes(),
	)

	backoff := utils.NewSimpleBackoff(connectionBackoffMin, connectionBackoffMax, connectionBackoffJitter, connectionBackoffMultiplier)
	timer := newDisconnectionTimer(mockWsClient, args.time(), args.heartbeatTimeout(), args.heartbeatJitter())
	defer timer.Stop()
	go func() {
		for i := 0; i < 10; i++ {
			startACSSession(ctx, mockWsClient, timer, args, backoff, session)
		}
		cancel()
	}()

	// Wait for context to be cancelled
	select {
	case <-ctx.Done():
	}
}
Example #4
0
// StartSession creates a session with the backend and handles requests
// using the passed in arguments.
// The engine is expected to initialized and gathering container metrics by
// the time the websocket client starts using it.
func StartSession(params TelemetrySessionParams, statsEngine stats.Engine) error {
	backoff := utils.NewSimpleBackoff(time.Second, 1*time.Minute, 0.2, 2)
	for {
		tcsError := startTelemetrySession(params, statsEngine)
		if tcsError == nil || tcsError == io.EOF {
			backoff.Reset()
		} else {
			log.Info("Error from tcs; backing off", "err", tcsError)
			ttime.Sleep(backoff.Duration())
		}
	}
}
// TestHandlerReconnectsOnServeErrors tests if the handler retries to
// to establish the session with ACS when ClientServer.Connect() returns errors
func TestHandlerReconnectsOnServeErrors(t *testing.T) {
	ctrl := gomock.NewController(t)
	defer ctrl.Finish()
	taskEngine := engine.NewMockTaskEngine(ctrl)
	taskEngine.EXPECT().Version().Return("Docker: 1.5.0", nil).AnyTimes()

	ecsClient := mock_api.NewMockECSClient(ctrl)
	ecsClient.EXPECT().DiscoverPollEndpoint(gomock.Any()).Return(acsURL, nil).AnyTimes()

	statemanager := statemanager.NewNoopStateManager()

	ctx, cancel := context.WithCancel(context.Background())
	mockWsClient := mock_wsclient.NewMockClientServer(ctrl)
	mockWsClient.EXPECT().SetAnyRequestHandler(gomock.Any()).AnyTimes()
	mockWsClient.EXPECT().AddRequestHandler(gomock.Any()).AnyTimes()
	mockWsClient.EXPECT().Connect().Return(nil).AnyTimes()
	mockWsClient.EXPECT().Close().Return(nil).AnyTimes()
	gomock.InOrder(
		// Serve fails 10 times
		mockWsClient.EXPECT().Serve().Return(io.EOF).Times(10),
		// Cancel trying to Serve ACS requests on the 11th attempt
		// Failure to retry on Serve() errors should cause the
		// test to time out as the context is never cancelled
		mockWsClient.EXPECT().Serve().Do(func() {
			cancel()
		}).Return(io.EOF),
	)
	session := &mockSession{mockWsClient}

	args := StartSessionArguments{
		ContainerInstanceArn: "myArn",
		CredentialProvider:   credentials.AnonymousCredentials,
		Config:               &config.Config{Cluster: "someCluster"},
		TaskEngine:           taskEngine,
		ECSClient:            ecsClient,
		StateManager:         statemanager,
		AcceptInvalidCert:    true,
		_heartbeatTimeout:    20 * time.Millisecond,
		_heartbeatJitter:     10 * time.Millisecond,
	}
	backoff := utils.NewSimpleBackoff(connectionBackoffMin, connectionBackoffMax, connectionBackoffJitter, connectionBackoffMultiplier)
	go func() {
		startSession(ctx, args, backoff, session)
	}()

	// Wait for context to be cancelled
	select {
	case <-ctx.Done():
	}
}
Example #6
0
func ServeHttp(containerInstanceArn *string, taskEngine engine.TaskEngine, cfg *config.Config) {
	serverFunctions := map[string]func(w http.ResponseWriter, r *http.Request){
		"/v1/metadata": MetadataV1RequestHandlerMaker(containerInstanceArn, cfg),
		"/v1/tasks":    TasksV1RequestHandlerMaker(taskEngine),
		"/license":     LicenseHandler,
	}

	paths := make([]string, 0, len(serverFunctions))
	for path := range serverFunctions {
		paths = append(paths, path)
	}
	availableCommands := &RootResponse{paths}
	// Autogenerated list of the above serverFunctions paths
	availableCommandResponse, _ := json.Marshal(&availableCommands)

	defaultHandler := func(w http.ResponseWriter, r *http.Request) {
		w.Write(availableCommandResponse)
	}

	serverMux := http.NewServeMux()
	serverMux.HandleFunc("/", defaultHandler)
	for key, fn := range serverFunctions {
		serverMux.HandleFunc(key, fn)
	}

	// Log all requests and then pass through to serverMux
	loggingServeMux := http.NewServeMux()
	loggingServeMux.Handle("/", LoggingHandler{serverMux})

	server := http.Server{
		Addr:         ":" + strconv.Itoa(config.AGENT_INTROSPECTION_PORT),
		Handler:      loggingServeMux,
		ReadTimeout:  5 * time.Second,
		WriteTimeout: 5 * time.Second,
	}

	for {
		once := sync.Once{}
		utils.RetryWithBackoff(utils.NewSimpleBackoff(time.Second, time.Minute, 0.2, 2), func() error {
			// TODO, make this cancellable and use the passed in context; for
			// now, not critical if this gets interrupted
			err := server.ListenAndServe()
			once.Do(func() {
				log.Error("Error running http api", "err", err)
			})
			return err
		})
	}
}
// TestConnectionIsClosedOnIdle tests if the connection to ACS is closed
// when the channel is idle
func TestConnectionIsClosedOnIdle(t *testing.T) {
	ctrl := gomock.NewController(t)
	defer ctrl.Finish()
	taskEngine := engine.NewMockTaskEngine(ctrl)
	taskEngine.EXPECT().Version().Return("Docker: 1.5.0", nil).AnyTimes()

	ecsClient := mock_api.NewMockECSClient(ctrl)
	statemanager := statemanager.NewNoopStateManager()

	mockWsClient := mock_wsclient.NewMockClientServer(ctrl)
	mockWsClient.EXPECT().SetAnyRequestHandler(gomock.Any()).Do(func(v interface{}) {}).AnyTimes()
	mockWsClient.EXPECT().AddRequestHandler(gomock.Any()).Do(func(v interface{}) {}).AnyTimes()
	mockWsClient.EXPECT().Connect().Return(nil)
	mockWsClient.EXPECT().Serve().Do(func() {
		// Pretend as if the maximum heartbeatTimeout duration has
		// been breached while Serving requests
		time.Sleep(30 * time.Millisecond)
	}).Return(io.EOF)

	connectionClosed := make(chan bool)
	mockWsClient.EXPECT().Close().Do(func() {
		// Record connection closed
		connectionClosed <- true
	}).Return(nil)
	ctx := context.Background()
	backoff := utils.NewSimpleBackoff(connectionBackoffMin, connectionBackoffMax, connectionBackoffJitter, connectionBackoffMultiplier)
	args := StartSessionArguments{
		ContainerInstanceArn: "myArn",
		CredentialProvider:   credentials.AnonymousCredentials,
		Config:               &config.Config{Cluster: "someCluster"},
		TaskEngine:           taskEngine,
		ECSClient:            ecsClient,
		StateManager:         statemanager,
		AcceptInvalidCert:    true,
		_heartbeatTimeout:    20 * time.Millisecond,
		_heartbeatJitter:     10 * time.Millisecond,
	}
	go func() {
		timer := newDisconnectionTimer(mockWsClient, args.time(), args.heartbeatTimeout(), args.heartbeatJitter())
		defer timer.Stop()
		startACSSession(ctx, mockWsClient, timer, args, backoff, &mockSession{})
	}()

	// Wait for connection to be closed. If the connection is not closed
	// due to inactivity, the test will time out
	<-connectionClosed
}
// MustInit blocks and retries until an engine can be initialized.
func (engine *DockerTaskEngine) MustInit() {
	if engine.client != nil {
		return
	}

	errorOnce := sync.Once{}
	taskEngineConnectBackoff := utils.NewSimpleBackoff(200*time.Millisecond, 2*time.Second, 0.20, 1.5)
	utils.RetryWithBackoff(taskEngineConnectBackoff, func() error {
		err := engine.Init()
		if err != nil {
			errorOnce.Do(func() {
				log.Error("Could not connect to docker daemon", "err", err)
			})
		}
		return err
	})
}
// ServeHttp serves information about this agent / containerInstance and tasks
// running on it.
func ServeHttp(containerInstanceArn *string, taskEngine engine.TaskEngine, cfg *config.Config) {
	// Is this the right level to type assert, assuming we'd abstract multiple taskengines here?
	// Revisit if we ever add another type..
	dockerTaskEngine := taskEngine.(*engine.DockerTaskEngine)

	server := setupServer(containerInstanceArn, dockerTaskEngine, cfg)
	for {
		once := sync.Once{}
		utils.RetryWithBackoff(utils.NewSimpleBackoff(time.Second, time.Minute, 0.2, 2), func() error {
			// TODO, make this cancellable and use the passed in context; for
			// now, not critical if this gets interrupted
			err := server.ListenAndServe()
			once.Do(func() {
				log.Error("Error running http api", "err", err)
			})
			return err
		})
	}
}
Example #10
0
// StartSession creates a session with the backend and handles requests
// using the passed in arguments.
// The engine is expected to initialized and gathering container metrics by
// the time the websocket client starts using it.
func StartSession(params TelemetrySessionParams, statsEngine stats.Engine) error {
	backoff := utils.NewSimpleBackoff(time.Second, 1*time.Minute, 0.2, 2)
	for {
		tcsEndpoint, err := params.EcsClient.DiscoverTelemetryEndpoint(params.ContainerInstanceArn)
		if err != nil {
			log.Error("Unable to discover poll endpoint", "err", err)
			return err
		}
		log.Debug("Connecting to TCS endpoint " + tcsEndpoint)
		url := formatURL(tcsEndpoint, params.Cfg.Cluster, params.ContainerInstanceArn)
		tcsError := startSession(url, params.Cfg.AWSRegion, params.CredentialProvider, params.AcceptInvalidCert, statsEngine, defaultPublishMetricsInterval)
		if tcsError == nil || tcsError == io.EOF {
			backoff.Reset()
		} else {
			log.Info("Error from tcs; backing off", "err", tcsError)
			ttime.Sleep(backoff.Duration())
		}
	}
}
Example #11
0
// StartSession creates a session with ACS and handles requests from ACS.
// It creates resources required to invoke the package scoped 'startSession()'
// method and invokes the same to repeatedly connect to ACS when disconnected
func StartSession(ctx context.Context, args StartSessionArguments) error {
	backoff := utils.NewSimpleBackoff(connectionBackoffMin, connectionBackoffMax, connectionBackoffJitter, connectionBackoffMultiplier)
	session := newSessionResources(args)
	return startSession(ctx, args, backoff, session)
}
// Continuously retries sending an event until it succeeds, sleeping between each
// attempt
func SubmitTaskEvents(events *eventList, client api.ECSClient) {
	backoff := utils.NewSimpleBackoff(1*time.Second, 30*time.Second, 0.20, 1.3)

	// Mirror events.sending, but without the need to lock since this is local
	// to our goroutine
	done := false

	for !done {
		// If we looped back up here, we successfully submitted an event, but
		// we haven't emptied the list so we should keep submitting
		backoff.Reset()
		utils.RetryWithBackoff(backoff, func() error {
			// Lock and unlock within this function, allowing the list to be added
			// to while we're not actively sending an event
			log.Debug("Waiting on semaphore to send...")
			handler.submitSemaphore.Wait()
			defer handler.submitSemaphore.Post()

			log.Debug("Aquiring lock for sending event...")
			events.Lock()
			defer events.Unlock()
			log.Debug("Aquired lock!")

			var err utils.RetriableError

			if events.Len() == 0 {
				log.Debug("No events left; not retrying more")

				events.sending = false
				done = true
				return nil
			}

			eventToSubmit := events.Front()
			event := eventToSubmit.Value.(*sendableEvent)
			llog := log.New("event", event)

			if event.containerShouldBeSent() {
				llog.Info("Sending container change", "change", event.containerChange)
				err = client.SubmitContainerStateChange(event.containerChange)
				if err == nil || !err.Retry() {
					// submitted or can't be retried; ensure we don't retry it
					event.containerSent = true
					if event.containerChange.SentStatus != nil {
						*event.containerChange.SentStatus = event.containerChange.Status
					}
					statesaver.Save()
					if err != nil {
						llog.Error("Unretriable error submitting container state change", "err", err)
					} else {
						llog.Debug("Submitted container")
					}
					events.Remove(eventToSubmit)
				} // else, leave event on and retry it next loop through
			} else if event.taskShouldBeSent() {
				llog.Info("Sending task change", "change", event.taskChange)
				err = client.SubmitTaskStateChange(event.taskChange)
				if err == nil || !err.Retry() {
					// submitted or can't be retried; ensure we don't retry it
					event.taskSent = true
					if event.taskChange.SentStatus != nil {
						*event.taskChange.SentStatus = event.taskChange.Status
					}
					statesaver.Save()
					if err != nil {
						llog.Error("Unretriable error submitting container state change", "err", err)
					} else {
						llog.Debug("Submitted container")
						backoff.Reset()
					}
					events.Remove(eventToSubmit)
				}
			} else {
				// Shouldn't be sent as either a task or container change event; must have been already sent
				llog.Info("Not submitting redundant event; just removing")
				events.Remove(eventToSubmit)
			}

			if events.Len() == 0 {
				llog.Debug("Removed the last element, no longer sending")
				events.sending = false
				done = true
				return nil
			}

			return err
		})
	}
}
Example #13
0
// StartSession creates a session with ACS and handles requests using the passed
// in arguments.
func StartSession(ctx context.Context, args StartSessionArguments) error {
	ecsclient := args.ECSClient
	cfg := args.Config
	backoff := utils.NewSimpleBackoff(connectionBackoffMin, connectionBackoffMax, connectionBackoffJitter, connectionBackoffMultiplier)

	payloadBuffer := make(chan *ecsacs.PayloadMessage, payloadMessageBufferSize)
	ackBuffer := make(chan string, payloadMessageBufferSize)

	go func() {
		// Handle any payloads async. For correctness, they must be handled in order, hence the buffered channel which is added to synchronously.
		for {
			select {
			case payload := <-payloadBuffer:
				handlePayloadMessage(ackBuffer, cfg.Cluster, args.ContainerInstanceArn, payload, args.TaskEngine, ecsclient, args.StateManager)
			case <-ctx.Done():
				return
			}
		}
	}()

	for {
		acsError := func() error {
			acsEndpoint, err := ecsclient.DiscoverPollEndpoint(args.ContainerInstanceArn)
			if err != nil {
				log.Error("Unable to discover poll endpoint", "err", err)
				return err
			}
			log.Debug("Connecting to ACS endpoint " + acsEndpoint)

			url := AcsWsUrl(acsEndpoint, cfg.Cluster, args.ContainerInstanceArn, args.TaskEngine)

			clearStrChannel(ackBuffer)
			client := acsclient.New(url, cfg.AWSRegion, args.CredentialProvider, args.AcceptInvalidCert)
			defer client.Close()
			// Clear the ackbuffer whenever we get a new client because acks of
			// messageids don't have any value across sessions
			defer clearStrChannel(ackBuffer)

			timer := ttime.AfterFunc(utils.AddJitter(heartbeatTimeout, heartbeatJitter), func() {
				log.Warn("ACS Connection hasn't had any activity for too long; closing connection")
				closeErr := client.Close()
				if closeErr != nil {
					log.Warn("Error disconnecting: " + closeErr.Error())
				}
			})
			defer timer.Stop()
			// Any message from the server resets the disconnect timeout
			client.SetAnyRequestHandler(anyMessageHandler(timer))
			client.AddRequestHandler(payloadMessageHandler(payloadBuffer))
			// Ignore heartbeat messages; anyMessageHandler gets 'em
			client.AddRequestHandler(func(*ecsacs.HeartbeatMessage) {})

			updater.AddAgentUpdateHandlers(client, cfg, args.StateManager, args.TaskEngine)

			err = client.Connect()
			if err != nil {
				log.Error("Error connecting to ACS: " + err.Error())
				return err
			}
			ttime.AfterFunc(utils.AddJitter(heartbeatTimeout, heartbeatJitter), func() {
				// If we do not have an error connecting and remain connected for at
				// least 5 or so minutes, reset the backoff. This prevents disconnect
				// errors that only happen infrequently from damaging the
				// reconnectability as significantly.
				backoff.Reset()
			})

			serveErr := make(chan error, 1)
			go func() {
				serveErr <- client.Serve()
			}()

			for {
				select {
				case mid := <-ackBuffer:
					ackMessageId(client, cfg.Cluster, args.ContainerInstanceArn, mid)
				case <-ctx.Done():
					return ctx.Err()
				case err := <-serveErr:
					return err
				}
			}
		}()

		select {
		case <-ctx.Done():
			return ctx.Err()
		default:
		}

		if acsError == nil || acsError == io.EOF {
			backoff.Reset()
		} else {
			log.Info("Error from acs; backing off", "err", acsError)
			ttime.Sleep(backoff.Duration())
		}
	}
}
// TestHandlerReconnectsOnDiscoverPollEndpointError tests if handler retries
// to establish the session with ACS on DiscoverPollEndpoint errors
func TestHandlerReconnectsOnDiscoverPollEndpointError(t *testing.T) {
	ctrl := gomock.NewController(t)
	defer ctrl.Finish()
	taskEngine := engine.NewMockTaskEngine(ctrl)
	taskEngine.EXPECT().Version().Return("Docker: 1.5.0", nil).AnyTimes()

	ecsClient := mock_api.NewMockECSClient(ctrl)
	statemanager := statemanager.NewNoopStateManager()

	ctx, cancel := context.WithCancel(context.Background())

	mockWsClient := mock_wsclient.NewMockClientServer(ctrl)
	mockWsClient.EXPECT().SetAnyRequestHandler(gomock.Any()).AnyTimes()
	mockWsClient.EXPECT().AddRequestHandler(gomock.Any()).AnyTimes()
	mockWsClient.EXPECT().Connect().Return(nil).AnyTimes()
	mockWsClient.EXPECT().Close().Return(nil).AnyTimes()
	mockWsClient.EXPECT().Serve().Do(func() {
		// Serve() cancels the context
		cancel()
	}).Return(io.EOF)

	session := &mockSession{mockWsClient}

	gomock.InOrder(
		// DiscoverPollEndpoint returns an error on its first invocation
		ecsClient.EXPECT().DiscoverPollEndpoint(gomock.Any()).Return("", fmt.Errorf("oops")).Times(1),
		// Second invocation returns a success
		ecsClient.EXPECT().DiscoverPollEndpoint(gomock.Any()).Return(acsURL, nil).Times(1),
	)
	args := StartSessionArguments{
		ContainerInstanceArn: "myArn",
		CredentialProvider:   credentials.AnonymousCredentials,
		Config:               &config.Config{Cluster: "someCluster"},
		TaskEngine:           taskEngine,
		ECSClient:            ecsClient,
		StateManager:         statemanager,
		AcceptInvalidCert:    true,
		_heartbeatTimeout:    20 * time.Millisecond,
		_heartbeatJitter:     10 * time.Millisecond,
	}
	backoff := utils.NewSimpleBackoff(connectionBackoffMin, connectionBackoffMax, connectionBackoffJitter, connectionBackoffMultiplier)
	go func() {
		startSession(ctx, args, backoff, session)
	}()
	start := time.Now()

	// Wait for context to be cancelled
	select {
	case <-ctx.Done():
	}

	// Measure the duration between retries
	timeSinceStart := time.Since(start)
	if timeSinceStart < connectionBackoffMin {
		t.Errorf("Duration since start is less than minimum threshold for backoff: %s", timeSinceStart.String())
	}

	// The upper limit here should really be connectionBackoffMin + (connectionBackoffMin * jitter)
	// But, it can be off by a few milliseconds to account for execution of other instructions
	// In any case, it should never be higher than 2*connectionBackoffMin
	if timeSinceStart > 2*connectionBackoffMin {
		t.Errorf("Duration since start is greater than maximum anticipated wait time: %v", timeSinceStart.String())
	}

}