Example #1
0
func TestRetryWithBackoff(t *testing.T) {
	test_time := ttime.NewTestTime()
	test_time.LudicrousSpeed(true)
	ttime.SetTime(test_time)

	start := ttime.Now()

	counter := 3
	RetryWithBackoff(NewSimpleBackoff(100*time.Millisecond, 100*time.Millisecond, 0, 1), func() error {
		if counter == 0 {
			return nil
		}
		counter--
		return errors.New("err")
	})
	if counter != 0 {
		t.Error("Counter didn't go to 0; didn't get retried enough")
	}
	testTime := ttime.Since(start)

	if testTime.Seconds() < .29 || testTime.Seconds() > .31 {
		t.Error("Retry didn't backoff for as long as expected")
	}

	start = ttime.Now()
	RetryWithBackoff(NewSimpleBackoff(10*time.Second, 20*time.Second, 0, 2), func() error {
		return NewRetriableError(NewRetriable(false), errors.New("can't retry"))
	})

	if ttime.Since(start).Seconds() > .1 {
		t.Error("Retry for the trivial function took too long")
	}
}
Example #2
0
func (task *managedTask) cleanupTask() {
	cleanupTime := ttime.After(task.KnownStatusTime.Add(taskStoppedDuration).Sub(ttime.Now()))
	cleanupTimeBool := make(chan bool)
	go func() {
		<-cleanupTime
		cleanupTimeBool <- true
		close(cleanupTimeBool)
	}()
	for !task.waitEvent(cleanupTimeBool) {
	}
	log.Debug("Cleaning up task's containers and data", "task", task.Task)

	// First make an attempt to cleanup resources
	task.engine.sweepTask(task.Task)
	task.engine.state.RemoveTask(task.Task)
	// Now remove ourselves from the global state and cleanup channels
	task.engine.processTasks.Lock()
	delete(task.engine.managedTasks, task.Arn)
	task.engine.processTasks.Unlock()
	task.engine.saver.Save()

	// Cleanup any leftover messages before closing their channels. No new
	// messages possible because we deleted ourselves from managedTasks, so this
	// removes all stale ones
	task.discardPendingMessages()

	close(task.dockerMessages)
	close(task.acsMessages)
}
func TestServerExceptionRetries(t *testing.T) {
	ctrl, client, mockRoundTripper := setup(t)
	defer ctrl.Finish()

	timesCalled := 0
	// This resp.Body song and dance is because it *must* be reset between
	// retries for the sdk to behave sanely; it rewinds request bodies, not
	// response bodies. The actual server would, indeed put a new body each time
	// so this is not a bad thing to do
	resp := operationErrorResp(500, `{"__type":"BadStuffHappenedException","message":"something went wrong"}`)
	mockRoundTripper.EXPECT().RoundTrip(mock_http.NewHTTPOperationMatcher(versionedOperation("DiscoverPollEndpoint"))).AnyTimes().Do(func(_ interface{}) {
		timesCalled++
		resp.Body = operationErrorResp(500, `{"__type":"BadStuffHappenedException","message":"something went wrong"}`).Body
	}).Return(resp, nil).AnyTimes()

	start := ttime.Now()
	_, err := client.DiscoverPollEndpoint("foo")
	if err == nil {
		t.Error("Expected it to error after retrying")
	}
	duration := ttime.Since(start)
	if duration < 100*time.Millisecond {
		t.Error("Retries should have taken some time; took " + duration.String())
	}
	if timesCalled < 2 || timesCalled > 10 {
		// Actaully 4 at the time of writing, but a reasonable range is fine
		t.Error("Retries should happen a reasonable number of times")
	}
}
Example #4
0
func TestRetryNWithBackoff(t *testing.T) {
	test_time := ttime.NewTestTime()
	test_time.LudicrousSpeed(true)
	ttime.SetTime(test_time)

	start := ttime.Now()

	counter := 3
	err := RetryNWithBackoff(NewSimpleBackoff(100*time.Millisecond, 100*time.Millisecond, 0, 1), 2, func() error {
		counter--
		return errors.New("err")
	})
	if counter != 1 {
		t.Error("Should have stopped after two tries")
	}
	if err == nil {
		t.Error("Should have returned appropriate error")
	}
	testTime := ttime.Since(start)
	// Expect that it tried twice, sleeping once between them
	if testTime.Seconds() < 0.09 || testTime.Seconds() > 0.11 {
		t.Errorf("Retry didn't backoff for as long as expected: %v", testTime.Seconds())
	}

	start = ttime.Now()
	counter = 3
	err = RetryNWithBackoff(NewSimpleBackoff(100*time.Millisecond, 100*time.Millisecond, 0, 1), 5, func() error {
		counter--
		if counter == 0 {
			return nil
		}
		return errors.New("err")
	})
	testTime = ttime.Since(start)
	if counter != 0 {
		t.Errorf("Counter expected to be 0, was %v", counter)
	}
	if err != nil {
		t.Errorf("Expected no error, got %v", err)
	}
	// 3 tries; 2 backoffs
	if testTime.Seconds() < 0.190 || testTime.Seconds() > 0.210 {
		t.Errorf("Retry didn't backoff for as long as expected: %v", testTime.Seconds())
	}
}
func (task *managedTask) cleanupTask(taskStoppedDuration time.Duration) {
	cleanupTimeDuration := task.GetKnownStatusTime().Add(taskStoppedDuration).Sub(ttime.Now())
	// There is a potential deadlock here if cleanupTime is negative. Ignore the computed
	// value in this case in favor of the default config value.
	if cleanupTimeDuration < 0 {
		log.Debug("Task Cleanup Duration is too short. Resetting to " + config.DefaultTaskCleanupWaitDuration.String())
		cleanupTimeDuration = config.DefaultTaskCleanupWaitDuration
	}
	cleanupTime := task.time().After(cleanupTimeDuration)
	cleanupTimeBool := make(chan bool)
	go func() {
		<-cleanupTime
		cleanupTimeBool <- true
		close(cleanupTimeBool)
	}()
	for !task.waitEvent(cleanupTimeBool) {
	}
	log.Debug("Cleaning up task's containers and data", "task", task.Task)

	// For the duration of this, simply discard any task events; this ensures the
	// speedy processing of other events for other tasks
	handleCleanupDone := make(chan struct{})
	go func() {
		task.engine.sweepTask(task.Task)
		task.engine.state.RemoveTask(task.Task)
		handleCleanupDone <- struct{}{}
	}()
	// discard events while the task is being removed from engine state
	task.discardEventsUntil(handleCleanupDone)
	log.Debug("Finished removing task data; removing from state no longer managing", "task", task.Task)
	// Now remove ourselves from the global state and cleanup channels
	go task.discardEventsUntil(handleCleanupDone) // keep discarding events until the taks is fully gone
	task.engine.processTasks.Lock()
	delete(task.engine.managedTasks, task.Arn)
	handleCleanupDone <- struct{}{}
	task.engine.processTasks.Unlock()
	task.engine.saver.Save()

	// Cleanup any leftover messages before closing their channels. No new
	// messages possible because we deleted ourselves from managedTasks, so this
	// removes all stale ones
	task.discardPendingMessages()

	close(task.dockerMessages)
	close(task.acsMessages)
}
func TestSubmitRetries(t *testing.T) {
	ctrl, client, mockRoundTripper := setup(t)
	defer ctrl.Finish()

	timesCalled := 0
	resp := operationErrorResp(500, `{"__type":"SubmitContainerStateChangeException","message":"something broke horribly"}`)
	mockRoundTripper.EXPECT().RoundTrip(mock_http.NewHTTPOperationMatcher(versionedOperation("SubmitContainerStateChange"))).AnyTimes().Do(func(_ interface{}) {
		timesCalled++
		resp.Body = operationErrorResp(500, `{"__type":"SubmitContainerStateChangeException","message":"something broke horribly"}`).Body
	}).Return(resp, nil)

	start := ttime.Now()
	err := client.SubmitContainerStateChange(ContainerStateChange{ContainerName: "foo", TaskArn: "bar", Status: ContainerRunning})
	if err == nil {
		t.Fatal("Expected it to error after retrying")
	}
	duration := ttime.Since(start)
	if duration < 23*time.Hour || duration > 25*time.Hour {
		t.Fatal("Retries should have taken roughly 24 hours; took " + duration.String())
	}
	if timesCalled < 10 {
		t.Fatal("Expected to be called many times")
	}
}
func (task *Task) updateKnownStatusTime() {
	task.knownStatusTimeLock.Lock()
	defer task.knownStatusTimeLock.Unlock()

	task.KnownStatusTime = ttime.Now()
}
Example #8
0
func (u *updater) stageUpdateHandler() func(req *ecsacs.StageUpdateMessage) {
	return func(req *ecsacs.StageUpdateMessage) {
		u.Lock()
		defer u.Unlock()

		if req == nil || req.MessageId == nil {
			log.Error("Nil request to stage update or missing MessageID")
			return
		}

		nack := func(reason string) {
			seelog.Errorf("Nacking StageUpdate; reason: %s", reason)
			u.acs.MakeRequest(&ecsacs.NackRequest{
				Cluster:           req.ClusterArn,
				ContainerInstance: req.ContainerInstanceArn,
				MessageId:         req.MessageId,
				Reason:            aws.String(reason),
			})
			u.reset()
		}

		if !u.config.UpdatesEnabled {
			nack("Updates are disabled")
			return
		}

		if err := validateUpdateInfo(req.UpdateInfo); err != nil {
			nack("Invalid update: " + err.Error())
			return
		}

		log.Debug("Staging update", "update", req)

		if u.stage != updateNone {
			if u.updateID != "" && u.updateID == *req.UpdateInfo.Signature {
				log.Debug("Update already in progress, acking duplicate message", "id", u.updateID)
				// Acking here is safe as any currently-downloading update will already be holding
				// the update lock.  A failed download will nack and clear state (while holding the
				// update lock) before this code is reached, meaning that the above conditional will
				// not evaluate true (no matching, in-progress update).
				u.acs.MakeRequest(&ecsacs.AckRequest{
					Cluster:           req.ClusterArn,
					ContainerInstance: req.ContainerInstanceArn,
					MessageId:         req.MessageId,
				})
				return
			} else {
				// Nack previous update
				reason := "New update arrived: " + *req.MessageId
				u.acs.MakeRequest(&ecsacs.NackRequest{
					Cluster:           req.ClusterArn,
					ContainerInstance: req.ContainerInstanceArn,
					MessageId:         &u.downloadMessageID,
					Reason:            &reason,
				})
			}
		}
		u.updateID = *req.UpdateInfo.Signature
		u.stage = updateDownloading
		u.stageTime = ttime.Now()
		u.downloadMessageID = *req.MessageId

		err := u.download(req.UpdateInfo)
		if err != nil {
			nack("Unable to download: " + err.Error())
			return
		}

		u.stage = updateDownloaded

		u.acs.MakeRequest(&ecsacs.AckRequest{
			Cluster:           req.ClusterArn,
			ContainerInstance: req.ContainerInstanceArn,
			MessageId:         req.MessageId,
		})
	}
}
Example #9
0
func (t *Task) SetKnownStatus(status TaskStatus) {
	t.KnownStatus = status
	t.KnownStatusTime = ttime.Now()
}
Example #10
0
func TestTaskFromACS(t *testing.T) {
	test_time := ttime.Now().Truncate(1 * time.Second).Format(time.RFC3339)

	intptr := func(i int64) *int64 {
		return &i
	}
	boolptr := func(b bool) *bool {
		return &b
	}
	// Testing type conversions, bleh. At least the type conversion itself
	// doesn't look this messy.
	taskFromAcs := ecsacs.Task{
		Arn:           strptr("myArn"),
		DesiredStatus: strptr("RUNNING"),
		Family:        strptr("myFamily"),
		Version:       strptr("1"),
		Containers: []*ecsacs.Container{
			&ecsacs.Container{
				Name:        strptr("myName"),
				Cpu:         intptr(10),
				Command:     []*string{strptr("command"), strptr("command2")},
				EntryPoint:  []*string{strptr("sh"), strptr("-c")},
				Environment: map[string]*string{"key": strptr("value")},
				Essential:   boolptr(true),
				Image:       strptr("image:tag"),
				Links:       []*string{strptr("link1"), strptr("link2")},
				Memory:      intptr(100),
				MountPoints: []*ecsacs.MountPoint{
					&ecsacs.MountPoint{
						ContainerPath: strptr("/container/path"),
						ReadOnly:      boolptr(true),
						SourceVolume:  strptr("sourceVolume"),
					},
				},
				Overrides: strptr(`{"command":["a","b","c"]}`),
				PortMappings: []*ecsacs.PortMapping{
					&ecsacs.PortMapping{
						HostPort:      intptr(800),
						ContainerPort: intptr(900),
						Protocol:      strptr("udp"),
					},
				},
				VolumesFrom: []*ecsacs.VolumeFrom{
					&ecsacs.VolumeFrom{
						ReadOnly:        boolptr(true),
						SourceContainer: strptr("volumeLink"),
					},
				},
				DockerConfig: &ecsacs.DockerConfig{
					Config:     strptr("config json"),
					HostConfig: strptr("hostconfig json"),
					Version:    strptr("version string"),
				},
			},
		},
		Volumes: []*ecsacs.Volume{
			&ecsacs.Volume{
				Name: strptr("volName"),
				Host: &ecsacs.HostVolumeProperties{
					SourcePath: strptr("/host/path"),
				},
			},
		},
		RoleCredentials: &ecsacs.IAMRoleCredentials{
			CredentialsId:   strptr("credsId"),
			AccessKeyId:     strptr("keyId"),
			Expiration:      strptr(test_time),
			RoleArn:         strptr("roleArn"),
			SecretAccessKey: strptr("OhhSecret"),
			SessionToken:    strptr("sessionToken"),
		},
	}
	expectedTask := &Task{
		Arn:           "myArn",
		DesiredStatus: TaskRunning,
		Family:        "myFamily",
		Version:       "1",
		Containers: []*Container{
			&Container{
				Name:        "myName",
				Image:       "image:tag",
				Command:     []string{"command", "command2"},
				Links:       []string{"link1", "link2"},
				EntryPoint:  &[]string{"sh", "-c"},
				Essential:   true,
				Environment: map[string]string{"key": "value"},
				Cpu:         10,
				Memory:      100,
				MountPoints: []MountPoint{
					MountPoint{
						ContainerPath: "/container/path",
						ReadOnly:      true,
						SourceVolume:  "sourceVolume",
					},
				},
				Overrides: ContainerOverrides{
					Command: &[]string{"a", "b", "c"},
				},
				Ports: []PortBinding{
					PortBinding{
						HostPort:      800,
						ContainerPort: 900,
						Protocol:      TransportProtocolUDP,
					},
				},
				VolumesFrom: []VolumeFrom{
					VolumeFrom{
						ReadOnly:        true,
						SourceContainer: "volumeLink",
					},
				},
				DockerConfig: DockerConfig{
					Config:     strptr("config json"),
					HostConfig: strptr("hostconfig json"),
					Version:    strptr("version string"),
				},
			},
		},
		Volumes: []TaskVolume{
			TaskVolume{
				Name: "volName",
				Volume: &FSHostVolume{
					FSSourcePath: "/host/path",
				},
			},
		},
		StartSequenceNumber: 42,
	}

	seqNum := int64(42)
	task, err := TaskFromACS(&taskFromAcs, &ecsacs.PayloadMessage{SeqNum: &seqNum})
	if err != nil {
		t.Fatalf("Should be able to handle acs task: %v", err)
	}
	if !reflect.DeepEqual(task.Containers, expectedTask.Containers) {
		t.Fatal("Containers should be equal")
	}
	if !reflect.DeepEqual(task.Volumes, expectedTask.Volumes) {
		t.Fatal("Volumes should be equal")
	}
	if !reflect.DeepEqual(task.StartSequenceNumber, expectedTask.StartSequenceNumber) {
		t.Fatal("StartSequenceNumber should be equal")
	}
	if !reflect.DeepEqual(task.StopSequenceNumber, expectedTask.StopSequenceNumber) {
		t.Fatal("StopSequenceNumber should be equal")
	}
}
func TestDockerStopTimeout(t *testing.T) {
	os.Setenv("ECS_CONTAINER_STOP_TIMEOUT", testDockerStopTimeout.String())
	defer os.Unsetenv("ECS_CONTAINER_STOP_TIMEOUT")
	cfg := defaultTestConfig()

	taskEngine, done, _ := setup(cfg, t)

	dockerTaskEngine := taskEngine.(*DockerTaskEngine)

	if dockerTaskEngine.cfg.DockerStopTimeout != testDockerStopTimeout {
		t.Errorf("Expect the docker stop timeout read from environment variable when ECS_CONTAINER_STOP_TIMEOUT is set, %v", dockerTaskEngine.cfg.DockerStopTimeout)
	}
	testTask := createTestTask("TestDockerStopTimeout")
	testTask.Containers = append(testTask.Containers, createTestContainer())
	testTask.Containers[0].Command = []string{"sh", "-c", "while true; do echo `date +%T`; sleep 1s; done;"}
	testTask.Containers[0].Image = testBusyboxImage
	testTask.Containers[0].Name = "test-docker-timeout"

	taskEvents, contEvents := dockerTaskEngine.TaskEvents()
	ctx, cancel := context.WithCancel(context.Background())
	go func() {
		for {
			select {
			case <-taskEvents:
			case <-ctx.Done():
				return
			}
		}
	}()
	defer func() {
		done()
		cancel()
	}()

	go dockerTaskEngine.AddTask(testTask)

	for contEvent := range contEvents {
		if contEvent.TaskArn != testTask.Arn {
			continue
		}
		if contEvent.Status == api.ContainerRunning {
			break
		}
		if contEvent.Status > api.ContainerRunning {
			t.Error("Expect container to run not stop")
		}
	}

	startTime := ttime.Now()
	dockerTaskEngine.stopContainer(testTask, testTask.Containers[0])
	for contEvent := range contEvents {
		if contEvent.TaskArn != testTask.Arn {
			continue
		}
		if contEvent.Status == api.ContainerRunning {
			break
		}
		if contEvent.Status > api.ContainerStopped {
			t.Error("Expect container to stop")
		}
	}
	if ttime.Since(startTime) < testDockerStopTimeout {
		t.Errorf("Container stopped before the timeout: %v", ttime.Since(startTime))
	}
	if ttime.Since(startTime) > testDockerStopTimeout+1*time.Second {
		t.Errorf("Container should have stopped eariler, but stopped after %v", ttime.Since(startTime))
	}
}