func TestOldTasksBatch(t *testing.T) { gd, err := startDispatcher(DefaultConfig()) assert.NoError(t, err) defer gd.Close() var expectedSessionID string var nodeID string { stream, err := gd.Clients[0].Session(context.Background(), &api.SessionRequest{}) assert.NoError(t, err) defer stream.CloseSend() resp, err := stream.Recv() assert.NoError(t, err) assert.NotEmpty(t, resp.SessionID) expectedSessionID = resp.SessionID nodeID = resp.Node.ID } testTask1 := &api.Task{ NodeID: nodeID, ID: "testTask1", Status: api.TaskStatus{State: api.TaskStateAssigned}, } testTask2 := &api.Task{ NodeID: nodeID, ID: "testTask2", Status: api.TaskStatus{State: api.TaskStateAssigned}, } stream, err := gd.Clients[0].Tasks(context.Background(), &api.TasksRequest{SessionID: expectedSessionID}) assert.NoError(t, err) resp, err := stream.Recv() assert.NoError(t, err) // initially no tasks assert.Equal(t, 0, len(resp.Tasks)) err = gd.Store.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, testTask1)) assert.NoError(t, store.CreateTask(tx, testTask2)) return nil }) assert.NoError(t, err) err = gd.Store.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteTask(tx, testTask1.ID)) assert.NoError(t, store.DeleteTask(tx, testTask2.ID)) return nil }) assert.NoError(t, err) resp, err = stream.Recv() assert.NoError(t, err) // all tasks have been deleted assert.Equal(t, len(resp.Tasks), 0) }
func TestLogBrokerNoFollowUnscheduledTask(t *testing.T) { ctx, ca, _, serverAddr, _, done := testLogBrokerEnv(t) defer done() client, clientDone := testLogClient(t, serverAddr) defer clientDone() // Create fake environment. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateTask(tx, &api.Task{ ID: "task1", ServiceID: "service", }) })) // Subscribe to logs in no follow mode logs, err := client.SubscribeLogs(ctx, &api.SubscribeLogsRequest{ Options: &api.LogSubscriptionOptions{ Follow: false, }, Selector: &api.LogSelector{ ServiceIDs: []string{"service"}, }, }) require.NoError(t, err) // Ensure we receive the message that we could grab _, err = logs.Recv() require.Error(t, err) require.Contains(t, err.Error(), "task1 has not been scheduled") }
func TestTaskUpdateNoCert(t *testing.T) { gd, err := startDispatcher(DefaultConfig()) assert.NoError(t, err) defer gd.Close() testTask1 := &api.Task{ ID: "testTask1", } err = gd.Store.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, testTask1)) return nil }) assert.NoError(t, err) testTask1.Status = api.TaskStatus{State: api.TaskStateAssigned} updReq := &api.UpdateTaskStatusRequest{ Updates: []*api.UpdateTaskStatusRequest_TaskStatusUpdate{ { TaskID: testTask1.ID, Status: &testTask1.Status, }, }, } // without correct SessionID should fail resp, err := gd.Clients[2].UpdateTaskStatus(context.Background(), updReq) assert.Nil(t, resp) assert.Error(t, err) assert.EqualError(t, err, "rpc error: code = 7 desc = Permission denied: unauthorized peer role: rpc error: code = 7 desc = no client certificates in request") }
func TestSchedulerNoReadyNodes(t *testing.T) { ctx := context.Background() initialTask := &api.Task{ ID: "id1", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial task assert.NoError(t, store.CreateTask(tx, initialTask)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() failure := watchAssignmentFailure(t, watch) assert.Equal(t, "no suitable node", failure.Status.Message) err = s.Update(func(tx store.Tx) error { // Create a ready node. The task should get assigned to this // node. node := &api.Node{ ID: "newnode", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "newnode", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.CreateNode(tx, node)) return nil }) assert.NoError(t, err) assignment := watchAssignment(t, watch) assert.Equal(t, "newnode", assignment.NodeID) }
func (u *Updater) updateTask(ctx context.Context, slot slot, updated *api.Task) error { // Kick off the watch before even creating the updated task. This is in order to avoid missing any event. taskUpdates, cancel := state.Watch(u.watchQueue, state.EventUpdateTask{ Task: &api.Task{ID: updated.ID}, Checks: []state.TaskCheckFunc{state.TaskCheckID}, }) defer cancel() var delayStartCh <-chan struct{} // Atomically create the updated task and bring down the old one. _, err := u.store.Batch(func(batch *store.Batch) error { err := batch.Update(func(tx store.Tx) error { if err := store.CreateTask(tx, updated); err != nil { return err } return nil }) if err != nil { return err } u.removeOldTasks(ctx, batch, slot) for _, t := range slot { if t.DesiredState == api.TaskStateRunning { // Wait for the old task to stop or time out, and then set the new one // to RUNNING. delayStartCh = u.restarts.DelayStart(ctx, nil, t, updated.ID, 0, true) break } } return nil }) if err != nil { return err } if delayStartCh != nil { <-delayStartCh } // Wait for the new task to come up. // TODO(aluzzardi): Consider adding a timeout here. for { select { case e := <-taskUpdates: updated = e.(state.EventUpdateTask).Task if updated.Status.State >= api.TaskStateRunning { return nil } case <-u.stopChan: return nil } } }
// AttachNetwork allows the node to request the resources // allocation needed for a network attachment on the specific node. // - Returns `InvalidArgument` if the Spec is malformed. // - Returns `NotFound` if the Network is not found. // - Returns `PermissionDenied` if the Network is not manually attachable. // - Returns an error if the creation fails. func (ra *ResourceAllocator) AttachNetwork(ctx context.Context, request *api.AttachNetworkRequest) (*api.AttachNetworkResponse, error) { nodeInfo, err := ca.RemoteNode(ctx) if err != nil { return nil, err } var network *api.Network ra.store.View(func(tx store.ReadTx) { network = store.GetNetwork(tx, request.Config.Target) if network == nil { if networks, err := store.FindNetworks(tx, store.ByName(request.Config.Target)); err == nil && len(networks) == 1 { network = networks[0] } } }) if network == nil { return nil, grpc.Errorf(codes.NotFound, "network %s not found", request.Config.Target) } if !network.Spec.Attachable { return nil, grpc.Errorf(codes.PermissionDenied, "network %s not manually attachable", request.Config.Target) } t := &api.Task{ ID: identity.NewID(), NodeID: nodeInfo.NodeID, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Attachment{ Attachment: &api.NetworkAttachmentSpec{ ContainerID: request.ContainerID, }, }, Networks: []*api.NetworkAttachmentConfig{ { Target: network.ID, Addresses: request.Config.Addresses, }, }, }, Status: api.TaskStatus{ State: api.TaskStateNew, Timestamp: ptypes.MustTimestampProto(time.Now()), Message: "created", }, DesiredState: api.TaskStateRunning, // TODO: Add Network attachment. } if err := ra.store.Update(func(tx store.Tx) error { return store.CreateTask(tx, t) }); err != nil { return nil, err } return &api.AttachNetworkResponse{AttachmentID: t.ID}, nil }
func (g *Orchestrator) addTask(ctx context.Context, batch *store.Batch, service *api.Service, nodeID string) { task := orchestrator.NewTask(g.cluster, service, 0, nodeID) err := batch.Update(func(tx store.Tx) error { return store.CreateTask(tx, task) }) if err != nil { log.G(ctx).WithError(err).Errorf("global orchestrator: failed to create task") } }
func (u *Updater) updateTask(ctx context.Context, original, updated *api.Task) error { log.G(ctx).Debugf("replacing %s with %s", original.ID, updated.ID) // Kick off the watch before even creating the updated task. This is in order to avoid missing any event. taskUpdates, cancel := state.Watch(u.watchQueue, state.EventUpdateTask{ Task: &api.Task{ID: updated.ID}, Checks: []state.TaskCheckFunc{state.TaskCheckID}, }) defer cancel() var delayStartCh <-chan struct{} // Atomically create the updated task and bring down the old one. err := u.store.Update(func(tx store.Tx) error { t := store.GetTask(tx, original.ID) if t == nil { return fmt.Errorf("task %s not found while trying to update it", original.ID) } if t.DesiredState > api.TaskStateRunning { return fmt.Errorf("task %s was already shut down when reached by updater", original.ID) } t.DesiredState = api.TaskStateShutdown if err := store.UpdateTask(tx, t); err != nil { return err } if err := store.CreateTask(tx, updated); err != nil { return err } // Wait for the old task to stop or time out, and then set the new one // to RUNNING. delayStartCh = u.restarts.DelayStart(ctx, tx, original, updated.ID, 0, true) return nil }) if err != nil { return err } <-delayStartCh // Wait for the new task to come up. // TODO(aluzzardi): Consider adding a timeout here. for { select { case e := <-taskUpdates: updated = e.(state.EventUpdateTask).Task if updated.Status.State >= api.TaskStateRunning { return nil } case <-u.stopChan: return nil } } }
func createTask(t *testing.T, ts *testServer, desiredState api.TaskState) *api.Task { task := &api.Task{ ID: identity.NewID(), DesiredState: desiredState, } err := ts.Store.Update(func(tx store.Tx) error { return store.CreateTask(tx, task) }) assert.NoError(t, err) return task }
func (r *ReplicatedOrchestrator) addTasks(ctx context.Context, batch *store.Batch, service *api.Service, runningSlots map[uint64]slot, count int) { slot := uint64(0) for i := 0; i < count; i++ { // Find an slot number that is missing a running task for { slot++ if _, ok := runningSlots[slot]; !ok { break } } err := batch.Update(func(tx store.Tx) error { return store.CreateTask(tx, newTask(r.cluster, service, slot)) }) if err != nil { log.G(ctx).Errorf("Failed to create task: %v", err) } } }
func (r *ReplicatedOrchestrator) addTasks(ctx context.Context, batch *store.Batch, service *api.Service, runningInstances map[uint64]struct{}, count int) { instance := uint64(0) for i := 0; i < count; i++ { // Find an instance number that is missing a running task for { instance++ if _, ok := runningInstances[instance]; !ok { break } } err := batch.Update(func(tx store.Tx) error { return store.CreateTask(tx, newTask(service, instance)) }) if err != nil { log.G(ctx).Errorf("Failed to create task: %v", err) } } }
func TestDrain(t *testing.T) { ctx := context.Background() initialService := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, Restart: &api.RestartPolicy{ Condition: api.RestartOnNone, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 6, }, }, }, } initialNodeSet := []*api.Node{ { ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name1", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_DOWN, }, }, // We should NOT kick out tasks on UNKNOWN nodes. { ID: "id3", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name3", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_UNKNOWN, }, }, { ID: "id4", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name4", }, Availability: api.NodeAvailabilityPause, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id5", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name5", }, Availability: api.NodeAvailabilityDrain, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, } initialTaskSet := []*api.Task{ // Task not assigned to any node { ID: "id0", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 1, ServiceAnnotations: api.Annotations{ Name: "name0", }, ServiceID: "id1", }, // Tasks assigned to the nodes defined above { ID: "id1", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 2, ServiceAnnotations: api.Annotations{ Name: "name1", }, ServiceID: "id1", NodeID: "id1", }, { ID: "id2", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 3, ServiceAnnotations: api.Annotations{ Name: "name2", }, ServiceID: "id1", NodeID: "id2", }, { ID: "id3", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 4, ServiceAnnotations: api.Annotations{ Name: "name3", }, ServiceID: "id1", NodeID: "id3", }, { ID: "id4", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 5, ServiceAnnotations: api.Annotations{ Name: "name4", }, ServiceID: "id1", NodeID: "id4", }, { ID: "id5", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 6, ServiceAnnotations: api.Annotations{ Name: "name5", }, ServiceID: "id1", NodeID: "id5", }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Prepopulate service assert.NoError(t, store.CreateService(tx, initialService)) // Prepoulate nodes for _, n := range initialNodeSet { assert.NoError(t, store.CreateNode(tx, n)) } // Prepopulate tasks for _, task := range initialTaskSet { assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() go func() { assert.NoError(t, orchestrator.Run(ctx)) }() // id2 and id5 should be killed immediately deletion1 := watchShutdownTask(t, watch) deletion2 := watchShutdownTask(t, watch) assert.Regexp(t, "id(2|5)", deletion1.ID) assert.Regexp(t, "id(2|5)", deletion1.NodeID) assert.Regexp(t, "id(2|5)", deletion2.ID) assert.Regexp(t, "id(2|5)", deletion2.NodeID) // Create a new task, assigned to node id2 err = s.Update(func(tx store.Tx) error { task := initialTaskSet[2].Copy() task.ID = "newtask" task.NodeID = "id2" assert.NoError(t, store.CreateTask(tx, task)) return nil }) assert.NoError(t, err) deletion3 := watchShutdownTask(t, watch) assert.Equal(t, "newtask", deletion3.ID) assert.Equal(t, "id2", deletion3.NodeID) // Set node id4 to the DRAINED state err = s.Update(func(tx store.Tx) error { n := initialNodeSet[3].Copy() n.Spec.Availability = api.NodeAvailabilityDrain assert.NoError(t, store.UpdateNode(tx, n)) return nil }) assert.NoError(t, err) deletion4 := watchShutdownTask(t, watch) assert.Equal(t, "id4", deletion4.ID) assert.Equal(t, "id4", deletion4.NodeID) // Delete node id1 err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteNode(tx, "id1")) return nil }) assert.NoError(t, err) deletion5 := watchShutdownTask(t, watch) assert.Equal(t, "id1", deletion5.ID) assert.Equal(t, "id1", deletion5.NodeID) }
func TestLogBrokerNoFollow(t *testing.T) { ctx, ca, _, serverAddr, brokerAddr, done := testLogBrokerEnv(t) defer done() client, clientDone := testLogClient(t, serverAddr) defer clientDone() agent1, agent1Security, agent1Done := testBrokerClient(t, ca, brokerAddr) defer agent1Done() agent1subscriptions := listenSubscriptions(ctx, t, agent1) agent2, agent2Security, agent2Done := testBrokerClient(t, ca, brokerAddr) defer agent2Done() agent2subscriptions := listenSubscriptions(ctx, t, agent2) // Create fake environment. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { if err := store.CreateTask(tx, &api.Task{ ID: "task1", ServiceID: "service", NodeID: agent1Security.ServerTLSCreds.NodeID(), }); err != nil { return err } if err := store.CreateTask(tx, &api.Task{ ID: "task2", ServiceID: "service", NodeID: agent2Security.ServerTLSCreds.NodeID(), }); err != nil { return err } return nil })) // We need to sleep here to give ListenSubscriptions time to call // registerSubscription before SubscribeLogs concludes that one or both // of the agents are not connected, and prematurely calls Done for one // or both nodes. Think of these stream RPC calls as goroutines which // don't have synchronization around anything that happens in the RPC // handler before a send or receive. It would be nice if we had a way // of confirming that a node was listening for subscriptions before // calling SubscribeLogs, but the current API doesn't provide this. time.Sleep(time.Second) // Subscribe to logs in no follow mode logs, err := client.SubscribeLogs(ctx, &api.SubscribeLogsRequest{ Options: &api.LogSubscriptionOptions{ Follow: false, }, Selector: &api.LogSelector{ ServiceIDs: []string{"service"}, }, }) require.NoError(t, err) // Get the subscriptions from the agents. subscription1 := ensureSubscription(t, agent1subscriptions) require.Equal(t, subscription1.Selector.ServiceIDs[0], "service") subscription2 := ensureSubscription(t, agent2subscriptions) require.Equal(t, subscription2.Selector.ServiceIDs[0], "service") require.Equal(t, subscription1.ID, subscription2.ID) // Publish a log message from agent-1 and close the publisher publisher, err := agent1.PublishLogs(ctx) require.NoError(t, err) require.NoError(t, publisher.Send(&api.PublishLogsMessage{ SubscriptionID: subscription1.ID, Messages: []api.LogMessage{ newLogMessage(api.LogContext{ NodeID: agent1Security.ServerTLSCreds.NodeID(), ServiceID: "service", TaskID: "task1", }, "log message"), }, })) _, err = publisher.CloseAndRecv() require.NoError(t, err) // Ensure we get it from the other end log, err := logs.Recv() require.NoError(t, err) require.Len(t, log.Messages, 1) require.Equal(t, log.Messages[0].Context.NodeID, agent1Security.ServerTLSCreds.NodeID()) // Now publish a message from the other agent and close the subscription publisher, err = agent2.PublishLogs(ctx) require.NoError(t, err) require.NoError(t, publisher.Send(&api.PublishLogsMessage{ SubscriptionID: subscription2.ID, Messages: []api.LogMessage{ newLogMessage(api.LogContext{ NodeID: agent2Security.ServerTLSCreds.NodeID(), ServiceID: "service", TaskID: "task2", }, "log message"), }, })) _, err = publisher.CloseAndRecv() require.NoError(t, err) // Ensure we get it from the other end log, err = logs.Recv() require.NoError(t, err) require.Len(t, log.Messages, 1) require.Equal(t, log.Messages[0].Context.NodeID, agent2Security.ServerTLSCreds.NodeID()) // Since we receive both messages the log stream should end _, err = logs.Recv() require.Equal(t, err, io.EOF) }
func TestAllocator(t *testing.T) { s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() a, err := New(s) assert.NoError(t, err) assert.NotNil(t, a) // Try adding some objects to store before allocator is started assert.NoError(t, s.Update(func(tx store.Tx) error { n1 := &api.Network{ ID: "testID1", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test1", }, }, } assert.NoError(t, store.CreateNetwork(tx, n1)) s1 := &api.Service{ ID: "testServiceID1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "service1", }, Task: api.TaskSpec{ Networks: []*api.NetworkAttachmentConfig{ { Target: "testID1", }, }, }, Endpoint: &api.EndpointSpec{}, }, } assert.NoError(t, store.CreateService(tx, s1)) t1 := &api.Task{ ID: "testTaskID1", Status: api.TaskStatus{ State: api.TaskStateNew, }, Networks: []*api.NetworkAttachment{ { Network: n1, }, }, } assert.NoError(t, store.CreateTask(tx, t1)) return nil })) netWatch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateNetwork{}, state.EventDeleteNetwork{}) defer cancel() taskWatch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}, state.EventDeleteTask{}) defer cancel() serviceWatch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateService{}, state.EventDeleteService{}) defer cancel() // Start allocator go func() { assert.NoError(t, a.Run(context.Background())) }() // Now verify if we get network and tasks updated properly watchNetwork(t, netWatch, false, isValidNetwork) watchTask(t, s, taskWatch, false, isValidTask) watchService(t, serviceWatch, false, nil) // Add new networks/tasks/services after allocator is started. assert.NoError(t, s.Update(func(tx store.Tx) error { n2 := &api.Network{ ID: "testID2", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test2", }, }, } assert.NoError(t, store.CreateNetwork(tx, n2)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) assert.NoError(t, s.Update(func(tx store.Tx) error { s2 := &api.Service{ ID: "testServiceID2", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "service2", }, Networks: []*api.NetworkAttachmentConfig{ { Target: "testID2", }, }, Endpoint: &api.EndpointSpec{}, }, } assert.NoError(t, store.CreateService(tx, s2)) return nil })) watchService(t, serviceWatch, false, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { t2 := &api.Task{ ID: "testTaskID2", Status: api.TaskStatus{ State: api.TaskStateNew, }, ServiceID: "testServiceID2", DesiredState: api.TaskStateRunning, } assert.NoError(t, store.CreateTask(tx, t2)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) // Now try adding a task which depends on a network before adding the network. n3 := &api.Network{ ID: "testID3", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test3", }, }, } assert.NoError(t, s.Update(func(tx store.Tx) error { t3 := &api.Task{ ID: "testTaskID3", Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, Networks: []*api.NetworkAttachment{ { Network: n3, }, }, } assert.NoError(t, store.CreateTask(tx, t3)) return nil })) // Wait for a little bit of time before adding network just to // test network is not available while task allocation is // going through time.Sleep(10 * time.Millisecond) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNetwork(tx, n3)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteTask(tx, "testTaskID3")) return nil })) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { t5 := &api.Task{ ID: "testTaskID5", Spec: api.TaskSpec{ Networks: []*api.NetworkAttachmentConfig{ { Target: "testID2", }, }, }, Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, ServiceID: "testServiceID2", } assert.NoError(t, store.CreateTask(tx, t5)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteNetwork(tx, "testID3")) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteService(tx, "testServiceID2")) return nil })) watchService(t, serviceWatch, false, nil) // Try to create a task with no network attachments and test // that it moves to ALLOCATED state. assert.NoError(t, s.Update(func(tx store.Tx) error { t4 := &api.Task{ ID: "testTaskID4", Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, } assert.NoError(t, store.CreateTask(tx, t4)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { n2 := store.GetNetwork(tx, "testID2") require.NotEqual(t, nil, n2) assert.NoError(t, store.UpdateNetwork(tx, n2)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) watchNetwork(t, netWatch, true, nil) // Try updating task which is already allocated assert.NoError(t, s.Update(func(tx store.Tx) error { t2 := store.GetTask(tx, "testTaskID2") require.NotEqual(t, nil, t2) assert.NoError(t, store.UpdateTask(tx, t2)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) watchTask(t, s, taskWatch, true, nil) // Try adding networks with conflicting network resources and // add task which attaches to a network which gets allocated // later and verify if task reconciles and moves to ALLOCATED. n4 := &api.Network{ ID: "testID4", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test4", }, DriverConfig: &api.Driver{ Name: "overlay", Options: map[string]string{ "com.docker.network.driver.overlay.vxlanid_list": "328", }, }, }, } n5 := n4.Copy() n5.ID = "testID5" n5.Spec.Annotations.Name = "test5" assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNetwork(tx, n4)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNetwork(tx, n5)) return nil })) watchNetwork(t, netWatch, true, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { t6 := &api.Task{ ID: "testTaskID6", Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, Networks: []*api.NetworkAttachment{ { Network: n5, }, }, } assert.NoError(t, store.CreateTask(tx, t6)) return nil })) watchTask(t, s, taskWatch, true, nil) // Now remove the conflicting network. assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteNetwork(tx, n4.ID)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) watchTask(t, s, taskWatch, false, isValidTask) // Try adding services with conflicting port configs and add // task which is part of the service whose allocation hasn't // happened and when that happens later and verify if task // reconciles and moves to ALLOCATED. s3 := &api.Service{ ID: "testServiceID3", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "service3", }, Endpoint: &api.EndpointSpec{ Ports: []*api.PortConfig{ { Name: "http", TargetPort: 80, PublishedPort: 8080, }, }, }, }, } s4 := s3.Copy() s4.ID = "testServiceID4" s4.Spec.Annotations.Name = "service4" assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateService(tx, s3)) return nil })) watchService(t, serviceWatch, false, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateService(tx, s4)) return nil })) watchService(t, serviceWatch, true, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { t7 := &api.Task{ ID: "testTaskID7", Status: api.TaskStatus{ State: api.TaskStateNew, }, ServiceID: "testServiceID4", DesiredState: api.TaskStateRunning, } assert.NoError(t, store.CreateTask(tx, t7)) return nil })) watchTask(t, s, taskWatch, true, nil) // Now remove the conflicting service. assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteService(tx, s3.ID)) return nil })) watchService(t, serviceWatch, false, nil) watchTask(t, s, taskWatch, false, isValidTask) a.Stop() }
func TestUpdaterStopGracePeriod(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) // Move tasks to their desired state. watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { for { select { case e := <-watch: task := e.(state.EventUpdateTask).Task err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) // Explicitly do not set task state to // DEAD to trigger StopGracePeriod if task.DesiredState == api.TaskStateRunning && task.Status.State != api.TaskStateRunning { task.Status.State = api.TaskStateRunning return store.UpdateTask(tx, task) } return nil }) assert.NoError(t, err) } } }() var instances uint64 = 3 service := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "v:1", StopGracePeriod: ptypes.DurationProto(100 * time.Millisecond), }, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: instances, }, }, }, } err := s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateService(tx, service)) for i := uint64(0); i < instances; i++ { task := newTask(nil, service, uint64(i)) task.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) originalTasks := getRunnableServiceTasks(t, s, service) for _, task := range originalTasks { assert.Equal(t, "v:1", task.Spec.GetContainer().Image) } before := time.Now() service.Spec.Task.GetContainer().Image = "v:2" updater := NewUpdater(s, NewRestartSupervisor(s)) // Override the default (1 minute) to speed up the test. updater.restarts.taskTimeout = 100 * time.Millisecond updater.Run(ctx, nil, service, getRunnableServiceTasks(t, s, service)) updatedTasks := getRunnableServiceTasks(t, s, service) for _, task := range updatedTasks { assert.Equal(t, "v:2", task.Spec.GetContainer().Image) } after := time.Now() // At least 100 ms should have elapsed. Only check the lower bound, // because the system may be slow and it could have taken longer. if after.Sub(before) < 100*time.Millisecond { t.Fatal("stop timeout should have elapsed") } }
func TestSchedulerResourceConstraint(t *testing.T) { ctx := context.Background() // Create a ready node without enough memory to run the task. underprovisionedNode := &api.Node{ ID: "underprovisioned", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "underprovisioned", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 1e9, MemoryBytes: 1e9, }, }, } // Non-ready nodes that satisfy the constraints but shouldn't be used nonready1 := &api.Node{ ID: "nonready1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "nonready1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_UNKNOWN, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 2e9, MemoryBytes: 2e9, }, }, } nonready2 := &api.Node{ ID: "nonready2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "nonready2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_UNKNOWN, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 2e9, MemoryBytes: 2e9, }, }, } initialTask := &api.Task{ ID: "id1", DesiredState: api.TaskStateRunning, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, Resources: &api.ResourceRequirements{ Reservations: &api.Resources{ MemoryBytes: 2e9, }, }, }, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial node and task assert.NoError(t, store.CreateTask(tx, initialTask)) assert.NoError(t, store.CreateNode(tx, underprovisionedNode)) assert.NoError(t, store.CreateNode(tx, nonready1)) assert.NoError(t, store.CreateNode(tx, nonready2)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() failure := watchAssignmentFailure(t, watch) assert.Equal(t, "no suitable node (2 nodes not available for new tasks; insufficient resources on 1 node)", failure.Status.Message) err = s.Update(func(tx store.Tx) error { // Create a node with enough memory. The task should get // assigned to this node. node := &api.Node{ ID: "bignode", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "bignode", }, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 4e9, MemoryBytes: 8e9, }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.CreateNode(tx, node)) return nil }) assert.NoError(t, err) assignment := watchAssignment(t, watch) assert.Equal(t, "bignode", assignment.NodeID) }
func TestSchedulerFaultyNode(t *testing.T) { ctx := context.Background() taskTemplate := &api.Task{ ServiceID: "service1", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } node1 := &api.Node{ ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "id1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } node2 := &api.Node{ ID: "id2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "id2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial nodes, and one task assigned to node id1 assert.NoError(t, store.CreateNode(tx, node1)) assert.NoError(t, store.CreateNode(tx, node2)) task1 := taskTemplate.Copy() task1.ID = "id1" task1.NodeID = "id1" task1.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task1)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() for i := 0; i != 8; i++ { // Simulate a task failure cycle newTask := taskTemplate.Copy() newTask.ID = identity.NewID() err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, newTask)) return nil }) assert.NoError(t, err) assignment := watchAssignment(t, watch) assert.Equal(t, newTask.ID, assignment.ID) if i < 5 { // The first 5 attempts should be assigned to node id2 because // it has no replicas of the service. assert.Equal(t, "id2", assignment.NodeID) } else { // The next ones should be assigned to id1, since we'll // flag id2 as potentially faulty. assert.Equal(t, "id1", assignment.NodeID) } err = s.Update(func(tx store.Tx) error { newTask := store.GetTask(tx, newTask.ID) require.NotNil(t, newTask) newTask.Status.State = api.TaskStateFailed assert.NoError(t, store.UpdateTask(tx, newTask)) return nil }) assert.NoError(t, err) } }
func TestSchedulerResourceConstraintDeadTask(t *testing.T) { ctx := context.Background() // Create a ready node without enough memory to run the task. node := &api.Node{ ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "node", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 1e9, MemoryBytes: 1e9, }, }, } bigTask1 := &api.Task{ DesiredState: api.TaskStateRunning, ID: "id1", Spec: api.TaskSpec{ Resources: &api.ResourceRequirements{ Reservations: &api.Resources{ MemoryBytes: 8e8, }, }, }, ServiceAnnotations: api.Annotations{ Name: "big", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } bigTask2 := bigTask1.Copy() bigTask2.ID = "id2" s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial node and task assert.NoError(t, store.CreateNode(tx, node)) assert.NoError(t, store.CreateTask(tx, bigTask1)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() // The task fits, so it should get assigned assignment := watchAssignment(t, watch) assert.Equal(t, "id1", assignment.ID) assert.Equal(t, "id1", assignment.NodeID) err = s.Update(func(tx store.Tx) error { // Add a second task. It shouldn't get assigned because of // resource constraints. return store.CreateTask(tx, bigTask2) }) assert.NoError(t, err) time.Sleep(100 * time.Millisecond) s.View(func(tx store.ReadTx) { tasks, err := store.FindTasks(tx, store.ByNodeID(node.ID)) assert.NoError(t, err) assert.Len(t, tasks, 1) }) err = s.Update(func(tx store.Tx) error { // The task becomes dead updatedTask := store.GetTask(tx, bigTask1.ID) updatedTask.Status.State = api.TaskStateShutdown return store.UpdateTask(tx, updatedTask) }) assert.NoError(t, err) // With the first task no longer consuming resources, the second // one can be scheduled. assignment = watchAssignment(t, watch) assert.Equal(t, "id2", assignment.ID) assert.Equal(t, "id1", assignment.NodeID) }
func TestSchedulerResourceConstraintHA(t *testing.T) { // node 1 starts with 1 task, node 2 starts with 3 tasks. // however, node 1 only has enough memory to schedule one more task. ctx := context.Background() node1 := &api.Node{ ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "id1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Resources: &api.Resources{ MemoryBytes: 1e9, }, }, } node2 := &api.Node{ ID: "id2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "id2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Resources: &api.Resources{ MemoryBytes: 1e11, }, }, } taskTemplate := &api.Task{ DesiredState: api.TaskStateRunning, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, Resources: &api.ResourceRequirements{ Reservations: &api.Resources{ MemoryBytes: 5e8, }, }, }, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial node and task assert.NoError(t, store.CreateNode(tx, node1)) assert.NoError(t, store.CreateNode(tx, node2)) // preassigned tasks task1 := taskTemplate.Copy() task1.ID = "id1" task1.NodeID = "id1" task1.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task1)) task2 := taskTemplate.Copy() task2.ID = "id2" task2.NodeID = "id2" task2.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task2)) task3 := taskTemplate.Copy() task3.ID = "id3" task3.NodeID = "id2" task3.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task3)) task4 := taskTemplate.Copy() task4.ID = "id4" task4.NodeID = "id2" task4.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task4)) // tasks to assign task5 := taskTemplate.Copy() task5.ID = "id5" assert.NoError(t, store.CreateTask(tx, task5)) task6 := taskTemplate.Copy() task6.ID = "id6" assert.NoError(t, store.CreateTask(tx, task6)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() assignment1 := watchAssignment(t, watch) if assignment1.ID != "id5" && assignment1.ID != "id6" { t.Fatal("assignment for unexpected task") } assignment2 := watchAssignment(t, watch) if assignment1.ID == "id5" { assert.Equal(t, "id6", assignment2.ID) } else { assert.Equal(t, "id5", assignment2.ID) } if assignment1.NodeID == "id1" { assert.Equal(t, "id2", assignment2.NodeID) } else { assert.Equal(t, "id1", assignment2.NodeID) } }
func TestSchedulerPreexistingDeadTask(t *testing.T) { ctx := context.Background() // Create a ready node without enough memory to run two tasks at once. node := &api.Node{ ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "node", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 1e9, MemoryBytes: 1e9, }, }, } deadTask := &api.Task{ DesiredState: api.TaskStateRunning, ID: "id1", NodeID: "id1", Spec: api.TaskSpec{ Resources: &api.ResourceRequirements{ Reservations: &api.Resources{ MemoryBytes: 8e8, }, }, }, ServiceAnnotations: api.Annotations{ Name: "big", }, Status: api.TaskStatus{ State: api.TaskStateShutdown, }, } bigTask2 := deadTask.Copy() bigTask2.ID = "id2" bigTask2.Status.State = api.TaskStatePending s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial node and task assert.NoError(t, store.CreateNode(tx, node)) assert.NoError(t, store.CreateTask(tx, deadTask)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() err = s.Update(func(tx store.Tx) error { // Add a second task. It should get assigned because the task // using the resources is past the running state. return store.CreateTask(tx, bigTask2) }) assert.NoError(t, err) assignment := watchAssignment(t, watch) assert.Equal(t, "id2", assignment.ID) assert.Equal(t, "id1", assignment.NodeID) }
func TestUpdater(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) // Move tasks to their desired state. watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { for { select { case e := <-watch: task := e.(state.EventUpdateTask).Task if task.Status.State == task.DesiredState { continue } err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) task.Status.State = task.DesiredState return store.UpdateTask(tx, task) }) assert.NoError(t, err) } } }() instances := 3 cluster := &api.Cluster{ // test cluster configuration propagation to task creation. Spec: api.ClusterSpec{ Annotations: api.Annotations{ Name: "default", }, }, } service := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: uint64(instances), }, }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "v:1", // This won't apply in this test because we set the old tasks to DEAD. StopGracePeriod: ptypes.DurationProto(time.Hour), }, }, }, }, } err := s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateCluster(tx, cluster)) assert.NoError(t, store.CreateService(tx, service)) for i := 0; i < instances; i++ { assert.NoError(t, store.CreateTask(tx, newTask(cluster, service, uint64(i)))) } return nil }) assert.NoError(t, err) originalTasks := getRunnableServiceTasks(t, s, service) for _, task := range originalTasks { assert.Equal(t, "v:1", task.Spec.GetContainer().Image) assert.Nil(t, task.LogDriver) // should be left alone } service.Spec.Task.GetContainer().Image = "v:2" service.Spec.Task.LogDriver = &api.Driver{Name: "tasklogdriver"} updater := NewUpdater(s, NewRestartSupervisor(s)) updater.Run(ctx, cluster, service, getRunnableServiceTasks(t, s, service)) updatedTasks := getRunnableServiceTasks(t, s, service) for _, task := range updatedTasks { assert.Equal(t, "v:2", task.Spec.GetContainer().Image) assert.Equal(t, service.Spec.Task.LogDriver, task.LogDriver) // pick up from task } service.Spec.Task.GetContainer().Image = "v:3" cluster.Spec.DefaultLogDriver = &api.Driver{Name: "clusterlogdriver"} // make cluster default logdriver. service.Spec.Update = &api.UpdateConfig{ Parallelism: 1, } updater = NewUpdater(s, NewRestartSupervisor(s)) updater.Run(ctx, cluster, service, getRunnableServiceTasks(t, s, service)) updatedTasks = getRunnableServiceTasks(t, s, service) for _, task := range updatedTasks { assert.Equal(t, "v:3", task.Spec.GetContainer().Image) assert.Equal(t, service.Spec.Task.LogDriver, task.LogDriver) // still pick up from task } service.Spec.Task.GetContainer().Image = "v:4" service.Spec.Task.LogDriver = nil // use cluster default now. service.Spec.Update = &api.UpdateConfig{ Parallelism: 1, Delay: *ptypes.DurationProto(10 * time.Millisecond), } updater = NewUpdater(s, NewRestartSupervisor(s)) updater.Run(ctx, cluster, service, getRunnableServiceTasks(t, s, service)) updatedTasks = getRunnableServiceTasks(t, s, service) for _, task := range updatedTasks { assert.Equal(t, "v:4", task.Spec.GetContainer().Image) assert.Equal(t, cluster.Spec.DefaultLogDriver, task.LogDriver) // pick up from cluster } }
func TestPreassignedTasks(t *testing.T) { ctx := context.Background() initialNodeSet := []*api.Node{ { ID: "node1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "node2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, } initialTaskSet := []*api.Task{ { ID: "task1", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, }, { ID: "task2", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, NodeID: initialNodeSet[0].ID, }, { ID: "task3", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, NodeID: initialNodeSet[0].ID, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Prepoulate nodes for _, n := range initialNodeSet { assert.NoError(t, store.CreateNode(tx, n)) } // Prepopulate tasks for _, task := range initialTaskSet { assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() //preassigned tasks would be processed first assignment1 := watchAssignment(t, watch) // task2 and task3 are preassigned to node1 assert.Equal(t, assignment1.NodeID, "node1") assert.Regexp(t, assignment1.ID, "(task2|task3)") assignment2 := watchAssignment(t, watch) if assignment1.ID == "task2" { assert.Equal(t, "task3", assignment2.ID) } else { assert.Equal(t, "task2", assignment2.ID) } // task1 would be assigned to node2 because node1 has 2 tasks already assignment3 := watchAssignment(t, watch) assert.Equal(t, assignment3.ID, "task1") assert.Equal(t, assignment3.NodeID, "node2") }
func TestLogBrokerSelector(t *testing.T) { ctx, ca, _, serverAddr, brokerAddr, done := testLogBrokerEnv(t) defer done() client, clientDone := testLogClient(t, serverAddr) defer clientDone() agent1, agent1Security, agent1Done := testBrokerClient(t, ca, brokerAddr) defer agent1Done() agent1subscriptions := listenSubscriptions(ctx, t, agent1) agent2, agent2Security, agent2Done := testBrokerClient(t, ca, brokerAddr) defer agent2Done() agent2subscriptions := listenSubscriptions(ctx, t, agent2) // Subscribe to a task. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateTask(tx, &api.Task{ ID: "task", }) })) _, err := client.SubscribeLogs(ctx, &api.SubscribeLogsRequest{ Options: &api.LogSubscriptionOptions{ Follow: true, }, Selector: &api.LogSelector{ TaskIDs: []string{"task"}, }, }) require.NoError(t, err) // Since it's not assigned to any agent, nobody should receive it. ensureNoSubscription(t, agent1subscriptions) ensureNoSubscription(t, agent2subscriptions) // Assign the task to agent-1. Make sure it's received by agent-1 but *not* // agent-2. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { task := store.GetTask(tx, "task") require.NotNil(t, task) task.NodeID = agent1Security.ServerTLSCreds.NodeID() return store.UpdateTask(tx, task) })) ensureSubscription(t, agent1subscriptions) ensureNoSubscription(t, agent2subscriptions) // Subscribe to a service. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateService(tx, &api.Service{ ID: "service", }) })) _, err = client.SubscribeLogs(ctx, &api.SubscribeLogsRequest{ Options: &api.LogSubscriptionOptions{ Follow: true, }, Selector: &api.LogSelector{ ServiceIDs: []string{"service"}, }, }) require.NoError(t, err) // Since there are no corresponding tasks, nobody should receive it. ensureNoSubscription(t, agent1subscriptions) ensureNoSubscription(t, agent2subscriptions) // Create a task that does *NOT* belong to our service and assign it to node-1. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateTask(tx, &api.Task{ ID: "wrong-task", ServiceID: "wrong-service", NodeID: agent1Security.ServerTLSCreds.NodeID(), }) })) // Ensure agent-1 doesn't receive it. ensureNoSubscription(t, agent1subscriptions) // Now create another task that does belong to our service and assign it to node-1. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateTask(tx, &api.Task{ ID: "service-task-1", ServiceID: "service", NodeID: agent1Security.ServerTLSCreds.NodeID(), }) })) // Make sure agent-1 receives it... ensureSubscription(t, agent1subscriptions) // ...and agent-2 does not. ensureNoSubscription(t, agent2subscriptions) // Create another task, same as above. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateTask(tx, &api.Task{ ID: "service-task-2", ServiceID: "service", NodeID: agent1Security.ServerTLSCreds.NodeID(), }) })) // agent-1 should *not* receive it anymore since the subscription was already delivered. // agent-2 should still not get it. ensureNoSubscription(t, agent1subscriptions) ensureNoSubscription(t, agent2subscriptions) // Now, create another one and assign it to agent-2. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { return store.CreateTask(tx, &api.Task{ ID: "service-task-3", ServiceID: "service", NodeID: agent2Security.ServerTLSCreds.NodeID(), }) })) // Make sure it's delivered to agent-2. ensureSubscription(t, agent2subscriptions) // it shouldn't do anything for agent-1. ensureNoSubscription(t, agent1subscriptions) }
func TestSchedulerPluginConstraint(t *testing.T) { ctx := context.Background() // Node1: vol plugin1 n1 := &api.Node{ ID: "node1_ID", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "node1", }, }, Description: &api.NodeDescription{ Engine: &api.EngineDescription{ Plugins: []api.PluginDescription{ { Type: "Volume", Name: "plugin1", }, }, }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } // Node2: vol plugin1, vol plugin2 n2 := &api.Node{ ID: "node2_ID", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "node2", }, }, Description: &api.NodeDescription{ Engine: &api.EngineDescription{ Plugins: []api.PluginDescription{ { Type: "Volume", Name: "plugin1", }, { Type: "Volume", Name: "plugin2", }, }, }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } // Node3: vol plugin1, network plugin1 n3 := &api.Node{ ID: "node3_ID", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "node3", }, }, Description: &api.NodeDescription{ Engine: &api.EngineDescription{ Plugins: []api.PluginDescription{ { Type: "Volume", Name: "plugin1", }, { Type: "Network", Name: "plugin1", }, }, }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } volumeOptionsDriver := func(driver string) *api.Mount_VolumeOptions { return &api.Mount_VolumeOptions{ DriverConfig: &api.Driver{ Name: driver, }, } } // Task1: vol plugin1 t1 := &api.Task{ ID: "task1_ID", DesiredState: api.TaskStateRunning, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Mounts: []api.Mount{ { Source: "testVol1", Target: "/foo", Type: api.MountTypeVolume, VolumeOptions: volumeOptionsDriver("plugin1"), }, }, }, }, }, ServiceAnnotations: api.Annotations{ Name: "task1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } // Task2: vol plugin1, vol plugin2 t2 := &api.Task{ ID: "task2_ID", DesiredState: api.TaskStateRunning, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Mounts: []api.Mount{ { Source: "testVol1", Target: "/foo", Type: api.MountTypeVolume, VolumeOptions: volumeOptionsDriver("plugin1"), }, { Source: "testVol2", Target: "/foo", Type: api.MountTypeVolume, VolumeOptions: volumeOptionsDriver("plugin2"), }, }, }, }, }, ServiceAnnotations: api.Annotations{ Name: "task2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } // Task3: vol plugin1, network plugin1 t3 := &api.Task{ ID: "task3_ID", DesiredState: api.TaskStateRunning, Networks: []*api.NetworkAttachment{ { Network: &api.Network{ ID: "testNwID1", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "testVol1", }, }, DriverState: &api.Driver{ Name: "plugin1", }, }, }, }, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Mounts: []api.Mount{ { Source: "testVol1", Target: "/foo", Type: api.MountTypeVolume, VolumeOptions: volumeOptionsDriver("plugin1"), }, }, }, }, }, ServiceAnnotations: api.Annotations{ Name: "task2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() // Add initial node and task err := s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, t1)) assert.NoError(t, store.CreateNode(tx, n1)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() // t1 should get assigned assignment := watchAssignment(t, watch) assert.Equal(t, assignment.NodeID, "node1_ID") // Create t2; it should stay in the pending state because there is // no node that with volume plugin `plugin2` err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, t2)) return nil }) assert.NoError(t, err) time.Sleep(100 * time.Millisecond) s.View(func(tx store.ReadTx) { task := store.GetTask(tx, "task2_ID") if task.Status.State >= api.TaskStateAssigned { t.Fatalf("task 'task2_ID' should not have been assigned to node %v", task.NodeID) } }) // Now add the second node err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, n2)) return nil }) assert.NoError(t, err) // Check that t2 has been assigned assignment1 := watchAssignment(t, watch) assert.Equal(t, assignment1.ID, "task2_ID") assert.Equal(t, assignment1.NodeID, "node2_ID") // Create t3; it should stay in the pending state because there is // no node that with network plugin `plugin1` err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, t3)) return nil }) assert.NoError(t, err) time.Sleep(100 * time.Millisecond) s.View(func(tx store.ReadTx) { task := store.GetTask(tx, "task3_ID") if task.Status.State >= api.TaskStateAssigned { t.Fatal("task 'task3_ID' should not have been assigned") } }) // Now add the node3 err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, n3)) return nil }) assert.NoError(t, err) // Check that t3 has been assigned assignment2 := watchAssignment(t, watch) assert.Equal(t, assignment2.ID, "task3_ID") assert.Equal(t, assignment2.NodeID, "node3_ID") }
func TestLogBrokerNoFollowMissingNode(t *testing.T) { ctx, ca, _, serverAddr, brokerAddr, done := testLogBrokerEnv(t) defer done() client, clientDone := testLogClient(t, serverAddr) defer clientDone() agent, agentSecurity, agentDone := testBrokerClient(t, ca, brokerAddr) defer agentDone() agentSubscriptions := listenSubscriptions(ctx, t, agent) // Create fake environment. // A service with one instance on a genuine node and another instance // and a node that didn't connect to the broker. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { if err := store.CreateTask(tx, &api.Task{ ID: "task1", ServiceID: "service", NodeID: agentSecurity.ServerTLSCreds.NodeID(), }); err != nil { return err } if err := store.CreateTask(tx, &api.Task{ ID: "task2", ServiceID: "service", NodeID: "node-2", }); err != nil { return err } return nil })) // Subscribe to logs in no follow mode logs, err := client.SubscribeLogs(ctx, &api.SubscribeLogsRequest{ Options: &api.LogSubscriptionOptions{ Follow: false, }, Selector: &api.LogSelector{ ServiceIDs: []string{"service"}, }, }) require.NoError(t, err) // Grab the subscription and publish a log message from the connected agent. subscription := ensureSubscription(t, agentSubscriptions) require.Equal(t, subscription.Selector.ServiceIDs[0], "service") publisher, err := agent.PublishLogs(ctx) require.NoError(t, err) require.NoError(t, publisher.Send(&api.PublishLogsMessage{ SubscriptionID: subscription.ID, Messages: []api.LogMessage{ newLogMessage(api.LogContext{ NodeID: agentSecurity.ServerTLSCreds.NodeID(), ServiceID: "service", TaskID: "task1", }, "log message"), }, })) _, err = publisher.CloseAndRecv() require.NoError(t, err) // Ensure we receive the message that we could grab log, err := logs.Recv() require.NoError(t, err) require.Len(t, log.Messages, 1) require.Equal(t, log.Messages[0].Context.NodeID, agentSecurity.ServerTLSCreds.NodeID()) // Ensure the log stream ends with an error complaining about the missing node _, err = logs.Recv() require.Error(t, err) require.Contains(t, err.Error(), "node-2 is not available") }
func benchScheduler(b *testing.B, nodes, tasks int, networkConstraints bool) { ctx := context.Background() for iters := 0; iters < b.N; iters++ { b.StopTimer() s := store.NewMemoryStore(nil) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) go func() { _ = scheduler.Run(ctx) }() // Let the scheduler get started runtime.Gosched() _ = s.Update(func(tx store.Tx) error { // Create initial nodes and tasks for i := 0; i < nodes; i++ { n := &api.Node{ ID: identity.NewID(), Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name" + strconv.Itoa(i), Labels: make(map[string]string), }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Engine: &api.EngineDescription{}, }, } // Give every third node a special network if i%3 == 0 { n.Description.Engine.Plugins = []api.PluginDescription{ { Name: "network", Type: "Network", }, } } err := store.CreateNode(tx, n) if err != nil { panic(err) } } for i := 0; i < tasks; i++ { id := "task" + strconv.Itoa(i) t := &api.Task{ ID: id, DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: id, }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } if networkConstraints { t.Networks = []*api.NetworkAttachment{ { Network: &api.Network{ DriverState: &api.Driver{ Name: "network", }, }, }, } } err := store.CreateTask(tx, t) if err != nil { panic(err) } } b.StartTimer() return nil }) for i := 0; i != tasks; i++ { <-watch } scheduler.Stop() cancel() s.Close() } }
func TestLogBrokerNoFollowDisconnect(t *testing.T) { ctx, ca, _, serverAddr, brokerAddr, done := testLogBrokerEnv(t) defer done() client, clientDone := testLogClient(t, serverAddr) defer clientDone() agent1, agent1Security, agent1Done := testBrokerClient(t, ca, brokerAddr) defer agent1Done() agent1subscriptions := listenSubscriptions(ctx, t, agent1) agent2, agent2Security, agent2Done := testBrokerClient(t, ca, brokerAddr) defer agent2Done() agent2subscriptions := listenSubscriptions(ctx, t, agent2) // Create fake environment. require.NoError(t, ca.MemoryStore.Update(func(tx store.Tx) error { if err := store.CreateTask(tx, &api.Task{ ID: "task1", ServiceID: "service", NodeID: agent1Security.ServerTLSCreds.NodeID(), }); err != nil { return err } if err := store.CreateTask(tx, &api.Task{ ID: "task2", ServiceID: "service", NodeID: agent2Security.ServerTLSCreds.NodeID(), }); err != nil { return err } return nil })) // Subscribe to logs in no follow mode logs, err := client.SubscribeLogs(ctx, &api.SubscribeLogsRequest{ Options: &api.LogSubscriptionOptions{ Follow: false, }, Selector: &api.LogSelector{ ServiceIDs: []string{"service"}, }, }) require.NoError(t, err) // Get the subscriptions from the agents. subscription1 := ensureSubscription(t, agent1subscriptions) require.Equal(t, subscription1.Selector.ServiceIDs[0], "service") subscription2 := ensureSubscription(t, agent2subscriptions) require.Equal(t, subscription2.Selector.ServiceIDs[0], "service") require.Equal(t, subscription1.ID, subscription2.ID) // Publish a log message from agent-1 and close the publisher publisher, err := agent1.PublishLogs(ctx) require.NoError(t, err) require.NoError(t, publisher.Send(&api.PublishLogsMessage{ SubscriptionID: subscription1.ID, Messages: []api.LogMessage{ newLogMessage(api.LogContext{ NodeID: agent1Security.ServerTLSCreds.NodeID(), ServiceID: "service", TaskID: "task1", }, "log message"), }, })) _, err = publisher.CloseAndRecv() require.NoError(t, err) // Now suddenly disconnect agent2... agent2Done() // Ensure we get the first message log, err := logs.Recv() require.NoError(t, err) require.Len(t, log.Messages, 1) require.Equal(t, log.Messages[0].Context.NodeID, agent1Security.ServerTLSCreds.NodeID()) // ...and then an error _, err = logs.Recv() require.Error(t, err) require.Contains(t, err.Error(), "disconnected unexpectedly") }
func TestScheduler(t *testing.T) { ctx := context.Background() initialNodeSet := []*api.Node{ { ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id3", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, } initialTaskSet := []*api.Task{ { ID: "id1", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStateAssigned, }, NodeID: initialNodeSet[0].ID, }, { ID: "id2", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, }, { ID: "id3", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Prepoulate nodes for _, n := range initialNodeSet { assert.NoError(t, store.CreateNode(tx, n)) } // Prepopulate tasks for _, task := range initialTaskSet { assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() assignment1 := watchAssignment(t, watch) // must assign to id2 or id3 since id1 already has a task assert.Regexp(t, assignment1.NodeID, "(id2|id3)") assignment2 := watchAssignment(t, watch) // must assign to id2 or id3 since id1 already has a task if assignment1.NodeID == "id2" { assert.Equal(t, "id3", assignment2.NodeID) } else { assert.Equal(t, "id2", assignment2.NodeID) } err = s.Update(func(tx store.Tx) error { // Update each node to make sure this doesn't mess up the // scheduler's state. for _, n := range initialNodeSet { assert.NoError(t, store.UpdateNode(tx, n)) } return nil }) assert.NoError(t, err) err = s.Update(func(tx store.Tx) error { // Delete the task associated with node 1 so it's now the most lightly // loaded node. assert.NoError(t, store.DeleteTask(tx, "id1")) // Create a new task. It should get assigned to id1. t4 := &api.Task{ ID: "id4", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name4", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t4)) return nil }) assert.NoError(t, err) assignment3 := watchAssignment(t, watch) assert.Equal(t, "id1", assignment3.NodeID) // Update a task to make it unassigned. It should get assigned by the // scheduler. err = s.Update(func(tx store.Tx) error { // Remove assignment from task id4. It should get assigned // to node id1. t4 := &api.Task{ ID: "id4", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name4", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.UpdateTask(tx, t4)) return nil }) assert.NoError(t, err) assignment4 := watchAssignment(t, watch) assert.Equal(t, "id1", assignment4.NodeID) err = s.Update(func(tx store.Tx) error { // Create a ready node, then remove it. No tasks should ever // be assigned to it. node := &api.Node{ ID: "removednode", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "removednode", }, }, Status: api.NodeStatus{ State: api.NodeStatus_DOWN, }, } assert.NoError(t, store.CreateNode(tx, node)) assert.NoError(t, store.DeleteNode(tx, node.ID)) // Create an unassigned task. task := &api.Task{ ID: "removednode", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "removednode", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, task)) return nil }) assert.NoError(t, err) assignmentRemovedNode := watchAssignment(t, watch) assert.NotEqual(t, "removednode", assignmentRemovedNode.NodeID) err = s.Update(func(tx store.Tx) error { // Create a ready node. It should be used for the next // assignment. n4 := &api.Node{ ID: "id4", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name4", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.CreateNode(tx, n4)) // Create an unassigned task. t5 := &api.Task{ ID: "id5", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name5", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t5)) return nil }) assert.NoError(t, err) assignment5 := watchAssignment(t, watch) assert.Equal(t, "id4", assignment5.NodeID) err = s.Update(func(tx store.Tx) error { // Create a non-ready node. It should NOT be used for the next // assignment. n5 := &api.Node{ ID: "id5", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name5", }, }, Status: api.NodeStatus{ State: api.NodeStatus_DOWN, }, } assert.NoError(t, store.CreateNode(tx, n5)) // Create an unassigned task. t6 := &api.Task{ ID: "id6", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name6", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t6)) return nil }) assert.NoError(t, err) assignment6 := watchAssignment(t, watch) assert.NotEqual(t, "id5", assignment6.NodeID) err = s.Update(func(tx store.Tx) error { // Update node id5 to put it in the READY state. n5 := &api.Node{ ID: "id5", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name5", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.UpdateNode(tx, n5)) // Create an unassigned task. Should be assigned to the // now-ready node. t7 := &api.Task{ ID: "id7", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name7", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t7)) return nil }) assert.NoError(t, err) assignment7 := watchAssignment(t, watch) assert.Equal(t, "id5", assignment7.NodeID) err = s.Update(func(tx store.Tx) error { // Create a ready node, then immediately take it down. The next // unassigned task should NOT be assigned to it. n6 := &api.Node{ ID: "id6", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name6", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.CreateNode(tx, n6)) n6.Status.State = api.NodeStatus_DOWN assert.NoError(t, store.UpdateNode(tx, n6)) // Create an unassigned task. t8 := &api.Task{ ID: "id8", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name8", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t8)) return nil }) assert.NoError(t, err) assignment8 := watchAssignment(t, watch) assert.NotEqual(t, "id6", assignment8.NodeID) }
func (u *Updater) updateTask(ctx context.Context, slot slot, updated *api.Task) error { // Kick off the watch before even creating the updated task. This is in order to avoid missing any event. taskUpdates, cancel := state.Watch(u.watchQueue, state.EventUpdateTask{ Task: &api.Task{ID: updated.ID}, Checks: []state.TaskCheckFunc{state.TaskCheckID}, }) defer cancel() // Create an empty entry for this task, so the updater knows a failure // should count towards the failure count. The timestamp is added // if/when the task reaches RUNNING. u.updatedTasksMu.Lock() u.updatedTasks[updated.ID] = time.Time{} u.updatedTasksMu.Unlock() var delayStartCh <-chan struct{} // Atomically create the updated task and bring down the old one. _, err := u.store.Batch(func(batch *store.Batch) error { oldTask, err := u.removeOldTasks(ctx, batch, slot) if err != nil { return err } err = batch.Update(func(tx store.Tx) error { if err := store.CreateTask(tx, updated); err != nil { return err } return nil }) if err != nil { return err } delayStartCh = u.restarts.DelayStart(ctx, nil, oldTask, updated.ID, 0, true) return nil }) if err != nil { return err } if delayStartCh != nil { <-delayStartCh } // Wait for the new task to come up. // TODO(aluzzardi): Consider adding a timeout here. for { select { case e := <-taskUpdates: updated = e.(state.EventUpdateTask).Task if updated.Status.State >= api.TaskStateRunning { u.updatedTasksMu.Lock() u.updatedTasks[updated.ID] = time.Now() u.updatedTasksMu.Unlock() return nil } case <-u.stopChan: return nil } } }
func TestHA(t *testing.T) { ctx := context.Background() initialNodeSet := []*api.Node{ { ID: "id1", Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id2", Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id3", Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id4", Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id5", Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, } taskTemplate1 := &api.Task{ DesiredState: api.TaskStateRunning, ServiceID: "service1", Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "v:1", }, }, }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } taskTemplate2 := &api.Task{ DesiredState: api.TaskStateRunning, ServiceID: "service2", Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "v:2", }, }, }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() t1Instances := 18 err := s.Update(func(tx store.Tx) error { // Prepoulate nodes for _, n := range initialNodeSet { assert.NoError(t, store.CreateNode(tx, n)) } // Prepopulate tasks from template 1 for i := 0; i != t1Instances; i++ { taskTemplate1.ID = fmt.Sprintf("t1id%d", i) assert.NoError(t, store.CreateTask(tx, taskTemplate1)) } return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() t1Assignments := make(map[string]int) for i := 0; i != t1Instances; i++ { assignment := watchAssignment(t, watch) if !strings.HasPrefix(assignment.ID, "t1") { t.Fatal("got assignment for different kind of task") } t1Assignments[assignment.NodeID]++ } assert.Len(t, t1Assignments, 5) nodesWith3T1Tasks := 0 nodesWith4T1Tasks := 0 for nodeID, taskCount := range t1Assignments { if taskCount == 3 { nodesWith3T1Tasks++ } else if taskCount == 4 { nodesWith4T1Tasks++ } else { t.Fatalf("unexpected number of tasks %d on node %s", taskCount, nodeID) } } assert.Equal(t, 3, nodesWith4T1Tasks) assert.Equal(t, 2, nodesWith3T1Tasks) t2Instances := 2 // Add a new service with two instances. They should fill the nodes // that only have two tasks. err = s.Update(func(tx store.Tx) error { for i := 0; i != t2Instances; i++ { taskTemplate2.ID = fmt.Sprintf("t2id%d", i) assert.NoError(t, store.CreateTask(tx, taskTemplate2)) } return nil }) assert.NoError(t, err) t2Assignments := make(map[string]int) for i := 0; i != t2Instances; i++ { assignment := watchAssignment(t, watch) if !strings.HasPrefix(assignment.ID, "t2") { t.Fatal("got assignment for different kind of task") } t2Assignments[assignment.NodeID]++ } assert.Len(t, t2Assignments, 2) for nodeID := range t2Assignments { assert.Equal(t, 3, t1Assignments[nodeID]) } // Scale up service 1 to 21 tasks. It should cover the two nodes that // service 2 was assigned to, and also one other node. err = s.Update(func(tx store.Tx) error { for i := t1Instances; i != t1Instances+3; i++ { taskTemplate1.ID = fmt.Sprintf("t1id%d", i) assert.NoError(t, store.CreateTask(tx, taskTemplate1)) } return nil }) assert.NoError(t, err) var sharedNodes [2]string for i := 0; i != 3; i++ { assignment := watchAssignment(t, watch) if !strings.HasPrefix(assignment.ID, "t1") { t.Fatal("got assignment for different kind of task") } if t1Assignments[assignment.NodeID] == 5 { t.Fatal("more than one new task assigned to the same node") } t1Assignments[assignment.NodeID]++ if t2Assignments[assignment.NodeID] != 0 { if sharedNodes[0] == "" { sharedNodes[0] = assignment.NodeID } else if sharedNodes[1] == "" { sharedNodes[1] = assignment.NodeID } else { t.Fatal("all three assignments went to nodes with service2 tasks") } } } assert.NotEmpty(t, sharedNodes[0]) assert.NotEmpty(t, sharedNodes[1]) assert.NotEqual(t, sharedNodes[0], sharedNodes[1]) nodesWith4T1Tasks = 0 nodesWith5T1Tasks := 0 for nodeID, taskCount := range t1Assignments { if taskCount == 4 { nodesWith4T1Tasks++ } else if taskCount == 5 { nodesWith5T1Tasks++ } else { t.Fatalf("unexpected number of tasks %d on node %s", taskCount, nodeID) } } assert.Equal(t, 4, nodesWith4T1Tasks) assert.Equal(t, 1, nodesWith5T1Tasks) // Add another task from service2. It must not land on the node that // has 5 service1 tasks. err = s.Update(func(tx store.Tx) error { taskTemplate2.ID = "t2id4" assert.NoError(t, store.CreateTask(tx, taskTemplate2)) return nil }) assert.NoError(t, err) assignment := watchAssignment(t, watch) if assignment.ID != "t2id4" { t.Fatal("got assignment for different task") } if t2Assignments[assignment.NodeID] != 0 { t.Fatal("was scheduled on a node that already has a service2 task") } if t1Assignments[assignment.NodeID] == 5 { t.Fatal("was scheduled on the node that has the most service1 tasks") } t2Assignments[assignment.NodeID]++ // Remove all tasks on node id1. err = s.Update(func(tx store.Tx) error { tasks, err := store.FindTasks(tx, store.ByNodeID("id1")) assert.NoError(t, err) for _, task := range tasks { assert.NoError(t, store.DeleteTask(tx, task.ID)) } return nil }) assert.NoError(t, err) t1Assignments["id1"] = 0 t2Assignments["id1"] = 0 // Add four instances of service1 and two instances of service2. // All instances of service1 should land on node "id1", and one // of the two service2 instances should as well. // Put these in a map to randomize the order in which they are // created. err = s.Update(func(tx store.Tx) error { tasksMap := make(map[string]*api.Task) for i := 22; i <= 25; i++ { taskTemplate1.ID = fmt.Sprintf("t1id%d", i) tasksMap[taskTemplate1.ID] = taskTemplate1.Copy() } for i := 5; i <= 6; i++ { taskTemplate2.ID = fmt.Sprintf("t2id%d", i) tasksMap[taskTemplate2.ID] = taskTemplate2.Copy() } for _, task := range tasksMap { assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) for i := 0; i != 4+2; i++ { assignment := watchAssignment(t, watch) if strings.HasPrefix(assignment.ID, "t1") { t1Assignments[assignment.NodeID]++ } else if strings.HasPrefix(assignment.ID, "t2") { t2Assignments[assignment.NodeID]++ } } assert.Equal(t, 4, t1Assignments["id1"]) assert.Equal(t, 1, t2Assignments["id1"]) }