func TestRemoveTask(t *testing.T) { store := store.NewMemoryStore(nil) assert.NotNil(t, store) defer store.Close() watch, cancel := state.Watch(store.WatchQueue() /*state.EventCreateTask{}, state.EventUpdateTask{}*/) defer cancel() SetupCluster(t, store) // get the task observedTask1 := watchTaskCreate(t, watch) assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") assert.Equal(t, observedTask1.NodeID, "id1") // delete the task deleteTask(t, store, observedTask1) // the task should be recreated observedTask2 := watchTaskCreate(t, watch) assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") assert.Equal(t, observedTask2.NodeID, "id1") }
func TestNodeAvailability(t *testing.T) { store := store.NewMemoryStore(nil) assert.NotNil(t, store) defer store.Close() SetupCluster(t, store) watch, cancel := state.Watch(store.WatchQueue()) defer cancel() skipEvents(t, watch) node1.Status.State = api.NodeStatus_READY node1.Spec.Availability = api.NodeAvailabilityActive // set node1 to drain updateNodeAvailability(t, store, node1, api.NodeAvailabilityDrain) // task should be set to dead observedTask1 := watchShutdownTask(t, watch) assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") assert.Equal(t, observedTask1.NodeID, "id1") // set node1 to active updateNodeAvailability(t, store, node1, api.NodeAvailabilityActive) // task should be added back observedTask2 := watchTaskCreate(t, watch) assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") assert.Equal(t, observedTask2.NodeID, "id1") }
// Verify the key generation and rotation for default subsystems func TestKeyManagerDefaultSubsystem(t *testing.T) { st := store.NewMemoryStore(nil) defer st.Close() createCluster(t, st, "default", "default") k := New(st, DefaultConfig()) ctx := context.Background() go k.Run(ctx) time.Sleep(250 * time.Millisecond) // verify the number of keys allocated matches the keyring size. var ( clusters []*api.Cluster err error ) k.store.View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName(k.config.ClusterName)) }) assert.NoError(t, err) assert.Equal(t, len(clusters[0].NetworkBootstrapKeys), len(k.config.Subsystems)*keyringSize) key1 := clusters[0].NetworkBootstrapKeys[0].Key k.rotateKey(ctx) // verify that after a rotation oldest key has been removed from the keyring assert.Equal(t, len(k.keyRing.keys), len(k.config.Subsystems)*keyringSize) for _, key := range k.keyRing.keys { match := bytes.Equal(key.Key, key1) assert.False(t, match) } }
func TestSchedulerNoReadyNodes(t *testing.T) { ctx := context.Background() initialTask := &api.Task{ ID: "id1", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial task assert.NoError(t, store.CreateTask(tx, initialTask)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() failure := watchAssignmentFailure(t, watch) assert.Equal(t, "no suitable node", failure.Status.Message) err = s.Update(func(tx store.Tx) error { // Create a ready node. The task should get assigned to this // node. node := &api.Node{ ID: "newnode", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "newnode", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.CreateNode(tx, node)) return nil }) assert.NoError(t, err) assignment := watchAssignment(t, watch) assert.Equal(t, "newnode", assignment.NodeID) }
// NewNode generates a new Raft node func NewNode(opts NodeOptions) *Node { cfg := opts.Config if cfg == nil { cfg = DefaultNodeConfig() } if opts.TickInterval == 0 { opts.TickInterval = time.Second } if opts.SendTimeout == 0 { opts.SendTimeout = 2 * time.Second } raftStore := raft.NewMemoryStorage() n := &Node{ cluster: membership.NewCluster(2 * cfg.ElectionTick), raftStore: raftStore, opts: opts, Config: &raft.Config{ ElectionTick: cfg.ElectionTick, HeartbeatTick: cfg.HeartbeatTick, Storage: raftStore, MaxSizePerMsg: cfg.MaxSizePerMsg, MaxInflightMsgs: cfg.MaxInflightMsgs, Logger: cfg.Logger, }, doneCh: make(chan struct{}), removeRaftCh: make(chan struct{}), stopped: make(chan struct{}), leadershipBroadcast: watch.NewQueue(), lastSendToMember: make(map[uint64]chan struct{}), keyRotator: opts.KeyRotator, } n.memoryStore = store.NewMemoryStore(n) if opts.ClockSource == nil { n.ticker = clock.NewClock().NewTicker(opts.TickInterval) } else { n.ticker = opts.ClockSource.NewTicker(opts.TickInterval) } n.reqIDGen = idutil.NewGenerator(uint16(n.Config.ID), time.Now()) n.wait = newWait() n.removeRaftFunc = func(n *Node) func() { var removeRaftOnce sync.Once return func() { removeRaftOnce.Do(func() { close(n.removeRaftCh) }) } }(n) return n }
// NewNode generates a new Raft node func NewNode(ctx context.Context, opts NewNodeOptions) *Node { cfg := opts.Config if cfg == nil { cfg = DefaultNodeConfig() } if opts.TickInterval == 0 { opts.TickInterval = time.Second } raftStore := raft.NewMemoryStorage() ctx, cancel := context.WithCancel(ctx) n := &Node{ Ctx: ctx, cancel: cancel, cluster: membership.NewCluster(), tlsCredentials: opts.TLSCredentials, raftStore: raftStore, Address: opts.Addr, opts: opts, Config: &raft.Config{ ElectionTick: cfg.ElectionTick, HeartbeatTick: cfg.HeartbeatTick, Storage: raftStore, MaxSizePerMsg: cfg.MaxSizePerMsg, MaxInflightMsgs: cfg.MaxInflightMsgs, Logger: cfg.Logger, }, forceNewCluster: opts.ForceNewCluster, stopCh: make(chan struct{}), doneCh: make(chan struct{}), removeRaftCh: make(chan struct{}), StateDir: opts.StateDir, joinAddr: opts.JoinAddr, sendTimeout: 2 * time.Second, leadershipBroadcast: events.NewBroadcaster(), } n.memoryStore = store.NewMemoryStore(n) if opts.ClockSource == nil { n.ticker = clock.NewClock().NewTicker(opts.TickInterval) } else { n.ticker = opts.ClockSource.NewTicker(opts.TickInterval) } if opts.SendTimeout != 0 { n.sendTimeout = opts.SendTimeout } n.reqIDGen = idutil.NewGenerator(uint16(n.Config.ID), time.Now()) n.wait = newWait() return n }
// Verify that instantiating keymanager fails if an invalid subsystem is // passed func TestKeyManagerInvalidSubsystem(t *testing.T) { st := store.NewMemoryStore(nil) createCluster(t, st, "default", "default") config := &Config{ ClusterName: store.DefaultClusterName, Keylen: DefaultKeyLen, RotationInterval: DefaultKeyRotationInterval, Subsystems: []string{"serf"}, } k := New(st, config) assert.Nil(t, k) }
func TestSetup(t *testing.T) { store := store.NewMemoryStore(nil) assert.NotNil(t, store) defer store.Close() watch, cancel := state.Watch(store.WatchQueue() /*state.EventCreateTask{}, state.EventUpdateTask{}*/) defer cancel() observedTask1 := SetupCluster(t, store, watch) assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") assert.Equal(t, observedTask1.NodeID, "id1") }
func TestAddNode(t *testing.T) { store := store.NewMemoryStore(nil) assert.NotNil(t, store) defer store.Close() watch, cancel := state.Watch(store.WatchQueue()) defer cancel() SetupCluster(t, store, watch) addNode(t, store, node2) observedTask2 := testutils.WatchTaskCreate(t, watch) assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") assert.Equal(t, observedTask2.NodeID, "id2") }
func TestDeleteService(t *testing.T) { store := store.NewMemoryStore(nil) assert.NotNil(t, store) defer store.Close() watch, cancel := state.Watch(store.WatchQueue()) defer cancel() SetupCluster(t, store, watch) deleteService(t, store, service1) // task should be deleted observedTask := testutils.WatchTaskDelete(t, watch) assert.Equal(t, observedTask.ServiceAnnotations.Name, "name1") assert.Equal(t, observedTask.NodeID, "id1") }
func TestAddService(t *testing.T) { store := store.NewMemoryStore(nil) assert.NotNil(t, store) SetupCluster(t, store) watch, cancel := state.Watch(store.WatchQueue()) defer cancel() skipEvents(t, watch) addService(t, store, service2) observedTask := watchTaskCreate(t, watch) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.ServiceAnnotations.Name, "name2") assert.True(t, observedTask.NodeID == "id1") }
func TestDeleteNode(t *testing.T) { store := store.NewMemoryStore(nil) assert.NotNil(t, store) SetupCluster(t, store) watch, cancel := state.Watch(store.WatchQueue()) defer cancel() skipEvents(t, watch) deleteNode(t, store, node1) // task should be set to dead observedTask := watchShutdownTask(t, watch) assert.Equal(t, observedTask.ServiceAnnotations.Name, "name1") assert.Equal(t, observedTask.NodeID, "id1") }
// Verify the key generation and rotation for IPsec subsystem func TestKeyManagerCustomSubsystem(t *testing.T) { st := store.NewMemoryStore(nil) defer st.Close() createCluster(t, st, "default", "default") config := &Config{ ClusterName: store.DefaultClusterName, Keylen: DefaultKeyLen, RotationInterval: DefaultKeyRotationInterval, Subsystems: []string{SubsystemIPSec}, } k := New(st, config) ctx := context.Background() go k.Run(ctx) time.Sleep(250 * time.Millisecond) // verify the number of keys allocated matches the keyring size. var ( clusters []*api.Cluster err error ) k.store.View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName(k.config.ClusterName)) }) assert.NoError(t, err) assert.Equal(t, len(clusters[0].NetworkBootstrapKeys), keyringSize) key1 := clusters[0].NetworkBootstrapKeys[0].Key k.rotateKey(ctx) // verify that after a rotation oldest key has been removed from the keyring // also verify that all keys are for the right subsystem assert.Equal(t, len(k.keyRing.keys), keyringSize) for _, key := range k.keyRing.keys { match := bytes.Equal(key.Key, key1) assert.False(t, match) match = key.Subsystem == SubsystemIPSec assert.True(t, match) } }
// Verify the key generation and rotation for IPsec subsystem func TestKeyManagerCustomSubsystem(t *testing.T) { st := store.NewMemoryStore(nil) createCluster(t, st, "default", "default") config := &Config{ ClusterName: store.DefaultClusterName, Keylen: DefaultKeyLen, RotationInterval: DefaultKeyRotationInterval, Subsystems: []string{SubsystemIPSec}, } k := New(st, config) ctx := context.Background() go k.Run(ctx) time.Sleep(250 * time.Millisecond) // verify the first key has been allocated and updated in the // store var ( clusters []*api.Cluster err error ) k.store.View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName(k.config.ClusterName)) }) assert.NoError(t, err) assert.Equal(t, len(clusters[0].NetworkBootstrapKeys), 1) key1 := clusters[0].NetworkBootstrapKeys[0].Key k.rotateKey(ctx) k.rotateKey(ctx) // verify that after two rotations keyring has two keys and the very // first key allocated has been removed assert.Equal(t, len(k.keyRing.keys), 2) for _, key := range k.keyRing.keys { match := bytes.Equal(key.Key, key1) assert.False(t, match) } }
func newTestServer(t *testing.T) *testServer { ts := &testServer{} // Create a testCA just to get a usable RootCA object tc := cautils.NewTestCA(nil) tc.Stop() ts.Store = store.NewMemoryStore(&mockProposer{}) assert.NotNil(t, ts.Store) ts.Server = NewServer(ts.Store, nil, &tc.RootCA) assert.NotNil(t, ts.Server) temp, err := ioutil.TempFile("", "test-socket") assert.NoError(t, err) assert.NoError(t, temp.Close()) assert.NoError(t, os.Remove(temp.Name())) ts.tempUnixSocket = temp.Name() lis, err := net.Listen("unix", temp.Name()) assert.NoError(t, err) ts.grpcServer = grpc.NewServer() api.RegisterControlServer(ts.grpcServer, ts.Server) go func() { // Serve will always return an error (even when properly stopped). // Explicitly ignore it. _ = ts.grpcServer.Serve(lis) }() conn, err := grpc.Dial(temp.Name(), grpc.WithInsecure(), grpc.WithTimeout(10*time.Second), grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) { return net.DialTimeout("unix", addr, timeout) })) assert.NoError(t, err) ts.clientConn = conn ts.Client = api.NewControlClient(conn) return ts }
func TestAllocator(t *testing.T) { s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() a, err := New(s) assert.NoError(t, err) assert.NotNil(t, a) // Try adding some objects to store before allocator is started assert.NoError(t, s.Update(func(tx store.Tx) error { n1 := &api.Network{ ID: "testID1", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test1", }, }, } assert.NoError(t, store.CreateNetwork(tx, n1)) s1 := &api.Service{ ID: "testServiceID1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "service1", }, Task: api.TaskSpec{ Networks: []*api.NetworkAttachmentConfig{ { Target: "testID1", }, }, }, Endpoint: &api.EndpointSpec{}, }, } assert.NoError(t, store.CreateService(tx, s1)) t1 := &api.Task{ ID: "testTaskID1", Status: api.TaskStatus{ State: api.TaskStateNew, }, Networks: []*api.NetworkAttachment{ { Network: n1, }, }, } assert.NoError(t, store.CreateTask(tx, t1)) return nil })) netWatch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateNetwork{}, state.EventDeleteNetwork{}) defer cancel() taskWatch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}, state.EventDeleteTask{}) defer cancel() serviceWatch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateService{}, state.EventDeleteService{}) defer cancel() // Start allocator go func() { assert.NoError(t, a.Run(context.Background())) }() // Now verify if we get network and tasks updated properly watchNetwork(t, netWatch, false, isValidNetwork) watchTask(t, s, taskWatch, false, isValidTask) watchService(t, serviceWatch, false, nil) // Add new networks/tasks/services after allocator is started. assert.NoError(t, s.Update(func(tx store.Tx) error { n2 := &api.Network{ ID: "testID2", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test2", }, }, } assert.NoError(t, store.CreateNetwork(tx, n2)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) assert.NoError(t, s.Update(func(tx store.Tx) error { s2 := &api.Service{ ID: "testServiceID2", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "service2", }, Networks: []*api.NetworkAttachmentConfig{ { Target: "testID2", }, }, Endpoint: &api.EndpointSpec{}, }, } assert.NoError(t, store.CreateService(tx, s2)) return nil })) watchService(t, serviceWatch, false, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { t2 := &api.Task{ ID: "testTaskID2", Status: api.TaskStatus{ State: api.TaskStateNew, }, ServiceID: "testServiceID2", DesiredState: api.TaskStateRunning, } assert.NoError(t, store.CreateTask(tx, t2)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) // Now try adding a task which depends on a network before adding the network. n3 := &api.Network{ ID: "testID3", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test3", }, }, } assert.NoError(t, s.Update(func(tx store.Tx) error { t3 := &api.Task{ ID: "testTaskID3", Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, Networks: []*api.NetworkAttachment{ { Network: n3, }, }, } assert.NoError(t, store.CreateTask(tx, t3)) return nil })) // Wait for a little bit of time before adding network just to // test network is not available while task allocation is // going through time.Sleep(10 * time.Millisecond) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNetwork(tx, n3)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteTask(tx, "testTaskID3")) return nil })) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { t5 := &api.Task{ ID: "testTaskID5", Spec: api.TaskSpec{ Networks: []*api.NetworkAttachmentConfig{ { Target: "testID2", }, }, }, Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, ServiceID: "testServiceID2", } assert.NoError(t, store.CreateTask(tx, t5)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteNetwork(tx, "testID3")) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteService(tx, "testServiceID2")) return nil })) watchService(t, serviceWatch, false, nil) // Try to create a task with no network attachments and test // that it moves to ALLOCATED state. assert.NoError(t, s.Update(func(tx store.Tx) error { t4 := &api.Task{ ID: "testTaskID4", Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, } assert.NoError(t, store.CreateTask(tx, t4)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) assert.NoError(t, s.Update(func(tx store.Tx) error { n2 := store.GetNetwork(tx, "testID2") require.NotEqual(t, nil, n2) assert.NoError(t, store.UpdateNetwork(tx, n2)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) watchNetwork(t, netWatch, true, nil) // Try updating task which is already allocated assert.NoError(t, s.Update(func(tx store.Tx) error { t2 := store.GetTask(tx, "testTaskID2") require.NotEqual(t, nil, t2) assert.NoError(t, store.UpdateTask(tx, t2)) return nil })) watchTask(t, s, taskWatch, false, isValidTask) watchTask(t, s, taskWatch, true, nil) // Try adding networks with conflicting network resources and // add task which attaches to a network which gets allocated // later and verify if task reconciles and moves to ALLOCATED. n4 := &api.Network{ ID: "testID4", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "test4", }, DriverConfig: &api.Driver{ Name: "overlay", Options: map[string]string{ "com.docker.network.driver.overlay.vxlanid_list": "328", }, }, }, } n5 := n4.Copy() n5.ID = "testID5" n5.Spec.Annotations.Name = "test5" assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNetwork(tx, n4)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNetwork(tx, n5)) return nil })) watchNetwork(t, netWatch, true, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { t6 := &api.Task{ ID: "testTaskID6", Status: api.TaskStatus{ State: api.TaskStateNew, }, DesiredState: api.TaskStateRunning, Networks: []*api.NetworkAttachment{ { Network: n5, }, }, } assert.NoError(t, store.CreateTask(tx, t6)) return nil })) watchTask(t, s, taskWatch, true, nil) // Now remove the conflicting network. assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteNetwork(tx, n4.ID)) return nil })) watchNetwork(t, netWatch, false, isValidNetwork) watchTask(t, s, taskWatch, false, isValidTask) // Try adding services with conflicting port configs and add // task which is part of the service whose allocation hasn't // happened and when that happens later and verify if task // reconciles and moves to ALLOCATED. s3 := &api.Service{ ID: "testServiceID3", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "service3", }, Endpoint: &api.EndpointSpec{ Ports: []*api.PortConfig{ { Name: "http", TargetPort: 80, PublishedPort: 8080, }, }, }, }, } s4 := s3.Copy() s4.ID = "testServiceID4" s4.Spec.Annotations.Name = "service4" assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateService(tx, s3)) return nil })) watchService(t, serviceWatch, false, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateService(tx, s4)) return nil })) watchService(t, serviceWatch, true, nil) assert.NoError(t, s.Update(func(tx store.Tx) error { t7 := &api.Task{ ID: "testTaskID7", Status: api.TaskStatus{ State: api.TaskStateNew, }, ServiceID: "testServiceID4", DesiredState: api.TaskStateRunning, } assert.NoError(t, store.CreateTask(tx, t7)) return nil })) watchTask(t, s, taskWatch, true, nil) // Now remove the conflicting service. assert.NoError(t, s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteService(tx, s3.ID)) return nil })) watchService(t, serviceWatch, false, nil) watchTask(t, s, taskWatch, false, isValidTask) a.Stop() }
// NewTestCA is a helper method that creates a TestCA and a bunch of default // connections and security configs. func NewTestCA(t *testing.T) *TestCA { tempBaseDir, err := ioutil.TempDir("", "swarm-ca-test-") assert.NoError(t, err) s := store.NewMemoryStore(nil) paths := ca.NewConfigPaths(tempBaseDir) organization := identity.NewID() rootCA, err := createAndWriteRootCA("swarm-test-CA", paths.RootCA, ca.DefaultNodeCertExpiration) assert.NoError(t, err) var ( externalSigningServer *ExternalSigningServer externalCAs []*api.ExternalCA ) if External { // Start the CA API server. externalSigningServer, err = NewExternalSigningServer(rootCA, tempBaseDir) assert.NoError(t, err) externalCAs = []*api.ExternalCA{ { Protocol: api.ExternalCA_CAProtocolCFSSL, URL: externalSigningServer.URL, }, } } managerConfig, err := genSecurityConfig(s, rootCA, ca.ManagerRole, organization, "", External) assert.NoError(t, err) managerDiffOrgConfig, err := genSecurityConfig(s, rootCA, ca.ManagerRole, "swarm-test-org-2", "", External) assert.NoError(t, err) agentConfig, err := genSecurityConfig(s, rootCA, ca.AgentRole, organization, "", External) assert.NoError(t, err) l, err := net.Listen("tcp", "127.0.0.1:0") assert.NoError(t, err) baseOpts := []grpc.DialOption{grpc.WithTimeout(10 * time.Second)} insecureClientOpts := append(baseOpts, grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{InsecureSkipVerify: true}))) clientOpts := append(baseOpts, grpc.WithTransportCredentials(agentConfig.ClientTLSCreds)) managerOpts := append(baseOpts, grpc.WithTransportCredentials(managerConfig.ClientTLSCreds)) managerDiffOrgOpts := append(baseOpts, grpc.WithTransportCredentials(managerDiffOrgConfig.ClientTLSCreds)) conn1, err := grpc.Dial(l.Addr().String(), insecureClientOpts...) assert.NoError(t, err) conn2, err := grpc.Dial(l.Addr().String(), clientOpts...) assert.NoError(t, err) conn3, err := grpc.Dial(l.Addr().String(), managerOpts...) assert.NoError(t, err) conn4, err := grpc.Dial(l.Addr().String(), managerDiffOrgOpts...) assert.NoError(t, err) serverOpts := []grpc.ServerOption{grpc.Creds(managerConfig.ServerTLSCreds)} grpcServer := grpc.NewServer(serverOpts...) managerToken := ca.GenerateJoinToken(&rootCA) workerToken := ca.GenerateJoinToken(&rootCA) createClusterObject(t, s, organization, workerToken, managerToken, externalCAs...) caServer := ca.NewServer(s, managerConfig) api.RegisterCAServer(grpcServer, caServer) api.RegisterNodeCAServer(grpcServer, caServer) ctx := context.Background() go grpcServer.Serve(l) go caServer.Run(ctx) // Wait for caServer to be ready to serve <-caServer.Ready() remotes := remotes.NewRemotes(api.Peer{Addr: l.Addr().String()}) caClients := []api.CAClient{api.NewCAClient(conn1), api.NewCAClient(conn2), api.NewCAClient(conn3)} nodeCAClients := []api.NodeCAClient{api.NewNodeCAClient(conn1), api.NewNodeCAClient(conn2), api.NewNodeCAClient(conn3), api.NewNodeCAClient(conn4)} conns := []*grpc.ClientConn{conn1, conn2, conn3, conn4} return &TestCA{ RootCA: rootCA, ExternalSigningServer: externalSigningServer, MemoryStore: s, TempDir: tempBaseDir, Organization: organization, Paths: paths, Context: ctx, CAClients: caClients, NodeCAClients: nodeCAClients, Conns: conns, Server: grpcServer, CAServer: caServer, WorkerToken: workerToken, ManagerToken: managerToken, Remotes: remotes, } }
func TestUpdaterStopGracePeriod(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) // Move tasks to their desired state. watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { for { select { case e := <-watch: task := e.(state.EventUpdateTask).Task err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) // Explicitly do not set task state to // DEAD to trigger StopGracePeriod if task.DesiredState == api.TaskStateRunning && task.Status.State != api.TaskStateRunning { task.Status.State = api.TaskStateRunning return store.UpdateTask(tx, task) } return nil }) assert.NoError(t, err) } } }() var instances uint64 = 3 service := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "v:1", StopGracePeriod: ptypes.DurationProto(100 * time.Millisecond), }, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: instances, }, }, }, } err := s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateService(tx, service)) for i := uint64(0); i < instances; i++ { task := newTask(nil, service, uint64(i)) task.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) originalTasks := getRunnableServiceTasks(t, s, service) for _, task := range originalTasks { assert.Equal(t, "v:1", task.Spec.GetContainer().Image) } before := time.Now() service.Spec.Task.GetContainer().Image = "v:2" updater := NewUpdater(s, NewRestartSupervisor(s)) // Override the default (1 minute) to speed up the test. updater.restarts.taskTimeout = 100 * time.Millisecond updater.Run(ctx, nil, service, getRunnableServiceTasks(t, s, service)) updatedTasks := getRunnableServiceTasks(t, s, service) for _, task := range updatedTasks { assert.Equal(t, "v:2", task.Spec.GetContainer().Image) } after := time.Now() // At least 100 ms should have elapsed. Only check the lower bound, // because the system may be slow and it could have taken longer. if after.Sub(before) < 100*time.Millisecond { t.Fatal("stop timeout should have elapsed") } }
func TestUpdater(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) // Move tasks to their desired state. watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { for { select { case e := <-watch: task := e.(state.EventUpdateTask).Task if task.Status.State == task.DesiredState { continue } err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) task.Status.State = task.DesiredState return store.UpdateTask(tx, task) }) assert.NoError(t, err) } } }() instances := 3 cluster := &api.Cluster{ // test cluster configuration propagation to task creation. Spec: api.ClusterSpec{ Annotations: api.Annotations{ Name: "default", }, }, } service := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: uint64(instances), }, }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "v:1", // This won't apply in this test because we set the old tasks to DEAD. StopGracePeriod: ptypes.DurationProto(time.Hour), }, }, }, }, } err := s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateCluster(tx, cluster)) assert.NoError(t, store.CreateService(tx, service)) for i := 0; i < instances; i++ { assert.NoError(t, store.CreateTask(tx, newTask(cluster, service, uint64(i)))) } return nil }) assert.NoError(t, err) originalTasks := getRunnableServiceTasks(t, s, service) for _, task := range originalTasks { assert.Equal(t, "v:1", task.Spec.GetContainer().Image) assert.Nil(t, task.LogDriver) // should be left alone } service.Spec.Task.GetContainer().Image = "v:2" service.Spec.Task.LogDriver = &api.Driver{Name: "tasklogdriver"} updater := NewUpdater(s, NewRestartSupervisor(s)) updater.Run(ctx, cluster, service, getRunnableServiceTasks(t, s, service)) updatedTasks := getRunnableServiceTasks(t, s, service) for _, task := range updatedTasks { assert.Equal(t, "v:2", task.Spec.GetContainer().Image) assert.Equal(t, service.Spec.Task.LogDriver, task.LogDriver) // pick up from task } service.Spec.Task.GetContainer().Image = "v:3" cluster.Spec.DefaultLogDriver = &api.Driver{Name: "clusterlogdriver"} // make cluster default logdriver. service.Spec.Update = &api.UpdateConfig{ Parallelism: 1, } updater = NewUpdater(s, NewRestartSupervisor(s)) updater.Run(ctx, cluster, service, getRunnableServiceTasks(t, s, service)) updatedTasks = getRunnableServiceTasks(t, s, service) for _, task := range updatedTasks { assert.Equal(t, "v:3", task.Spec.GetContainer().Image) assert.Equal(t, service.Spec.Task.LogDriver, task.LogDriver) // still pick up from task } service.Spec.Task.GetContainer().Image = "v:4" service.Spec.Task.LogDriver = nil // use cluster default now. service.Spec.Update = &api.UpdateConfig{ Parallelism: 1, Delay: *ptypes.DurationProto(10 * time.Millisecond), } updater = NewUpdater(s, NewRestartSupervisor(s)) updater.Run(ctx, cluster, service, getRunnableServiceTasks(t, s, service)) updatedTasks = getRunnableServiceTasks(t, s, service) for _, task := range updatedTasks { assert.Equal(t, "v:4", task.Spec.GetContainer().Image) assert.Equal(t, cluster.Spec.DefaultLogDriver, task.LogDriver) // pick up from cluster } }
func TestSchedulerResourceConstraint(t *testing.T) { ctx := context.Background() // Create a ready node without enough memory to run the task. underprovisionedNode := &api.Node{ ID: "underprovisioned", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "underprovisioned", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 1e9, MemoryBytes: 1e9, }, }, } // Non-ready nodes that satisfy the constraints but shouldn't be used nonready1 := &api.Node{ ID: "nonready1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "nonready1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_UNKNOWN, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 2e9, MemoryBytes: 2e9, }, }, } nonready2 := &api.Node{ ID: "nonready2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "nonready2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_UNKNOWN, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 2e9, MemoryBytes: 2e9, }, }, } initialTask := &api.Task{ ID: "id1", DesiredState: api.TaskStateRunning, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, Resources: &api.ResourceRequirements{ Reservations: &api.Resources{ MemoryBytes: 2e9, }, }, }, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial node and task assert.NoError(t, store.CreateTask(tx, initialTask)) assert.NoError(t, store.CreateNode(tx, underprovisionedNode)) assert.NoError(t, store.CreateNode(tx, nonready1)) assert.NoError(t, store.CreateNode(tx, nonready2)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() failure := watchAssignmentFailure(t, watch) assert.Equal(t, "no suitable node (2 nodes not available for new tasks; insufficient resources on 1 node)", failure.Status.Message) err = s.Update(func(tx store.Tx) error { // Create a node with enough memory. The task should get // assigned to this node. node := &api.Node{ ID: "bignode", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "bignode", }, }, Description: &api.NodeDescription{ Resources: &api.Resources{ NanoCPUs: 4e9, MemoryBytes: 8e9, }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.CreateNode(tx, node)) return nil }) assert.NoError(t, err) assignment := watchAssignment(t, watch) assert.Equal(t, "bignode", assignment.NodeID) }
func TestSchedulerResourceConstraintHA(t *testing.T) { // node 1 starts with 1 task, node 2 starts with 3 tasks. // however, node 1 only has enough memory to schedule one more task. ctx := context.Background() node1 := &api.Node{ ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "id1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Resources: &api.Resources{ MemoryBytes: 1e9, }, }, } node2 := &api.Node{ ID: "id2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "id2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Resources: &api.Resources{ MemoryBytes: 1e11, }, }, } taskTemplate := &api.Task{ DesiredState: api.TaskStateRunning, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, Resources: &api.ResourceRequirements{ Reservations: &api.Resources{ MemoryBytes: 5e8, }, }, }, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial node and task assert.NoError(t, store.CreateNode(tx, node1)) assert.NoError(t, store.CreateNode(tx, node2)) // preassigned tasks task1 := taskTemplate.Copy() task1.ID = "id1" task1.NodeID = "id1" task1.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task1)) task2 := taskTemplate.Copy() task2.ID = "id2" task2.NodeID = "id2" task2.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task2)) task3 := taskTemplate.Copy() task3.ID = "id3" task3.NodeID = "id2" task3.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task3)) task4 := taskTemplate.Copy() task4.ID = "id4" task4.NodeID = "id2" task4.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task4)) // tasks to assign task5 := taskTemplate.Copy() task5.ID = "id5" assert.NoError(t, store.CreateTask(tx, task5)) task6 := taskTemplate.Copy() task6.ID = "id6" assert.NoError(t, store.CreateTask(tx, task6)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() assignment1 := watchAssignment(t, watch) if assignment1.ID != "id5" && assignment1.ID != "id6" { t.Fatal("assignment for unexpected task") } assignment2 := watchAssignment(t, watch) if assignment1.ID == "id5" { assert.Equal(t, "id6", assignment2.ID) } else { assert.Equal(t, "id5", assignment2.ID) } if assignment1.NodeID == "id1" { assert.Equal(t, "id2", assignment2.NodeID) } else { assert.Equal(t, "id1", assignment2.NodeID) } }
func TestPreassignedTasks(t *testing.T) { ctx := context.Background() initialNodeSet := []*api.Node{ { ID: "node1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "node2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, } initialTaskSet := []*api.Task{ { ID: "task1", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, }, { ID: "task2", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, NodeID: initialNodeSet[0].ID, }, { ID: "task3", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, NodeID: initialNodeSet[0].ID, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Prepoulate nodes for _, n := range initialNodeSet { assert.NoError(t, store.CreateNode(tx, n)) } // Prepopulate tasks for _, task := range initialTaskSet { assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() //preassigned tasks would be processed first assignment1 := watchAssignment(t, watch) // task2 and task3 are preassigned to node1 assert.Equal(t, assignment1.NodeID, "node1") assert.Regexp(t, assignment1.ID, "(task2|task3)") assignment2 := watchAssignment(t, watch) if assignment1.ID == "task2" { assert.Equal(t, "task3", assignment2.ID) } else { assert.Equal(t, "task2", assignment2.ID) } // task1 would be assigned to node2 because node1 has 2 tasks already assignment3 := watchAssignment(t, watch) assert.Equal(t, assignment3.ID, "task1") assert.Equal(t, assignment3.NodeID, "node2") }
func TestManager(t *testing.T) { ctx := context.TODO() store := store.NewMemoryStore(nil) assert.NotNil(t, store) temp, err := ioutil.TempFile("", "test-socket") assert.NoError(t, err) assert.NoError(t, temp.Close()) assert.NoError(t, os.Remove(temp.Name())) defer os.RemoveAll(temp.Name()) lunix, err := net.Listen("unix", temp.Name()) assert.NoError(t, err) ltcp, err := net.Listen("tcp", "127.0.0.1:0") assert.NoError(t, err) stateDir, err := ioutil.TempDir("", "test-raft") assert.NoError(t, err) defer os.RemoveAll(stateDir) tc := testutils.NewTestCA(t) defer tc.Stop() agentSecurityConfig, err := tc.NewNodeConfig(ca.AgentRole) assert.NoError(t, err) agentDiffOrgSecurityConfig, err := tc.NewNodeConfigOrg(ca.AgentRole, "another-org") assert.NoError(t, err) managerSecurityConfig, err := tc.NewNodeConfig(ca.ManagerRole) assert.NoError(t, err) m, err := manager.New(&manager.Config{ ProtoListener: map[string]net.Listener{"unix": lunix, "tcp": ltcp}, StateDir: stateDir, SecurityConfig: managerSecurityConfig, }) assert.NoError(t, err) assert.NotNil(t, m) done := make(chan error) defer close(done) go func() { done <- m.Run(ctx) }() opts := []grpc.DialOption{ grpc.WithTimeout(10 * time.Second), grpc.WithTransportCredentials(agentSecurityConfig.ClientTLSCreds), } conn, err := grpc.Dial(ltcp.Addr().String(), opts...) assert.NoError(t, err) defer func() { assert.NoError(t, conn.Close()) }() // We have to send a dummy request to verify if the connection is actually up. client := api.NewDispatcherClient(conn) _, err = client.Heartbeat(context.Background(), &api.HeartbeatRequest{}) assert.Equal(t, dispatcher.ErrNodeNotRegistered.Error(), grpc.ErrorDesc(err)) // Try to have a client in a different org access this manager opts = []grpc.DialOption{ grpc.WithTimeout(10 * time.Second), grpc.WithTransportCredentials(agentDiffOrgSecurityConfig.ClientTLSCreds), } conn2, err := grpc.Dial(ltcp.Addr().String(), opts...) assert.NoError(t, err) defer func() { assert.NoError(t, conn2.Close()) }() // We have to send a dummy request to verify if the connection is actually up. client = api.NewDispatcherClient(conn2) _, err = client.Heartbeat(context.Background(), &api.HeartbeatRequest{}) assert.Contains(t, grpc.ErrorDesc(err), "Permission denied: unauthorized peer role: rpc error: code = 7 desc = Permission denied: remote certificate not part of organization") // Verify that requests to the various GRPC services running on TCP // are rejected if they don't have certs. opts = []grpc.DialOption{ grpc.WithTimeout(10 * time.Second), grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{InsecureSkipVerify: true})), } noCertConn, err := grpc.Dial(ltcp.Addr().String(), opts...) assert.NoError(t, err) defer func() { assert.NoError(t, noCertConn.Close()) }() client = api.NewDispatcherClient(noCertConn) _, err = client.Heartbeat(context.Background(), &api.HeartbeatRequest{}) assert.EqualError(t, err, "rpc error: code = 7 desc = Permission denied: unauthorized peer role: rpc error: code = 7 desc = no client certificates in request") controlClient := api.NewControlClient(noCertConn) _, err = controlClient.ListNodes(context.Background(), &api.ListNodesRequest{}) assert.EqualError(t, err, "rpc error: code = 7 desc = Permission denied: unauthorized peer role: rpc error: code = 7 desc = no client certificates in request") raftClient := api.NewRaftMembershipClient(noCertConn) _, err = raftClient.Join(context.Background(), &api.JoinRequest{}) assert.EqualError(t, err, "rpc error: code = 7 desc = Permission denied: unauthorized peer role: rpc error: code = 7 desc = no client certificates in request") m.Stop(ctx) // After stopping we should MAY receive an error from ListenAndServe if // all this happened before WaitForLeader completed, so don't check the // error. <-done }
func TestSchedulerPluginConstraint(t *testing.T) { ctx := context.Background() // Node1: vol plugin1 n1 := &api.Node{ ID: "node1_ID", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "node1", }, }, Description: &api.NodeDescription{ Engine: &api.EngineDescription{ Plugins: []api.PluginDescription{ { Type: "Volume", Name: "plugin1", }, }, }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } // Node2: vol plugin1, vol plugin2 n2 := &api.Node{ ID: "node2_ID", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "node2", }, }, Description: &api.NodeDescription{ Engine: &api.EngineDescription{ Plugins: []api.PluginDescription{ { Type: "Volume", Name: "plugin1", }, { Type: "Volume", Name: "plugin2", }, }, }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } // Node3: vol plugin1, network plugin1 n3 := &api.Node{ ID: "node3_ID", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "node3", }, }, Description: &api.NodeDescription{ Engine: &api.EngineDescription{ Plugins: []api.PluginDescription{ { Type: "Volume", Name: "plugin1", }, { Type: "Network", Name: "plugin1", }, }, }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } volumeOptionsDriver := func(driver string) *api.Mount_VolumeOptions { return &api.Mount_VolumeOptions{ DriverConfig: &api.Driver{ Name: driver, }, } } // Task1: vol plugin1 t1 := &api.Task{ ID: "task1_ID", DesiredState: api.TaskStateRunning, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Mounts: []api.Mount{ { Source: "testVol1", Target: "/foo", Type: api.MountTypeVolume, VolumeOptions: volumeOptionsDriver("plugin1"), }, }, }, }, }, ServiceAnnotations: api.Annotations{ Name: "task1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } // Task2: vol plugin1, vol plugin2 t2 := &api.Task{ ID: "task2_ID", DesiredState: api.TaskStateRunning, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Mounts: []api.Mount{ { Source: "testVol1", Target: "/foo", Type: api.MountTypeVolume, VolumeOptions: volumeOptionsDriver("plugin1"), }, { Source: "testVol2", Target: "/foo", Type: api.MountTypeVolume, VolumeOptions: volumeOptionsDriver("plugin2"), }, }, }, }, }, ServiceAnnotations: api.Annotations{ Name: "task2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } // Task3: vol plugin1, network plugin1 t3 := &api.Task{ ID: "task3_ID", DesiredState: api.TaskStateRunning, Networks: []*api.NetworkAttachment{ { Network: &api.Network{ ID: "testNwID1", Spec: api.NetworkSpec{ Annotations: api.Annotations{ Name: "testVol1", }, }, DriverState: &api.Driver{ Name: "plugin1", }, }, }, }, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Mounts: []api.Mount{ { Source: "testVol1", Target: "/foo", Type: api.MountTypeVolume, VolumeOptions: volumeOptionsDriver("plugin1"), }, }, }, }, }, ServiceAnnotations: api.Annotations{ Name: "task2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() // Add initial node and task err := s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, t1)) assert.NoError(t, store.CreateNode(tx, n1)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() // t1 should get assigned assignment := watchAssignment(t, watch) assert.Equal(t, assignment.NodeID, "node1_ID") // Create t2; it should stay in the pending state because there is // no node that with volume plugin `plugin2` err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, t2)) return nil }) assert.NoError(t, err) time.Sleep(100 * time.Millisecond) s.View(func(tx store.ReadTx) { task := store.GetTask(tx, "task2_ID") if task.Status.State >= api.TaskStateAssigned { t.Fatalf("task 'task2_ID' should not have been assigned to node %v", task.NodeID) } }) // Now add the second node err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, n2)) return nil }) assert.NoError(t, err) // Check that t2 has been assigned assignment1 := watchAssignment(t, watch) assert.Equal(t, assignment1.ID, "task2_ID") assert.Equal(t, assignment1.NodeID, "node2_ID") // Create t3; it should stay in the pending state because there is // no node that with network plugin `plugin1` err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, t3)) return nil }) assert.NoError(t, err) time.Sleep(100 * time.Millisecond) s.View(func(tx store.ReadTx) { task := store.GetTask(tx, "task3_ID") if task.Status.State >= api.TaskStateAssigned { t.Fatal("task 'task3_ID' should not have been assigned") } }) // Now add the node3 err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, n3)) return nil }) assert.NoError(t, err) // Check that t3 has been assigned assignment2 := watchAssignment(t, watch) assert.Equal(t, assignment2.ID, "task3_ID") assert.Equal(t, assignment2.NodeID, "node3_ID") }
func TestUpdaterRollback(t *testing.T) { ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() var ( failImage1 uint32 failImage2 uint32 ) watchCreate, cancelCreate := state.Watch(s.WatchQueue(), state.EventCreateTask{}) defer cancelCreate() watchServiceUpdate, cancelServiceUpdate := state.Watch(s.WatchQueue(), state.EventUpdateService{}) defer cancelServiceUpdate() // Fail new tasks the updater tries to run watchUpdate, cancelUpdate := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancelUpdate() go func() { failedLast := false for { select { case e := <-watchUpdate: task := e.(state.EventUpdateTask).Task if task.DesiredState == task.Status.State { continue } if task.DesiredState == api.TaskStateRunning && task.Status.State != api.TaskStateFailed && task.Status.State != api.TaskStateRunning { err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) // Never fail two image2 tasks in a row, so there's a mix of // failed and successful tasks for the rollback. if task.Spec.GetContainer().Image == "image1" && atomic.LoadUint32(&failImage1) == 1 { task.Status.State = api.TaskStateFailed failedLast = true } else if task.Spec.GetContainer().Image == "image2" && atomic.LoadUint32(&failImage2) == 1 && !failedLast { task.Status.State = api.TaskStateFailed failedLast = true } else { task.Status.State = task.DesiredState failedLast = false } return store.UpdateTask(tx, task) }) assert.NoError(t, err) } else if task.DesiredState > api.TaskStateRunning { err := s.Update(func(tx store.Tx) error { task = store.GetTask(tx, task.ID) task.Status.State = task.DesiredState return store.UpdateTask(tx, task) }) assert.NoError(t, err) } } } }() // Create a service with four replicas specified before the orchestrator // is started. This should result in two tasks when the orchestrator // starts up. err := s.Update(func(tx store.Tx) error { s1 := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "image1", }, }, Restart: &api.RestartPolicy{ Condition: api.RestartOnNone, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 4, }, }, Update: &api.UpdateConfig{ FailureAction: api.UpdateConfig_ROLLBACK, Parallelism: 1, Delay: *ptypes.DurationProto(10 * time.Millisecond), Monitor: ptypes.DurationProto(500 * time.Millisecond), MaxFailureRatio: 0.4, }, }, } assert.NoError(t, store.CreateService(tx, s1)) return nil }) assert.NoError(t, err) // Start the orchestrator. go func() { assert.NoError(t, orchestrator.Run(ctx)) }() observedTask := testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") atomic.StoreUint32(&failImage2, 1) // Start a rolling update err = s.Update(func(tx store.Tx) error { s1 := store.GetService(tx, "id1") require.NotNil(t, s1) s1.PreviousSpec = s1.Spec.Copy() s1.UpdateStatus = nil s1.Spec.Task.GetContainer().Image = "image2" assert.NoError(t, store.UpdateService(tx, s1)) return nil }) assert.NoError(t, err) // Should see three tasks started, then a rollback observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") // Should get to the ROLLBACK_STARTED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus == nil { continue } if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED { break } } observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") // Should end up in ROLLBACK_COMPLETED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_COMPLETED { break } } atomic.StoreUint32(&failImage1, 1) // Repeat the rolling update but this time fail the tasks that the // rollback creates. It should end up in ROLLBACK_PAUSED. err = s.Update(func(tx store.Tx) error { s1 := store.GetService(tx, "id1") require.NotNil(t, s1) s1.PreviousSpec = s1.Spec.Copy() s1.UpdateStatus = nil s1.Spec.Task.GetContainer().Image = "image2" assert.NoError(t, store.UpdateService(tx, s1)) return nil }) assert.NoError(t, err) // Should see three tasks started, then a rollback observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image2") // Should get to the ROLLBACK_STARTED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus == nil { continue } if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_STARTED { break } } observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") observedTask = testutils.WatchTaskCreate(t, watchCreate) assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") // Should end up in ROLLBACK_PAUSED state for { e := <-watchServiceUpdate if e.(state.EventUpdateService).Service.UpdateStatus.State == api.UpdateStatus_ROLLBACK_PAUSED { break } } }
func benchScheduler(b *testing.B, nodes, tasks int, networkConstraints bool) { ctx := context.Background() for iters := 0; iters < b.N; iters++ { b.StopTimer() s := store.NewMemoryStore(nil) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) go func() { _ = scheduler.Run(ctx) }() // Let the scheduler get started runtime.Gosched() _ = s.Update(func(tx store.Tx) error { // Create initial nodes and tasks for i := 0; i < nodes; i++ { n := &api.Node{ ID: identity.NewID(), Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name" + strconv.Itoa(i), Labels: make(map[string]string), }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, Description: &api.NodeDescription{ Engine: &api.EngineDescription{}, }, } // Give every third node a special network if i%3 == 0 { n.Description.Engine.Plugins = []api.PluginDescription{ { Name: "network", Type: "Network", }, } } err := store.CreateNode(tx, n) if err != nil { panic(err) } } for i := 0; i < tasks; i++ { id := "task" + strconv.Itoa(i) t := &api.Task{ ID: id, DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: id, }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } if networkConstraints { t.Networks = []*api.NetworkAttachment{ { Network: &api.Network{ DriverState: &api.Driver{ Name: "network", }, }, }, } } err := store.CreateTask(tx, t) if err != nil { panic(err) } } b.StartTimer() return nil }) for i := 0; i != tasks; i++ { <-watch } scheduler.Stop() cancel() s.Close() } }
func TestScheduler(t *testing.T) { ctx := context.Background() initialNodeSet := []*api.Node{ { ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id3", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, } initialTaskSet := []*api.Task{ { ID: "id1", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStateAssigned, }, NodeID: initialNodeSet[0].ID, }, { ID: "id2", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, }, { ID: "id3", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name2", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Prepoulate nodes for _, n := range initialNodeSet { assert.NoError(t, store.CreateNode(tx, n)) } // Prepopulate tasks for _, task := range initialTaskSet { assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() assignment1 := watchAssignment(t, watch) // must assign to id2 or id3 since id1 already has a task assert.Regexp(t, assignment1.NodeID, "(id2|id3)") assignment2 := watchAssignment(t, watch) // must assign to id2 or id3 since id1 already has a task if assignment1.NodeID == "id2" { assert.Equal(t, "id3", assignment2.NodeID) } else { assert.Equal(t, "id2", assignment2.NodeID) } err = s.Update(func(tx store.Tx) error { // Update each node to make sure this doesn't mess up the // scheduler's state. for _, n := range initialNodeSet { assert.NoError(t, store.UpdateNode(tx, n)) } return nil }) assert.NoError(t, err) err = s.Update(func(tx store.Tx) error { // Delete the task associated with node 1 so it's now the most lightly // loaded node. assert.NoError(t, store.DeleteTask(tx, "id1")) // Create a new task. It should get assigned to id1. t4 := &api.Task{ ID: "id4", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name4", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t4)) return nil }) assert.NoError(t, err) assignment3 := watchAssignment(t, watch) assert.Equal(t, "id1", assignment3.NodeID) // Update a task to make it unassigned. It should get assigned by the // scheduler. err = s.Update(func(tx store.Tx) error { // Remove assignment from task id4. It should get assigned // to node id1. t4 := &api.Task{ ID: "id4", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name4", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.UpdateTask(tx, t4)) return nil }) assert.NoError(t, err) assignment4 := watchAssignment(t, watch) assert.Equal(t, "id1", assignment4.NodeID) err = s.Update(func(tx store.Tx) error { // Create a ready node, then remove it. No tasks should ever // be assigned to it. node := &api.Node{ ID: "removednode", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "removednode", }, }, Status: api.NodeStatus{ State: api.NodeStatus_DOWN, }, } assert.NoError(t, store.CreateNode(tx, node)) assert.NoError(t, store.DeleteNode(tx, node.ID)) // Create an unassigned task. task := &api.Task{ ID: "removednode", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "removednode", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, task)) return nil }) assert.NoError(t, err) assignmentRemovedNode := watchAssignment(t, watch) assert.NotEqual(t, "removednode", assignmentRemovedNode.NodeID) err = s.Update(func(tx store.Tx) error { // Create a ready node. It should be used for the next // assignment. n4 := &api.Node{ ID: "id4", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name4", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.CreateNode(tx, n4)) // Create an unassigned task. t5 := &api.Task{ ID: "id5", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name5", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t5)) return nil }) assert.NoError(t, err) assignment5 := watchAssignment(t, watch) assert.Equal(t, "id4", assignment5.NodeID) err = s.Update(func(tx store.Tx) error { // Create a non-ready node. It should NOT be used for the next // assignment. n5 := &api.Node{ ID: "id5", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name5", }, }, Status: api.NodeStatus{ State: api.NodeStatus_DOWN, }, } assert.NoError(t, store.CreateNode(tx, n5)) // Create an unassigned task. t6 := &api.Task{ ID: "id6", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name6", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t6)) return nil }) assert.NoError(t, err) assignment6 := watchAssignment(t, watch) assert.NotEqual(t, "id5", assignment6.NodeID) err = s.Update(func(tx store.Tx) error { // Update node id5 to put it in the READY state. n5 := &api.Node{ ID: "id5", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name5", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.UpdateNode(tx, n5)) // Create an unassigned task. Should be assigned to the // now-ready node. t7 := &api.Task{ ID: "id7", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name7", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t7)) return nil }) assert.NoError(t, err) assignment7 := watchAssignment(t, watch) assert.Equal(t, "id5", assignment7.NodeID) err = s.Update(func(tx store.Tx) error { // Create a ready node, then immediately take it down. The next // unassigned task should NOT be assigned to it. n6 := &api.Node{ ID: "id6", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name6", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } assert.NoError(t, store.CreateNode(tx, n6)) n6.Status.State = api.NodeStatus_DOWN assert.NoError(t, store.UpdateNode(tx, n6)) // Create an unassigned task. t8 := &api.Task{ ID: "id8", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name8", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } assert.NoError(t, store.CreateTask(tx, t8)) return nil }) assert.NoError(t, err) assignment8 := watchAssignment(t, watch) assert.NotEqual(t, "id6", assignment8.NodeID) }
func TestSchedulerFaultyNode(t *testing.T) { ctx := context.Background() taskTemplate := &api.Task{ ServiceID: "service1", DesiredState: api.TaskStateRunning, ServiceAnnotations: api.Annotations{ Name: "name1", }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } node1 := &api.Node{ ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "id1", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } node2 := &api.Node{ ID: "id2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "id2", }, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Add initial nodes, and one task assigned to node id1 assert.NoError(t, store.CreateNode(tx, node1)) assert.NoError(t, store.CreateNode(tx, node2)) task1 := taskTemplate.Copy() task1.ID = "id1" task1.NodeID = "id1" task1.Status.State = api.TaskStateRunning assert.NoError(t, store.CreateTask(tx, task1)) return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() for i := 0; i != 8; i++ { // Simulate a task failure cycle newTask := taskTemplate.Copy() newTask.ID = identity.NewID() err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateTask(tx, newTask)) return nil }) assert.NoError(t, err) assignment := watchAssignment(t, watch) assert.Equal(t, newTask.ID, assignment.ID) if i < 5 { // The first 5 attempts should be assigned to node id2 because // it has no replicas of the service. assert.Equal(t, "id2", assignment.NodeID) } else { // The next ones should be assigned to id1, since we'll // flag id2 as potentially faulty. assert.Equal(t, "id1", assignment.NodeID) } err = s.Update(func(tx store.Tx) error { newTask := store.GetTask(tx, newTask.ID) require.NotNil(t, newTask) newTask.Status.State = api.TaskStateFailed assert.NoError(t, store.UpdateTask(tx, newTask)) return nil }) assert.NoError(t, err) } }
func TestDrain(t *testing.T) { ctx := context.Background() initialService := &api.Service{ ID: "id1", Spec: api.ServiceSpec{ Annotations: api.Annotations{ Name: "name1", }, Task: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{}, }, Restart: &api.RestartPolicy{ Condition: api.RestartOnNone, }, }, Mode: &api.ServiceSpec_Replicated{ Replicated: &api.ReplicatedService{ Replicas: 6, }, }, }, } initialNodeSet := []*api.Node{ { ID: "id1", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name1", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id2", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name2", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_DOWN, }, }, // We should NOT kick out tasks on UNKNOWN nodes. { ID: "id3", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name3", }, Availability: api.NodeAvailabilityActive, }, Status: api.NodeStatus{ State: api.NodeStatus_UNKNOWN, }, }, { ID: "id4", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name4", }, Availability: api.NodeAvailabilityPause, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id5", Spec: api.NodeSpec{ Annotations: api.Annotations{ Name: "name5", }, Availability: api.NodeAvailabilityDrain, }, Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, } initialTaskSet := []*api.Task{ // Task not assigned to any node { ID: "id0", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 1, ServiceAnnotations: api.Annotations{ Name: "name0", }, ServiceID: "id1", }, // Tasks assigned to the nodes defined above { ID: "id1", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 2, ServiceAnnotations: api.Annotations{ Name: "name1", }, ServiceID: "id1", NodeID: "id1", }, { ID: "id2", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 3, ServiceAnnotations: api.Annotations{ Name: "name2", }, ServiceID: "id1", NodeID: "id2", }, { ID: "id3", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 4, ServiceAnnotations: api.Annotations{ Name: "name3", }, ServiceID: "id1", NodeID: "id3", }, { ID: "id4", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 5, ServiceAnnotations: api.Annotations{ Name: "name4", }, ServiceID: "id1", NodeID: "id4", }, { ID: "id5", Status: api.TaskStatus{ State: api.TaskStateNew, }, Slot: 6, ServiceAnnotations: api.Annotations{ Name: "name5", }, ServiceID: "id1", NodeID: "id5", }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() err := s.Update(func(tx store.Tx) error { // Prepopulate service assert.NoError(t, store.CreateService(tx, initialService)) // Prepoulate nodes for _, n := range initialNodeSet { assert.NoError(t, store.CreateNode(tx, n)) } // Prepopulate tasks for _, task := range initialTaskSet { assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() go func() { assert.NoError(t, orchestrator.Run(ctx)) }() // id2 and id5 should be killed immediately deletion1 := watchShutdownTask(t, watch) deletion2 := watchShutdownTask(t, watch) assert.Regexp(t, "id(2|5)", deletion1.ID) assert.Regexp(t, "id(2|5)", deletion1.NodeID) assert.Regexp(t, "id(2|5)", deletion2.ID) assert.Regexp(t, "id(2|5)", deletion2.NodeID) // Create a new task, assigned to node id2 err = s.Update(func(tx store.Tx) error { task := initialTaskSet[2].Copy() task.ID = "newtask" task.NodeID = "id2" assert.NoError(t, store.CreateTask(tx, task)) return nil }) assert.NoError(t, err) deletion3 := watchShutdownTask(t, watch) assert.Equal(t, "newtask", deletion3.ID) assert.Equal(t, "id2", deletion3.NodeID) // Set node id4 to the DRAINED state err = s.Update(func(tx store.Tx) error { n := initialNodeSet[3].Copy() n.Spec.Availability = api.NodeAvailabilityDrain assert.NoError(t, store.UpdateNode(tx, n)) return nil }) assert.NoError(t, err) deletion4 := watchShutdownTask(t, watch) assert.Equal(t, "id4", deletion4.ID) assert.Equal(t, "id4", deletion4.NodeID) // Delete node id1 err = s.Update(func(tx store.Tx) error { assert.NoError(t, store.DeleteNode(tx, "id1")) return nil }) assert.NoError(t, err) deletion5 := watchShutdownTask(t, watch) assert.Equal(t, "id1", deletion5.ID) assert.Equal(t, "id1", deletion5.NodeID) }
func TestHA(t *testing.T) { ctx := context.Background() initialNodeSet := []*api.Node{ { ID: "id1", Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id2", Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id3", Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id4", Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, { ID: "id5", Status: api.NodeStatus{ State: api.NodeStatus_READY, }, }, } taskTemplate1 := &api.Task{ DesiredState: api.TaskStateRunning, ServiceID: "service1", Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "v:1", }, }, }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } taskTemplate2 := &api.Task{ DesiredState: api.TaskStateRunning, ServiceID: "service2", Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Container{ Container: &api.ContainerSpec{ Image: "v:2", }, }, }, Status: api.TaskStatus{ State: api.TaskStatePending, }, } s := store.NewMemoryStore(nil) assert.NotNil(t, s) defer s.Close() t1Instances := 18 err := s.Update(func(tx store.Tx) error { // Prepoulate nodes for _, n := range initialNodeSet { assert.NoError(t, store.CreateNode(tx, n)) } // Prepopulate tasks from template 1 for i := 0; i != t1Instances; i++ { taskTemplate1.ID = fmt.Sprintf("t1id%d", i) assert.NoError(t, store.CreateTask(tx, taskTemplate1)) } return nil }) assert.NoError(t, err) scheduler := New(s) watch, cancel := state.Watch(s.WatchQueue(), state.EventUpdateTask{}) defer cancel() go func() { assert.NoError(t, scheduler.Run(ctx)) }() defer scheduler.Stop() t1Assignments := make(map[string]int) for i := 0; i != t1Instances; i++ { assignment := watchAssignment(t, watch) if !strings.HasPrefix(assignment.ID, "t1") { t.Fatal("got assignment for different kind of task") } t1Assignments[assignment.NodeID]++ } assert.Len(t, t1Assignments, 5) nodesWith3T1Tasks := 0 nodesWith4T1Tasks := 0 for nodeID, taskCount := range t1Assignments { if taskCount == 3 { nodesWith3T1Tasks++ } else if taskCount == 4 { nodesWith4T1Tasks++ } else { t.Fatalf("unexpected number of tasks %d on node %s", taskCount, nodeID) } } assert.Equal(t, 3, nodesWith4T1Tasks) assert.Equal(t, 2, nodesWith3T1Tasks) t2Instances := 2 // Add a new service with two instances. They should fill the nodes // that only have two tasks. err = s.Update(func(tx store.Tx) error { for i := 0; i != t2Instances; i++ { taskTemplate2.ID = fmt.Sprintf("t2id%d", i) assert.NoError(t, store.CreateTask(tx, taskTemplate2)) } return nil }) assert.NoError(t, err) t2Assignments := make(map[string]int) for i := 0; i != t2Instances; i++ { assignment := watchAssignment(t, watch) if !strings.HasPrefix(assignment.ID, "t2") { t.Fatal("got assignment for different kind of task") } t2Assignments[assignment.NodeID]++ } assert.Len(t, t2Assignments, 2) for nodeID := range t2Assignments { assert.Equal(t, 3, t1Assignments[nodeID]) } // Scale up service 1 to 21 tasks. It should cover the two nodes that // service 2 was assigned to, and also one other node. err = s.Update(func(tx store.Tx) error { for i := t1Instances; i != t1Instances+3; i++ { taskTemplate1.ID = fmt.Sprintf("t1id%d", i) assert.NoError(t, store.CreateTask(tx, taskTemplate1)) } return nil }) assert.NoError(t, err) var sharedNodes [2]string for i := 0; i != 3; i++ { assignment := watchAssignment(t, watch) if !strings.HasPrefix(assignment.ID, "t1") { t.Fatal("got assignment for different kind of task") } if t1Assignments[assignment.NodeID] == 5 { t.Fatal("more than one new task assigned to the same node") } t1Assignments[assignment.NodeID]++ if t2Assignments[assignment.NodeID] != 0 { if sharedNodes[0] == "" { sharedNodes[0] = assignment.NodeID } else if sharedNodes[1] == "" { sharedNodes[1] = assignment.NodeID } else { t.Fatal("all three assignments went to nodes with service2 tasks") } } } assert.NotEmpty(t, sharedNodes[0]) assert.NotEmpty(t, sharedNodes[1]) assert.NotEqual(t, sharedNodes[0], sharedNodes[1]) nodesWith4T1Tasks = 0 nodesWith5T1Tasks := 0 for nodeID, taskCount := range t1Assignments { if taskCount == 4 { nodesWith4T1Tasks++ } else if taskCount == 5 { nodesWith5T1Tasks++ } else { t.Fatalf("unexpected number of tasks %d on node %s", taskCount, nodeID) } } assert.Equal(t, 4, nodesWith4T1Tasks) assert.Equal(t, 1, nodesWith5T1Tasks) // Add another task from service2. It must not land on the node that // has 5 service1 tasks. err = s.Update(func(tx store.Tx) error { taskTemplate2.ID = "t2id4" assert.NoError(t, store.CreateTask(tx, taskTemplate2)) return nil }) assert.NoError(t, err) assignment := watchAssignment(t, watch) if assignment.ID != "t2id4" { t.Fatal("got assignment for different task") } if t2Assignments[assignment.NodeID] != 0 { t.Fatal("was scheduled on a node that already has a service2 task") } if t1Assignments[assignment.NodeID] == 5 { t.Fatal("was scheduled on the node that has the most service1 tasks") } t2Assignments[assignment.NodeID]++ // Remove all tasks on node id1. err = s.Update(func(tx store.Tx) error { tasks, err := store.FindTasks(tx, store.ByNodeID("id1")) assert.NoError(t, err) for _, task := range tasks { assert.NoError(t, store.DeleteTask(tx, task.ID)) } return nil }) assert.NoError(t, err) t1Assignments["id1"] = 0 t2Assignments["id1"] = 0 // Add four instances of service1 and two instances of service2. // All instances of service1 should land on node "id1", and one // of the two service2 instances should as well. // Put these in a map to randomize the order in which they are // created. err = s.Update(func(tx store.Tx) error { tasksMap := make(map[string]*api.Task) for i := 22; i <= 25; i++ { taskTemplate1.ID = fmt.Sprintf("t1id%d", i) tasksMap[taskTemplate1.ID] = taskTemplate1.Copy() } for i := 5; i <= 6; i++ { taskTemplate2.ID = fmt.Sprintf("t2id%d", i) tasksMap[taskTemplate2.ID] = taskTemplate2.Copy() } for _, task := range tasksMap { assert.NoError(t, store.CreateTask(tx, task)) } return nil }) assert.NoError(t, err) for i := 0; i != 4+2; i++ { assignment := watchAssignment(t, watch) if strings.HasPrefix(assignment.ID, "t1") { t1Assignments[assignment.NodeID]++ } else if strings.HasPrefix(assignment.ID, "t2") { t2Assignments[assignment.NodeID]++ } } assert.Equal(t, 4, t1Assignments["id1"]) assert.Equal(t, 1, t2Assignments["id1"]) }