func agentTestEnv(t *testing.T) (*Agent, func()) { var cleanup []func() tc := testutils.NewTestCA(t, testutils.AcceptancePolicy(true, true, "")) cleanup = append(cleanup, func() { tc.Stop() }) agentSecurityConfig, err := tc.NewNodeConfig(ca.AgentRole) assert.NoError(t, err) addr := "localhost:4949" remotes := picker.NewRemotes(api.Peer{Addr: addr}) conn, err := grpc.Dial(addr, grpc.WithPicker(picker.NewPicker(remotes, addr)), grpc.WithTransportCredentials(agentSecurityConfig.ClientTLSCreds)) assert.NoError(t, err) db, cleanupStorage := storageTestEnv(t) cleanup = append(cleanup, func() { cleanupStorage() }) agent, err := New(&Config{ Executor: &NoopExecutor{}, Managers: remotes, Conn: conn, DB: db, }) return agent, func() { for i := len(cleanup) - 1; i > 0; i-- { cleanup[i]() } } }
func (n *Node) runAgent(ctx context.Context, db *bolt.DB, creds credentials.TransportAuthenticator, ready chan<- struct{}) error { var manager api.Peer select { case <-ctx.Done(): case manager = <-n.remotes.WaitSelect(ctx): } if ctx.Err() != nil { return ctx.Err() } picker := picker.NewPicker(n.remotes, manager.Addr) conn, err := grpc.Dial(manager.Addr, grpc.WithPicker(picker), grpc.WithTransportCredentials(creds), grpc.WithBackoffMaxDelay(maxSessionFailureBackoff)) if err != nil { return err } agent, err := New(&Config{ Hostname: n.config.Hostname, Managers: n.remotes, Executor: n.config.Executor, DB: db, Conn: conn, Picker: picker, NotifyRoleChange: n.roleChangeReq, }) if err != nil { return err } if err := agent.Start(ctx); err != nil { return err } n.Lock() n.agent = agent n.Unlock() defer func() { n.Lock() n.agent = nil n.Unlock() }() go func() { <-agent.Ready() close(ready) }() // todo: manually call stop on context cancellation? return agent.Err(context.Background()) }
func (mc *managersCluster) addAgents(count int) error { var addrs []api.Peer for _, m := range mc.ms { addrs = append(addrs, api.Peer{Addr: m.addr}) } for i := 0; i < count; i++ { asConfig, err := mc.tc.NewNodeConfig(ca.AgentRole) if err != nil { return err } managers := picker.NewRemotes(addrs...) peer, err := managers.Select() if err != nil { return err } conn, err := grpc.Dial(peer.Addr, grpc.WithPicker(picker.NewPicker(managers)), grpc.WithTransportCredentials(asConfig.ClientTLSCreds)) if err != nil { return err } id := strconv.Itoa(rand.Int()) a, err := agent.New(&agent.Config{ Hostname: "hostname_" + id, Managers: managers, Executor: &NoopExecutor{}, Conn: conn, }) if err != nil { return err } if err := a.Start(context.Background()); err != nil { return err } mc.agents = append(mc.agents, a) } return nil }
func TestAgentStartStop(t *testing.T) { tc := testutils.NewTestCA(t, testutils.AcceptancePolicy(true, true, "")) defer tc.Stop() agentSecurityConfig, err := tc.NewNodeConfig(ca.AgentRole) assert.NoError(t, err) addr := "localhost:4949" remotes := picker.NewRemotes(api.Peer{Addr: addr}) conn, err := grpc.Dial(addr, grpc.WithPicker(picker.NewPicker(remotes, addr)), grpc.WithTransportCredentials(agentSecurityConfig.ClientTLSCreds)) assert.NoError(t, err) db, cleanup := storageTestEnv(t) defer cleanup() agent, err := New(&Config{ Executor: &NoopExecutor{}, Managers: remotes, Conn: conn, DB: db, }) assert.NoError(t, err) assert.NotNil(t, agent) ctx, _ := context.WithTimeout(context.Background(), 5000*time.Millisecond) assert.Equal(t, errAgentNotStarted, agent.Stop(ctx)) assert.NoError(t, agent.Start(ctx)) if err := agent.Start(ctx); err != errAgentStarted { t.Fatalf("expected agent started error: %v", err) } assert.NoError(t, agent.Stop(ctx)) }
func (n *Node) run(ctx context.Context) (err error) { defer func() { n.err = err close(n.closed) }() ctx, cancel := context.WithCancel(ctx) defer cancel() ctx = log.WithLogger(ctx, log.G(ctx).WithField("module", "node")) go func() { select { case <-ctx.Done(): case <-n.stopped: cancel() } }() // NOTE: When this node is created by NewNode(), our nodeID is set if // n.loadCertificates() succeeded in loading TLS credentials. if n.config.JoinAddr == "" && n.nodeID == "" { if err := n.bootstrapCA(); err != nil { return err } } if n.config.JoinAddr != "" || n.config.ForceNewCluster { n.remotes = newPersistentRemotes(filepath.Join(n.config.StateDir, stateFilename)) if n.config.JoinAddr != "" { n.remotes.Observe(api.Peer{Addr: n.config.JoinAddr}, 1) } } // Obtain new certs and setup TLS certificates renewal for this node: // - We call LoadOrCreateSecurityConfig which blocks until a valid certificate has been issued // - We retrieve the nodeID from LoadOrCreateSecurityConfig through the info channel. This allows // us to display the ID before the certificate gets issued (for potential approval). // - We wait for LoadOrCreateSecurityConfig to finish since we need a certificate to operate. // - Given a valid certificate, spin a renewal go-routine that will ensure that certificates stay // up to date. issueResponseChan := make(chan api.IssueNodeCertificateResponse, 1) go func() { select { case <-ctx.Done(): case resp := <-issueResponseChan: logrus.Debugf("Requesting certificate for NodeID: %v", resp.NodeID) n.Lock() n.nodeID = resp.NodeID n.nodeMembership = resp.NodeMembership n.Unlock() close(n.certificateRequested) } }() certDir := filepath.Join(n.config.StateDir, "certificates") securityConfig, err := ca.LoadOrCreateSecurityConfig(ctx, certDir, n.config.JoinToken, ca.ManagerRole, picker.NewPicker(n.remotes), issueResponseChan) if err != nil { return err } taskDBPath := filepath.Join(n.config.StateDir, "worker/tasks.db") if err := os.MkdirAll(filepath.Dir(taskDBPath), 0777); err != nil { return err } db, err := bolt.Open(taskDBPath, 0666, nil) if err != nil { return err } defer db.Close() if err := n.loadCertificates(); err != nil { return err } forceCertRenewal := make(chan struct{}) go func() { for { select { case <-ctx.Done(): return case apirole := <-n.roleChangeReq: n.Lock() lastRole := n.role role := ca.AgentRole if apirole == api.NodeRoleManager { role = ca.ManagerRole } if lastRole == role { n.Unlock() continue } // switch role to agent immediately to shutdown manager early if role == ca.AgentRole { n.role = role n.roleCond.Broadcast() } n.Unlock() select { case forceCertRenewal <- struct{}{}: case <-ctx.Done(): return } } } }() updates := ca.RenewTLSConfig(ctx, securityConfig, certDir, picker.NewPicker(n.remotes), forceCertRenewal) go func() { for { select { case certUpdate := <-updates: if certUpdate.Err != nil { logrus.Warnf("error renewing TLS certificate: %v", certUpdate.Err) continue } n.Lock() n.role = certUpdate.Role n.roleCond.Broadcast() n.Unlock() case <-ctx.Done(): return } } }() role := n.role managerReady := make(chan struct{}) agentReady := make(chan struct{}) var managerErr error var agentErr error var wg sync.WaitGroup wg.Add(2) go func() { managerErr = n.runManager(ctx, securityConfig, managerReady) // store err and loop wg.Done() cancel() }() go func() { agentErr = n.runAgent(ctx, db, securityConfig.ClientTLSCreds, agentReady) wg.Done() cancel() }() go func() { <-agentReady if role == ca.ManagerRole { <-managerReady } close(n.ready) }() wg.Wait() if managerErr != nil && managerErr != context.Canceled { return managerErr } if agentErr != nil && agentErr != context.Canceled { return agentErr } return err }
// NewTestCA is a helper method that creates a TestCA and a bunch of default // connections and security configs func NewTestCA(t *testing.T, policy api.AcceptancePolicy) *TestCA { tempBaseDir, err := ioutil.TempDir("", "swarm-ca-test-") assert.NoError(t, err) s := store.NewMemoryStore(nil) paths := ca.NewConfigPaths(tempBaseDir) organization := identity.NewID() rootCA, err := createAndWriteRootCA("swarm-test-CA", paths.RootCA, ca.DefaultNodeCertExpiration) assert.NoError(t, err) managerConfig, err := genSecurityConfig(s, rootCA, ca.ManagerRole, organization, "") assert.NoError(t, err) managerDiffOrgConfig, err := genSecurityConfig(s, rootCA, ca.ManagerRole, "swarm-test-org-2", "") assert.NoError(t, err) agentConfig, err := genSecurityConfig(s, rootCA, ca.AgentRole, organization, "") assert.NoError(t, err) l, err := net.Listen("tcp", "127.0.0.1:0") assert.NoError(t, err) baseOpts := []grpc.DialOption{grpc.WithTimeout(10 * time.Second)} insecureClientOpts := append(baseOpts, grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{InsecureSkipVerify: true}))) clientOpts := append(baseOpts, grpc.WithTransportCredentials(agentConfig.ClientTLSCreds)) managerOpts := append(baseOpts, grpc.WithTransportCredentials(managerConfig.ClientTLSCreds)) managerDiffOrgOpts := append(baseOpts, grpc.WithTransportCredentials(managerDiffOrgConfig.ClientTLSCreds)) conn1, err := grpc.Dial(l.Addr().String(), insecureClientOpts...) assert.NoError(t, err) conn2, err := grpc.Dial(l.Addr().String(), clientOpts...) assert.NoError(t, err) conn3, err := grpc.Dial(l.Addr().String(), managerOpts...) assert.NoError(t, err) conn4, err := grpc.Dial(l.Addr().String(), managerDiffOrgOpts...) assert.NoError(t, err) serverOpts := []grpc.ServerOption{grpc.Creds(managerConfig.ServerTLSCreds)} grpcServer := grpc.NewServer(serverOpts...) createClusterObject(t, s, policy) caServer := ca.NewServer(s, managerConfig) api.RegisterCAServer(grpcServer, caServer) api.RegisterNodeCAServer(grpcServer, caServer) ctx := context.Background() go grpcServer.Serve(l) go caServer.Run(ctx) // Wait for caServer to be ready to serve <-caServer.Ready() remotes := picker.NewRemotes(api.Peer{Addr: l.Addr().String()}) picker := picker.NewPicker(remotes, l.Addr().String()) caClients := []api.CAClient{api.NewCAClient(conn1), api.NewCAClient(conn2), api.NewCAClient(conn3)} nodeCAClients := []api.NodeCAClient{api.NewNodeCAClient(conn1), api.NewNodeCAClient(conn2), api.NewNodeCAClient(conn3), api.NewNodeCAClient(conn4)} conns := []*grpc.ClientConn{conn1, conn2, conn3, conn4} return &TestCA{ RootCA: rootCA, MemoryStore: s, Picker: picker, TempDir: tempBaseDir, Organization: organization, Paths: paths, Context: ctx, CAClients: caClients, NodeCAClients: nodeCAClients, Conns: conns, CAServer: caServer, } }