func TestForceRenewTLSConfig(t *testing.T) { t.Parallel() tc := testutils.NewTestCA(t) defer tc.Stop() ctx, cancel := context.WithCancel(context.Background()) defer cancel() // Get a new managerConfig with a TLS cert that has 15 minutes to live nodeConfig, err := tc.WriteNewNodeConfig(ca.ManagerRole) assert.NoError(t, err) renew := make(chan struct{}, 1) updates := ca.RenewTLSConfig(ctx, nodeConfig, tc.Remotes, renew) renew <- struct{}{} select { case <-time.After(10 * time.Second): assert.Fail(t, "TestForceRenewTLSConfig timed-out") case certUpdate := <-updates: assert.NoError(t, certUpdate.Err) assert.NotNil(t, certUpdate) assert.Equal(t, certUpdate.Role, ca.ManagerRole) } }
func TestForceRenewTLSConfig(t *testing.T) { tc := testutils.NewTestCA(t) defer tc.Stop() ctx, cancel := context.WithCancel(context.Background()) defer cancel() // Get a new managerConfig with a TLS cert that has 15 minutes to live nodeConfig, err := tc.WriteNewNodeConfig(ca.ManagerRole) assert.NoError(t, err) var success, timeout bool renew := make(chan struct{}, 1) updates := ca.RenewTLSConfig(ctx, nodeConfig, tc.TempDir, tc.Picker, renew) for { renew <- struct{}{} select { case <-time.After(2 * time.Second): timeout = true case certUpdate := <-updates: assert.NoError(t, certUpdate.Err) assert.NotNil(t, certUpdate) assert.Equal(t, certUpdate.Role, ca.ManagerRole) success = true } if timeout { assert.Fail(t, "TestForceRenewTLSConfig timed-out") break } if success { break } } }
func TestRenewTLSConfigWithNoNode(t *testing.T) { t.Parallel() tc := testutils.NewTestCA(t) defer tc.Stop() ctx, cancel := context.WithCancel(context.Background()) defer cancel() // Get a new nodeConfig with a TLS cert that has the default Cert duration nodeConfig, err := tc.WriteNewNodeConfig(ca.ManagerRole) assert.NoError(t, err) // Create a new RootCA, and change the policy to issue 6 minute certificates. // Because of the default backdate of 5 minutes, this issues certificates // valid for 1 minute. newRootCA, err := ca.NewRootCA(tc.RootCA.Cert, tc.RootCA.Key, ca.DefaultNodeCertExpiration) assert.NoError(t, err) newRootCA.Signer.SetPolicy(&cfconfig.Signing{ Default: &cfconfig.SigningProfile{ Usage: []string{"signing", "key encipherment", "server auth", "client auth"}, Expiry: 6 * time.Minute, }, }) // Create a new CSR and overwrite the key on disk csr, key, err := ca.GenerateNewCSR() assert.NoError(t, err) // Issue a new certificate with the same details as the current config, but with 1 min expiration time c := nodeConfig.ClientTLSCreds signedCert, err := newRootCA.ParseValidateAndSignCSR(csr, c.NodeID(), c.Role(), c.Organization()) assert.NoError(t, err) assert.NotNil(t, signedCert) // Overwrite the certificate on disk with one that expires in 1 minute err = ioutils.AtomicWriteFile(tc.Paths.Node.Cert, signedCert, 0644) assert.NoError(t, err) err = ioutils.AtomicWriteFile(tc.Paths.Node.Key, key, 0600) assert.NoError(t, err) // Delete the node from the backend store err = tc.MemoryStore.Update(func(tx store.Tx) error { node := store.GetNode(tx, nodeConfig.ClientTLSCreds.NodeID()) assert.NotNil(t, node) return store.DeleteNode(tx, nodeConfig.ClientTLSCreds.NodeID()) }) assert.NoError(t, err) renew := make(chan struct{}) updates := ca.RenewTLSConfig(ctx, nodeConfig, tc.Remotes, renew) select { case <-time.After(10 * time.Second): assert.Fail(t, "TestRenewTLSConfig timed-out") case certUpdate := <-updates: assert.Error(t, certUpdate.Err) assert.Contains(t, certUpdate.Err.Error(), "not found when attempting to renew certificate") } }
func TestRenewTLSConfigManager(t *testing.T) { tc := testutils.NewTestCA(t) defer tc.Stop() ctx, cancel := context.WithCancel(context.Background()) defer cancel() // Get a new nodeConfig with a TLS cert that has the default Cert duration nodeConfig, err := tc.WriteNewNodeConfig(ca.ManagerRole) assert.NoError(t, err) // Create a new RootCA, and change the policy to issue 6 minute certificates newRootCA, err := ca.NewRootCA(tc.RootCA.Cert, tc.RootCA.Key, ca.DefaultNodeCertExpiration) assert.NoError(t, err) newRootCA.Signer.SetPolicy(&cfconfig.Signing{ Default: &cfconfig.SigningProfile{ Usage: []string{"signing", "key encipherment", "server auth", "client auth"}, Expiry: 6 * time.Minute, }, }) // Create a new CSR and overwrite the key on disk csr, _, err := ca.GenerateAndWriteNewKey(tc.Paths.Node) assert.NoError(t, err) // Issue a new certificate with the same details as the current config, but with 6 min expiration time c := nodeConfig.ClientTLSCreds signedCert, err := newRootCA.ParseValidateAndSignCSR(csr, c.NodeID(), c.Role(), c.Organization()) assert.NoError(t, err) assert.NotNil(t, signedCert) // Overwrite the certificate on disk with one that expires in 1 minute err = ioutils.AtomicWriteFile(tc.Paths.Node.Cert, signedCert, 0644) assert.NoError(t, err) // Get a new nodeConfig with a TLS cert that has 6 minutes to live var success, timeout bool renew := make(chan struct{}) updates := ca.RenewTLSConfig(ctx, nodeConfig, tc.TempDir, tc.Picker, renew) for { select { case <-time.After(2 * time.Second): timeout = true case certUpdate := <-updates: assert.NoError(t, certUpdate.Err) assert.NotNil(t, certUpdate) assert.Equal(t, ca.ManagerRole, certUpdate.Role) success = true } if timeout { assert.Fail(t, "TestRenewTLSConfig timed-out") break } if success { break } } }
func (n *Node) run(ctx context.Context) (err error) { defer func() { n.err = err close(n.closed) }() ctx, cancel := context.WithCancel(ctx) defer cancel() ctx = log.WithModule(ctx, "node") go func() { select { case <-ctx.Done(): case <-n.stopped: cancel() } }() // NOTE: When this node is created by NewNode(), our nodeID is set if // n.loadCertificates() succeeded in loading TLS credentials. if n.config.JoinAddr == "" && n.nodeID == "" { if err := n.bootstrapCA(); err != nil { return err } } if n.config.JoinAddr != "" || n.config.ForceNewCluster { n.remotes = newPersistentRemotes(filepath.Join(n.config.StateDir, stateFilename)) if n.config.JoinAddr != "" { n.remotes.Observe(api.Peer{Addr: n.config.JoinAddr}, remotes.DefaultObservationWeight) } } // Obtain new certs and setup TLS certificates renewal for this node: // - We call LoadOrCreateSecurityConfig which blocks until a valid certificate has been issued // - We retrieve the nodeID from LoadOrCreateSecurityConfig through the info channel. This allows // us to display the ID before the certificate gets issued (for potential approval). // - We wait for LoadOrCreateSecurityConfig to finish since we need a certificate to operate. // - Given a valid certificate, spin a renewal go-routine that will ensure that certificates stay // up to date. issueResponseChan := make(chan api.IssueNodeCertificateResponse, 1) go func() { select { case <-ctx.Done(): case resp := <-issueResponseChan: log.G(log.WithModule(ctx, "tls")).WithFields(logrus.Fields{ "node.id": resp.NodeID, }).Debugf("requesting certificate") n.Lock() n.nodeID = resp.NodeID n.nodeMembership = resp.NodeMembership n.Unlock() close(n.certificateRequested) } }() certDir := filepath.Join(n.config.StateDir, "certificates") securityConfig, err := ca.LoadOrCreateSecurityConfig(ctx, certDir, n.config.JoinToken, ca.ManagerRole, n.remotes, issueResponseChan) if err != nil { return err } taskDBPath := filepath.Join(n.config.StateDir, "worker/tasks.db") if err := os.MkdirAll(filepath.Dir(taskDBPath), 0777); err != nil { return err } db, err := bolt.Open(taskDBPath, 0666, nil) if err != nil { return err } defer db.Close() if err := n.loadCertificates(); err != nil { return err } forceCertRenewal := make(chan struct{}) renewCert := func() { select { case forceCertRenewal <- struct{}{}: case <-ctx.Done(): } } go func() { for { select { case <-ctx.Done(): return case node := <-n.notifyNodeChange: // If the server is sending us a ForceRenewal State, renew if node.Certificate.Status.State == api.IssuanceStateRotate { renewCert() continue } n.Lock() // If we got a role change, renew lastRole := n.role role := ca.WorkerRole if node.Spec.Role == api.NodeRoleManager { role = ca.ManagerRole } if lastRole == role { n.Unlock() continue } // switch role to agent immediately to shutdown manager early if role == ca.WorkerRole { n.role = role n.roleCond.Broadcast() } n.Unlock() renewCert() } } }() updates := ca.RenewTLSConfig(ctx, securityConfig, certDir, n.remotes, forceCertRenewal) go func() { for { select { case certUpdate := <-updates: if certUpdate.Err != nil { logrus.Warnf("error renewing TLS certificate: %v", certUpdate.Err) continue } n.Lock() n.role = certUpdate.Role n.roleCond.Broadcast() n.Unlock() case <-ctx.Done(): return } } }() role := n.role managerReady := make(chan struct{}) agentReady := make(chan struct{}) var managerErr error var agentErr error var wg sync.WaitGroup wg.Add(2) go func() { managerErr = n.runManager(ctx, securityConfig, managerReady) // store err and loop wg.Done() cancel() }() go func() { agentErr = n.runAgent(ctx, db, securityConfig.ClientTLSCreds, agentReady) wg.Done() cancel() }() go func() { <-agentReady if role == ca.ManagerRole { <-managerReady } close(n.ready) }() wg.Wait() if managerErr != nil && managerErr != context.Canceled { return managerErr } if agentErr != nil && agentErr != context.Canceled { return agentErr } return err }
func (n *Node) run(ctx context.Context) (err error) { defer func() { n.err = err close(n.closed) }() ctx, cancel := context.WithCancel(ctx) defer cancel() ctx = log.WithModule(ctx, "node") go func() { select { case <-ctx.Done(): case <-n.stopped: cancel() } }() securityConfig, err := n.loadSecurityConfig(ctx) if err != nil { return err } taskDBPath := filepath.Join(n.config.StateDir, "worker/tasks.db") if err := os.MkdirAll(filepath.Dir(taskDBPath), 0777); err != nil { return err } db, err := bolt.Open(taskDBPath, 0666, nil) if err != nil { return err } defer db.Close() forceCertRenewal := make(chan struct{}) renewCert := func() { select { case forceCertRenewal <- struct{}{}: case <-ctx.Done(): } } go func() { for { select { case <-ctx.Done(): return case node := <-n.notifyNodeChange: // If the server is sending us a ForceRenewal State, renew if node.Certificate.Status.State == api.IssuanceStateRotate { renewCert() continue } n.Lock() // If we got a role change, renew lastRole := n.role role := ca.WorkerRole if node.Spec.Role == api.NodeRoleManager { role = ca.ManagerRole } if lastRole == role { n.Unlock() continue } // switch role to agent immediately to shutdown manager early if role == ca.WorkerRole { n.role = role n.roleCond.Broadcast() } n.Unlock() renewCert() } } }() updates := ca.RenewTLSConfig(ctx, securityConfig, n.remotes, forceCertRenewal) go func() { for { select { case certUpdate := <-updates: if certUpdate.Err != nil { logrus.Warnf("error renewing TLS certificate: %v", certUpdate.Err) continue } n.Lock() n.role = certUpdate.Role n.roleCond.Broadcast() n.Unlock() case <-ctx.Done(): return } } }() role := n.role managerReady := make(chan struct{}) agentReady := make(chan struct{}) var managerErr error var agentErr error var wg sync.WaitGroup wg.Add(2) go func() { managerErr = n.superviseManager(ctx, securityConfig, managerReady) // store err and loop wg.Done() }() go func() { agentErr = n.runAgent(ctx, db, securityConfig.ClientTLSCreds, agentReady) wg.Done() }() go func() { <-agentReady if role == ca.ManagerRole { <-managerReady } close(n.ready) }() wg.Wait() if managerErr != nil && managerErr != context.Canceled { return managerErr } if agentErr != nil && agentErr != context.Canceled { return agentErr } return err }
func (n *Node) run(ctx context.Context) (err error) { defer func() { n.err = err close(n.closed) }() ctx, cancel := context.WithCancel(ctx) defer cancel() ctx = log.WithLogger(ctx, log.G(ctx).WithField("module", "node")) go func() { select { case <-ctx.Done(): case <-n.stopped: cancel() } }() if n.config.JoinAddr == "" && n.nodeID == "" { if err := n.bootstrapCA(); err != nil { return err } } if n.config.JoinAddr != "" || n.config.ForceNewCluster { n.remotes = newPersistentRemotes(filepath.Join(n.config.StateDir, stateFilename)) if n.config.JoinAddr != "" { n.remotes.Observe(api.Peer{Addr: n.config.JoinAddr}, 1) } } csrRole := n.role if n.config.IsManager { // todo: temporary csrRole = ca.ManagerRole } // Obtain new certs and setup TLS certificates renewal for this node: // - We call LoadOrCreateSecurityConfig which blocks until a valid certificate has been issued // - We retrieve the nodeID from LoadOrCreateSecurityConfig through the info channel. This allows // us to display the ID before the certificate gets issued (for potential approval). // - We wait for LoadOrCreateSecurityConfig to finish since we need a certificate to operate. // - Given a valid certificate, spin a renewal go-routine that will ensure that certificates stay // up to date. issueResponseChan := make(chan api.IssueNodeCertificateResponse, 1) go func() { select { case <-ctx.Done(): case resp := <-issueResponseChan: logrus.Debugf("Requesting certificate for NodeID: %v", resp.NodeID) n.Lock() n.nodeID = resp.NodeID n.nodeMembership = resp.NodeMembership n.Unlock() close(n.certificateRequested) } }() certDir := filepath.Join(n.config.StateDir, "certificates") securityConfig, err := ca.LoadOrCreateSecurityConfig(ctx, certDir, n.config.CAHash, n.config.Secret, csrRole, picker.NewPicker(n.remotes), issueResponseChan) if err != nil { return err } taskDBPath := filepath.Join(n.config.StateDir, "worker/tasks.db") if err := os.MkdirAll(filepath.Dir(taskDBPath), 0777); err != nil { return err } db, err := bolt.Open(taskDBPath, 0666, nil) if err != nil { return err } defer db.Close() if err := n.loadCertificates(); err != nil { return err } if n.role == ca.ManagerRole { n.managerRoleCh <- struct{}{} } forceCertRenewal := make(chan struct{}) go func() { n.RLock() lastRole := n.role n.RUnlock() for { select { case <-ctx.Done(): return case apirole := <-n.roleChangeReq: role := ca.AgentRole if apirole == api.NodeRoleManager { role = ca.ManagerRole } if lastRole != role { forceCertRenewal <- struct{}{} } lastRole = role } } }() updates := ca.RenewTLSConfig(ctx, securityConfig, certDir, picker.NewPicker(n.remotes), forceCertRenewal) go func() { for { select { case certUpdate := <-updates: if ctx.Err() != nil { return } if certUpdate.Err != nil { logrus.Warnf("error renewing TLS certificate: %v", certUpdate.Err) continue } n.Lock() n.role = certUpdate.Role if n.role == ca.ManagerRole { n.managerRoleCh <- struct{}{} } n.Unlock() case <-ctx.Done(): return } } }() role := n.role managerReady := make(chan struct{}) agentReady := make(chan struct{}) var managerErr error var agentErr error var wg sync.WaitGroup wg.Add(2) go func() { managerErr = n.runManager(ctx, securityConfig, managerReady) // store err and loop wg.Done() cancel() }() go func() { agentErr = n.runAgent(ctx, db, securityConfig.ClientTLSCreds, agentReady) wg.Done() cancel() }() go func() { <-agentReady if role == ca.ManagerRole { <-managerReady } close(n.ready) }() wg.Wait() if managerErr != nil && managerErr != context.Canceled { return managerErr } if agentErr != nil && agentErr != context.Canceled { return agentErr } return err }