func TestLoadOrCreateSecurityConfigInvalidKeyWithValidTempKey(t *testing.T) { tc := testutils.NewTestCA(t) defer tc.Stop() nodeConfig, err := ca.LoadOrCreateSecurityConfig(tc.Context, tc.TempDir, "", ca.AgentRole, tc.Picker, nil) assert.NoError(t, err) assert.NotNil(t, nodeConfig) assert.NotNil(t, nodeConfig.ClientTLSCreds) assert.NotNil(t, nodeConfig.ServerTLSCreds) assert.NotNil(t, nodeConfig.RootCA().Pool) assert.NotNil(t, nodeConfig.RootCA().Cert) assert.NotNil(t, nodeConfig.RootCA().Signer) // Write some garbage to the Key assert.NoError(t, os.Rename(tc.Paths.Node.Key, filepath.Dir(tc.Paths.Node.Key)+"."+filepath.Base(tc.Paths.Node.Key))) ioutil.WriteFile(tc.Paths.Node.Key, []byte(`-----BEGIN EC PRIVATE KEY-----\n some random garbage\n -----END EC PRIVATE KEY-----`), 0644) nodeConfig, err = ca.LoadOrCreateSecurityConfig(tc.Context, tc.TempDir, "", ca.AgentRole, nil, nil) assert.NoError(t, err) assert.NotNil(t, nodeConfig) assert.NotNil(t, nodeConfig.ClientTLSCreds) assert.NotNil(t, nodeConfig.ServerTLSCreds) assert.NotNil(t, nodeConfig.RootCA().Pool) assert.NotNil(t, nodeConfig.RootCA().Cert) assert.NotNil(t, nodeConfig.RootCA().Signer) }
func TestLoadOrCreateSecurityConfigNoCerts(t *testing.T) { tc := testutils.NewTestCA(t) defer tc.Stop() // Remove only the node certificates form the directory, and attest that we get // new certificates that are locally signed os.RemoveAll(tc.Paths.Node.Cert) nodeConfig, err := ca.LoadOrCreateSecurityConfig(tc.Context, tc.TempDir, tc.WorkerToken, ca.AgentRole, tc.Picker, nil) assert.NoError(t, err) assert.NotNil(t, nodeConfig) assert.NotNil(t, nodeConfig.ClientTLSCreds) assert.NotNil(t, nodeConfig.ServerTLSCreds) assert.NotNil(t, nodeConfig.RootCA().Pool) assert.NotNil(t, nodeConfig.RootCA().Cert) assert.NotNil(t, nodeConfig.RootCA().Signer) assert.True(t, nodeConfig.RootCA().CanSign()) info := make(chan api.IssueNodeCertificateResponse, 1) // Remove only the node certificates form the directory, and attest that we get // new certificates that are issued by the remote CA os.RemoveAll(tc.Paths.RootCA.Key) os.RemoveAll(tc.Paths.Node.Cert) nodeConfig, err = ca.LoadOrCreateSecurityConfig(tc.Context, tc.TempDir, tc.WorkerToken, ca.AgentRole, tc.Picker, info) assert.NoError(t, err) assert.NotNil(t, nodeConfig) assert.NotNil(t, nodeConfig.ClientTLSCreds) assert.NotNil(t, nodeConfig.ServerTLSCreds) assert.NotNil(t, nodeConfig.RootCA().Pool) assert.NotNil(t, nodeConfig.RootCA().Cert) assert.Nil(t, nodeConfig.RootCA().Signer) assert.False(t, nodeConfig.RootCA().CanSign()) assert.NotEmpty(t, <-info) }
func TestLoadOrCreateSecurityConfigInvalidCACert(t *testing.T) { tc := testutils.NewTestCA(t) defer tc.Stop() // First load the current nodeConfig. We'll verify that after we corrupt // the certificate, another subsquent call with get us new certs nodeConfig, err := ca.LoadOrCreateSecurityConfig(tc.Context, tc.TempDir, "", ca.AgentRole, tc.Picker, nil) assert.NoError(t, err) assert.NotNil(t, nodeConfig) assert.NotNil(t, nodeConfig.ClientTLSCreds) assert.NotNil(t, nodeConfig.ServerTLSCreds) assert.NotNil(t, nodeConfig.RootCA().Pool) assert.NotNil(t, nodeConfig.RootCA().Cert) // We have a valid signer because we bootstrapped with valid root key-material assert.NotNil(t, nodeConfig.RootCA().Signer) assert.True(t, nodeConfig.RootCA().CanSign()) // Write some garbage to the CA cert ioutil.WriteFile(tc.Paths.RootCA.Cert, []byte(`-----BEGIN CERTIFICATE-----\n some random garbage\n -----END CERTIFICATE-----`), 0644) // We should get an error when the CA cert is invalid. _, err = ca.LoadOrCreateSecurityConfig(tc.Context, tc.TempDir, "", ca.AgentRole, tc.Picker, nil) assert.Error(t, err) // Not having a local cert should cause us to fallback to using the // picker to get a remote. assert.Nil(t, os.Remove(tc.Paths.RootCA.Cert)) // Validate we got a new valid state newNodeConfig, err := ca.LoadOrCreateSecurityConfig(tc.Context, tc.TempDir, "", ca.AgentRole, tc.Picker, nil) assert.NoError(t, err) assert.NotNil(t, nodeConfig) assert.NotNil(t, nodeConfig.ClientTLSCreds) assert.NotNil(t, nodeConfig.ServerTLSCreds) assert.NotNil(t, nodeConfig.RootCA().Pool) assert.NotNil(t, nodeConfig.RootCA().Cert) assert.NotNil(t, nodeConfig.RootCA().Signer) assert.True(t, nodeConfig.RootCA().CanSign()) // Ensure that we have the same certificate as before assert.Equal(t, nodeConfig.RootCA().Cert, newNodeConfig.RootCA().Cert) }
func TestLoadOrCreateSecurityConfigNoCertsAndNoRemote(t *testing.T) { tc := testutils.NewTestCA(t) defer tc.Stop() // Remove the certificate from the temp dir and try loading with a new manager os.Remove(tc.Paths.Node.Cert) os.Remove(tc.Paths.RootCA.Key) _, err := ca.LoadOrCreateSecurityConfig(tc.Context, tc.TempDir, tc.WorkerToken, ca.AgentRole, nil, nil) assert.EqualError(t, err, "valid remote address picker required") }
func TestLoadOrCreateSecurityConfigNoLocalCACertNoRemote(t *testing.T) { tc := testutils.NewTestCA(t) defer tc.Stop() // Delete the root CA file so that LoadOrCreateSecurityConfig falls // back to using the remote. assert.Nil(t, os.Remove(tc.Paths.RootCA.Cert)) nodeConfig, err := ca.LoadOrCreateSecurityConfig(tc.Context, tc.TempDir, "", ca.AgentRole, nil, nil) assert.EqualError(t, err, "valid remote address picker required") assert.Nil(t, nodeConfig) }
func TestLoadOrCreateSecurityConfigInvalidCAKey(t *testing.T) { tc := testutils.NewTestCA(t) defer tc.Stop() // Write some garbage to the root key ioutil.WriteFile(tc.Paths.RootCA.Key, []byte(`-----BEGIN EC PRIVATE KEY-----\n some random garbage\n -----END EC PRIVATE KEY-----`), 0644) // We should get an error when the local ca private key is invalid. _, err := ca.LoadOrCreateSecurityConfig(tc.Context, tc.TempDir, "", ca.AgentRole, tc.Picker, nil) assert.Error(t, err) }
func TestLoadOrCreateSecurityConfigWrongCAHash(t *testing.T) { tc := testutils.NewTestCA(t) defer tc.Stop() splitToken := strings.Split(tc.ManagerToken, "-") splitToken[2] = "1kxftv4ofnc6mt30lmgipg6ngf9luhwqopfk1tz6bdmnkubg0e" replacementToken := strings.Join(splitToken, "-") info := make(chan api.IssueNodeCertificateResponse, 1) // Remove only the node certificates form the directory, and attest that we get // new certificates that are issued by the remote CA os.RemoveAll(tc.Paths.RootCA.Key) os.RemoveAll(tc.Paths.RootCA.Cert) os.RemoveAll(tc.Paths.Node.Cert) _, err := ca.LoadOrCreateSecurityConfig(tc.Context, tc.TempDir, replacementToken, ca.AgentRole, tc.Picker, info) assert.Error(t, err) assert.Contains(t, err.Error(), "remote CA does not match fingerprint.") }
func TestLoadOrCreateSecurityConfigEmptyDir(t *testing.T) { tc := testutils.NewTestCA(t) defer tc.Stop() info := make(chan api.IssueNodeCertificateResponse, 1) // Remove all the contents from the temp dir and try again with a new node os.RemoveAll(tc.TempDir) nodeConfig, err := ca.LoadOrCreateSecurityConfig(tc.Context, tc.TempDir, tc.WorkerToken, ca.AgentRole, tc.Picker, info) assert.NoError(t, err) assert.NotNil(t, nodeConfig) assert.NotNil(t, nodeConfig.ClientTLSCreds) assert.NotNil(t, nodeConfig.ServerTLSCreds) assert.NotNil(t, nodeConfig.RootCA().Pool) assert.NotNil(t, nodeConfig.RootCA().Cert) assert.Nil(t, nodeConfig.RootCA().Signer) assert.False(t, nodeConfig.RootCA().CanSign()) assert.NotEmpty(t, <-info) }
func TestLoadOrCreateSecurityConfigInvalidCert(t *testing.T) { tc := testutils.NewTestCA(t) defer tc.Stop() // Write some garbage to the cert ioutil.WriteFile(tc.Paths.Node.Cert, []byte(`-----BEGIN CERTIFICATE-----\n some random garbage\n -----END CERTIFICATE-----`), 0644) nodeConfig, err := ca.LoadOrCreateSecurityConfig(tc.Context, tc.TempDir, "", ca.AgentRole, tc.Picker, nil) assert.NoError(t, err) assert.NotNil(t, nodeConfig) assert.NotNil(t, nodeConfig.ClientTLSCreds) assert.NotNil(t, nodeConfig.ServerTLSCreds) assert.NotNil(t, nodeConfig.RootCA().Pool) assert.NotNil(t, nodeConfig.RootCA().Cert) assert.NotNil(t, nodeConfig.RootCA().Signer) }
func TestLoadOrCreateSecurityConfigInvalidKey(t *testing.T) { tc := testutils.NewTestCA(t, testutils.AcceptancePolicy(true, true, "")) defer tc.Stop() // Write some garbage to the Key ioutil.WriteFile(tc.Paths.Node.Key, []byte(`-----BEGIN EC PRIVATE KEY-----\n some random garbage\n -----END EC PRIVATE KEY-----`), 0644) nodeConfig, err := ca.LoadOrCreateSecurityConfig(tc.Context, tc.TempDir, "", "", ca.AgentRole, tc.Picker, nil) assert.NoError(t, err) assert.NotNil(t, nodeConfig) assert.NotNil(t, nodeConfig.ClientTLSCreds) assert.NotNil(t, nodeConfig.ServerTLSCreds) assert.NotNil(t, nodeConfig.RootCA().Pool) assert.NotNil(t, nodeConfig.RootCA().Cert) assert.NotNil(t, nodeConfig.RootCA().Signer) }
func (n *Node) run(ctx context.Context) (err error) { defer func() { n.err = err close(n.closed) }() ctx, cancel := context.WithCancel(ctx) defer cancel() ctx = log.WithModule(ctx, "node") go func() { select { case <-ctx.Done(): case <-n.stopped: cancel() } }() // NOTE: When this node is created by NewNode(), our nodeID is set if // n.loadCertificates() succeeded in loading TLS credentials. if n.config.JoinAddr == "" && n.nodeID == "" { if err := n.bootstrapCA(); err != nil { return err } } if n.config.JoinAddr != "" || n.config.ForceNewCluster { n.remotes = newPersistentRemotes(filepath.Join(n.config.StateDir, stateFilename)) if n.config.JoinAddr != "" { n.remotes.Observe(api.Peer{Addr: n.config.JoinAddr}, remotes.DefaultObservationWeight) } } // Obtain new certs and setup TLS certificates renewal for this node: // - We call LoadOrCreateSecurityConfig which blocks until a valid certificate has been issued // - We retrieve the nodeID from LoadOrCreateSecurityConfig through the info channel. This allows // us to display the ID before the certificate gets issued (for potential approval). // - We wait for LoadOrCreateSecurityConfig to finish since we need a certificate to operate. // - Given a valid certificate, spin a renewal go-routine that will ensure that certificates stay // up to date. issueResponseChan := make(chan api.IssueNodeCertificateResponse, 1) go func() { select { case <-ctx.Done(): case resp := <-issueResponseChan: log.G(log.WithModule(ctx, "tls")).WithFields(logrus.Fields{ "node.id": resp.NodeID, }).Debugf("requesting certificate") n.Lock() n.nodeID = resp.NodeID n.nodeMembership = resp.NodeMembership n.Unlock() close(n.certificateRequested) } }() certDir := filepath.Join(n.config.StateDir, "certificates") securityConfig, err := ca.LoadOrCreateSecurityConfig(ctx, certDir, n.config.JoinToken, ca.ManagerRole, n.remotes, issueResponseChan) if err != nil { return err } taskDBPath := filepath.Join(n.config.StateDir, "worker/tasks.db") if err := os.MkdirAll(filepath.Dir(taskDBPath), 0777); err != nil { return err } db, err := bolt.Open(taskDBPath, 0666, nil) if err != nil { return err } defer db.Close() if err := n.loadCertificates(); err != nil { return err } forceCertRenewal := make(chan struct{}) renewCert := func() { select { case forceCertRenewal <- struct{}{}: case <-ctx.Done(): } } go func() { for { select { case <-ctx.Done(): return case node := <-n.notifyNodeChange: // If the server is sending us a ForceRenewal State, renew if node.Certificate.Status.State == api.IssuanceStateRotate { renewCert() continue } n.Lock() // If we got a role change, renew lastRole := n.role role := ca.WorkerRole if node.Spec.Role == api.NodeRoleManager { role = ca.ManagerRole } if lastRole == role { n.Unlock() continue } // switch role to agent immediately to shutdown manager early if role == ca.WorkerRole { n.role = role n.roleCond.Broadcast() } n.Unlock() renewCert() } } }() updates := ca.RenewTLSConfig(ctx, securityConfig, certDir, n.remotes, forceCertRenewal) go func() { for { select { case certUpdate := <-updates: if certUpdate.Err != nil { logrus.Warnf("error renewing TLS certificate: %v", certUpdate.Err) continue } n.Lock() n.role = certUpdate.Role n.roleCond.Broadcast() n.Unlock() case <-ctx.Done(): return } } }() role := n.role managerReady := make(chan struct{}) agentReady := make(chan struct{}) var managerErr error var agentErr error var wg sync.WaitGroup wg.Add(2) go func() { managerErr = n.runManager(ctx, securityConfig, managerReady) // store err and loop wg.Done() cancel() }() go func() { agentErr = n.runAgent(ctx, db, securityConfig.ClientTLSCreds, agentReady) wg.Done() cancel() }() go func() { <-agentReady if role == ca.ManagerRole { <-managerReady } close(n.ready) }() wg.Wait() if managerErr != nil && managerErr != context.Canceled { return managerErr } if agentErr != nil && agentErr != context.Canceled { return agentErr } return err }
func (n *Node) loadSecurityConfig(ctx context.Context) (*ca.SecurityConfig, error) { paths := ca.NewConfigPaths(filepath.Join(n.config.StateDir, certDirectory)) var securityConfig *ca.SecurityConfig krw := ca.NewKeyReadWriter(paths.Node, n.unlockKey, &manager.RaftDEKData{}) if err := krw.Migrate(); err != nil { return nil, err } // Check if we already have a valid certificates on disk. rootCA, err := ca.GetLocalRootCA(paths.RootCA) if err != nil && err != ca.ErrNoLocalRootCA { return nil, err } if err == nil { clientTLSCreds, serverTLSCreds, err := ca.LoadTLSCreds(rootCA, krw) _, ok := errors.Cause(err).(ca.ErrInvalidKEK) switch { case err == nil: securityConfig = ca.NewSecurityConfig(&rootCA, krw, clientTLSCreds, serverTLSCreds) log.G(ctx).Debug("loaded CA and TLS certificates") case ok: return nil, ErrInvalidUnlockKey case os.IsNotExist(err): break default: return nil, errors.Wrapf(err, "error while loading TLS certificate in %s", paths.Node.Cert) } } if securityConfig == nil { if n.config.JoinAddr == "" { // if we're not joining a cluster, bootstrap a new one - and we have to set the unlock key n.unlockKey = nil if n.config.AutoLockManagers { n.unlockKey = encryption.GenerateSecretKey() } krw = ca.NewKeyReadWriter(paths.Node, n.unlockKey, &manager.RaftDEKData{}) rootCA, err = ca.CreateRootCA(ca.DefaultRootCN, paths.RootCA) if err != nil { return nil, err } log.G(ctx).Debug("generated CA key and certificate") } else if err == ca.ErrNoLocalRootCA { // from previous error loading the root CA from disk rootCA, err = ca.DownloadRootCA(ctx, paths.RootCA, n.config.JoinToken, n.remotes) if err != nil { return nil, err } log.G(ctx).Debug("downloaded CA certificate") } // Obtain new certs and setup TLS certificates renewal for this node: // - We call LoadOrCreateSecurityConfig which blocks until a valid certificate has been issued // - We retrieve the nodeID from LoadOrCreateSecurityConfig through the info channel. This allows // us to display the ID before the certificate gets issued (for potential approval). // - We wait for LoadOrCreateSecurityConfig to finish since we need a certificate to operate. // - Given a valid certificate, spin a renewal go-routine that will ensure that certificates stay // up to date. issueResponseChan := make(chan api.IssueNodeCertificateResponse, 1) go func() { select { case <-ctx.Done(): case resp := <-issueResponseChan: log.G(log.WithModule(ctx, "tls")).WithFields(logrus.Fields{ "node.id": resp.NodeID, }).Debugf("loaded TLS certificate") n.Lock() n.nodeID = resp.NodeID n.nodeMembership = resp.NodeMembership n.Unlock() close(n.certificateRequested) } }() // LoadOrCreateSecurityConfig is the point at which a new node joining a cluster will retrieve TLS // certificates and write them to disk securityConfig, err = ca.LoadOrCreateSecurityConfig( ctx, rootCA, n.config.JoinToken, ca.ManagerRole, n.remotes, issueResponseChan, krw) if err != nil { if _, ok := errors.Cause(err).(ca.ErrInvalidKEK); ok { return nil, ErrInvalidUnlockKey } return nil, err } } n.Lock() n.role = securityConfig.ClientTLSCreds.Role() n.nodeID = securityConfig.ClientTLSCreds.NodeID() n.nodeMembership = api.NodeMembershipAccepted n.roleCond.Broadcast() n.Unlock() return securityConfig, nil }
func (n *Node) run(ctx context.Context) (err error) { defer func() { n.err = err close(n.closed) }() ctx, cancel := context.WithCancel(ctx) defer cancel() ctx = log.WithLogger(ctx, log.G(ctx).WithField("module", "node")) go func() { select { case <-ctx.Done(): case <-n.stopped: cancel() } }() if n.config.JoinAddr == "" && n.nodeID == "" { if err := n.bootstrapCA(); err != nil { return err } } if n.config.JoinAddr != "" || n.config.ForceNewCluster { n.remotes = newPersistentRemotes(filepath.Join(n.config.StateDir, stateFilename)) if n.config.JoinAddr != "" { n.remotes.Observe(api.Peer{Addr: n.config.JoinAddr}, 1) } } csrRole := n.role if n.config.IsManager { // todo: temporary csrRole = ca.ManagerRole } // Obtain new certs and setup TLS certificates renewal for this node: // - We call LoadOrCreateSecurityConfig which blocks until a valid certificate has been issued // - We retrieve the nodeID from LoadOrCreateSecurityConfig through the info channel. This allows // us to display the ID before the certificate gets issued (for potential approval). // - We wait for LoadOrCreateSecurityConfig to finish since we need a certificate to operate. // - Given a valid certificate, spin a renewal go-routine that will ensure that certificates stay // up to date. issueResponseChan := make(chan api.IssueNodeCertificateResponse, 1) go func() { select { case <-ctx.Done(): case resp := <-issueResponseChan: logrus.Debugf("Requesting certificate for NodeID: %v", resp.NodeID) n.Lock() n.nodeID = resp.NodeID n.nodeMembership = resp.NodeMembership n.Unlock() close(n.certificateRequested) } }() certDir := filepath.Join(n.config.StateDir, "certificates") securityConfig, err := ca.LoadOrCreateSecurityConfig(ctx, certDir, n.config.CAHash, n.config.Secret, csrRole, picker.NewPicker(n.remotes), issueResponseChan) if err != nil { return err } taskDBPath := filepath.Join(n.config.StateDir, "worker/tasks.db") if err := os.MkdirAll(filepath.Dir(taskDBPath), 0777); err != nil { return err } db, err := bolt.Open(taskDBPath, 0666, nil) if err != nil { return err } defer db.Close() if err := n.loadCertificates(); err != nil { return err } if n.role == ca.ManagerRole { n.managerRoleCh <- struct{}{} } forceCertRenewal := make(chan struct{}) go func() { n.RLock() lastRole := n.role n.RUnlock() for { select { case <-ctx.Done(): return case apirole := <-n.roleChangeReq: role := ca.AgentRole if apirole == api.NodeRoleManager { role = ca.ManagerRole } if lastRole != role { forceCertRenewal <- struct{}{} } lastRole = role } } }() updates := ca.RenewTLSConfig(ctx, securityConfig, certDir, picker.NewPicker(n.remotes), forceCertRenewal) go func() { for { select { case certUpdate := <-updates: if ctx.Err() != nil { return } if certUpdate.Err != nil { logrus.Warnf("error renewing TLS certificate: %v", certUpdate.Err) continue } n.Lock() n.role = certUpdate.Role if n.role == ca.ManagerRole { n.managerRoleCh <- struct{}{} } n.Unlock() case <-ctx.Done(): return } } }() role := n.role managerReady := make(chan struct{}) agentReady := make(chan struct{}) var managerErr error var agentErr error var wg sync.WaitGroup wg.Add(2) go func() { managerErr = n.runManager(ctx, securityConfig, managerReady) // store err and loop wg.Done() cancel() }() go func() { agentErr = n.runAgent(ctx, db, securityConfig.ClientTLSCreds, agentReady) wg.Done() cancel() }() go func() { <-agentReady if role == ca.ManagerRole { <-managerReady } close(n.ready) }() wg.Wait() if managerErr != nil && managerErr != context.Canceled { return managerErr } if agentErr != nil && agentErr != context.Canceled { return agentErr } return err }