Beispiel #1
0
func TestForceRenewTLSConfig(t *testing.T) {
	t.Parallel()

	tc := testutils.NewTestCA(t)
	defer tc.Stop()

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	// Get a new managerConfig with a TLS cert that has 15 minutes to live
	nodeConfig, err := tc.WriteNewNodeConfig(ca.ManagerRole)
	assert.NoError(t, err)

	renew := make(chan struct{}, 1)
	updates := ca.RenewTLSConfig(ctx, nodeConfig, tc.Remotes, renew)
	renew <- struct{}{}
	select {
	case <-time.After(10 * time.Second):
		assert.Fail(t, "TestForceRenewTLSConfig timed-out")
	case certUpdate := <-updates:
		assert.NoError(t, certUpdate.Err)
		assert.NotNil(t, certUpdate)
		assert.Equal(t, certUpdate.Role, ca.ManagerRole)
	}
}
Beispiel #2
0
func TestForceRenewTLSConfig(t *testing.T) {
	tc := testutils.NewTestCA(t)
	defer tc.Stop()

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	// Get a new managerConfig with a TLS cert that has 15 minutes to live
	nodeConfig, err := tc.WriteNewNodeConfig(ca.ManagerRole)
	assert.NoError(t, err)

	var success, timeout bool
	renew := make(chan struct{}, 1)
	updates := ca.RenewTLSConfig(ctx, nodeConfig, tc.TempDir, tc.Picker, renew)
	for {
		renew <- struct{}{}
		select {
		case <-time.After(2 * time.Second):
			timeout = true
		case certUpdate := <-updates:
			assert.NoError(t, certUpdate.Err)
			assert.NotNil(t, certUpdate)
			assert.Equal(t, certUpdate.Role, ca.ManagerRole)
			success = true
		}
		if timeout {
			assert.Fail(t, "TestForceRenewTLSConfig timed-out")
			break
		}
		if success {
			break
		}
	}
}
Beispiel #3
0
func TestRenewTLSConfigWithNoNode(t *testing.T) {
	t.Parallel()

	tc := testutils.NewTestCA(t)
	defer tc.Stop()

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	// Get a new nodeConfig with a TLS cert that has the default Cert duration
	nodeConfig, err := tc.WriteNewNodeConfig(ca.ManagerRole)
	assert.NoError(t, err)

	// Create a new RootCA, and change the policy to issue 6 minute certificates.
	// Because of the default backdate of 5 minutes, this issues certificates
	// valid for 1 minute.
	newRootCA, err := ca.NewRootCA(tc.RootCA.Cert, tc.RootCA.Key, ca.DefaultNodeCertExpiration)
	assert.NoError(t, err)
	newRootCA.Signer.SetPolicy(&cfconfig.Signing{
		Default: &cfconfig.SigningProfile{
			Usage:  []string{"signing", "key encipherment", "server auth", "client auth"},
			Expiry: 6 * time.Minute,
		},
	})

	// Create a new CSR and overwrite the key on disk
	csr, key, err := ca.GenerateNewCSR()
	assert.NoError(t, err)

	// Issue a new certificate with the same details as the current config, but with 1 min expiration time
	c := nodeConfig.ClientTLSCreds
	signedCert, err := newRootCA.ParseValidateAndSignCSR(csr, c.NodeID(), c.Role(), c.Organization())
	assert.NoError(t, err)
	assert.NotNil(t, signedCert)

	// Overwrite the certificate on disk with one that expires in 1 minute
	err = ioutils.AtomicWriteFile(tc.Paths.Node.Cert, signedCert, 0644)
	assert.NoError(t, err)

	err = ioutils.AtomicWriteFile(tc.Paths.Node.Key, key, 0600)
	assert.NoError(t, err)

	// Delete the node from the backend store
	err = tc.MemoryStore.Update(func(tx store.Tx) error {
		node := store.GetNode(tx, nodeConfig.ClientTLSCreds.NodeID())
		assert.NotNil(t, node)
		return store.DeleteNode(tx, nodeConfig.ClientTLSCreds.NodeID())
	})
	assert.NoError(t, err)

	renew := make(chan struct{})
	updates := ca.RenewTLSConfig(ctx, nodeConfig, tc.Remotes, renew)
	select {
	case <-time.After(10 * time.Second):
		assert.Fail(t, "TestRenewTLSConfig timed-out")
	case certUpdate := <-updates:
		assert.Error(t, certUpdate.Err)
		assert.Contains(t, certUpdate.Err.Error(), "not found when attempting to renew certificate")
	}
}
Beispiel #4
0
func TestRenewTLSConfigManager(t *testing.T) {
	tc := testutils.NewTestCA(t)
	defer tc.Stop()

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	// Get a new nodeConfig with a TLS cert that has the default Cert duration
	nodeConfig, err := tc.WriteNewNodeConfig(ca.ManagerRole)
	assert.NoError(t, err)

	// Create a new RootCA, and change the policy to issue 6 minute certificates
	newRootCA, err := ca.NewRootCA(tc.RootCA.Cert, tc.RootCA.Key, ca.DefaultNodeCertExpiration)
	assert.NoError(t, err)
	newRootCA.Signer.SetPolicy(&cfconfig.Signing{
		Default: &cfconfig.SigningProfile{
			Usage:  []string{"signing", "key encipherment", "server auth", "client auth"},
			Expiry: 6 * time.Minute,
		},
	})

	// Create a new CSR and overwrite the key on disk
	csr, _, err := ca.GenerateAndWriteNewKey(tc.Paths.Node)
	assert.NoError(t, err)

	// Issue a new certificate with the same details as the current config, but with 6 min expiration time
	c := nodeConfig.ClientTLSCreds
	signedCert, err := newRootCA.ParseValidateAndSignCSR(csr, c.NodeID(), c.Role(), c.Organization())
	assert.NoError(t, err)
	assert.NotNil(t, signedCert)

	// Overwrite the certificate on disk with one that expires in 1 minute
	err = ioutils.AtomicWriteFile(tc.Paths.Node.Cert, signedCert, 0644)
	assert.NoError(t, err)

	// Get a new nodeConfig with a TLS cert that has 6 minutes to live
	var success, timeout bool
	renew := make(chan struct{})

	updates := ca.RenewTLSConfig(ctx, nodeConfig, tc.TempDir, tc.Picker, renew)
	for {
		select {
		case <-time.After(2 * time.Second):
			timeout = true
		case certUpdate := <-updates:
			assert.NoError(t, certUpdate.Err)
			assert.NotNil(t, certUpdate)
			assert.Equal(t, ca.ManagerRole, certUpdate.Role)
			success = true
		}
		if timeout {
			assert.Fail(t, "TestRenewTLSConfig timed-out")
			break
		}
		if success {
			break
		}
	}
}
Beispiel #5
0
func (n *Node) run(ctx context.Context) (err error) {
	defer func() {
		n.err = err
		close(n.closed)
	}()
	ctx, cancel := context.WithCancel(ctx)
	defer cancel()
	ctx = log.WithModule(ctx, "node")

	go func() {
		select {
		case <-ctx.Done():
		case <-n.stopped:
			cancel()
		}
	}()

	// NOTE: When this node is created by NewNode(), our nodeID is set if
	// n.loadCertificates() succeeded in loading TLS credentials.
	if n.config.JoinAddr == "" && n.nodeID == "" {
		if err := n.bootstrapCA(); err != nil {
			return err
		}
	}

	if n.config.JoinAddr != "" || n.config.ForceNewCluster {
		n.remotes = newPersistentRemotes(filepath.Join(n.config.StateDir, stateFilename))
		if n.config.JoinAddr != "" {
			n.remotes.Observe(api.Peer{Addr: n.config.JoinAddr}, remotes.DefaultObservationWeight)
		}
	}

	// Obtain new certs and setup TLS certificates renewal for this node:
	// - We call LoadOrCreateSecurityConfig which blocks until a valid certificate has been issued
	// - We retrieve the nodeID from LoadOrCreateSecurityConfig through the info channel. This allows
	// us to display the ID before the certificate gets issued (for potential approval).
	// - We wait for LoadOrCreateSecurityConfig to finish since we need a certificate to operate.
	// - Given a valid certificate, spin a renewal go-routine that will ensure that certificates stay
	// up to date.
	issueResponseChan := make(chan api.IssueNodeCertificateResponse, 1)
	go func() {
		select {
		case <-ctx.Done():
		case resp := <-issueResponseChan:
			log.G(log.WithModule(ctx, "tls")).WithFields(logrus.Fields{
				"node.id": resp.NodeID,
			}).Debugf("requesting certificate")
			n.Lock()
			n.nodeID = resp.NodeID
			n.nodeMembership = resp.NodeMembership
			n.Unlock()
			close(n.certificateRequested)
		}
	}()

	certDir := filepath.Join(n.config.StateDir, "certificates")
	securityConfig, err := ca.LoadOrCreateSecurityConfig(ctx, certDir, n.config.JoinToken, ca.ManagerRole, n.remotes, issueResponseChan)
	if err != nil {
		return err
	}

	taskDBPath := filepath.Join(n.config.StateDir, "worker/tasks.db")
	if err := os.MkdirAll(filepath.Dir(taskDBPath), 0777); err != nil {
		return err
	}

	db, err := bolt.Open(taskDBPath, 0666, nil)
	if err != nil {
		return err
	}
	defer db.Close()

	if err := n.loadCertificates(); err != nil {
		return err
	}

	forceCertRenewal := make(chan struct{})
	renewCert := func() {
		select {
		case forceCertRenewal <- struct{}{}:
		case <-ctx.Done():
		}
	}

	go func() {
		for {
			select {
			case <-ctx.Done():
				return
			case node := <-n.notifyNodeChange:
				// If the server is sending us a ForceRenewal State, renew
				if node.Certificate.Status.State == api.IssuanceStateRotate {
					renewCert()
					continue
				}
				n.Lock()
				// If we got a role change, renew
				lastRole := n.role
				role := ca.WorkerRole
				if node.Spec.Role == api.NodeRoleManager {
					role = ca.ManagerRole
				}
				if lastRole == role {
					n.Unlock()
					continue
				}
				// switch role to agent immediately to shutdown manager early
				if role == ca.WorkerRole {
					n.role = role
					n.roleCond.Broadcast()
				}
				n.Unlock()
				renewCert()
			}
		}
	}()

	updates := ca.RenewTLSConfig(ctx, securityConfig, certDir, n.remotes, forceCertRenewal)
	go func() {
		for {
			select {
			case certUpdate := <-updates:
				if certUpdate.Err != nil {
					logrus.Warnf("error renewing TLS certificate: %v", certUpdate.Err)
					continue
				}
				n.Lock()
				n.role = certUpdate.Role
				n.roleCond.Broadcast()
				n.Unlock()
			case <-ctx.Done():
				return
			}
		}
	}()

	role := n.role

	managerReady := make(chan struct{})
	agentReady := make(chan struct{})
	var managerErr error
	var agentErr error
	var wg sync.WaitGroup
	wg.Add(2)
	go func() {
		managerErr = n.runManager(ctx, securityConfig, managerReady) // store err and loop
		wg.Done()
		cancel()
	}()
	go func() {
		agentErr = n.runAgent(ctx, db, securityConfig.ClientTLSCreds, agentReady)
		wg.Done()
		cancel()
	}()

	go func() {
		<-agentReady
		if role == ca.ManagerRole {
			<-managerReady
		}
		close(n.ready)
	}()

	wg.Wait()
	if managerErr != nil && managerErr != context.Canceled {
		return managerErr
	}
	if agentErr != nil && agentErr != context.Canceled {
		return agentErr
	}
	return err
}
Beispiel #6
0
func (n *Node) run(ctx context.Context) (err error) {
	defer func() {
		n.err = err
		close(n.closed)
	}()
	ctx, cancel := context.WithCancel(ctx)
	defer cancel()
	ctx = log.WithModule(ctx, "node")

	go func() {
		select {
		case <-ctx.Done():
		case <-n.stopped:
			cancel()
		}
	}()

	securityConfig, err := n.loadSecurityConfig(ctx)
	if err != nil {
		return err
	}

	taskDBPath := filepath.Join(n.config.StateDir, "worker/tasks.db")
	if err := os.MkdirAll(filepath.Dir(taskDBPath), 0777); err != nil {
		return err
	}

	db, err := bolt.Open(taskDBPath, 0666, nil)
	if err != nil {
		return err
	}
	defer db.Close()

	forceCertRenewal := make(chan struct{})
	renewCert := func() {
		select {
		case forceCertRenewal <- struct{}{}:
		case <-ctx.Done():
		}
	}

	go func() {
		for {
			select {
			case <-ctx.Done():
				return
			case node := <-n.notifyNodeChange:
				// If the server is sending us a ForceRenewal State, renew
				if node.Certificate.Status.State == api.IssuanceStateRotate {
					renewCert()
					continue
				}
				n.Lock()
				// If we got a role change, renew
				lastRole := n.role
				role := ca.WorkerRole
				if node.Spec.Role == api.NodeRoleManager {
					role = ca.ManagerRole
				}
				if lastRole == role {
					n.Unlock()
					continue
				}
				// switch role to agent immediately to shutdown manager early
				if role == ca.WorkerRole {
					n.role = role
					n.roleCond.Broadcast()
				}
				n.Unlock()
				renewCert()
			}
		}
	}()

	updates := ca.RenewTLSConfig(ctx, securityConfig, n.remotes, forceCertRenewal)
	go func() {
		for {
			select {
			case certUpdate := <-updates:
				if certUpdate.Err != nil {
					logrus.Warnf("error renewing TLS certificate: %v", certUpdate.Err)
					continue
				}
				n.Lock()
				n.role = certUpdate.Role
				n.roleCond.Broadcast()
				n.Unlock()
			case <-ctx.Done():
				return
			}
		}
	}()

	role := n.role

	managerReady := make(chan struct{})
	agentReady := make(chan struct{})
	var managerErr error
	var agentErr error
	var wg sync.WaitGroup
	wg.Add(2)
	go func() {
		managerErr = n.superviseManager(ctx, securityConfig, managerReady) // store err and loop
		wg.Done()
	}()
	go func() {
		agentErr = n.runAgent(ctx, db, securityConfig.ClientTLSCreds, agentReady)
		wg.Done()
	}()

	go func() {
		<-agentReady
		if role == ca.ManagerRole {
			<-managerReady
		}
		close(n.ready)
	}()

	wg.Wait()
	if managerErr != nil && managerErr != context.Canceled {
		return managerErr
	}
	if agentErr != nil && agentErr != context.Canceled {
		return agentErr
	}
	return err
}
Beispiel #7
0
func (n *Node) run(ctx context.Context) (err error) {
	defer func() {
		n.err = err
		close(n.closed)
	}()
	ctx, cancel := context.WithCancel(ctx)
	defer cancel()
	ctx = log.WithLogger(ctx, log.G(ctx).WithField("module", "node"))

	go func() {
		select {
		case <-ctx.Done():
		case <-n.stopped:
			cancel()
		}
	}()

	if n.config.JoinAddr == "" && n.nodeID == "" {
		if err := n.bootstrapCA(); err != nil {
			return err
		}
	}

	if n.config.JoinAddr != "" || n.config.ForceNewCluster {
		n.remotes = newPersistentRemotes(filepath.Join(n.config.StateDir, stateFilename))
		if n.config.JoinAddr != "" {
			n.remotes.Observe(api.Peer{Addr: n.config.JoinAddr}, 1)
		}
	}

	csrRole := n.role
	if n.config.IsManager { // todo: temporary
		csrRole = ca.ManagerRole
	}

	// Obtain new certs and setup TLS certificates renewal for this node:
	// - We call LoadOrCreateSecurityConfig which blocks until a valid certificate has been issued
	// - We retrieve the nodeID from LoadOrCreateSecurityConfig through the info channel. This allows
	// us to display the ID before the certificate gets issued (for potential approval).
	// - We wait for LoadOrCreateSecurityConfig to finish since we need a certificate to operate.
	// - Given a valid certificate, spin a renewal go-routine that will ensure that certificates stay
	// up to date.
	issueResponseChan := make(chan api.IssueNodeCertificateResponse, 1)
	go func() {
		select {
		case <-ctx.Done():
		case resp := <-issueResponseChan:
			logrus.Debugf("Requesting certificate for NodeID: %v", resp.NodeID)
			n.Lock()
			n.nodeID = resp.NodeID
			n.nodeMembership = resp.NodeMembership
			n.Unlock()
			close(n.certificateRequested)
		}
	}()

	certDir := filepath.Join(n.config.StateDir, "certificates")
	securityConfig, err := ca.LoadOrCreateSecurityConfig(ctx, certDir, n.config.CAHash, n.config.Secret, csrRole, picker.NewPicker(n.remotes), issueResponseChan)
	if err != nil {
		return err
	}

	taskDBPath := filepath.Join(n.config.StateDir, "worker/tasks.db")
	if err := os.MkdirAll(filepath.Dir(taskDBPath), 0777); err != nil {
		return err
	}

	db, err := bolt.Open(taskDBPath, 0666, nil)
	if err != nil {
		return err
	}
	defer db.Close()

	if err := n.loadCertificates(); err != nil {
		return err
	}

	if n.role == ca.ManagerRole {
		n.managerRoleCh <- struct{}{}
	}

	forceCertRenewal := make(chan struct{})
	go func() {
		n.RLock()
		lastRole := n.role
		n.RUnlock()
		for {
			select {
			case <-ctx.Done():
				return
			case apirole := <-n.roleChangeReq:
				role := ca.AgentRole
				if apirole == api.NodeRoleManager {
					role = ca.ManagerRole
				}
				if lastRole != role {
					forceCertRenewal <- struct{}{}
				}
				lastRole = role
			}
		}
	}()

	updates := ca.RenewTLSConfig(ctx, securityConfig, certDir, picker.NewPicker(n.remotes), forceCertRenewal)
	go func() {
		for {
			select {
			case certUpdate := <-updates:
				if ctx.Err() != nil {
					return
				}
				if certUpdate.Err != nil {
					logrus.Warnf("error renewing TLS certificate: %v", certUpdate.Err)
					continue
				}
				n.Lock()
				n.role = certUpdate.Role
				if n.role == ca.ManagerRole {
					n.managerRoleCh <- struct{}{}
				}
				n.Unlock()
			case <-ctx.Done():
				return
			}
		}
	}()

	role := n.role

	managerReady := make(chan struct{})
	agentReady := make(chan struct{})
	var managerErr error
	var agentErr error
	var wg sync.WaitGroup
	wg.Add(2)
	go func() {
		managerErr = n.runManager(ctx, securityConfig, managerReady) // store err and loop
		wg.Done()
		cancel()
	}()
	go func() {
		agentErr = n.runAgent(ctx, db, securityConfig.ClientTLSCreds, agentReady)
		wg.Done()
		cancel()
	}()

	go func() {
		<-agentReady
		if role == ca.ManagerRole {
			<-managerReady
		}
		close(n.ready)
	}()

	wg.Wait()
	if managerErr != nil && managerErr != context.Canceled {
		return managerErr
	}
	if agentErr != nil && agentErr != context.Canceled {
		return agentErr
	}
	return err
}