func (m *Manager) watchForKEKChanges(ctx context.Context) error { clusterID := m.config.SecurityConfig.ClientTLSCreds.Organization() clusterWatch, clusterWatchCancel, err := store.ViewAndWatch(m.raftNode.MemoryStore(), func(tx store.ReadTx) error { cluster := store.GetCluster(tx, clusterID) if cluster == nil { return fmt.Errorf("unable to get current cluster") } return m.updateKEK(ctx, cluster) }, state.EventUpdateCluster{ Cluster: &api.Cluster{ID: clusterID}, Checks: []state.ClusterCheckFunc{state.ClusterCheckID}, }, ) if err != nil { return err } go func() { for { select { case event := <-clusterWatch: clusterEvent := event.(state.EventUpdateCluster) m.updateKEK(ctx, clusterEvent.Cluster) case <-ctx.Done(): clusterWatchCancel() return } } }() return nil }
// UpdateCluster updates a Cluster referenced by ClusterID with the given ClusterSpec. // - Returns `NotFound` if the Cluster is not found. // - Returns `InvalidArgument` if the ClusterSpec is malformed. // - Returns `Unimplemented` if the ClusterSpec references unimplemented features. // - Returns an error if the update fails. func (s *Server) UpdateCluster(ctx context.Context, request *api.UpdateClusterRequest) (*api.UpdateClusterResponse, error) { if request.ClusterID == "" || request.ClusterVersion == nil { return nil, grpc.Errorf(codes.InvalidArgument, errInvalidArgument.Error()) } if err := validateClusterSpec(request.Spec); err != nil { return nil, err } var cluster *api.Cluster err := s.store.Update(func(tx store.Tx) error { cluster = store.GetCluster(tx, request.ClusterID) if cluster == nil { return nil } cluster.Meta.Version = *request.ClusterVersion cluster.Spec = *request.Spec.Copy() return store.UpdateCluster(tx, cluster) }) if err != nil { return nil, err } if cluster == nil { return nil, grpc.Errorf(codes.NotFound, "cluster %s not found", request.ClusterID) } redactedClusters := redactClusters([]*api.Cluster{cluster}) // WARN: we should never return cluster here. We need to redact the private fields first. return &api.UpdateClusterResponse{ Cluster: redactedClusters[0], }, nil }
func TestGetUnlockKey(t *testing.T) { t.Parallel() tc := testutils.NewTestCA(t) defer tc.Stop() var cluster *api.Cluster tc.MemoryStore.View(func(tx store.ReadTx) { clusters, err := store.FindClusters(tx, store.ByName(store.DefaultClusterName)) require.NoError(t, err) cluster = clusters[0] }) resp, err := tc.CAClients[0].GetUnlockKey(context.Background(), &api.GetUnlockKeyRequest{}) require.NoError(t, err) require.Nil(t, resp.UnlockKey) require.Equal(t, cluster.Meta.Version, resp.Version) // Update the unlock key require.NoError(t, tc.MemoryStore.Update(func(tx store.Tx) error { cluster = store.GetCluster(tx, cluster.ID) cluster.Spec.EncryptionConfig.AutoLockManagers = true cluster.UnlockKeys = []*api.EncryptionKey{{ Subsystem: ca.ManagerRole, Key: []byte("secret"), }} return store.UpdateCluster(tx, cluster) })) tc.MemoryStore.View(func(tx store.ReadTx) { cluster = store.GetCluster(tx, cluster.ID) }) require.NoError(t, raftutils.PollFuncWithTimeout(nil, func() error { resp, err = tc.CAClients[0].GetUnlockKey(context.Background(), &api.GetUnlockKeyRequest{}) if err != nil { return fmt.Errorf("get unlock key: %v", err) } if !bytes.Equal(resp.UnlockKey, []byte("secret")) { return fmt.Errorf("secret hasn't rotated yet") } if cluster.Meta.Version.Index > resp.Version.Index { return fmt.Errorf("hasn't updated to the right version yet") } return nil }, 250*time.Millisecond)) }
func (k *KeyManager) updateKey(cluster *api.Cluster) error { return k.store.Update(func(tx store.Tx) error { cluster = store.GetCluster(tx, cluster.ID) if cluster == nil { return nil } cluster.EncryptionKeyLamportClock = k.keyRing.lClock cluster.NetworkBootstrapKeys = k.keyRing.keys return store.UpdateCluster(tx, cluster) }) }
// GetCluster returns a Cluster given a ClusterID. // - Returns `InvalidArgument` if ClusterID is not provided. // - Returns `NotFound` if the Cluster is not found. func (s *Server) GetCluster(ctx context.Context, request *api.GetClusterRequest) (*api.GetClusterResponse, error) { if request.ClusterID == "" { return nil, grpc.Errorf(codes.InvalidArgument, errInvalidArgument.Error()) } var cluster *api.Cluster s.store.View(func(tx store.ReadTx) { cluster = store.GetCluster(tx, request.ClusterID) }) if cluster == nil { return nil, grpc.Errorf(codes.NotFound, "cluster %s not found", request.ClusterID) } redactedClusters := redactClusters([]*api.Cluster{cluster}) // WARN: we should never return cluster here. We need to redact the private fields first. return &api.GetClusterResponse{ Cluster: redactedClusters[0], }, nil }
// GetUnlockKey is responsible for returning the current unlock key used for encrypting TLS private keys and // other at rest data. Access to this RPC call should only be allowed via mutual TLS from managers. func (s *Server) GetUnlockKey(ctx context.Context, request *api.GetUnlockKeyRequest) (*api.GetUnlockKeyResponse, error) { // This directly queries the store, rather than storing the unlock key and version on // the `Server` object and updating it `updateCluster` is called, because we need this // API to return the latest version of the key. Otherwise, there might be a slight delay // between when the cluster gets updated, and when this function returns the latest key. // This delay is currently unacceptable because this RPC call is the only way, after a // cluster update, to get the actual value of the unlock key, and we don't want to return // a cached value. resp := api.GetUnlockKeyResponse{} s.store.View(func(tx store.ReadTx) { cluster := store.GetCluster(tx, s.securityConfig.ClientTLSCreds.Organization()) resp.Version = cluster.Meta.Version if cluster.Spec.EncryptionConfig.AutoLockManagers { for _, encryptionKey := range cluster.UnlockKeys { if encryptionKey.Subsystem == ManagerRole { resp.UnlockKey = encryptionKey.Key return } } } }) return &resp, nil }
// rotateRootCAKEK will attempt to rotate the key-encryption-key for root CA key-material in raft. // If there is no passphrase set in ENV, it returns. // If there is plain-text root key-material, and a passphrase set, it encrypts it. // If there is encrypted root key-material and it is using the current passphrase, it returns. // If there is encrypted root key-material, and it is using the previous passphrase, it // re-encrypts it with the current passphrase. func (m *Manager) rotateRootCAKEK(ctx context.Context, clusterID string) error { // If we don't have a KEK, we won't ever be rotating anything strPassphrase := os.Getenv(ca.PassphraseENVVar) if strPassphrase == "" { return nil } strPassphrasePrev := os.Getenv(ca.PassphraseENVVarPrev) passphrase := []byte(strPassphrase) passphrasePrev := []byte(strPassphrasePrev) s := m.RaftNode.MemoryStore() var ( cluster *api.Cluster err error finalKey []byte ) // Retrieve the cluster identified by ClusterID s.View(func(readTx store.ReadTx) { cluster = store.GetCluster(readTx, clusterID) }) if cluster == nil { return fmt.Errorf("cluster not found: %s", clusterID) } // Try to get the private key from the cluster privKeyPEM := cluster.RootCA.CAKey if privKeyPEM == nil || len(privKeyPEM) == 0 { // We have no PEM root private key in this cluster. log.G(ctx).Warnf("cluster %s does not have private key material", clusterID) return nil } // Decode the PEM private key keyBlock, _ := pem.Decode(privKeyPEM) if keyBlock == nil { return fmt.Errorf("invalid PEM-encoded private key inside of cluster %s", clusterID) } // If this key is not encrypted, then we have to encrypt it if !x509.IsEncryptedPEMBlock(keyBlock) { finalKey, err = ca.EncryptECPrivateKey(privKeyPEM, strPassphrase) if err != nil { return err } } else { // This key is already encrypted, let's try to decrypt with the current main passphrase _, err = x509.DecryptPEMBlock(keyBlock, []byte(passphrase)) if err == nil { // The main key is the correct KEK, nothing to do here return nil } // This key is already encrypted, but failed with current main passphrase. // Let's try to decrypt with the previous passphrase unencryptedKey, err := x509.DecryptPEMBlock(keyBlock, []byte(passphrasePrev)) if err != nil { // We were not able to decrypt either with the main or backup passphrase, error return err } unencryptedKeyBlock := &pem.Block{ Type: keyBlock.Type, Bytes: unencryptedKey, Headers: keyBlock.Headers, } // We were able to decrypt the key, but with the previous passphrase. Let's encrypt // with the new one and store it in raft finalKey, err = ca.EncryptECPrivateKey(pem.EncodeToMemory(unencryptedKeyBlock), strPassphrase) if err != nil { log.G(ctx).Debugf("failed to rotate the key-encrypting-key for the root key material of cluster %s", clusterID) return err } } log.G(ctx).Infof("Re-encrypting the root key material of cluster %s", clusterID) // Let's update the key in the cluster object return s.Update(func(tx store.Tx) error { cluster = store.GetCluster(tx, clusterID) if cluster == nil { return fmt.Errorf("cluster not found: %s", clusterID) } cluster.RootCA.CAKey = finalKey return store.UpdateCluster(tx, cluster) }) }
func TestRequestAndSaveNewCertificates(t *testing.T) { tc := testutils.NewTestCA(t) defer tc.Stop() // Copy the current RootCA without the signer rca := ca.RootCA{Cert: tc.RootCA.Cert, Pool: tc.RootCA.Pool} cert, err := rca.RequestAndSaveNewCertificates(tc.Context, tc.KeyReadWriter, ca.CertificateRequestConfig{ Token: tc.ManagerToken, Remotes: tc.Remotes, }) assert.NoError(t, err) assert.NotNil(t, cert) perms, err := permbits.Stat(tc.Paths.Node.Cert) assert.NoError(t, err) assert.False(t, perms.GroupWrite()) assert.False(t, perms.OtherWrite()) // there was no encryption config in the remote, so the key should be unencrypted unencryptedKeyReader := ca.NewKeyReadWriter(tc.Paths.Node, nil, nil) _, _, err = unencryptedKeyReader.Read() require.NoError(t, err) // the worker token is also unencrypted cert, err = rca.RequestAndSaveNewCertificates(tc.Context, tc.KeyReadWriter, ca.CertificateRequestConfig{ Token: tc.WorkerToken, Remotes: tc.Remotes, }) assert.NoError(t, err) assert.NotNil(t, cert) _, _, err = unencryptedKeyReader.Read() require.NoError(t, err) // If there is a different kek in the remote store, when TLS certs are renewed the new key will // be encrypted with that kek assert.NoError(t, tc.MemoryStore.Update(func(tx store.Tx) error { cluster := store.GetCluster(tx, tc.Organization) cluster.Spec.EncryptionConfig.AutoLockManagers = true cluster.UnlockKeys = []*api.EncryptionKey{{ Subsystem: ca.ManagerRole, Key: []byte("kek!"), }} return store.UpdateCluster(tx, cluster) })) assert.NoError(t, os.RemoveAll(tc.Paths.Node.Cert)) assert.NoError(t, os.RemoveAll(tc.Paths.Node.Key)) _, err = rca.RequestAndSaveNewCertificates(tc.Context, tc.KeyReadWriter, ca.CertificateRequestConfig{ Token: tc.ManagerToken, Remotes: tc.Remotes, }) assert.NoError(t, err) // key can no longer be read without a kek _, _, err = unencryptedKeyReader.Read() require.Error(t, err) _, _, err = ca.NewKeyReadWriter(tc.Paths.Node, []byte("kek!"), nil).Read() require.NoError(t, err) // if it's a worker though, the key is always unencrypted, even though the manager key is encrypted _, err = rca.RequestAndSaveNewCertificates(tc.Context, tc.KeyReadWriter, ca.CertificateRequestConfig{ Token: tc.WorkerToken, Remotes: tc.Remotes, }) assert.NoError(t, err) _, _, err = unencryptedKeyReader.Read() require.NoError(t, err) }
// UpdateCluster updates a Cluster referenced by ClusterID with the given ClusterSpec. // - Returns `NotFound` if the Cluster is not found. // - Returns `InvalidArgument` if the ClusterSpec is malformed. // - Returns `Unimplemented` if the ClusterSpec references unimplemented features. // - Returns an error if the update fails. func (s *Server) UpdateCluster(ctx context.Context, request *api.UpdateClusterRequest) (*api.UpdateClusterResponse, error) { if request.ClusterID == "" || request.ClusterVersion == nil { return nil, grpc.Errorf(codes.InvalidArgument, errInvalidArgument.Error()) } if err := validateClusterSpec(request.Spec); err != nil { return nil, err } var cluster *api.Cluster err := s.store.Update(func(tx store.Tx) error { cluster = store.GetCluster(tx, request.ClusterID) if cluster == nil { return nil } cluster.Meta.Version = *request.ClusterVersion cluster.Spec = *request.Spec.Copy() expireBlacklistedCerts(cluster) if request.Rotation.WorkerJoinToken { cluster.RootCA.JoinTokens.Worker = ca.GenerateJoinToken(s.rootCA) } if request.Rotation.ManagerJoinToken { cluster.RootCA.JoinTokens.Manager = ca.GenerateJoinToken(s.rootCA) } var unlockKeys []*api.EncryptionKey var managerKey *api.EncryptionKey for _, eKey := range cluster.UnlockKeys { if eKey.Subsystem == ca.ManagerRole { if !cluster.Spec.EncryptionConfig.AutoLockManagers { continue } managerKey = eKey } unlockKeys = append(unlockKeys, eKey) } switch { case !cluster.Spec.EncryptionConfig.AutoLockManagers: break case managerKey == nil: unlockKeys = append(unlockKeys, &api.EncryptionKey{ Subsystem: ca.ManagerRole, Key: encryption.GenerateSecretKey(), }) case request.Rotation.ManagerUnlockKey: managerKey.Key = encryption.GenerateSecretKey() } cluster.UnlockKeys = unlockKeys return store.UpdateCluster(tx, cluster) }) if err != nil { return nil, err } if cluster == nil { return nil, grpc.Errorf(codes.NotFound, "cluster %s not found", request.ClusterID) } redactedClusters := redactClusters([]*api.Cluster{cluster}) // WARN: we should never return cluster here. We need to redact the private fields first. return &api.UpdateClusterResponse{ Cluster: redactedClusters[0], }, nil }
func TestGetRemoteCA(t *testing.T) { tc := testutils.NewTestCA(t) defer tc.Stop() shaHash := sha256.New() shaHash.Write(tc.RootCA.Cert) md := shaHash.Sum(nil) mdStr := hex.EncodeToString(md) d, err := digest.Parse("sha256:" + mdStr) require.NoError(t, err) downloadedRootCA, err := ca.GetRemoteCA(tc.Context, d, tc.ConnBroker) require.NoError(t, err) require.Equal(t, downloadedRootCA.Cert, tc.RootCA.Cert) // update the test CA to include a multi-certificate bundle as the root - the digest // we use to verify with must be the digest of the whole bundle tmpDir, err := ioutil.TempDir("", "GetRemoteCA") require.NoError(t, err) defer os.RemoveAll(tmpDir) paths := ca.NewConfigPaths(tmpDir) otherRootCA, err := ca.CreateRootCA("other", paths.RootCA) require.NoError(t, err) comboCertBundle := append(tc.RootCA.Cert, otherRootCA.Cert...) require.NoError(t, tc.MemoryStore.Update(func(tx store.Tx) error { cluster := store.GetCluster(tx, tc.Organization) cluster.RootCA.CACert = comboCertBundle cluster.RootCA.CAKey = tc.RootCA.Key return store.UpdateCluster(tx, cluster) })) require.NoError(t, raftutils.PollFunc(nil, func() error { _, err := ca.GetRemoteCA(tc.Context, d, tc.ConnBroker) if err == nil { return fmt.Errorf("testca's rootca hasn't updated yet") } require.Contains(t, err.Error(), "remote CA does not match fingerprint") return nil })) // If we provide the right digest, the root CA is updated and we can validate // certs signed by either one d = digest.FromBytes(comboCertBundle) downloadedRootCA, err = ca.GetRemoteCA(tc.Context, d, tc.ConnBroker) require.NoError(t, err) require.Equal(t, comboCertBundle, downloadedRootCA.Cert) require.Equal(t, 2, len(downloadedRootCA.Pool.Subjects())) for _, rootCA := range []ca.RootCA{tc.RootCA, otherRootCA} { krw := ca.NewKeyReadWriter(paths.Node, nil, nil) _, err := rootCA.IssueAndSaveNewCertificates(krw, "cn", "ou", "org") require.NoError(t, err) certPEM, _, err := krw.Read() require.NoError(t, err) cert, err := helpers.ParseCertificatesPEM(certPEM) require.NoError(t, err) chains, err := cert[0].Verify(x509.VerifyOptions{ Roots: downloadedRootCA.Pool, }) require.NoError(t, err) require.Len(t, chains, 1) } }
func TestUpdateClusterRotateUnlockKey(t *testing.T) { ts := newTestServer(t) defer ts.Stop() // create a cluster with extra encryption keys, to make sure they exist cluster := createClusterObj("id", "name", api.AcceptancePolicy{}, ts.Server.rootCA) expected := make(map[string]*api.EncryptionKey) for i := 1; i <= 2; i++ { value := fmt.Sprintf("fake%d", i) expected[value] = &api.EncryptionKey{Subsystem: value, Key: []byte(value)} cluster.UnlockKeys = append(cluster.UnlockKeys, expected[value]) } require.NoError(t, ts.Store.Update(func(tx store.Tx) error { return store.CreateCluster(tx, cluster) })) // we have to get the key from the memory store, since the cluster returned by the API is redacted getManagerKey := func() (managerKey *api.EncryptionKey) { ts.Store.View(func(tx store.ReadTx) { viewCluster := store.GetCluster(tx, cluster.ID) // no matter whether there's a manager key or not, the other keys should not have been affected foundKeys := make(map[string]*api.EncryptionKey) for _, eKey := range viewCluster.UnlockKeys { foundKeys[eKey.Subsystem] = eKey } for v, key := range expected { foundKey, ok := foundKeys[v] require.True(t, ok) require.Equal(t, key, foundKey) } managerKey = foundKeys[ca.ManagerRole] }) return } validateListResult := func(expectedLocked bool) api.Version { r, err := ts.Client.ListClusters(context.Background(), &api.ListClustersRequest{ Filters: &api.ListClustersRequest_Filters{ NamePrefixes: []string{"name"}, }, }) require.NoError(t, err) require.Len(t, r.Clusters, 1) require.Equal(t, expectedLocked, r.Clusters[0].Spec.EncryptionConfig.AutoLockManagers) require.Nil(t, r.Clusters[0].UnlockKeys) // redacted return r.Clusters[0].Meta.Version } // we start off with manager autolocking turned off version := validateListResult(false) require.Nil(t, getManagerKey()) // Rotate unlock key without turning auto-lock on - key should still be nil _, err := ts.Client.UpdateCluster(context.Background(), &api.UpdateClusterRequest{ ClusterID: cluster.ID, Spec: &cluster.Spec, ClusterVersion: &version, Rotation: api.KeyRotation{ ManagerUnlockKey: true, }, }) require.NoError(t, err) version = validateListResult(false) require.Nil(t, getManagerKey()) // Enable auto-lock only, no rotation boolean spec := cluster.Spec.Copy() spec.EncryptionConfig.AutoLockManagers = true _, err = ts.Client.UpdateCluster(context.Background(), &api.UpdateClusterRequest{ ClusterID: cluster.ID, Spec: spec, ClusterVersion: &version, }) require.NoError(t, err) version = validateListResult(true) managerKey := getManagerKey() require.NotNil(t, managerKey) // Rotate the manager key _, err = ts.Client.UpdateCluster(context.Background(), &api.UpdateClusterRequest{ ClusterID: cluster.ID, Spec: spec, ClusterVersion: &version, Rotation: api.KeyRotation{ ManagerUnlockKey: true, }, }) require.NoError(t, err) version = validateListResult(true) newManagerKey := getManagerKey() require.NotNil(t, managerKey) require.NotEqual(t, managerKey, newManagerKey) managerKey = newManagerKey // Just update the cluster without modifying unlock keys _, err = ts.Client.UpdateCluster(context.Background(), &api.UpdateClusterRequest{ ClusterID: cluster.ID, Spec: spec, ClusterVersion: &version, }) require.NoError(t, err) version = validateListResult(true) newManagerKey = getManagerKey() require.Equal(t, managerKey, newManagerKey) // Disable auto lock _, err = ts.Client.UpdateCluster(context.Background(), &api.UpdateClusterRequest{ ClusterID: cluster.ID, Spec: &cluster.Spec, // set back to original spec ClusterVersion: &version, Rotation: api.KeyRotation{ ManagerUnlockKey: true, // this will be ignored because we disable the auto-lock }, }) require.NoError(t, err) validateListResult(false) require.Nil(t, getManagerKey()) }