func decryptRaftData(swarmdir, outdir, unlockKey string) error { krw, err := getKRW(swarmdir, unlockKey) if err != nil { return err } deks, err := getDEKData(krw) if err != nil { return err } _, d := encryption.Defaults(deks.CurrentDEK) if deks.PendingDEK == nil { _, d2 := encryption.Defaults(deks.PendingDEK) d = storage.MultiDecrypter{d, d2} } snapDir := filepath.Join(outdir, "snap-decrypted") if err := moveDirAside(snapDir); err != nil { return err } if err := storage.MigrateSnapshot( filepath.Join(swarmdir, "raft", "snap-v3-encrypted"), snapDir, storage.NewSnapFactory(encryption.NoopCrypter, d), storage.OriginalSnap); err != nil { return err } var walsnap walpb.Snapshot snap, err := storage.OriginalSnap.New(snapDir).Load() if err != nil && !os.IsNotExist(err) { return err } if snap != nil { walsnap.Index = snap.Metadata.Index walsnap.Term = snap.Metadata.Term } walDir := filepath.Join(outdir, "wal-decrypted") if err := moveDirAside(walDir); err != nil { return err } return storage.MigrateWALs(context.Background(), filepath.Join(swarmdir, "raft", "wal-v3-encrypted"), walDir, storage.NewWALFactory(encryption.NoopCrypter, d), storage.OriginalWAL, walsnap) }
func encodePEMHeaderValue(headerValue []byte, kek []byte) (string, error) { var encrypter encryption.Encrypter = encryption.NoopCrypter if kek != nil { encrypter, _ = encryption.Defaults(kek) } encrypted, err := encryption.Encrypt(headerValue, encrypter) if err != nil { return "", err } return base64.StdEncoding.EncodeToString(encrypted), nil }
func TestDecrypt(t *testing.T) { tempdir, err := ioutil.TempDir("", "rafttool") require.NoError(t, err) defer os.RemoveAll(tempdir) kek := []byte("kek") dek := []byte("dek") unlockKey := encryption.HumanReadableKey(kek) // write a key to disk, else we won't be able to decrypt anything paths := certPaths(tempdir) krw := ca.NewKeyReadWriter(paths.Node, kek, manager.RaftDEKData{EncryptionKeys: raft.EncryptionKeys{CurrentDEK: dek}}) cert, key, err := testutils.CreateRootCertAndKey("not really a root, just need cert and key") require.NoError(t, err) require.NoError(t, krw.Write(cert, key, nil)) // create the encrypted v3 directory origSnapshot := raftpb.Snapshot{ Data: []byte("snapshot"), Metadata: raftpb.SnapshotMetadata{ Index: 1, Term: 1, }, } e, d := encryption.Defaults(dek) writeFakeRaftData(t, tempdir, &origSnapshot, storage.NewWALFactory(e, d), storage.NewSnapFactory(e, d)) outdir := filepath.Join(tempdir, "outdir") // if we use the wrong unlock key, we can't actually decrypt anything. The output directory won't get created. err = decryptRaftData(tempdir, outdir, "") require.IsType(t, ca.ErrInvalidKEK{}, err) require.False(t, fileutil.Exist(outdir)) // Using the right unlock key, we produce data that is unencrypted require.NoError(t, decryptRaftData(tempdir, outdir, unlockKey)) require.True(t, fileutil.Exist(outdir)) // The snapshot directory is readable by the regular snapshotter snapshot, err := storage.OriginalSnap.New(filepath.Join(outdir, "snap-decrypted")).Load() require.NoError(t, err) require.NotNil(t, snapshot) require.Equal(t, origSnapshot, *snapshot) // The wals are readable by the regular wal walreader, err := storage.OriginalWAL.Open(filepath.Join(outdir, "wal-decrypted"), walpb.Snapshot{Index: 1, Term: 1}) require.NoError(t, err) metadata, _, entries, err := walreader.ReadAll() require.NoError(t, err) require.Equal(t, []byte("v3metadata"), metadata) require.Len(t, entries, 5) }
func decodePEMHeaderValue(headerValue string, kek []byte) ([]byte, error) { var decrypter encryption.Decrypter = encryption.NoopCrypter if kek != nil { _, decrypter = encryption.Defaults(kek) } valueBytes, err := base64.StdEncoding.DecodeString(headerValue) if err != nil { return nil, err } result, err := encryption.Decrypt(valueBytes, decrypter) if err != nil { return nil, ca.ErrInvalidKEK{Wrapped: err} } return result, nil }
// RotateEncryptionKey swaps out the encoders and decoders used by the wal and snapshotter func (e *EncryptedRaftLogger) RotateEncryptionKey(newKey []byte) { e.encoderMu.Lock() defer e.encoderMu.Unlock() if e.wal != nil { // if the wal exists, the snapshotter exists // We don't want to have to close the WAL, because we can't open a new one. // We need to know the previous snapshot, because when you open a WAL you // have to read out all the entries from a particular snapshot, or you can't // write. So just rotate the encoders out from under it. We already // have a lock on writing to snapshots and WALs. wrapped, ok := e.wal.(*wrappedWAL) if !ok { panic(fmt.Errorf("EncryptedRaftLogger's WAL is not a wrappedWAL")) } wrapped.encrypter, wrapped.decrypter = encryption.Defaults(newKey) e.snapshotter = NewSnapFactory(wrapped.encrypter, wrapped.decrypter).New(e.snapDir()) } e.EncryptionKey = newKey }
// BootstrapNew creates a new snapshotter and WAL writer, expecting that there is nothing on disk func (e *EncryptedRaftLogger) BootstrapNew(metadata []byte) error { e.encoderMu.Lock() defer e.encoderMu.Unlock() encrypter, decrypter := encryption.Defaults(e.EncryptionKey) walFactory := NewWALFactory(encrypter, decrypter) for _, dirpath := range []string{filepath.Dir(e.walDir()), e.snapDir()} { if err := os.MkdirAll(dirpath, 0700); err != nil { return errors.Wrapf(err, "failed to create %s", dirpath) } } var err error // the wal directory must not already exist upon creation e.wal, err = walFactory.Create(e.walDir(), metadata) if err != nil { return errors.Wrap(err, "failed to create WAL") } e.snapshotter = NewSnapFactory(encrypter, decrypter).New(e.snapDir()) return nil }
func TestMigrateToV3EncryptedForm(t *testing.T) { t.Parallel() tempdir, err := ioutil.TempDir("", "raft-storage") require.NoError(t, err) defer os.RemoveAll(tempdir) dek := []byte("key") writeDataTo := func(suffix string, snapshot raftpb.Snapshot, walFactory WALFactory, snapFactory SnapFactory) []raftpb.Entry { snapDir := filepath.Join(tempdir, "snap"+suffix) walDir := filepath.Join(tempdir, "wal"+suffix) require.NoError(t, os.MkdirAll(snapDir, 0755)) require.NoError(t, snapFactory.New(snapDir).SaveSnap(snapshot)) _, entries, _ := makeWALData(snapshot.Metadata.Index, snapshot.Metadata.Term) walWriter, err := walFactory.Create(walDir, []byte("metadata")) require.NoError(t, err) require.NoError(t, walWriter.SaveSnapshot(walpb.Snapshot{Index: snapshot.Metadata.Index, Term: snapshot.Metadata.Term})) require.NoError(t, walWriter.Save(raftpb.HardState{}, entries)) require.NoError(t, walWriter.Close()) return entries } requireLoadedData := func(expectedSnap raftpb.Snapshot, expectedEntries []raftpb.Entry) { logger := EncryptedRaftLogger{ StateDir: tempdir, EncryptionKey: dek, } readSnap, waldata, err := logger.BootstrapFromDisk(context.Background()) require.NoError(t, err) require.NotNil(t, readSnap) require.Equal(t, expectedSnap, *readSnap) require.Equal(t, expectedEntries, waldata.Entries) logger.Close(context.Background()) } v2Snapshot := fakeSnapshotData v3Snapshot := fakeSnapshotData v3Snapshot.Metadata.Index += 100 v3Snapshot.Metadata.Term += 10 v3EncryptedSnapshot := fakeSnapshotData v3EncryptedSnapshot.Metadata.Index += 200 v3EncryptedSnapshot.Metadata.Term += 20 encoder, decoders := encryption.Defaults(dek) walFactory := NewWALFactory(encoder, decoders) snapFactory := NewSnapFactory(encoder, decoders) // generate both v2 and v3 unencrypted snapshot data directories, as well as an encrypted directory v2Entries := writeDataTo("", v2Snapshot, OriginalWAL, OriginalSnap) v3Entries := writeDataTo("-v3", v3Snapshot, OriginalWAL, OriginalSnap) v3EncryptedEntries := writeDataTo("-v3-encrypted", v3EncryptedSnapshot, walFactory, snapFactory) // bootstrap from disk - the encrypted directory exists, so we should just read from // it instead of from the legacy directories requireLoadedData(v3EncryptedSnapshot, v3EncryptedEntries) // remove the newest dirs - should try to migrate from v3 require.NoError(t, os.RemoveAll(filepath.Join(tempdir, "snap-v3-encrypted"))) require.NoError(t, os.RemoveAll(filepath.Join(tempdir, "wal-v3-encrypted"))) requireLoadedData(v3Snapshot, v3Entries) // it can recover from partial migrations require.NoError(t, os.RemoveAll(filepath.Join(tempdir, "snap-v3-encrypted"))) requireLoadedData(v3Snapshot, v3Entries) // v3 dirs still there _, err = os.Stat(filepath.Join(tempdir, "snap-v3")) require.NoError(t, err) _, err = os.Stat(filepath.Join(tempdir, "wal-v3")) require.NoError(t, err) // remove the v3 dirs - should try to migrate from v2 require.NoError(t, os.RemoveAll(filepath.Join(tempdir, "snap-v3-encrypted"))) require.NoError(t, os.RemoveAll(filepath.Join(tempdir, "wal-v3-encrypted"))) require.NoError(t, os.RemoveAll(filepath.Join(tempdir, "snap-v3"))) require.NoError(t, os.RemoveAll(filepath.Join(tempdir, "wal-v3"))) requireLoadedData(v2Snapshot, v2Entries) }
// BootstrapFromDisk creates a new snapshotter and wal, and also reads the latest snapshot and WALs from disk func (e *EncryptedRaftLogger) BootstrapFromDisk(ctx context.Context, oldEncryptionKeys ...[]byte) (*raftpb.Snapshot, WALData, error) { e.encoderMu.Lock() defer e.encoderMu.Unlock() walDir := e.walDir() snapDir := e.snapDir() encrypter, decrypter := encryption.Defaults(e.EncryptionKey) if oldEncryptionKeys != nil { decrypters := []encryption.Decrypter{decrypter} for _, key := range oldEncryptionKeys { _, d := encryption.Defaults(key) decrypters = append(decrypters, d) } decrypter = MultiDecrypter(decrypters) } snapFactory := NewSnapFactory(encrypter, decrypter) if !fileutil.Exist(snapDir) { // If snapshots created by the etcd-v2 code exist, or by swarmkit development version, // read the latest snapshot and write it encoded to the new path. The new path // prevents etc-v2 creating snapshots that are visible to us, but not encoded and // out of sync with our WALs, after a downgrade. for _, dirs := range versionedWALSnapDirs[1:] { legacySnapDir := filepath.Join(e.StateDir, dirs.snap) if fileutil.Exist(legacySnapDir) { if err := MigrateSnapshot(legacySnapDir, snapDir, OriginalSnap, snapFactory); err != nil { return nil, WALData{}, err } break } } } // ensure the new directory exists if err := os.MkdirAll(snapDir, 0700); err != nil { return nil, WALData{}, errors.Wrap(err, "failed to create snapshot directory") } var ( snapshotter Snapshotter walObj WAL err error ) // Create a snapshotter and load snapshot data snapshotter = snapFactory.New(snapDir) snapshot, err := snapshotter.Load() if err != nil && err != snap.ErrNoSnapshot { return nil, WALData{}, err } walFactory := NewWALFactory(encrypter, decrypter) var walsnap walpb.Snapshot if snapshot != nil { walsnap.Index = snapshot.Metadata.Index walsnap.Term = snapshot.Metadata.Term } if !wal.Exist(walDir) { var walExists bool // If wals created by the etcd-v2 wal code exist, read the latest ones based // on this snapshot and encode them to wals in the new path to avoid adding // backwards-incompatible entries to those files. for _, dirs := range versionedWALSnapDirs[1:] { legacyWALDir := filepath.Join(e.StateDir, dirs.wal) if !wal.Exist(legacyWALDir) { continue } if err = MigrateWALs(ctx, legacyWALDir, walDir, OriginalWAL, walFactory, walsnap); err != nil { return nil, WALData{}, err } walExists = true break } if !walExists { return nil, WALData{}, ErrNoWAL } } walObj, waldata, err := ReadRepairWAL(ctx, walDir, walsnap, walFactory) if err != nil { return nil, WALData{}, err } e.snapshotter = snapshotter e.wal = walObj return snapshot, waldata, nil }
// Tests locking and unlocking the manager and key rotations func TestManagerLockUnlock(t *testing.T) { ctx := context.Background() temp, err := ioutil.TempFile("", "test-manager-lock") require.NoError(t, err) require.NoError(t, temp.Close()) require.NoError(t, os.Remove(temp.Name())) defer os.RemoveAll(temp.Name()) stateDir, err := ioutil.TempDir("", "test-raft") require.NoError(t, err) defer os.RemoveAll(stateDir) tc := testutils.NewTestCA(t) defer tc.Stop() managerSecurityConfig, err := tc.NewNodeConfig(ca.ManagerRole) require.NoError(t, err) _, _, err = managerSecurityConfig.KeyReader().Read() require.NoError(t, err) m, err := New(&Config{ RemoteAPI: RemoteAddrs{ListenAddr: "127.0.0.1:0"}, ControlAPI: temp.Name(), StateDir: stateDir, SecurityConfig: managerSecurityConfig, // start off without any encryption }) require.NoError(t, err) require.NotNil(t, m) done := make(chan error) defer close(done) go func() { done <- m.Run(ctx) }() opts := []grpc.DialOption{ grpc.WithTimeout(10 * time.Second), grpc.WithTransportCredentials(managerSecurityConfig.ClientTLSCreds), } conn, err := grpc.Dial(m.Addr(), opts...) require.NoError(t, err) defer func() { require.NoError(t, conn.Close()) }() // check that there is no kek currently - we are using the API because this // lets us wait until the manager is up and listening, as well var cluster *api.Cluster client := api.NewControlClient(conn) require.NoError(t, raftutils.PollFuncWithTimeout(nil, func() error { resp, err := client.ListClusters(ctx, &api.ListClustersRequest{}) if err != nil { return err } if len(resp.Clusters) == 0 { return fmt.Errorf("no clusters yet") } cluster = resp.Clusters[0] return nil }, 1*time.Second)) require.Nil(t, cluster.UnlockKeys) // tls key is unencrypted, but there is a DEK key, err := ioutil.ReadFile(tc.Paths.Node.Key) require.NoError(t, err) keyBlock, _ := pem.Decode(key) require.NotNil(t, keyBlock) require.False(t, x509.IsEncryptedPEMBlock(keyBlock)) require.Len(t, keyBlock.Headers, 2) currentDEK, err := decodePEMHeaderValue(keyBlock.Headers[pemHeaderRaftDEK], nil) require.NoError(t, err) require.NotEmpty(t, currentDEK) // update the lock key - this may fail due to update out of sequence errors, so try again for { getResp, err := client.GetCluster(ctx, &api.GetClusterRequest{ClusterID: cluster.ID}) require.NoError(t, err) cluster = getResp.Cluster spec := cluster.Spec.Copy() spec.EncryptionConfig.AutoLockManagers = true updateResp, err := client.UpdateCluster(ctx, &api.UpdateClusterRequest{ ClusterID: cluster.ID, ClusterVersion: &cluster.Meta.Version, Spec: spec, }) if grpc.ErrorDesc(err) == "update out of sequence" { continue } // if there is any other type of error, this should fail if err == nil { cluster = updateResp.Cluster } break } require.NoError(t, err) caConn := api.NewCAClient(conn) unlockKeyResp, err := caConn.GetUnlockKey(ctx, &api.GetUnlockKeyRequest{}) require.NoError(t, err) // this should update the TLS key, rotate the DEK, and finish snapshotting var updatedKey []byte require.NoError(t, raftutils.PollFuncWithTimeout(nil, func() error { updatedKey, err = ioutil.ReadFile(tc.Paths.Node.Key) require.NoError(t, err) // this should never error due to atomic writes if bytes.Equal(key, updatedKey) { return fmt.Errorf("TLS key should have been re-encrypted at least") } keyBlock, _ = pem.Decode(updatedKey) require.NotNil(t, keyBlock) // this should never error due to atomic writes if !x509.IsEncryptedPEMBlock(keyBlock) { return fmt.Errorf("Key not encrypted") } // we don't check that the TLS key has been rotated, because that may take // a little bit, and is best effort only currentDEKString, ok := keyBlock.Headers[pemHeaderRaftDEK] require.True(t, ok) // there should never NOT be a current header nowCurrentDEK, err := decodePEMHeaderValue(currentDEKString, unlockKeyResp.UnlockKey) require.NoError(t, err) // it should always be encrypted if bytes.Equal(currentDEK, nowCurrentDEK) { return fmt.Errorf("snapshot has not been finished yet") } currentDEK = nowCurrentDEK return nil }, 1*time.Second)) _, ok := keyBlock.Headers[pemHeaderRaftPendingDEK] require.False(t, ok) // once the snapshot is do _, ok = keyBlock.Headers[pemHeaderRaftDEKNeedsRotation] require.False(t, ok) // verify that the snapshot is readable with the new DEK encrypter, decrypter := encryption.Defaults(currentDEK) // we can't use the raftLogger, because the WALs are still locked while the raft node is up. And once we remove // the manager, they'll be deleted. snapshot, err := storage.NewSnapFactory(encrypter, decrypter).New(filepath.Join(stateDir, "raft", "snap-v3-encrypted")).Load() require.NoError(t, err) require.NotNil(t, snapshot) // update the lock key to nil for i := 0; i < 3; i++ { getResp, err := client.GetCluster(ctx, &api.GetClusterRequest{ClusterID: cluster.ID}) require.NoError(t, err) cluster = getResp.Cluster spec := cluster.Spec.Copy() spec.EncryptionConfig.AutoLockManagers = false _, err = client.UpdateCluster(ctx, &api.UpdateClusterRequest{ ClusterID: cluster.ID, ClusterVersion: &cluster.Meta.Version, Spec: spec, }) if grpc.ErrorDesc(err) == "update out of sequence" { continue } require.NoError(t, err) } // this should update the TLS key var unlockedKey []byte require.NoError(t, raftutils.PollFuncWithTimeout(nil, func() error { unlockedKey, err = ioutil.ReadFile(tc.Paths.Node.Key) if err != nil { return err } if bytes.Equal(unlockedKey, updatedKey) { return fmt.Errorf("TLS key should have been rotated") } return nil }, 1*time.Second)) // the new key should not be encrypted, and the DEK should also be unencrypted // but not rotated keyBlock, _ = pem.Decode(unlockedKey) require.NotNil(t, keyBlock) require.False(t, x509.IsEncryptedPEMBlock(keyBlock)) unencryptedDEK, err := decodePEMHeaderValue(keyBlock.Headers[pemHeaderRaftDEK], nil) require.NoError(t, err) require.NotNil(t, unencryptedDEK) require.Equal(t, currentDEK, unencryptedDEK) m.Stop(ctx) // After stopping we should MAY receive an error from ListenAndServe if // all this happened before WaitForLeader completed, so don't check the // error. <-done }