// Dial establishes a connection and creates a client. // It infers connection parameters from CLI options. func Dial(cmd *cobra.Command) (api.ControlClient, error) { conn, err := DialConn(cmd) if err != nil { return nil, err } return api.NewControlClient(conn), nil }
func (b *Benchmark) launch() (*api.Service, error) { conn, err := grpc.Dial(b.cfg.Manager, grpc.WithInsecure()) if err != nil { return nil, err } client := api.NewControlClient(conn) r, err := client.CreateService(context.Background(), &api.CreateServiceRequest{ Spec: b.spec(), }) if err != nil { return nil, err } return r.Service, nil }
func initClusterSpec(node *node, spec types.Spec) error { ctx, _ := context.WithTimeout(context.Background(), 5*time.Second) for conn := range node.ListenControlSocket(ctx) { if ctx.Err() != nil { return ctx.Err() } if conn != nil { client := swarmapi.NewControlClient(conn) var cluster *swarmapi.Cluster for i := 0; ; i++ { lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{}) if err != nil { return fmt.Errorf("error on listing clusters: %v", err) } if len(lcr.Clusters) == 0 { if i < 10 { time.Sleep(200 * time.Millisecond) continue } return fmt.Errorf("empty list of clusters was returned") } cluster = lcr.Clusters[0] break } // In init, we take the initial default values from swarmkit, and merge // any non nil or 0 value from spec to GRPC spec. This will leave the // default value alone. // Note that this is different from Update(), as in Update() we expect // user to specify the complete spec of the cluster (as they already know // the existing one and knows which field to update) clusterSpec, err := convert.MergeSwarmSpecToGRPC(spec, cluster.Spec) if err != nil { return fmt.Errorf("error updating cluster settings: %v", err) } _, err = client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{ ClusterID: cluster.ID, ClusterVersion: &cluster.Meta.Version, Spec: &clusterSpec, }) if err != nil { return fmt.Errorf("error updating cluster settings: %v", err) } return nil } } return ctx.Err() }
func (n *nodeRunner) handleControlSocketChange(ctx context.Context, node *swarmnode.Node) { for conn := range node.ListenControlSocket(ctx) { n.mu.Lock() if n.grpcConn != conn { if conn == nil { n.controlClient = nil n.logsClient = nil } else { n.controlClient = swarmapi.NewControlClient(conn) n.logsClient = swarmapi.NewLogsClient(conn) } } n.grpcConn = conn n.mu.Unlock() n.cluster.configEvent <- struct{}{} } }
// ControlClient returns grpc client to ControlAPI of node. It will panic for // non-manager nodes. func (n *testNode) ControlClient(ctx context.Context) (api.ControlClient, error) { ctx, cancel := context.WithTimeout(ctx, opsTimeout) defer cancel() connChan := n.node.ListenControlSocket(ctx) var controlConn *grpc.ClientConn if err := raftutils.PollFuncWithTimeout(nil, func() error { select { case controlConn = <-connChan: default: } if controlConn == nil { return fmt.Errorf("didn't get control api connection") } return nil }, opsTimeout); err != nil { return nil, err } return api.NewControlClient(controlConn), nil }
func initAcceptancePolicy(node *swarmagent.Node, acceptancePolicy types.AcceptancePolicy) error { ctx, _ := context.WithTimeout(context.Background(), 5*time.Second) for conn := range node.ListenControlSocket(ctx) { if ctx.Err() != nil { return ctx.Err() } if conn != nil { client := swarmapi.NewControlClient(conn) var cluster *swarmapi.Cluster for i := 0; ; i++ { lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{}) if err != nil { return fmt.Errorf("error on listing clusters: %v", err) } if len(lcr.Clusters) == 0 { if i < 10 { time.Sleep(200 * time.Millisecond) continue } return fmt.Errorf("empty list of clusters was returned") } cluster = lcr.Clusters[0] break } spec := &cluster.Spec if err := convert.SwarmSpecUpdateAcceptancePolicy(spec, acceptancePolicy); err != nil { return fmt.Errorf("error updating cluster settings: %v", err) } _, err := client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{ ClusterID: cluster.ID, ClusterVersion: &cluster.Meta.Version, Spec: spec, }) if err != nil { return fmt.Errorf("error updating cluster settings: %v", err) } return nil } } return ctx.Err() }
func newTestServer(t *testing.T) *testServer { ts := &testServer{} // Create a testCA just to get a usable RootCA object tc := cautils.NewTestCA(nil) tc.Stop() ts.Store = store.NewMemoryStore(&mockProposer{}) assert.NotNil(t, ts.Store) ts.Server = NewServer(ts.Store, nil, &tc.RootCA) assert.NotNil(t, ts.Server) temp, err := ioutil.TempFile("", "test-socket") assert.NoError(t, err) assert.NoError(t, temp.Close()) assert.NoError(t, os.Remove(temp.Name())) ts.tempUnixSocket = temp.Name() lis, err := net.Listen("unix", temp.Name()) assert.NoError(t, err) ts.grpcServer = grpc.NewServer() api.RegisterControlServer(ts.grpcServer, ts.Server) go func() { // Serve will always return an error (even when properly stopped). // Explicitly ignore it. _ = ts.grpcServer.Serve(lis) }() conn, err := grpc.Dial(temp.Name(), grpc.WithInsecure(), grpc.WithTimeout(10*time.Second), grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) { return net.DialTimeout("unix", addr, timeout) })) assert.NoError(t, err) ts.clientConn = conn ts.Client = api.NewControlClient(conn) return ts }
// Dial establishes a connection and creates a client. // It infers connection parameters from CLI options. func Dial(cmd *cobra.Command) (api.ControlClient, error) { addr, err := cmd.Flags().GetString("socket") if err != nil { return nil, err } opts := []grpc.DialOption{} insecureCreds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true}) opts = append(opts, grpc.WithTransportCredentials(insecureCreds)) opts = append(opts, grpc.WithDialer( func(addr string, timeout time.Duration) (net.Conn, error) { return net.DialTimeout("unix", addr, timeout) })) conn, err := grpc.Dial(addr, opts...) if err != nil { return nil, err } client := api.NewControlClient(conn) return client, nil }
func (c *Cluster) startNewNode(forceNewCluster bool, listenAddr, joinAddr, secret, cahash string, ismanager bool) (*swarmagent.Node, context.Context, error) { if err := c.checkCompatibility(); err != nil { return nil, nil, err } c.node = nil c.cancelDelay = nil node, err := swarmagent.NewNode(&swarmagent.NodeConfig{ Hostname: c.config.Name, ForceNewCluster: forceNewCluster, ListenControlAPI: filepath.Join(c.root, controlSocket), ListenRemoteAPI: listenAddr, JoinAddr: joinAddr, StateDir: c.root, CAHash: cahash, Secret: secret, Executor: container.NewExecutor(c.config.Backend), HeartbeatTick: 1, ElectionTick: 3, IsManager: ismanager, }) if err != nil { return nil, nil, err } ctx, cancel := context.WithCancel(context.Background()) if err := node.Start(ctx); err != nil { return nil, nil, err } c.node = node c.listenAddr = listenAddr c.saveState() c.config.Backend.SetClusterProvider(c) go func() { err := node.Err(ctx) if err != nil { logrus.Errorf("cluster exited with error: %v", err) } c.Lock() c.conn = nil c.client = nil c.node = nil c.ready = false c.err = err c.Unlock() cancel() }() go func() { select { case <-node.Ready(context.Background()): c.Lock() c.reconnectDelay = initialReconnectDelay c.Unlock() case <-ctx.Done(): } if ctx.Err() == nil { c.Lock() c.ready = true c.err = nil c.Unlock() } c.configEvent <- struct{}{} }() go func() { for conn := range node.ListenControlSocket(ctx) { c.Lock() if c.conn != conn { c.client = swarmapi.NewControlClient(conn) } if c.conn != nil { c.client = nil } c.conn = conn c.Unlock() c.configEvent <- struct{}{} } }() return node, ctx, nil }
func (c *Cluster) startNewNode(forceNewCluster bool, localAddr, remoteAddr, listenAddr, advertiseAddr, joinAddr, joinToken string) (*node, error) { if err := c.config.Backend.IsSwarmCompatible(); err != nil { return nil, err } actualLocalAddr := localAddr if actualLocalAddr == "" { // If localAddr was not specified, resolve it automatically // based on the route to joinAddr. localAddr can only be left // empty on "join". listenHost, _, err := net.SplitHostPort(listenAddr) if err != nil { return nil, fmt.Errorf("could not parse listen address: %v", err) } listenAddrIP := net.ParseIP(listenHost) if listenAddrIP == nil || !listenAddrIP.IsUnspecified() { actualLocalAddr = listenHost } else { if remoteAddr == "" { // Should never happen except using swarms created by // old versions that didn't save remoteAddr. remoteAddr = "8.8.8.8:53" } conn, err := net.Dial("udp", remoteAddr) if err != nil { return nil, fmt.Errorf("could not find local IP address: %v", err) } localHostPort := conn.LocalAddr().String() actualLocalAddr, _, _ = net.SplitHostPort(localHostPort) conn.Close() } } c.node = nil c.cancelDelay = nil c.stop = false n, err := swarmagent.NewNode(&swarmagent.NodeConfig{ Hostname: c.config.Name, ForceNewCluster: forceNewCluster, ListenControlAPI: filepath.Join(c.root, controlSocket), ListenRemoteAPI: listenAddr, AdvertiseRemoteAPI: advertiseAddr, JoinAddr: joinAddr, StateDir: c.root, JoinToken: joinToken, Executor: container.NewExecutor(c.config.Backend), HeartbeatTick: 1, ElectionTick: 3, }) if err != nil { return nil, err } ctx := context.Background() if err := n.Start(ctx); err != nil { return nil, err } node := &node{ Node: n, done: make(chan struct{}), reconnectDelay: initialReconnectDelay, } c.node = node c.localAddr = localAddr c.actualLocalAddr = actualLocalAddr // not saved c.remoteAddr = remoteAddr c.listenAddr = listenAddr c.advertiseAddr = advertiseAddr c.saveState() c.config.Backend.SetClusterProvider(c) go func() { err := n.Err(ctx) if err != nil { logrus.Errorf("cluster exited with error: %v", err) } c.Lock() c.node = nil c.err = err c.Unlock() close(node.done) }() go func() { select { case <-n.Ready(): c.Lock() node.ready = true c.err = nil c.Unlock() case <-ctx.Done(): } c.configEvent <- struct{}{} }() go func() { for conn := range n.ListenControlSocket(ctx) { c.Lock() if node.conn != conn { if conn == nil { node.client = nil } else { node.client = swarmapi.NewControlClient(conn) } } node.conn = conn c.Unlock() c.configEvent <- struct{}{} } }() return node, nil }
func TestManager(t *testing.T) { ctx := context.TODO() store := store.NewMemoryStore(nil) assert.NotNil(t, store) temp, err := ioutil.TempFile("", "test-socket") assert.NoError(t, err) assert.NoError(t, temp.Close()) assert.NoError(t, os.Remove(temp.Name())) defer os.RemoveAll(temp.Name()) lunix, err := net.Listen("unix", temp.Name()) assert.NoError(t, err) ltcp, err := net.Listen("tcp", "127.0.0.1:0") assert.NoError(t, err) stateDir, err := ioutil.TempDir("", "test-raft") assert.NoError(t, err) defer os.RemoveAll(stateDir) tc := testutils.NewTestCA(t) defer tc.Stop() agentSecurityConfig, err := tc.NewNodeConfig(ca.AgentRole) assert.NoError(t, err) agentDiffOrgSecurityConfig, err := tc.NewNodeConfigOrg(ca.AgentRole, "another-org") assert.NoError(t, err) managerSecurityConfig, err := tc.NewNodeConfig(ca.ManagerRole) assert.NoError(t, err) m, err := manager.New(&manager.Config{ ProtoListener: map[string]net.Listener{"unix": lunix, "tcp": ltcp}, StateDir: stateDir, SecurityConfig: managerSecurityConfig, }) assert.NoError(t, err) assert.NotNil(t, m) done := make(chan error) defer close(done) go func() { done <- m.Run(ctx) }() opts := []grpc.DialOption{ grpc.WithTimeout(10 * time.Second), grpc.WithTransportCredentials(agentSecurityConfig.ClientTLSCreds), } conn, err := grpc.Dial(ltcp.Addr().String(), opts...) assert.NoError(t, err) defer func() { assert.NoError(t, conn.Close()) }() // We have to send a dummy request to verify if the connection is actually up. client := api.NewDispatcherClient(conn) _, err = client.Heartbeat(context.Background(), &api.HeartbeatRequest{}) assert.Equal(t, dispatcher.ErrNodeNotRegistered.Error(), grpc.ErrorDesc(err)) // Try to have a client in a different org access this manager opts = []grpc.DialOption{ grpc.WithTimeout(10 * time.Second), grpc.WithTransportCredentials(agentDiffOrgSecurityConfig.ClientTLSCreds), } conn2, err := grpc.Dial(ltcp.Addr().String(), opts...) assert.NoError(t, err) defer func() { assert.NoError(t, conn2.Close()) }() // We have to send a dummy request to verify if the connection is actually up. client = api.NewDispatcherClient(conn2) _, err = client.Heartbeat(context.Background(), &api.HeartbeatRequest{}) assert.Contains(t, grpc.ErrorDesc(err), "Permission denied: unauthorized peer role: rpc error: code = 7 desc = Permission denied: remote certificate not part of organization") // Verify that requests to the various GRPC services running on TCP // are rejected if they don't have certs. opts = []grpc.DialOption{ grpc.WithTimeout(10 * time.Second), grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{InsecureSkipVerify: true})), } noCertConn, err := grpc.Dial(ltcp.Addr().String(), opts...) assert.NoError(t, err) defer func() { assert.NoError(t, noCertConn.Close()) }() client = api.NewDispatcherClient(noCertConn) _, err = client.Heartbeat(context.Background(), &api.HeartbeatRequest{}) assert.EqualError(t, err, "rpc error: code = 7 desc = Permission denied: unauthorized peer role: rpc error: code = 7 desc = no client certificates in request") controlClient := api.NewControlClient(noCertConn) _, err = controlClient.ListNodes(context.Background(), &api.ListNodesRequest{}) assert.EqualError(t, err, "rpc error: code = 7 desc = Permission denied: unauthorized peer role: rpc error: code = 7 desc = no client certificates in request") raftClient := api.NewRaftMembershipClient(noCertConn) _, err = raftClient.Join(context.Background(), &api.JoinRequest{}) assert.EqualError(t, err, "rpc error: code = 7 desc = Permission denied: unauthorized peer role: rpc error: code = 7 desc = no client certificates in request") m.Stop(ctx) // After stopping we should MAY receive an error from ListenAndServe if // all this happened before WaitForLeader completed, so don't check the // error. <-done }
func (c *Cluster) startNewNode(forceNewCluster bool, listenAddr, joinAddr, secret, cahash string, ismanager bool) (*node, error) { if err := c.config.Backend.IsSwarmCompatible(); err != nil { return nil, err } c.node = nil c.cancelDelay = nil c.stop = false n, err := swarmagent.NewNode(&swarmagent.NodeConfig{ Hostname: c.config.Name, ForceNewCluster: forceNewCluster, ListenControlAPI: filepath.Join(c.root, controlSocket), ListenRemoteAPI: listenAddr, JoinAddr: joinAddr, StateDir: c.root, CAHash: cahash, Secret: secret, Executor: container.NewExecutor(c.config.Backend), HeartbeatTick: 1, ElectionTick: 3, IsManager: ismanager, }) if err != nil { return nil, err } ctx := context.Background() if err := n.Start(ctx); err != nil { return nil, err } node := &node{ Node: n, done: make(chan struct{}), reconnectDelay: initialReconnectDelay, } c.node = node c.listenAddr = listenAddr c.saveState() c.config.Backend.SetClusterProvider(c) go func() { err := n.Err(ctx) if err != nil { logrus.Errorf("cluster exited with error: %v", err) } c.Lock() c.node = nil c.err = err c.Unlock() close(node.done) }() go func() { select { case <-n.Ready(): c.Lock() node.ready = true c.err = nil c.Unlock() case <-ctx.Done(): } c.configEvent <- struct{}{} }() go func() { for conn := range n.ListenControlSocket(ctx) { c.Lock() if node.conn != conn { if conn == nil { node.client = nil } else { node.client = swarmapi.NewControlClient(conn) } } node.conn = conn c.Unlock() c.configEvent <- struct{}{} } }() return node, nil }
func TestManager(t *testing.T) { ctx := context.Background() temp, err := ioutil.TempFile("", "test-socket") assert.NoError(t, err) assert.NoError(t, temp.Close()) assert.NoError(t, os.Remove(temp.Name())) defer os.RemoveAll(temp.Name()) stateDir, err := ioutil.TempDir("", "test-raft") assert.NoError(t, err) defer os.RemoveAll(stateDir) tc := testutils.NewTestCA(t, func(p ca.CertPaths) *ca.KeyReadWriter { return ca.NewKeyReadWriter(p, []byte("kek"), nil) }) defer tc.Stop() agentSecurityConfig, err := tc.NewNodeConfig(ca.WorkerRole) assert.NoError(t, err) agentDiffOrgSecurityConfig, err := tc.NewNodeConfigOrg(ca.WorkerRole, "another-org") assert.NoError(t, err) managerSecurityConfig, err := tc.NewNodeConfig(ca.ManagerRole) assert.NoError(t, err) m, err := New(&Config{ RemoteAPI: RemoteAddrs{ListenAddr: "127.0.0.1:0"}, ControlAPI: temp.Name(), StateDir: stateDir, SecurityConfig: managerSecurityConfig, AutoLockManagers: true, UnlockKey: []byte("kek"), }) assert.NoError(t, err) assert.NotNil(t, m) tcpAddr := m.Addr() done := make(chan error) defer close(done) go func() { done <- m.Run(ctx) }() opts := []grpc.DialOption{ grpc.WithTimeout(10 * time.Second), grpc.WithTransportCredentials(agentSecurityConfig.ClientTLSCreds), } conn, err := grpc.Dial(tcpAddr, opts...) assert.NoError(t, err) defer func() { assert.NoError(t, conn.Close()) }() // We have to send a dummy request to verify if the connection is actually up. client := api.NewDispatcherClient(conn) _, err = client.Heartbeat(ctx, &api.HeartbeatRequest{}) assert.Equal(t, dispatcher.ErrNodeNotRegistered.Error(), grpc.ErrorDesc(err)) _, err = client.Session(ctx, &api.SessionRequest{}) assert.NoError(t, err) // Try to have a client in a different org access this manager opts = []grpc.DialOption{ grpc.WithTimeout(10 * time.Second), grpc.WithTransportCredentials(agentDiffOrgSecurityConfig.ClientTLSCreds), } conn2, err := grpc.Dial(tcpAddr, opts...) assert.NoError(t, err) defer func() { assert.NoError(t, conn2.Close()) }() client = api.NewDispatcherClient(conn2) _, err = client.Heartbeat(context.Background(), &api.HeartbeatRequest{}) assert.Contains(t, grpc.ErrorDesc(err), "Permission denied: unauthorized peer role: rpc error: code = 7 desc = Permission denied: remote certificate not part of organization") // Verify that requests to the various GRPC services running on TCP // are rejected if they don't have certs. opts = []grpc.DialOption{ grpc.WithTimeout(10 * time.Second), grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{InsecureSkipVerify: true})), } noCertConn, err := grpc.Dial(tcpAddr, opts...) assert.NoError(t, err) defer func() { assert.NoError(t, noCertConn.Close()) }() client = api.NewDispatcherClient(noCertConn) _, err = client.Heartbeat(context.Background(), &api.HeartbeatRequest{}) assert.EqualError(t, err, "rpc error: code = 7 desc = Permission denied: unauthorized peer role: rpc error: code = 7 desc = no client certificates in request") controlClient := api.NewControlClient(noCertConn) _, err = controlClient.ListNodes(context.Background(), &api.ListNodesRequest{}) assert.EqualError(t, err, "rpc error: code = 7 desc = Permission denied: unauthorized peer role: rpc error: code = 7 desc = no client certificates in request") raftClient := api.NewRaftMembershipClient(noCertConn) _, err = raftClient.Join(context.Background(), &api.JoinRequest{}) assert.EqualError(t, err, "rpc error: code = 7 desc = Permission denied: unauthorized peer role: rpc error: code = 7 desc = no client certificates in request") opts = []grpc.DialOption{ grpc.WithTimeout(10 * time.Second), grpc.WithTransportCredentials(managerSecurityConfig.ClientTLSCreds), } controlConn, err := grpc.Dial(tcpAddr, opts...) assert.NoError(t, err) defer func() { assert.NoError(t, controlConn.Close()) }() // check that the kek is added to the config var cluster api.Cluster m.raftNode.MemoryStore().View(func(tx store.ReadTx) { clusters, err := store.FindClusters(tx, store.All) require.NoError(t, err) require.Len(t, clusters, 1) cluster = *clusters[0] }) require.NotNil(t, cluster) require.Len(t, cluster.UnlockKeys, 1) require.Equal(t, &api.EncryptionKey{ Subsystem: ca.ManagerRole, Key: []byte("kek"), }, cluster.UnlockKeys[0]) // Test removal of the agent node agentID := agentSecurityConfig.ClientTLSCreds.NodeID() assert.NoError(t, m.raftNode.MemoryStore().Update(func(tx store.Tx) error { return store.CreateNode(tx, &api.Node{ ID: agentID, Certificate: api.Certificate{ Role: api.NodeRoleWorker, CN: agentID, }, }, ) })) controlClient = api.NewControlClient(controlConn) _, err = controlClient.RemoveNode(context.Background(), &api.RemoveNodeRequest{ NodeID: agentID, Force: true, }, ) assert.NoError(t, err) client = api.NewDispatcherClient(conn) _, err = client.Heartbeat(context.Background(), &api.HeartbeatRequest{}) assert.Contains(t, grpc.ErrorDesc(err), "removed from swarm") m.Stop(ctx) // After stopping we should MAY receive an error from ListenAndServe if // all this happened before WaitForLeader completed, so don't check the // error. <-done }
// Tests locking and unlocking the manager and key rotations func TestManagerLockUnlock(t *testing.T) { ctx := context.Background() temp, err := ioutil.TempFile("", "test-manager-lock") require.NoError(t, err) require.NoError(t, temp.Close()) require.NoError(t, os.Remove(temp.Name())) defer os.RemoveAll(temp.Name()) stateDir, err := ioutil.TempDir("", "test-raft") require.NoError(t, err) defer os.RemoveAll(stateDir) tc := testutils.NewTestCA(t) defer tc.Stop() managerSecurityConfig, err := tc.NewNodeConfig(ca.ManagerRole) require.NoError(t, err) _, _, err = managerSecurityConfig.KeyReader().Read() require.NoError(t, err) m, err := New(&Config{ RemoteAPI: RemoteAddrs{ListenAddr: "127.0.0.1:0"}, ControlAPI: temp.Name(), StateDir: stateDir, SecurityConfig: managerSecurityConfig, // start off without any encryption }) require.NoError(t, err) require.NotNil(t, m) done := make(chan error) defer close(done) go func() { done <- m.Run(ctx) }() opts := []grpc.DialOption{ grpc.WithTimeout(10 * time.Second), grpc.WithTransportCredentials(managerSecurityConfig.ClientTLSCreds), } conn, err := grpc.Dial(m.Addr(), opts...) require.NoError(t, err) defer func() { require.NoError(t, conn.Close()) }() // check that there is no kek currently - we are using the API because this // lets us wait until the manager is up and listening, as well var cluster *api.Cluster client := api.NewControlClient(conn) require.NoError(t, raftutils.PollFuncWithTimeout(nil, func() error { resp, err := client.ListClusters(ctx, &api.ListClustersRequest{}) if err != nil { return err } if len(resp.Clusters) == 0 { return fmt.Errorf("no clusters yet") } cluster = resp.Clusters[0] return nil }, 1*time.Second)) require.Nil(t, cluster.UnlockKeys) // tls key is unencrypted, but there is a DEK key, err := ioutil.ReadFile(tc.Paths.Node.Key) require.NoError(t, err) keyBlock, _ := pem.Decode(key) require.NotNil(t, keyBlock) require.False(t, x509.IsEncryptedPEMBlock(keyBlock)) require.Len(t, keyBlock.Headers, 2) currentDEK, err := decodePEMHeaderValue(keyBlock.Headers[pemHeaderRaftDEK], nil) require.NoError(t, err) require.NotEmpty(t, currentDEK) // update the lock key - this may fail due to update out of sequence errors, so try again for { getResp, err := client.GetCluster(ctx, &api.GetClusterRequest{ClusterID: cluster.ID}) require.NoError(t, err) cluster = getResp.Cluster spec := cluster.Spec.Copy() spec.EncryptionConfig.AutoLockManagers = true updateResp, err := client.UpdateCluster(ctx, &api.UpdateClusterRequest{ ClusterID: cluster.ID, ClusterVersion: &cluster.Meta.Version, Spec: spec, }) if grpc.ErrorDesc(err) == "update out of sequence" { continue } // if there is any other type of error, this should fail if err == nil { cluster = updateResp.Cluster } break } require.NoError(t, err) caConn := api.NewCAClient(conn) unlockKeyResp, err := caConn.GetUnlockKey(ctx, &api.GetUnlockKeyRequest{}) require.NoError(t, err) // this should update the TLS key, rotate the DEK, and finish snapshotting var updatedKey []byte require.NoError(t, raftutils.PollFuncWithTimeout(nil, func() error { updatedKey, err = ioutil.ReadFile(tc.Paths.Node.Key) require.NoError(t, err) // this should never error due to atomic writes if bytes.Equal(key, updatedKey) { return fmt.Errorf("TLS key should have been re-encrypted at least") } keyBlock, _ = pem.Decode(updatedKey) require.NotNil(t, keyBlock) // this should never error due to atomic writes if !x509.IsEncryptedPEMBlock(keyBlock) { return fmt.Errorf("Key not encrypted") } // we don't check that the TLS key has been rotated, because that may take // a little bit, and is best effort only currentDEKString, ok := keyBlock.Headers[pemHeaderRaftDEK] require.True(t, ok) // there should never NOT be a current header nowCurrentDEK, err := decodePEMHeaderValue(currentDEKString, unlockKeyResp.UnlockKey) require.NoError(t, err) // it should always be encrypted if bytes.Equal(currentDEK, nowCurrentDEK) { return fmt.Errorf("snapshot has not been finished yet") } currentDEK = nowCurrentDEK return nil }, 1*time.Second)) _, ok := keyBlock.Headers[pemHeaderRaftPendingDEK] require.False(t, ok) // once the snapshot is do _, ok = keyBlock.Headers[pemHeaderRaftDEKNeedsRotation] require.False(t, ok) // verify that the snapshot is readable with the new DEK encrypter, decrypter := encryption.Defaults(currentDEK) // we can't use the raftLogger, because the WALs are still locked while the raft node is up. And once we remove // the manager, they'll be deleted. snapshot, err := storage.NewSnapFactory(encrypter, decrypter).New(filepath.Join(stateDir, "raft", "snap-v3-encrypted")).Load() require.NoError(t, err) require.NotNil(t, snapshot) // update the lock key to nil for i := 0; i < 3; i++ { getResp, err := client.GetCluster(ctx, &api.GetClusterRequest{ClusterID: cluster.ID}) require.NoError(t, err) cluster = getResp.Cluster spec := cluster.Spec.Copy() spec.EncryptionConfig.AutoLockManagers = false _, err = client.UpdateCluster(ctx, &api.UpdateClusterRequest{ ClusterID: cluster.ID, ClusterVersion: &cluster.Meta.Version, Spec: spec, }) if grpc.ErrorDesc(err) == "update out of sequence" { continue } require.NoError(t, err) } // this should update the TLS key var unlockedKey []byte require.NoError(t, raftutils.PollFuncWithTimeout(nil, func() error { unlockedKey, err = ioutil.ReadFile(tc.Paths.Node.Key) if err != nil { return err } if bytes.Equal(unlockedKey, updatedKey) { return fmt.Errorf("TLS key should have been rotated") } return nil }, 1*time.Second)) // the new key should not be encrypted, and the DEK should also be unencrypted // but not rotated keyBlock, _ = pem.Decode(unlockedKey) require.NotNil(t, keyBlock) require.False(t, x509.IsEncryptedPEMBlock(keyBlock)) unencryptedDEK, err := decodePEMHeaderValue(keyBlock.Headers[pemHeaderRaftDEK], nil) require.NoError(t, err) require.NotNil(t, unencryptedDEK) require.Equal(t, currentDEK, unencryptedDEK) m.Stop(ctx) // After stopping we should MAY receive an error from ListenAndServe if // all this happened before WaitForLeader completed, so don't check the // error. <-done }