func TestIssueNodeCertificateAgentFromDifferentOrgRenewal(t *testing.T) { tc := testutils.NewTestCA(t, ca.DefaultAcceptancePolicy()) defer tc.Stop() csr, _, err := ca.GenerateAndWriteNewKey(tc.Paths.Node) assert.NoError(t, err) // Since we're using a client that has a different Organization, this request will be treated // as a new certificate request, not allowing auto-renewal role := api.NodeRoleManager issueRequest := &api.IssueNodeCertificateRequest{CSR: csr, Role: role} issueResponse, err := tc.NodeCAClients[3].IssueNodeCertificate(context.Background(), issueRequest) assert.NoError(t, err) assert.NotNil(t, issueResponse.NodeID) assert.Equal(t, api.NodeMembershipPending, issueResponse.NodeMembership) tc.MemoryStore.View(func(readTx store.ReadTx) { storeNodes, err := store.FindNodes(readTx, store.All) assert.NoError(t, err) assert.NotEmpty(t, storeNodes) found := false for _, node := range storeNodes { if node.ID == issueResponse.NodeID { found = true assert.Equal(t, api.IssuanceStatePending, node.Certificate.Status.State) } } assert.True(t, found) }) }
func TestGetRemoteSignedCertificateWithPending(t *testing.T) { tc := testutils.NewTestCA(t, ca.DefaultAcceptancePolicy()) defer tc.Stop() // Create a new CSR to be signed csr, _, err := ca.GenerateAndWriteNewKey(tc.Paths.Node) assert.NoError(t, err) updates, cancel := state.Watch(tc.MemoryStore.WatchQueue(), state.EventCreateNode{}) defer cancel() completed := make(chan error) go func() { _, err := ca.GetRemoteSignedCertificate(context.Background(), csr, ca.ManagerRole, "", tc.RootCA.Pool, tc.Picker, nil, nil) completed <- err }() event := <-updates node := event.(state.EventCreateNode).Node.Copy() // Directly update the status of the store err = tc.MemoryStore.Update(func(tx store.Tx) error { node.Certificate.Status.State = api.IssuanceStateIssued return store.UpdateNode(tx, node) }) assert.NoError(t, err) // Make sure GetRemoteSignedCertificate didn't return an error assert.NoError(t, <-completed) }
func TestGetRootCACertificate(t *testing.T) { tc := testutils.NewTestCA(t, ca.DefaultAcceptancePolicy()) defer tc.Stop() resp, err := tc.CAClients[0].GetRootCACertificate(context.Background(), &api.GetRootCACertificateRequest{}) assert.NoError(t, err) assert.NotEmpty(t, resp.Certificate) }
func TestRestartRootCA(t *testing.T) { tc := testutils.NewTestCA(t, ca.DefaultAcceptancePolicy()) defer tc.Stop() resp1, err := tc.CAClients[0].GetRootCACertificate(context.Background(), &api.GetRootCACertificateRequest{}) assert.NoError(t, err) assert.NotEmpty(t, resp1.Certificate) tc.CAServer.Stop() go tc.CAServer.Run(context.Background()) resp2, err := tc.CAClients[0].GetRootCACertificate(context.Background(), &api.GetRootCACertificateRequest{}) assert.NoError(t, err) assert.Equal(t, resp1.Certificate, resp2.Certificate) }
func TestIssueNodeCertificateWithInvalidCSR(t *testing.T) { tc := testutils.NewTestCA(t, ca.DefaultAcceptancePolicy()) defer tc.Stop() role := api.NodeRoleWorker issueRequest := &api.IssueNodeCertificateRequest{CSR: []byte("random garbage"), Role: role} issueResponse, err := tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.NoError(t, err) assert.NotNil(t, issueResponse.NodeID) assert.Equal(t, api.NodeMembershipAccepted, issueResponse.NodeMembership) statusRequest := &api.NodeCertificateStatusRequest{NodeID: issueResponse.NodeID} statusResponse, err := tc.NodeCAClients[0].NodeCertificateStatus(context.Background(), statusRequest) assert.Equal(t, api.IssuanceStateFailed, statusResponse.Status.State) assert.Contains(t, statusResponse.Status.Err, "CSR Decode failed") assert.Nil(t, statusResponse.Certificate.Certificate) }
func TestIssueNodeCertificateManagerRenewal(t *testing.T) { tc := testutils.NewTestCA(t, ca.DefaultAcceptancePolicy()) defer tc.Stop() csr, _, err := ca.GenerateAndWriteNewKey(tc.Paths.Node) assert.NoError(t, err) assert.NotNil(t, csr) role := api.NodeRoleManager issueRequest := &api.IssueNodeCertificateRequest{CSR: csr, Role: role} issueResponse, err := tc.NodeCAClients[2].IssueNodeCertificate(context.Background(), issueRequest) assert.NotNil(t, issueResponse.NodeID) assert.Equal(t, api.NodeMembershipAccepted, issueResponse.NodeMembership) statusRequest := &api.NodeCertificateStatusRequest{NodeID: issueResponse.NodeID} statusResponse, err := tc.NodeCAClients[2].NodeCertificateStatus(context.Background(), statusRequest) assert.Equal(t, api.IssuanceStateIssued, statusResponse.Status.State) assert.NotNil(t, statusResponse.Certificate.Certificate) assert.Equal(t, role, statusResponse.Certificate.Role) }
// Run starts all manager sub-systems and the gRPC server at the configured // address. // The call never returns unless an error occurs or `Stop()` is called. // // TODO(aluzzardi): /!\ This function is *way* too complex. /!\ // It needs to be split into smaller manageable functions. func (m *Manager) Run(parent context.Context) error { ctx, ctxCancel := context.WithCancel(parent) defer ctxCancel() // Harakiri. go func() { select { case <-ctx.Done(): case <-m.stopped: ctxCancel() } }() leadershipCh, cancel := m.RaftNode.SubscribeLeadership() defer cancel() go func() { for leadershipEvent := range leadershipCh { // read out and discard all of the messages when we've stopped // don't acquire the mutex yet. if stopped is closed, we don't need // this stops this loop from starving Run()'s attempt to Lock select { case <-m.stopped: continue default: // do nothing, we're not stopped } // we're not stopping so NOW acquire the mutex m.mu.Lock() newState := leadershipEvent.(raft.LeadershipState) if newState == raft.IsLeader { s := m.RaftNode.MemoryStore() rootCA := m.config.SecurityConfig.RootCA() nodeID := m.config.SecurityConfig.ClientTLSCreds.NodeID() raftCfg := raft.DefaultRaftConfig() raftCfg.ElectionTick = uint32(m.RaftNode.Config.ElectionTick) raftCfg.HeartbeatTick = uint32(m.RaftNode.Config.HeartbeatTick) clusterID := m.config.SecurityConfig.ClientTLSCreds.Organization() s.Update(func(tx store.Tx) error { // Add a default cluster object to the // store. Don't check the error because // we expect this to fail unless this // is a brand new cluster. store.CreateCluster(tx, &api.Cluster{ ID: clusterID, Spec: api.ClusterSpec{ Annotations: api.Annotations{ Name: store.DefaultClusterName, }, AcceptancePolicy: ca.DefaultAcceptancePolicy(), Orchestration: api.OrchestrationConfig{ TaskHistoryRetentionLimit: defaultTaskHistoryRetentionLimit, }, Dispatcher: api.DispatcherConfig{ HeartbeatPeriod: uint64(dispatcher.DefaultHeartBeatPeriod), }, Raft: raftCfg, CAConfig: ca.DefaultCAConfig(), }, RootCA: api.RootCA{ CAKey: rootCA.Key, CACert: rootCA.Cert, CACertHash: rootCA.Digest.String(), }, }) // Add Node entry for ourself, if one // doesn't exist already. store.CreateNode(tx, &api.Node{ ID: nodeID, Certificate: api.Certificate{ CN: nodeID, Role: api.NodeRoleManager, Status: api.IssuanceStatus{ State: api.IssuanceStateIssued, }, }, Spec: api.NodeSpec{ Role: api.NodeRoleManager, Membership: api.NodeMembershipAccepted, }, }) return nil }) // Attempt to rotate the key-encrypting-key of the root CA key-material err := m.rotateRootCAKEK(ctx, clusterID) if err != nil { log.G(ctx).WithError(err).Error("root key-encrypting-key rotation failed") } m.replicatedOrchestrator = orchestrator.New(s) m.globalOrchestrator = orchestrator.NewGlobalOrchestrator(s) m.taskReaper = orchestrator.NewTaskReaper(s) m.scheduler = scheduler.New(s) m.keyManager = keymanager.New(m.RaftNode.MemoryStore(), keymanager.DefaultConfig()) // TODO(stevvooe): Allocate a context that can be used to // shutdown underlying manager processes when leadership is // lost. m.allocator, err = allocator.New(s) if err != nil { log.G(ctx).WithError(err).Error("failed to create allocator") // TODO(stevvooe): It doesn't seem correct here to fail // creating the allocator but then use it anyways. } go func(keyManager *keymanager.KeyManager) { if err := keyManager.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("keymanager failed with an error") } }(m.keyManager) go func(d *dispatcher.Dispatcher) { if err := d.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("Dispatcher exited with an error") } }(m.Dispatcher) go func(server *ca.Server) { if err := server.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("CA signer exited with an error") } }(m.caserver) // Start all sub-components in separate goroutines. // TODO(aluzzardi): This should have some kind of error handling so that // any component that goes down would bring the entire manager down. if m.allocator != nil { go func(allocator *allocator.Allocator) { if err := allocator.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("allocator exited with an error") } }(m.allocator) } go func(scheduler *scheduler.Scheduler) { if err := scheduler.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("scheduler exited with an error") } }(m.scheduler) go func(taskReaper *orchestrator.TaskReaper) { taskReaper.Run() }(m.taskReaper) go func(orchestrator *orchestrator.ReplicatedOrchestrator) { if err := orchestrator.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("replicated orchestrator exited with an error") } }(m.replicatedOrchestrator) go func(globalOrchestrator *orchestrator.GlobalOrchestrator) { if err := globalOrchestrator.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("global orchestrator exited with an error") } }(m.globalOrchestrator) } else if newState == raft.IsFollower { m.Dispatcher.Stop() m.caserver.Stop() if m.allocator != nil { m.allocator.Stop() m.allocator = nil } m.replicatedOrchestrator.Stop() m.replicatedOrchestrator = nil m.globalOrchestrator.Stop() m.globalOrchestrator = nil m.taskReaper.Stop() m.taskReaper = nil m.scheduler.Stop() m.scheduler = nil m.keyManager.Stop() m.keyManager = nil } m.mu.Unlock() } }() go func() { err := m.RaftNode.Run(ctx) if err != nil { log.G(ctx).Error(err) m.Stop(ctx) } }() proxyOpts := []grpc.DialOption{ grpc.WithBackoffMaxDelay(2 * time.Second), grpc.WithTransportCredentials(m.config.SecurityConfig.ClientTLSCreds), } cs := raftpicker.NewConnSelector(m.RaftNode, proxyOpts...) authorize := func(ctx context.Context, roles []string) error { // Authorize the remote roles, ensure they can only be forwarded by managers _, err := ca.AuthorizeForwardedRoleAndOrg(ctx, roles, []string{ca.ManagerRole}, m.config.SecurityConfig.ClientTLSCreds.Organization()) return err } baseControlAPI := controlapi.NewServer(m.RaftNode.MemoryStore(), m.RaftNode) authenticatedControlAPI := api.NewAuthenticatedWrapperControlServer(baseControlAPI, authorize) authenticatedDispatcherAPI := api.NewAuthenticatedWrapperDispatcherServer(m.Dispatcher, authorize) authenticatedCAAPI := api.NewAuthenticatedWrapperCAServer(m.caserver, authorize) authenticatedNodeCAAPI := api.NewAuthenticatedWrapperNodeCAServer(m.caserver, authorize) authenticatedRaftAPI := api.NewAuthenticatedWrapperRaftServer(m.RaftNode, authorize) authenticatedRaftMembershipAPI := api.NewAuthenticatedWrapperRaftMembershipServer(m.RaftNode, authorize) proxyDispatcherAPI := api.NewRaftProxyDispatcherServer(authenticatedDispatcherAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyCAAPI := api.NewRaftProxyCAServer(authenticatedCAAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyNodeCAAPI := api.NewRaftProxyNodeCAServer(authenticatedNodeCAAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyRaftMembershipAPI := api.NewRaftProxyRaftMembershipServer(authenticatedRaftMembershipAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) // localProxyControlAPI is a special kind of proxy. It is only wired up // to receive requests from a trusted local socket, and these requests // don't use TLS, therefore the requests it handles locally should // bypass authorization. When it proxies, it sends them as requests from // this manager rather than forwarded requests (it has no TLS // information to put in the metadata map). forwardAsOwnRequest := func(ctx context.Context) (context.Context, error) { return ctx, nil } localProxyControlAPI := api.NewRaftProxyControlServer(baseControlAPI, cs, m.RaftNode, forwardAsOwnRequest) // Everything registered on m.server should be an authenticated // wrapper, or a proxy wrapping an authenticated wrapper! api.RegisterCAServer(m.server, proxyCAAPI) api.RegisterNodeCAServer(m.server, proxyNodeCAAPI) api.RegisterRaftServer(m.server, authenticatedRaftAPI) api.RegisterRaftMembershipServer(m.server, proxyRaftMembershipAPI) api.RegisterControlServer(m.localserver, localProxyControlAPI) api.RegisterControlServer(m.server, authenticatedControlAPI) api.RegisterDispatcherServer(m.server, proxyDispatcherAPI) errServe := make(chan error, 2) for proto, l := range m.listeners { go func(proto string, lis net.Listener) { ctx := log.WithLogger(ctx, log.G(ctx).WithFields( logrus.Fields{ "proto": lis.Addr().Network(), "addr": lis.Addr().String()})) if proto == "unix" { log.G(ctx).Info("Listening for local connections") errServe <- m.localserver.Serve(lis) } else { log.G(ctx).Info("Listening for connections") errServe <- m.server.Serve(lis) } }(proto, l) } if err := raft.WaitForLeader(ctx, m.RaftNode); err != nil { m.server.Stop() return err } c, err := raft.WaitForCluster(ctx, m.RaftNode) if err != nil { m.server.Stop() return err } raftConfig := c.Spec.Raft if int(raftConfig.ElectionTick) != m.RaftNode.Config.ElectionTick { log.G(ctx).Warningf("election tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.RaftNode.Config.ElectionTick, raftConfig.ElectionTick) } if int(raftConfig.HeartbeatTick) != m.RaftNode.Config.HeartbeatTick { log.G(ctx).Warningf("heartbeat tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.RaftNode.Config.HeartbeatTick, raftConfig.HeartbeatTick) } // wait for an error in serving. err = <-errServe select { // check to see if stopped was posted to. if so, we're in the process of // stopping, or done and that's why we got the error. if stopping is // deliberate, stopped will ALWAYS be closed before the error is trigger, // so this path will ALWAYS be taken if the stop was deliberate case <-m.stopped: // shutdown was requested, do not return an error // but first, we wait to acquire a mutex to guarantee that stopping is // finished. as long as we acquire the mutex BEFORE we return, we know // that stopping is stopped. m.mu.Lock() m.mu.Unlock() return nil // otherwise, we'll get something from errServe, which indicates that an // error in serving has actually occurred and this isn't a planned shutdown default: return err } }
func TestNodeCertificateAccept(t *testing.T) { tc := testutils.NewTestCA(t, ca.DefaultAcceptancePolicy()) defer tc.Stop() csr, _, err := ca.GenerateAndWriteNewKey(tc.Paths.Node) assert.NoError(t, err) testNode := &api.Node{ ID: "nodeID", Spec: api.NodeSpec{ Membership: api.NodeMembershipAccepted, Role: api.NodeRoleWorker, }, Certificate: api.Certificate{ CN: "nodeID", CSR: csr, Status: api.IssuanceStatus{State: api.IssuanceStatePending}, }, } err = tc.MemoryStore.Update(func(tx store.Tx) error { assert.NoError(t, store.CreateNode(tx, testNode)) return nil }) assert.NoError(t, err) statusRequest := &api.NodeCertificateStatusRequest{NodeID: "nodeID"} resp, err := tc.NodeCAClients[1].NodeCertificateStatus(context.Background(), statusRequest) assert.NoError(t, err) assert.NotEmpty(t, resp.Certificate) assert.NotEmpty(t, resp.Status) assert.NotNil(t, resp.Certificate.Certificate) assert.Equal(t, api.IssuanceStateIssued, resp.Status.State) tc.MemoryStore.View(func(readTx store.ReadTx) { storeNodes, err := store.FindNodes(readTx, store.All) assert.NoError(t, err) assert.NotEmpty(t, storeNodes) var found bool for _, node := range storeNodes { if node.ID == "nodeID" { assert.Equal(t, api.IssuanceStateIssued, node.Certificate.Status.State) found = true } } assert.True(t, found) }) // Try it one more time for Worker, this time end-to-end role := api.NodeRoleWorker issueRequest := &api.IssueNodeCertificateRequest{CSR: csr, Role: role} issueResponse, err := tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) require.NoError(t, err) assert.NotNil(t, issueResponse.NodeID) assert.Equal(t, api.NodeMembershipAccepted, issueResponse.NodeMembership) // Try it one more time for Worker, this time end-to-end with manager role = api.NodeRoleManager issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role} issueResponse, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) require.NoError(t, err) assert.NotNil(t, issueResponse.NodeID) assert.Equal(t, api.NodeMembershipPending, issueResponse.NodeMembership) }