// Run starts all manager sub-systems and the gRPC server at the configured // address. // The call never returns unless an error occurs or `Stop()` is called. // // TODO(aluzzardi): /!\ This function is *way* too complex. /!\ // It needs to be split into smaller manageable functions. func (m *Manager) Run(parent context.Context) error { ctx, ctxCancel := context.WithCancel(parent) defer ctxCancel() // Harakiri. go func() { select { case <-ctx.Done(): case <-m.stopped: ctxCancel() } }() leadershipCh, cancel := m.RaftNode.SubscribeLeadership() defer cancel() go func() { for leadershipEvent := range leadershipCh { // read out and discard all of the messages when we've stopped // don't acquire the mutex yet. if stopped is closed, we don't need // this stops this loop from starving Run()'s attempt to Lock select { case <-m.stopped: continue default: // do nothing, we're not stopped } // we're not stopping so NOW acquire the mutex m.mu.Lock() newState := leadershipEvent.(raft.LeadershipState) if newState == raft.IsLeader { s := m.RaftNode.MemoryStore() rootCA := m.config.SecurityConfig.RootCA() nodeID := m.config.SecurityConfig.ClientTLSCreds.NodeID() raftCfg := raft.DefaultRaftConfig() raftCfg.ElectionTick = uint32(m.RaftNode.Config.ElectionTick) raftCfg.HeartbeatTick = uint32(m.RaftNode.Config.HeartbeatTick) clusterID := m.config.SecurityConfig.ClientTLSCreds.Organization() initialCAConfig := ca.DefaultCAConfig() initialCAConfig.ExternalCAs = m.config.ExternalCAs s.Update(func(tx store.Tx) error { // Add a default cluster object to the // store. Don't check the error because // we expect this to fail unless this // is a brand new cluster. store.CreateCluster(tx, &api.Cluster{ ID: clusterID, Spec: api.ClusterSpec{ Annotations: api.Annotations{ Name: store.DefaultClusterName, }, Orchestration: api.OrchestrationConfig{ TaskHistoryRetentionLimit: defaultTaskHistoryRetentionLimit, }, Dispatcher: api.DispatcherConfig{ HeartbeatPeriod: ptypes.DurationProto(dispatcher.DefaultHeartBeatPeriod), }, Raft: raftCfg, CAConfig: initialCAConfig, }, RootCA: api.RootCA{ CAKey: rootCA.Key, CACert: rootCA.Cert, CACertHash: rootCA.Digest.String(), JoinTokens: api.JoinTokens{ Worker: ca.GenerateJoinToken(rootCA), Manager: ca.GenerateJoinToken(rootCA), }, }, }) // Add Node entry for ourself, if one // doesn't exist already. store.CreateNode(tx, &api.Node{ ID: nodeID, Certificate: api.Certificate{ CN: nodeID, Role: api.NodeRoleManager, Status: api.IssuanceStatus{ State: api.IssuanceStateIssued, }, }, Spec: api.NodeSpec{ Role: api.NodeRoleManager, Membership: api.NodeMembershipAccepted, }, }) return nil }) // Attempt to rotate the key-encrypting-key of the root CA key-material err := m.rotateRootCAKEK(ctx, clusterID) if err != nil { log.G(ctx).WithError(err).Error("root key-encrypting-key rotation failed") } m.replicatedOrchestrator = orchestrator.NewReplicatedOrchestrator(s) m.globalOrchestrator = orchestrator.NewGlobalOrchestrator(s) m.taskReaper = orchestrator.NewTaskReaper(s) m.scheduler = scheduler.New(s) m.keyManager = keymanager.New(m.RaftNode.MemoryStore(), keymanager.DefaultConfig()) // TODO(stevvooe): Allocate a context that can be used to // shutdown underlying manager processes when leadership is // lost. m.allocator, err = allocator.New(s) if err != nil { log.G(ctx).WithError(err).Error("failed to create allocator") // TODO(stevvooe): It doesn't seem correct here to fail // creating the allocator but then use it anyway. } if m.keyManager != nil { go func(keyManager *keymanager.KeyManager) { if err := keyManager.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("keymanager failed with an error") } }(m.keyManager) } go func(d *dispatcher.Dispatcher) { if err := d.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("Dispatcher exited with an error") } }(m.Dispatcher) go func(server *ca.Server) { if err := server.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("CA signer exited with an error") } }(m.caserver) // Start all sub-components in separate goroutines. // TODO(aluzzardi): This should have some kind of error handling so that // any component that goes down would bring the entire manager down. if m.allocator != nil { go func(allocator *allocator.Allocator) { if err := allocator.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("allocator exited with an error") } }(m.allocator) } go func(scheduler *scheduler.Scheduler) { if err := scheduler.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("scheduler exited with an error") } }(m.scheduler) go func(taskReaper *orchestrator.TaskReaper) { taskReaper.Run() }(m.taskReaper) go func(orchestrator *orchestrator.ReplicatedOrchestrator) { if err := orchestrator.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("replicated orchestrator exited with an error") } }(m.replicatedOrchestrator) go func(globalOrchestrator *orchestrator.GlobalOrchestrator) { if err := globalOrchestrator.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("global orchestrator exited with an error") } }(m.globalOrchestrator) } else if newState == raft.IsFollower { m.Dispatcher.Stop() m.caserver.Stop() if m.allocator != nil { m.allocator.Stop() m.allocator = nil } m.replicatedOrchestrator.Stop() m.replicatedOrchestrator = nil m.globalOrchestrator.Stop() m.globalOrchestrator = nil m.taskReaper.Stop() m.taskReaper = nil m.scheduler.Stop() m.scheduler = nil if m.keyManager != nil { m.keyManager.Stop() m.keyManager = nil } } m.mu.Unlock() } }() proxyOpts := []grpc.DialOption{ grpc.WithTimeout(5 * time.Second), grpc.WithTransportCredentials(m.config.SecurityConfig.ClientTLSCreds), } cs := raftpicker.NewConnSelector(m.RaftNode, proxyOpts...) m.connSelector = cs // We need special connSelector for controlapi because it provides automatic // leader tracking. // Other APIs are using connSelector which errors out on leader change, but // allows to react quickly to reelections. controlAPIProxyOpts := []grpc.DialOption{ grpc.WithBackoffMaxDelay(time.Second), grpc.WithTransportCredentials(m.config.SecurityConfig.ClientTLSCreds), } controlAPIConnSelector := hackpicker.NewConnSelector(m.RaftNode, controlAPIProxyOpts...) authorize := func(ctx context.Context, roles []string) error { // Authorize the remote roles, ensure they can only be forwarded by managers _, err := ca.AuthorizeForwardedRoleAndOrg(ctx, roles, []string{ca.ManagerRole}, m.config.SecurityConfig.ClientTLSCreds.Organization()) return err } baseControlAPI := controlapi.NewServer(m.RaftNode.MemoryStore(), m.RaftNode, m.config.SecurityConfig.RootCA()) healthServer := health.NewHealthServer() authenticatedControlAPI := api.NewAuthenticatedWrapperControlServer(baseControlAPI, authorize) authenticatedDispatcherAPI := api.NewAuthenticatedWrapperDispatcherServer(m.Dispatcher, authorize) authenticatedCAAPI := api.NewAuthenticatedWrapperCAServer(m.caserver, authorize) authenticatedNodeCAAPI := api.NewAuthenticatedWrapperNodeCAServer(m.caserver, authorize) authenticatedRaftAPI := api.NewAuthenticatedWrapperRaftServer(m.RaftNode, authorize) authenticatedHealthAPI := api.NewAuthenticatedWrapperHealthServer(healthServer, authorize) authenticatedRaftMembershipAPI := api.NewAuthenticatedWrapperRaftMembershipServer(m.RaftNode, authorize) proxyDispatcherAPI := api.NewRaftProxyDispatcherServer(authenticatedDispatcherAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyCAAPI := api.NewRaftProxyCAServer(authenticatedCAAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyNodeCAAPI := api.NewRaftProxyNodeCAServer(authenticatedNodeCAAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyRaftMembershipAPI := api.NewRaftProxyRaftMembershipServer(authenticatedRaftMembershipAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) // localProxyControlAPI is a special kind of proxy. It is only wired up // to receive requests from a trusted local socket, and these requests // don't use TLS, therefore the requests it handles locally should // bypass authorization. When it proxies, it sends them as requests from // this manager rather than forwarded requests (it has no TLS // information to put in the metadata map). forwardAsOwnRequest := func(ctx context.Context) (context.Context, error) { return ctx, nil } localProxyControlAPI := api.NewRaftProxyControlServer(baseControlAPI, controlAPIConnSelector, m.RaftNode, forwardAsOwnRequest) // Everything registered on m.server should be an authenticated // wrapper, or a proxy wrapping an authenticated wrapper! api.RegisterCAServer(m.server, proxyCAAPI) api.RegisterNodeCAServer(m.server, proxyNodeCAAPI) api.RegisterRaftServer(m.server, authenticatedRaftAPI) api.RegisterHealthServer(m.server, authenticatedHealthAPI) api.RegisterRaftMembershipServer(m.server, proxyRaftMembershipAPI) api.RegisterControlServer(m.localserver, localProxyControlAPI) api.RegisterControlServer(m.server, authenticatedControlAPI) api.RegisterDispatcherServer(m.server, proxyDispatcherAPI) errServe := make(chan error, 2) for proto, l := range m.listeners { go func(proto string, lis net.Listener) { ctx := log.WithLogger(ctx, log.G(ctx).WithFields( logrus.Fields{ "proto": lis.Addr().Network(), "addr": lis.Addr().String()})) if proto == "unix" { log.G(ctx).Info("Listening for local connections") // we need to disallow double closes because UnixListener.Close // can delete unix-socket file of newer listener. grpc calls // Close twice indeed: in Serve and in Stop. errServe <- m.localserver.Serve(&closeOnceListener{Listener: lis}) } else { log.G(ctx).Info("Listening for connections") errServe <- m.server.Serve(lis) } }(proto, l) } // Set the raft server as serving for the health server healthServer.SetServingStatus("Raft", api.HealthCheckResponse_SERVING) if err := m.RaftNode.JoinAndStart(); err != nil { for _, lis := range m.listeners { lis.Close() } return fmt.Errorf("can't initialize raft node: %v", err) } close(m.started) go func() { err := m.RaftNode.Run(ctx) if err != nil { log.G(ctx).Error(err) m.Stop(ctx) } }() if err := raft.WaitForLeader(ctx, m.RaftNode); err != nil { m.server.Stop() return err } c, err := raft.WaitForCluster(ctx, m.RaftNode) if err != nil { m.server.Stop() return err } raftConfig := c.Spec.Raft if int(raftConfig.ElectionTick) != m.RaftNode.Config.ElectionTick { log.G(ctx).Warningf("election tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.RaftNode.Config.ElectionTick, raftConfig.ElectionTick) } if int(raftConfig.HeartbeatTick) != m.RaftNode.Config.HeartbeatTick { log.G(ctx).Warningf("heartbeat tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.RaftNode.Config.HeartbeatTick, raftConfig.HeartbeatTick) } // wait for an error in serving. err = <-errServe select { // check to see if stopped was posted to. if so, we're in the process of // stopping, or done and that's why we got the error. if stopping is // deliberate, stopped will ALWAYS be closed before the error is trigger, // so this path will ALWAYS be taken if the stop was deliberate case <-m.stopped: // shutdown was requested, do not return an error // but first, we wait to acquire a mutex to guarantee that stopping is // finished. as long as we acquire the mutex BEFORE we return, we know // that stopping is stopped. m.mu.Lock() m.mu.Unlock() return nil // otherwise, we'll get something from errServe, which indicates that an // error in serving has actually occurred and this isn't a planned shutdown default: return err } }
// Run starts all manager sub-systems and the gRPC server at the configured // address. // The call never returns unless an error occurs or `Stop()` is called. func (m *Manager) Run(parent context.Context) error { ctx, ctxCancel := context.WithCancel(parent) defer ctxCancel() // Harakiri. go func() { select { case <-ctx.Done(): case <-m.stopped: ctxCancel() } }() leadershipCh, cancel := m.RaftNode.SubscribeLeadership() defer cancel() go m.handleLeadershipEvents(ctx, leadershipCh) proxyOpts := []grpc.DialOption{ grpc.WithTimeout(5 * time.Second), grpc.WithTransportCredentials(m.config.SecurityConfig.ClientTLSCreds), } cs := raftpicker.NewConnSelector(m.RaftNode, proxyOpts...) m.connSelector = cs // We need special connSelector for controlapi because it provides automatic // leader tracking. // Other APIs are using connSelector which errors out on leader change, but // allows to react quickly to reelections. controlAPIProxyOpts := []grpc.DialOption{ grpc.WithBackoffMaxDelay(time.Second), grpc.WithTransportCredentials(m.config.SecurityConfig.ClientTLSCreds), } controlAPIConnSelector := hackpicker.NewConnSelector(m.RaftNode, controlAPIProxyOpts...) authorize := func(ctx context.Context, roles []string) error { // Authorize the remote roles, ensure they can only be forwarded by managers _, err := ca.AuthorizeForwardedRoleAndOrg(ctx, roles, []string{ca.ManagerRole}, m.config.SecurityConfig.ClientTLSCreds.Organization()) return err } baseControlAPI := controlapi.NewServer(m.RaftNode.MemoryStore(), m.RaftNode, m.config.SecurityConfig.RootCA()) baseResourceAPI := resourceapi.New(m.RaftNode.MemoryStore()) healthServer := health.NewHealthServer() localHealthServer := health.NewHealthServer() authenticatedControlAPI := api.NewAuthenticatedWrapperControlServer(baseControlAPI, authorize) authenticatedResourceAPI := api.NewAuthenticatedWrapperResourceAllocatorServer(baseResourceAPI, authorize) authenticatedDispatcherAPI := api.NewAuthenticatedWrapperDispatcherServer(m.Dispatcher, authorize) authenticatedCAAPI := api.NewAuthenticatedWrapperCAServer(m.caserver, authorize) authenticatedNodeCAAPI := api.NewAuthenticatedWrapperNodeCAServer(m.caserver, authorize) authenticatedRaftAPI := api.NewAuthenticatedWrapperRaftServer(m.RaftNode, authorize) authenticatedHealthAPI := api.NewAuthenticatedWrapperHealthServer(healthServer, authorize) authenticatedRaftMembershipAPI := api.NewAuthenticatedWrapperRaftMembershipServer(m.RaftNode, authorize) proxyDispatcherAPI := api.NewRaftProxyDispatcherServer(authenticatedDispatcherAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyCAAPI := api.NewRaftProxyCAServer(authenticatedCAAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyNodeCAAPI := api.NewRaftProxyNodeCAServer(authenticatedNodeCAAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyRaftMembershipAPI := api.NewRaftProxyRaftMembershipServer(authenticatedRaftMembershipAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyResourceAPI := api.NewRaftProxyResourceAllocatorServer(authenticatedResourceAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) // localProxyControlAPI is a special kind of proxy. It is only wired up // to receive requests from a trusted local socket, and these requests // don't use TLS, therefore the requests it handles locally should // bypass authorization. When it proxies, it sends them as requests from // this manager rather than forwarded requests (it has no TLS // information to put in the metadata map). forwardAsOwnRequest := func(ctx context.Context) (context.Context, error) { return ctx, nil } localProxyControlAPI := api.NewRaftProxyControlServer(baseControlAPI, controlAPIConnSelector, m.RaftNode, forwardAsOwnRequest) // Everything registered on m.server should be an authenticated // wrapper, or a proxy wrapping an authenticated wrapper! api.RegisterCAServer(m.server, proxyCAAPI) api.RegisterNodeCAServer(m.server, proxyNodeCAAPI) api.RegisterRaftServer(m.server, authenticatedRaftAPI) api.RegisterHealthServer(m.server, authenticatedHealthAPI) api.RegisterRaftMembershipServer(m.server, proxyRaftMembershipAPI) api.RegisterControlServer(m.server, authenticatedControlAPI) api.RegisterResourceAllocatorServer(m.server, proxyResourceAPI) api.RegisterDispatcherServer(m.server, proxyDispatcherAPI) api.RegisterControlServer(m.localserver, localProxyControlAPI) api.RegisterHealthServer(m.localserver, localHealthServer) errServe := make(chan error, 2) for proto, l := range m.listeners { go m.serveListener(ctx, errServe, proto, l) } // Set the raft server as serving for the health server healthServer.SetServingStatus("Raft", api.HealthCheckResponse_SERVING) localHealthServer.SetServingStatus("ControlAPI", api.HealthCheckResponse_SERVING) defer func() { m.server.Stop() m.localserver.Stop() }() if err := m.RaftNode.JoinAndStart(); err != nil { return fmt.Errorf("can't initialize raft node: %v", err) } close(m.started) go func() { err := m.RaftNode.Run(ctx) if err != nil { log.G(ctx).Error(err) m.Stop(ctx) } }() if err := raft.WaitForLeader(ctx, m.RaftNode); err != nil { return err } c, err := raft.WaitForCluster(ctx, m.RaftNode) if err != nil { return err } raftConfig := c.Spec.Raft if int(raftConfig.ElectionTick) != m.RaftNode.Config.ElectionTick { log.G(ctx).Warningf("election tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.RaftNode.Config.ElectionTick, raftConfig.ElectionTick) } if int(raftConfig.HeartbeatTick) != m.RaftNode.Config.HeartbeatTick { log.G(ctx).Warningf("heartbeat tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.RaftNode.Config.HeartbeatTick, raftConfig.HeartbeatTick) } // wait for an error in serving. err = <-errServe select { // check to see if stopped was posted to. if so, we're in the process of // stopping, or done and that's why we got the error. if stopping is // deliberate, stopped will ALWAYS be closed before the error is trigger, // so this path will ALWAYS be taken if the stop was deliberate case <-m.stopped: // shutdown was requested, do not return an error // but first, we wait to acquire a mutex to guarantee that stopping is // finished. as long as we acquire the mutex BEFORE we return, we know // that stopping is stopped. m.mu.Lock() m.mu.Unlock() return nil // otherwise, we'll get something from errServe, which indicates that an // error in serving has actually occurred and this isn't a planned shutdown default: return err } }
// Run starts all manager sub-systems and the gRPC server at the configured // address. // The call never returns unless an error occurs or `Stop()` is called. func (m *Manager) Run(parent context.Context) error { ctx, ctxCancel := context.WithCancel(parent) defer ctxCancel() // Harakiri. go func() { select { case <-ctx.Done(): case <-m.stopped: ctxCancel() } }() leadershipCh, cancel := m.raftNode.SubscribeLeadership() defer cancel() go m.handleLeadershipEvents(ctx, leadershipCh) authorize := func(ctx context.Context, roles []string) error { var ( blacklistedCerts map[string]*api.BlacklistedCertificate clusters []*api.Cluster err error ) m.raftNode.MemoryStore().View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName("default")) }) // Not having a cluster object yet means we can't check // the blacklist. if err == nil && len(clusters) == 1 { blacklistedCerts = clusters[0].BlacklistedCertificates } // Authorize the remote roles, ensure they can only be forwarded by managers _, err = ca.AuthorizeForwardedRoleAndOrg(ctx, roles, []string{ca.ManagerRole}, m.config.SecurityConfig.ClientTLSCreds.Organization(), blacklistedCerts) return err } baseControlAPI := controlapi.NewServer(m.raftNode.MemoryStore(), m.raftNode, m.config.SecurityConfig.RootCA()) baseResourceAPI := resourceapi.New(m.raftNode.MemoryStore()) healthServer := health.NewHealthServer() localHealthServer := health.NewHealthServer() authenticatedControlAPI := api.NewAuthenticatedWrapperControlServer(baseControlAPI, authorize) authenticatedResourceAPI := api.NewAuthenticatedWrapperResourceAllocatorServer(baseResourceAPI, authorize) authenticatedDispatcherAPI := api.NewAuthenticatedWrapperDispatcherServer(m.dispatcher, authorize) authenticatedCAAPI := api.NewAuthenticatedWrapperCAServer(m.caserver, authorize) authenticatedNodeCAAPI := api.NewAuthenticatedWrapperNodeCAServer(m.caserver, authorize) authenticatedRaftAPI := api.NewAuthenticatedWrapperRaftServer(m.raftNode, authorize) authenticatedHealthAPI := api.NewAuthenticatedWrapperHealthServer(healthServer, authorize) authenticatedRaftMembershipAPI := api.NewAuthenticatedWrapperRaftMembershipServer(m.raftNode, authorize) proxyDispatcherAPI := api.NewRaftProxyDispatcherServer(authenticatedDispatcherAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) proxyCAAPI := api.NewRaftProxyCAServer(authenticatedCAAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) proxyNodeCAAPI := api.NewRaftProxyNodeCAServer(authenticatedNodeCAAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) proxyRaftMembershipAPI := api.NewRaftProxyRaftMembershipServer(authenticatedRaftMembershipAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) proxyResourceAPI := api.NewRaftProxyResourceAllocatorServer(authenticatedResourceAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) // localProxyControlAPI is a special kind of proxy. It is only wired up // to receive requests from a trusted local socket, and these requests // don't use TLS, therefore the requests it handles locally should // bypass authorization. When it proxies, it sends them as requests from // this manager rather than forwarded requests (it has no TLS // information to put in the metadata map). forwardAsOwnRequest := func(ctx context.Context) (context.Context, error) { return ctx, nil } localProxyControlAPI := api.NewRaftProxyControlServer(baseControlAPI, m.raftNode, forwardAsOwnRequest) // Everything registered on m.server should be an authenticated // wrapper, or a proxy wrapping an authenticated wrapper! api.RegisterCAServer(m.server, proxyCAAPI) api.RegisterNodeCAServer(m.server, proxyNodeCAAPI) api.RegisterRaftServer(m.server, authenticatedRaftAPI) api.RegisterHealthServer(m.server, authenticatedHealthAPI) api.RegisterRaftMembershipServer(m.server, proxyRaftMembershipAPI) api.RegisterControlServer(m.server, authenticatedControlAPI) api.RegisterResourceAllocatorServer(m.server, proxyResourceAPI) api.RegisterDispatcherServer(m.server, proxyDispatcherAPI) api.RegisterControlServer(m.localserver, localProxyControlAPI) api.RegisterHealthServer(m.localserver, localHealthServer) healthServer.SetServingStatus("Raft", api.HealthCheckResponse_NOT_SERVING) localHealthServer.SetServingStatus("ControlAPI", api.HealthCheckResponse_NOT_SERVING) errServe := make(chan error, len(m.listeners)) for proto, l := range m.listeners { go m.serveListener(ctx, errServe, proto, l) } defer func() { m.server.Stop() m.localserver.Stop() }() // Set the raft server as serving for the health server healthServer.SetServingStatus("Raft", api.HealthCheckResponse_SERVING) if err := m.raftNode.JoinAndStart(ctx); err != nil { return errors.Wrap(err, "can't initialize raft node") } localHealthServer.SetServingStatus("ControlAPI", api.HealthCheckResponse_SERVING) close(m.started) go func() { err := m.raftNode.Run(ctx) if err != nil { log.G(ctx).Error(err) m.Stop(ctx) } }() if err := raft.WaitForLeader(ctx, m.raftNode); err != nil { return err } c, err := raft.WaitForCluster(ctx, m.raftNode) if err != nil { return err } raftConfig := c.Spec.Raft if int(raftConfig.ElectionTick) != m.raftNode.Config.ElectionTick { log.G(ctx).Warningf("election tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.raftNode.Config.ElectionTick, raftConfig.ElectionTick) } if int(raftConfig.HeartbeatTick) != m.raftNode.Config.HeartbeatTick { log.G(ctx).Warningf("heartbeat tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.raftNode.Config.HeartbeatTick, raftConfig.HeartbeatTick) } // wait for an error in serving. err = <-errServe select { // check to see if stopped was posted to. if so, we're in the process of // stopping, or done and that's why we got the error. if stopping is // deliberate, stopped will ALWAYS be closed before the error is trigger, // so this path will ALWAYS be taken if the stop was deliberate case <-m.stopped: // shutdown was requested, do not return an error // but first, we wait to acquire a mutex to guarantee that stopping is // finished. as long as we acquire the mutex BEFORE we return, we know // that stopping is stopped. m.mu.Lock() m.mu.Unlock() return nil // otherwise, we'll get something from errServe, which indicates that an // error in serving has actually occurred and this isn't a planned shutdown default: return err } }
// Run starts all manager sub-systems and the gRPC server at the configured // address. // The call never returns unless an error occurs or `Stop()` is called. func (m *Manager) Run(parent context.Context) error { ctx, ctxCancel := context.WithCancel(parent) defer ctxCancel() m.cancelFunc = ctxCancel leadershipCh, cancel := m.raftNode.SubscribeLeadership() defer cancel() go m.handleLeadershipEvents(ctx, leadershipCh) authorize := func(ctx context.Context, roles []string) error { var ( blacklistedCerts map[string]*api.BlacklistedCertificate clusters []*api.Cluster err error ) m.raftNode.MemoryStore().View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName("default")) }) // Not having a cluster object yet means we can't check // the blacklist. if err == nil && len(clusters) == 1 { blacklistedCerts = clusters[0].BlacklistedCertificates } // Authorize the remote roles, ensure they can only be forwarded by managers _, err = ca.AuthorizeForwardedRoleAndOrg(ctx, roles, []string{ca.ManagerRole}, m.config.SecurityConfig.ClientTLSCreds.Organization(), blacklistedCerts) return err } baseControlAPI := controlapi.NewServer(m.raftNode.MemoryStore(), m.raftNode, m.config.SecurityConfig.RootCA()) baseResourceAPI := resourceapi.New(m.raftNode.MemoryStore()) healthServer := health.NewHealthServer() localHealthServer := health.NewHealthServer() authenticatedControlAPI := api.NewAuthenticatedWrapperControlServer(baseControlAPI, authorize) authenticatedResourceAPI := api.NewAuthenticatedWrapperResourceAllocatorServer(baseResourceAPI, authorize) authenticatedLogsServerAPI := api.NewAuthenticatedWrapperLogsServer(m.logbroker, authorize) authenticatedLogBrokerAPI := api.NewAuthenticatedWrapperLogBrokerServer(m.logbroker, authorize) authenticatedDispatcherAPI := api.NewAuthenticatedWrapperDispatcherServer(m.dispatcher, authorize) authenticatedCAAPI := api.NewAuthenticatedWrapperCAServer(m.caserver, authorize) authenticatedNodeCAAPI := api.NewAuthenticatedWrapperNodeCAServer(m.caserver, authorize) authenticatedRaftAPI := api.NewAuthenticatedWrapperRaftServer(m.raftNode, authorize) authenticatedHealthAPI := api.NewAuthenticatedWrapperHealthServer(healthServer, authorize) authenticatedRaftMembershipAPI := api.NewAuthenticatedWrapperRaftMembershipServer(m.raftNode, authorize) proxyDispatcherAPI := api.NewRaftProxyDispatcherServer(authenticatedDispatcherAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) proxyCAAPI := api.NewRaftProxyCAServer(authenticatedCAAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) proxyNodeCAAPI := api.NewRaftProxyNodeCAServer(authenticatedNodeCAAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) proxyRaftMembershipAPI := api.NewRaftProxyRaftMembershipServer(authenticatedRaftMembershipAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) proxyResourceAPI := api.NewRaftProxyResourceAllocatorServer(authenticatedResourceAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) proxyLogBrokerAPI := api.NewRaftProxyLogBrokerServer(authenticatedLogBrokerAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) // localProxyControlAPI is a special kind of proxy. It is only wired up // to receive requests from a trusted local socket, and these requests // don't use TLS, therefore the requests it handles locally should // bypass authorization. When it proxies, it sends them as requests from // this manager rather than forwarded requests (it has no TLS // information to put in the metadata map). forwardAsOwnRequest := func(ctx context.Context) (context.Context, error) { return ctx, nil } localProxyControlAPI := api.NewRaftProxyControlServer(baseControlAPI, m.raftNode, forwardAsOwnRequest) localProxyLogsAPI := api.NewRaftProxyLogsServer(m.logbroker, m.raftNode, forwardAsOwnRequest) // Everything registered on m.server should be an authenticated // wrapper, or a proxy wrapping an authenticated wrapper! api.RegisterCAServer(m.server, proxyCAAPI) api.RegisterNodeCAServer(m.server, proxyNodeCAAPI) api.RegisterRaftServer(m.server, authenticatedRaftAPI) api.RegisterHealthServer(m.server, authenticatedHealthAPI) api.RegisterRaftMembershipServer(m.server, proxyRaftMembershipAPI) api.RegisterControlServer(m.server, authenticatedControlAPI) api.RegisterLogsServer(m.server, authenticatedLogsServerAPI) api.RegisterLogBrokerServer(m.server, proxyLogBrokerAPI) api.RegisterResourceAllocatorServer(m.server, proxyResourceAPI) api.RegisterDispatcherServer(m.server, proxyDispatcherAPI) api.RegisterControlServer(m.localserver, localProxyControlAPI) api.RegisterLogsServer(m.localserver, localProxyLogsAPI) api.RegisterHealthServer(m.localserver, localHealthServer) healthServer.SetServingStatus("Raft", api.HealthCheckResponse_NOT_SERVING) localHealthServer.SetServingStatus("ControlAPI", api.HealthCheckResponse_NOT_SERVING) errServe := make(chan error, len(m.listeners)) for _, lis := range m.listeners { go m.serveListener(ctx, errServe, lis) } defer func() { m.server.Stop() m.localserver.Stop() }() // Set the raft server as serving for the health server healthServer.SetServingStatus("Raft", api.HealthCheckResponse_SERVING) if err := m.raftNode.JoinAndStart(ctx); err != nil { return errors.Wrap(err, "can't initialize raft node") } localHealthServer.SetServingStatus("ControlAPI", api.HealthCheckResponse_SERVING) close(m.started) go func() { err := m.raftNode.Run(ctx) if err != nil { log.G(ctx).Error(err) m.Stop(ctx) } }() if err := raft.WaitForLeader(ctx, m.raftNode); err != nil { return err } c, err := raft.WaitForCluster(ctx, m.raftNode) if err != nil { return err } raftConfig := c.Spec.Raft if int(raftConfig.ElectionTick) != m.raftNode.Config.ElectionTick { log.G(ctx).Warningf("election tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.raftNode.Config.ElectionTick, raftConfig.ElectionTick) } if int(raftConfig.HeartbeatTick) != m.raftNode.Config.HeartbeatTick { log.G(ctx).Warningf("heartbeat tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.raftNode.Config.HeartbeatTick, raftConfig.HeartbeatTick) } // wait for an error in serving. err = <-errServe m.mu.Lock() if m.stopped { m.mu.Unlock() return nil } m.mu.Unlock() m.Stop(ctx) return err }
// Run starts all manager sub-systems and the gRPC server at the configured // address. // The call never returns unless an error occurs or `Stop()` is called. func (m *Manager) Run(parent context.Context) error { ctx, ctxCancel := context.WithCancel(parent) defer ctxCancel() m.cancelFunc = ctxCancel leadershipCh, cancel := m.raftNode.SubscribeLeadership() defer cancel() go m.handleLeadershipEvents(ctx, leadershipCh) authorize := func(ctx context.Context, roles []string) error { var ( blacklistedCerts map[string]*api.BlacklistedCertificate clusters []*api.Cluster err error ) m.raftNode.MemoryStore().View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName("default")) }) // Not having a cluster object yet means we can't check // the blacklist. if err == nil && len(clusters) == 1 { blacklistedCerts = clusters[0].BlacklistedCertificates } // Authorize the remote roles, ensure they can only be forwarded by managers _, err = ca.AuthorizeForwardedRoleAndOrg(ctx, roles, []string{ca.ManagerRole}, m.config.SecurityConfig.ClientTLSCreds.Organization(), blacklistedCerts) return err } baseControlAPI := controlapi.NewServer(m.raftNode.MemoryStore(), m.raftNode, m.config.SecurityConfig.RootCA(), m.config.PluginGetter) baseResourceAPI := resourceapi.New(m.raftNode.MemoryStore()) healthServer := health.NewHealthServer() localHealthServer := health.NewHealthServer() authenticatedControlAPI := api.NewAuthenticatedWrapperControlServer(baseControlAPI, authorize) authenticatedResourceAPI := api.NewAuthenticatedWrapperResourceAllocatorServer(baseResourceAPI, authorize) authenticatedLogsServerAPI := api.NewAuthenticatedWrapperLogsServer(m.logbroker, authorize) authenticatedLogBrokerAPI := api.NewAuthenticatedWrapperLogBrokerServer(m.logbroker, authorize) authenticatedDispatcherAPI := api.NewAuthenticatedWrapperDispatcherServer(m.dispatcher, authorize) authenticatedCAAPI := api.NewAuthenticatedWrapperCAServer(m.caserver, authorize) authenticatedNodeCAAPI := api.NewAuthenticatedWrapperNodeCAServer(m.caserver, authorize) authenticatedRaftAPI := api.NewAuthenticatedWrapperRaftServer(m.raftNode, authorize) authenticatedHealthAPI := api.NewAuthenticatedWrapperHealthServer(healthServer, authorize) authenticatedRaftMembershipAPI := api.NewAuthenticatedWrapperRaftMembershipServer(m.raftNode, authorize) proxyDispatcherAPI := api.NewRaftProxyDispatcherServer(authenticatedDispatcherAPI, m.raftNode, nil, ca.WithMetadataForwardTLSInfo) proxyCAAPI := api.NewRaftProxyCAServer(authenticatedCAAPI, m.raftNode, nil, ca.WithMetadataForwardTLSInfo) proxyNodeCAAPI := api.NewRaftProxyNodeCAServer(authenticatedNodeCAAPI, m.raftNode, nil, ca.WithMetadataForwardTLSInfo) proxyRaftMembershipAPI := api.NewRaftProxyRaftMembershipServer(authenticatedRaftMembershipAPI, m.raftNode, nil, ca.WithMetadataForwardTLSInfo) proxyResourceAPI := api.NewRaftProxyResourceAllocatorServer(authenticatedResourceAPI, m.raftNode, nil, ca.WithMetadataForwardTLSInfo) proxyLogBrokerAPI := api.NewRaftProxyLogBrokerServer(authenticatedLogBrokerAPI, m.raftNode, nil, ca.WithMetadataForwardTLSInfo) // The following local proxies are only wired up to receive requests // from a trusted local socket, and these requests don't use TLS, // therefore the requests they handle locally should bypass // authorization. When requests are proxied from these servers, they // are sent as requests from this manager rather than forwarded // requests (it has no TLS information to put in the metadata map). forwardAsOwnRequest := func(ctx context.Context) (context.Context, error) { return ctx, nil } handleRequestLocally := func(ctx context.Context) (context.Context, error) { remoteAddr := "127.0.0.1:0" m.addrMu.Lock() if m.config.RemoteAPI != nil { if m.config.RemoteAPI.AdvertiseAddr != "" { remoteAddr = m.config.RemoteAPI.AdvertiseAddr } else { remoteAddr = m.config.RemoteAPI.ListenAddr } } m.addrMu.Unlock() creds := m.config.SecurityConfig.ClientTLSCreds nodeInfo := ca.RemoteNodeInfo{ Roles: []string{creds.Role()}, Organization: creds.Organization(), NodeID: creds.NodeID(), RemoteAddr: remoteAddr, } return context.WithValue(ctx, ca.LocalRequestKey, nodeInfo), nil } localProxyControlAPI := api.NewRaftProxyControlServer(baseControlAPI, m.raftNode, handleRequestLocally, forwardAsOwnRequest) localProxyLogsAPI := api.NewRaftProxyLogsServer(m.logbroker, m.raftNode, handleRequestLocally, forwardAsOwnRequest) localProxyDispatcherAPI := api.NewRaftProxyDispatcherServer(m.dispatcher, m.raftNode, handleRequestLocally, forwardAsOwnRequest) localProxyCAAPI := api.NewRaftProxyCAServer(m.caserver, m.raftNode, handleRequestLocally, forwardAsOwnRequest) localProxyNodeCAAPI := api.NewRaftProxyNodeCAServer(m.caserver, m.raftNode, handleRequestLocally, forwardAsOwnRequest) localProxyResourceAPI := api.NewRaftProxyResourceAllocatorServer(baseResourceAPI, m.raftNode, handleRequestLocally, forwardAsOwnRequest) localProxyLogBrokerAPI := api.NewRaftProxyLogBrokerServer(m.logbroker, m.raftNode, handleRequestLocally, forwardAsOwnRequest) // Everything registered on m.server should be an authenticated // wrapper, or a proxy wrapping an authenticated wrapper! api.RegisterCAServer(m.server, proxyCAAPI) api.RegisterNodeCAServer(m.server, proxyNodeCAAPI) api.RegisterRaftServer(m.server, authenticatedRaftAPI) api.RegisterHealthServer(m.server, authenticatedHealthAPI) api.RegisterRaftMembershipServer(m.server, proxyRaftMembershipAPI) api.RegisterControlServer(m.server, authenticatedControlAPI) api.RegisterLogsServer(m.server, authenticatedLogsServerAPI) api.RegisterLogBrokerServer(m.server, proxyLogBrokerAPI) api.RegisterResourceAllocatorServer(m.server, proxyResourceAPI) api.RegisterDispatcherServer(m.server, proxyDispatcherAPI) api.RegisterControlServer(m.localserver, localProxyControlAPI) api.RegisterLogsServer(m.localserver, localProxyLogsAPI) api.RegisterHealthServer(m.localserver, localHealthServer) api.RegisterDispatcherServer(m.localserver, localProxyDispatcherAPI) api.RegisterCAServer(m.localserver, localProxyCAAPI) api.RegisterNodeCAServer(m.localserver, localProxyNodeCAAPI) api.RegisterResourceAllocatorServer(m.localserver, localProxyResourceAPI) api.RegisterLogBrokerServer(m.localserver, localProxyLogBrokerAPI) healthServer.SetServingStatus("Raft", api.HealthCheckResponse_NOT_SERVING) localHealthServer.SetServingStatus("ControlAPI", api.HealthCheckResponse_NOT_SERVING) go m.serveListener(ctx, m.remoteListener) go m.serveListener(ctx, m.controlListener) defer func() { m.server.Stop() m.localserver.Stop() }() // Set the raft server as serving for the health server healthServer.SetServingStatus("Raft", api.HealthCheckResponse_SERVING) if err := m.raftNode.JoinAndStart(ctx); err != nil { return errors.Wrap(err, "can't initialize raft node") } localHealthServer.SetServingStatus("ControlAPI", api.HealthCheckResponse_SERVING) close(m.started) go func() { err := m.raftNode.Run(ctx) if err != nil { log.G(ctx).WithError(err).Error("raft node stopped") m.Stop(ctx, false) } }() if err := raft.WaitForLeader(ctx, m.raftNode); err != nil { return err } c, err := raft.WaitForCluster(ctx, m.raftNode) if err != nil { return err } raftConfig := c.Spec.Raft if err := m.watchForKEKChanges(ctx); err != nil { return err } if int(raftConfig.ElectionTick) != m.raftNode.Config.ElectionTick { log.G(ctx).Warningf("election tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.raftNode.Config.ElectionTick, raftConfig.ElectionTick) } if int(raftConfig.HeartbeatTick) != m.raftNode.Config.HeartbeatTick { log.G(ctx).Warningf("heartbeat tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.raftNode.Config.HeartbeatTick, raftConfig.HeartbeatTick) } // wait for an error in serving. err = <-m.errServe m.mu.Lock() if m.stopped { m.mu.Unlock() return nil } m.mu.Unlock() m.Stop(ctx, false) return err }