// GetRemoteCA returns the remote endpoint's CA certificate func GetRemoteCA(ctx context.Context, d digest.Digest, picker *picker.Picker) (RootCA, error) { // We need a valid picker to be able to Dial to a remote CA if picker == nil { return RootCA{}, fmt.Errorf("valid remote address picker required") } // This TLS Config is intentionally using InsecureSkipVerify. Either we're // doing TOFU, in which case we don't validate the remote CA, or we're using // a user supplied hash to check the integrity of the CA certificate. insecureCreds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true}) opts := []grpc.DialOption{ grpc.WithTransportCredentials(insecureCreds), grpc.WithBackoffMaxDelay(10 * time.Second), grpc.WithPicker(picker)} firstAddr, err := picker.PickAddr() if err != nil { return RootCA{}, err } conn, err := grpc.Dial(firstAddr, opts...) if err != nil { return RootCA{}, err } defer conn.Close() client := api.NewCAClient(conn) response, err := client.GetRootCACertificate(ctx, &api.GetRootCACertificateRequest{}) if err != nil { return RootCA{}, err } if d != "" { verifier, err := digest.NewDigestVerifier(d) if err != nil { return RootCA{}, fmt.Errorf("unexpected error getting digest verifier: %v", err) } io.Copy(verifier, bytes.NewReader(response.Certificate)) if !verifier.Verified() { return RootCA{}, fmt.Errorf("remote CA does not match fingerprint. Expected: %s", d.Hex()) } } // Check the validity of the remote Cert _, err = helpers.ParseCertificatePEM(response.Certificate) if err != nil { return RootCA{}, err } // Create a Pool with our RootCACertificate pool := x509.NewCertPool() if !pool.AppendCertsFromPEM(response.Certificate) { return RootCA{}, fmt.Errorf("failed to append certificate to cert pool") } return RootCA{Cert: response.Certificate, Pool: pool}, nil }
func main() { flag.Parse() if err := loadConfig(); err != nil { log.Fatal("loadConfig:", err) } // Init RemotePtt ptt = pttbbs.NewRemotePtt(config.BoarddAddress, config.BoarddMaxConn) // Init mand connection if conn, err := grpc.Dial(config.MandAddress, grpc.WithInsecure(), grpc.WithBackoffMaxDelay(time.Second*5)); err != nil { log.Fatal("cannot connect to mand:", config.MandAddress, err) } else { mand = bbspb.NewManServiceClient(conn) } // Init cache manager cacheMgr = cache.NewCacheManager(config.MemcachedAddress, config.MemcachedMaxConn) // Load templates if err := page.LoadTemplates(config.TemplateDirectory, templateFuncMap()); err != nil { log.Fatal("cannot load templates:", err) } // Init router router = createRouter() http.Handle("/", router) if len(config.Bind) == 0 { log.Fatal("No bind addresses specified in config") } for _, addr := range config.Bind { part := strings.SplitN(addr, ":", 2) if len(part) != 2 { log.Fatal("Invalid bind address: ", addr) } if listener, err := net.Listen(part[0], part[1]); err != nil { log.Fatal("Listen failed for address: ", addr, " error: ", err) } else { if part[0] == "unix" { os.Chmod(part[1], 0777) // Ignores errors, we can't do anything to those. } svr := &http.Server{ MaxHeaderBytes: 64 * 1024, } go svr.Serve(listener) } } progExit := make(chan os.Signal) signal.Notify(progExit, os.Interrupt) <-progExit }
func getGRPCConnection(creds credentials.TransportCredentials, connBroker *connectionbroker.Broker, forceRemote bool) (*connectionbroker.Conn, error) { dialOpts := []grpc.DialOption{ grpc.WithTransportCredentials(creds), grpc.WithTimeout(5 * time.Second), grpc.WithBackoffMaxDelay(5 * time.Second), } if forceRemote { return connBroker.SelectRemote(dialOpts...) } return connBroker.Select(dialOpts...) }
// GRPCDial calls grpc.Dial with the options appropriate for the context. func (ctx *Context) GRPCDial(target string, opts ...grpc.DialOption) (*grpc.ClientConn, error) { ctx.conns.Lock() meta, ok := ctx.conns.cache[target] if !ok { meta = &connMeta{} ctx.conns.cache[target] = meta } ctx.conns.Unlock() meta.Do(func() { var dialOpt grpc.DialOption if ctx.Insecure { dialOpt = grpc.WithInsecure() } else { tlsConfig, err := ctx.GetClientTLSConfig() if err != nil { meta.err = err return } dialOpt = grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig)) } dialOpts := make([]grpc.DialOption, 0, 2+len(opts)) dialOpts = append(dialOpts, dialOpt) dialOpts = append(dialOpts, grpc.WithBackoffMaxDelay(maxBackoff)) dialOpts = append(dialOpts, opts...) if log.V(1) { log.Infof(ctx.masterCtx, "dialing %s", target) } meta.conn, meta.err = grpc.DialContext(ctx.masterCtx, target, dialOpts...) if meta.err == nil { if err := ctx.Stopper.RunTask(func() { ctx.Stopper.RunWorker(func() { err := ctx.runHeartbeat(meta.conn, target) if err != nil && !grpcutil.IsClosedConnection(err) { log.Error(ctx.masterCtx, err) } ctx.removeConn(target, meta) }) }); err != nil { meta.err = err // removeConn and ctx's cleanup worker both lock ctx.conns. However, // to avoid racing with meta's initialization, the cleanup worker // blocks on meta.Do while holding ctx.conns. Invoke removeConn // asynchronously to avoid deadlock. go ctx.removeConn(target, meta) } } }) return meta.conn, meta.err }
// dial returns a grpc client connection func dial(addr string, protocol string, creds credentials.TransportAuthenticator, timeout time.Duration) (*grpc.ClientConn, error) { grpcOptions := []grpc.DialOption{ grpc.WithBackoffMaxDelay(2 * time.Second), grpc.WithTransportCredentials(creds), } if timeout != 0 { grpcOptions = append(grpcOptions, grpc.WithTimeout(timeout)) } return grpc.Dial(addr, grpcOptions...) }
func (n *Node) runAgent(ctx context.Context, db *bolt.DB, creds credentials.TransportAuthenticator, ready chan<- struct{}) error { var manager api.Peer select { case <-ctx.Done(): case manager = <-n.remotes.WaitSelect(ctx): } if ctx.Err() != nil { return ctx.Err() } picker := picker.NewPicker(n.remotes, manager.Addr) conn, err := grpc.Dial(manager.Addr, grpc.WithPicker(picker), grpc.WithTransportCredentials(creds), grpc.WithBackoffMaxDelay(maxSessionFailureBackoff)) if err != nil { return err } agent, err := New(&Config{ Hostname: n.config.Hostname, Managers: n.remotes, Executor: n.config.Executor, DB: db, Conn: conn, Picker: picker, NotifyRoleChange: n.roleChangeReq, }) if err != nil { return err } if err := agent.Start(ctx); err != nil { return err } n.Lock() n.agent = agent n.Unlock() defer func() { n.Lock() n.agent = nil n.Unlock() }() go func() { <-agent.Ready() close(ready) }() // todo: manually call stop on context cancellation? return agent.Err(context.Background()) }
func dial(n *raftutils.TestNode, addr string) (*grpc.ClientConn, error) { grpcOptions := []grpc.DialOption{ grpc.WithBackoffMaxDelay(2 * time.Second), grpc.WithBlock(), } grpcOptions = append(grpcOptions, grpc.WithTransportCredentials(n.SecurityConfig.ClientTLSCreds)) grpcOptions = append(grpcOptions, grpc.WithTimeout(10*time.Second)) cc, err := grpc.Dial(addr, grpcOptions...) if err != nil { return nil, err } return cc, nil }
func getGRPCConnection(creds credentials.TransportCredentials, r remotes.Remotes) (*grpc.ClientConn, api.Peer, error) { peer, err := r.Select() if err != nil { return nil, api.Peer{}, err } opts := []grpc.DialOption{ grpc.WithTransportCredentials(creds), grpc.WithTimeout(5 * time.Second), grpc.WithBackoffMaxDelay(5 * time.Second), } conn, err := grpc.Dial(peer.Addr, opts...) if err != nil { return nil, api.Peer{}, err } return conn, peer, nil }
func (lc *leaseChecker) Check() error { conn, err := grpc.Dial(lc.ls.endpoint, grpc.WithInsecure(), grpc.WithBackoffMaxDelay(1)) if err != nil { return fmt.Errorf("%v (%s)", err, lc.ls.endpoint) } defer func() { if conn != nil { conn.Close() } }() lc.kvc = pb.NewKVClient(conn) lc.leaseClient = pb.NewLeaseClient(conn) if err := lc.check(true, lc.ls.revokedLeases.leases); err != nil { return err } if err := lc.check(false, lc.ls.aliveLeases.leases); err != nil { return err } return lc.checkShortLivedLeases() }
func (ls *leaseStresser) Stress() error { plog.Infof("lease Stresser %v starting ...", ls.endpoint) if err := ls.setupOnce(); err != nil { return err } conn, err := grpc.Dial(ls.endpoint, grpc.WithInsecure(), grpc.WithBackoffMaxDelay(1*time.Second)) if err != nil { return fmt.Errorf("%v (%s)", err, ls.endpoint) } ls.conn = conn ls.kvc = pb.NewKVClient(conn) ls.lc = pb.NewLeaseClient(conn) ls.revokedLeases = &atomicLeases{leases: make(map[int64]time.Time)} ls.shortLivedLeases = &atomicLeases{leases: make(map[int64]time.Time)} ctx, cancel := context.WithCancel(context.Background()) ls.cancel = cancel ls.ctx = ctx ls.runWg.Add(1) go ls.run() return nil }
// GetRemoteSignedCertificate submits a CSR together with the intended role to a remote CA server address // available through a picker, and that is part of a CA identified by a specific certificate pool. func GetRemoteSignedCertificate(ctx context.Context, csr []byte, role, secret string, rootCAPool *x509.CertPool, picker *picker.Picker, creds credentials.TransportAuthenticator, nodeInfo chan<- api.IssueNodeCertificateResponse) ([]byte, error) { if rootCAPool == nil { return nil, fmt.Errorf("valid root CA pool required") } if picker == nil { return nil, fmt.Errorf("valid remote address picker required") } if creds == nil { // This is our only non-MTLS request, and it happens when we are boostraping our TLS certs // We're using CARole as server name, so an external CA doesn't also have to have ManagerRole in the cert SANs creds = credentials.NewTLS(&tls.Config{ServerName: CARole, RootCAs: rootCAPool}) } opts := []grpc.DialOption{ grpc.WithTransportCredentials(creds), grpc.WithBackoffMaxDelay(10 * time.Second), grpc.WithPicker(picker)} firstAddr, err := picker.PickAddr() if err != nil { return nil, err } conn, err := grpc.Dial(firstAddr, opts...) if err != nil { return nil, err } defer conn.Close() // Create a CAClient to retrieve a new Certificate caClient := api.NewNodeCAClient(conn) // Convert our internal string roles into an API role apiRole, err := FormatRole(role) if err != nil { return nil, err } // Send the Request and retrieve the request token issueRequest := &api.IssueNodeCertificateRequest{CSR: csr, Role: apiRole, Secret: secret} issueResponse, err := caClient.IssueNodeCertificate(ctx, issueRequest) if err != nil { return nil, err } // Send back the NodeID on the nodeInfo, so the caller can know what ID was assigned by the CA if nodeInfo != nil { nodeInfo <- *issueResponse } statusRequest := &api.NodeCertificateStatusRequest{NodeID: issueResponse.NodeID} expBackoff := events.NewExponentialBackoff(events.ExponentialBackoffConfig{ Base: time.Second, Factor: time.Second, Max: 30 * time.Second, }) log.Infof("Waiting for TLS certificate to be issued...") // Exponential backoff with Max of 30 seconds to wait for a new retry for { // Send the Request and retrieve the certificate statusResponse, err := caClient.NodeCertificateStatus(ctx, statusRequest) if err != nil { return nil, err } // If the certificate was issued, return if statusResponse.Status.State == api.IssuanceStateIssued { if statusResponse.Certificate == nil { return nil, fmt.Errorf("no certificate in CertificateStatus response") } // The certificate in the response must match the CSR // we submitted. If we are getting a response for a // certificate that was previously issued, we need to // retry until the certificate gets updated per our // current request. if bytes.Equal(statusResponse.Certificate.CSR, csr) { return statusResponse.Certificate.Certificate, nil } } // If we're still pending, the issuance failed, or the state is unknown // let's continue trying. expBackoff.Failure(nil, nil) time.Sleep(expBackoff.Proceed(nil)) } }
// Run starts all manager sub-systems and the gRPC server at the configured // address. // The call never returns unless an error occurs or `Stop()` is called. // // TODO(aluzzardi): /!\ This function is *way* too complex. /!\ // It needs to be split into smaller manageable functions. func (m *Manager) Run(parent context.Context) error { ctx, ctxCancel := context.WithCancel(parent) defer ctxCancel() // Harakiri. go func() { select { case <-ctx.Done(): case <-m.stopped: ctxCancel() } }() leadershipCh, cancel := m.RaftNode.SubscribeLeadership() defer cancel() go func() { for leadershipEvent := range leadershipCh { // read out and discard all of the messages when we've stopped // don't acquire the mutex yet. if stopped is closed, we don't need // this stops this loop from starving Run()'s attempt to Lock select { case <-m.stopped: continue default: // do nothing, we're not stopped } // we're not stopping so NOW acquire the mutex m.mu.Lock() newState := leadershipEvent.(raft.LeadershipState) if newState == raft.IsLeader { s := m.RaftNode.MemoryStore() rootCA := m.config.SecurityConfig.RootCA() nodeID := m.config.SecurityConfig.ClientTLSCreds.NodeID() raftCfg := raft.DefaultRaftConfig() raftCfg.ElectionTick = uint32(m.RaftNode.Config.ElectionTick) raftCfg.HeartbeatTick = uint32(m.RaftNode.Config.HeartbeatTick) clusterID := m.config.SecurityConfig.ClientTLSCreds.Organization() initialCAConfig := ca.DefaultCAConfig() initialCAConfig.ExternalCAs = m.config.ExternalCAs s.Update(func(tx store.Tx) error { // Add a default cluster object to the // store. Don't check the error because // we expect this to fail unless this // is a brand new cluster. store.CreateCluster(tx, &api.Cluster{ ID: clusterID, Spec: api.ClusterSpec{ Annotations: api.Annotations{ Name: store.DefaultClusterName, }, Orchestration: api.OrchestrationConfig{ TaskHistoryRetentionLimit: defaultTaskHistoryRetentionLimit, }, Dispatcher: api.DispatcherConfig{ HeartbeatPeriod: ptypes.DurationProto(dispatcher.DefaultHeartBeatPeriod), }, Raft: raftCfg, CAConfig: initialCAConfig, }, RootCA: api.RootCA{ CAKey: rootCA.Key, CACert: rootCA.Cert, CACertHash: rootCA.Digest.String(), JoinTokens: api.JoinTokens{ Worker: ca.GenerateJoinToken(rootCA), Manager: ca.GenerateJoinToken(rootCA), }, }, }) // Add Node entry for ourself, if one // doesn't exist already. store.CreateNode(tx, &api.Node{ ID: nodeID, Certificate: api.Certificate{ CN: nodeID, Role: api.NodeRoleManager, Status: api.IssuanceStatus{ State: api.IssuanceStateIssued, }, }, Spec: api.NodeSpec{ Role: api.NodeRoleManager, Membership: api.NodeMembershipAccepted, }, }) return nil }) // Attempt to rotate the key-encrypting-key of the root CA key-material err := m.rotateRootCAKEK(ctx, clusterID) if err != nil { log.G(ctx).WithError(err).Error("root key-encrypting-key rotation failed") } m.replicatedOrchestrator = orchestrator.NewReplicatedOrchestrator(s) m.globalOrchestrator = orchestrator.NewGlobalOrchestrator(s) m.taskReaper = orchestrator.NewTaskReaper(s) m.scheduler = scheduler.New(s) m.keyManager = keymanager.New(m.RaftNode.MemoryStore(), keymanager.DefaultConfig()) // TODO(stevvooe): Allocate a context that can be used to // shutdown underlying manager processes when leadership is // lost. m.allocator, err = allocator.New(s) if err != nil { log.G(ctx).WithError(err).Error("failed to create allocator") // TODO(stevvooe): It doesn't seem correct here to fail // creating the allocator but then use it anyway. } if m.keyManager != nil { go func(keyManager *keymanager.KeyManager) { if err := keyManager.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("keymanager failed with an error") } }(m.keyManager) } go func(d *dispatcher.Dispatcher) { if err := d.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("Dispatcher exited with an error") } }(m.Dispatcher) go func(server *ca.Server) { if err := server.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("CA signer exited with an error") } }(m.caserver) // Start all sub-components in separate goroutines. // TODO(aluzzardi): This should have some kind of error handling so that // any component that goes down would bring the entire manager down. if m.allocator != nil { go func(allocator *allocator.Allocator) { if err := allocator.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("allocator exited with an error") } }(m.allocator) } go func(scheduler *scheduler.Scheduler) { if err := scheduler.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("scheduler exited with an error") } }(m.scheduler) go func(taskReaper *orchestrator.TaskReaper) { taskReaper.Run() }(m.taskReaper) go func(orchestrator *orchestrator.ReplicatedOrchestrator) { if err := orchestrator.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("replicated orchestrator exited with an error") } }(m.replicatedOrchestrator) go func(globalOrchestrator *orchestrator.GlobalOrchestrator) { if err := globalOrchestrator.Run(ctx); err != nil { log.G(ctx).WithError(err).Error("global orchestrator exited with an error") } }(m.globalOrchestrator) } else if newState == raft.IsFollower { m.Dispatcher.Stop() m.caserver.Stop() if m.allocator != nil { m.allocator.Stop() m.allocator = nil } m.replicatedOrchestrator.Stop() m.replicatedOrchestrator = nil m.globalOrchestrator.Stop() m.globalOrchestrator = nil m.taskReaper.Stop() m.taskReaper = nil m.scheduler.Stop() m.scheduler = nil if m.keyManager != nil { m.keyManager.Stop() m.keyManager = nil } } m.mu.Unlock() } }() proxyOpts := []grpc.DialOption{ grpc.WithTimeout(5 * time.Second), grpc.WithTransportCredentials(m.config.SecurityConfig.ClientTLSCreds), } cs := raftpicker.NewConnSelector(m.RaftNode, proxyOpts...) m.connSelector = cs // We need special connSelector for controlapi because it provides automatic // leader tracking. // Other APIs are using connSelector which errors out on leader change, but // allows to react quickly to reelections. controlAPIProxyOpts := []grpc.DialOption{ grpc.WithBackoffMaxDelay(time.Second), grpc.WithTransportCredentials(m.config.SecurityConfig.ClientTLSCreds), } controlAPIConnSelector := hackpicker.NewConnSelector(m.RaftNode, controlAPIProxyOpts...) authorize := func(ctx context.Context, roles []string) error { // Authorize the remote roles, ensure they can only be forwarded by managers _, err := ca.AuthorizeForwardedRoleAndOrg(ctx, roles, []string{ca.ManagerRole}, m.config.SecurityConfig.ClientTLSCreds.Organization()) return err } baseControlAPI := controlapi.NewServer(m.RaftNode.MemoryStore(), m.RaftNode, m.config.SecurityConfig.RootCA()) healthServer := health.NewHealthServer() authenticatedControlAPI := api.NewAuthenticatedWrapperControlServer(baseControlAPI, authorize) authenticatedDispatcherAPI := api.NewAuthenticatedWrapperDispatcherServer(m.Dispatcher, authorize) authenticatedCAAPI := api.NewAuthenticatedWrapperCAServer(m.caserver, authorize) authenticatedNodeCAAPI := api.NewAuthenticatedWrapperNodeCAServer(m.caserver, authorize) authenticatedRaftAPI := api.NewAuthenticatedWrapperRaftServer(m.RaftNode, authorize) authenticatedHealthAPI := api.NewAuthenticatedWrapperHealthServer(healthServer, authorize) authenticatedRaftMembershipAPI := api.NewAuthenticatedWrapperRaftMembershipServer(m.RaftNode, authorize) proxyDispatcherAPI := api.NewRaftProxyDispatcherServer(authenticatedDispatcherAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyCAAPI := api.NewRaftProxyCAServer(authenticatedCAAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyNodeCAAPI := api.NewRaftProxyNodeCAServer(authenticatedNodeCAAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyRaftMembershipAPI := api.NewRaftProxyRaftMembershipServer(authenticatedRaftMembershipAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) // localProxyControlAPI is a special kind of proxy. It is only wired up // to receive requests from a trusted local socket, and these requests // don't use TLS, therefore the requests it handles locally should // bypass authorization. When it proxies, it sends them as requests from // this manager rather than forwarded requests (it has no TLS // information to put in the metadata map). forwardAsOwnRequest := func(ctx context.Context) (context.Context, error) { return ctx, nil } localProxyControlAPI := api.NewRaftProxyControlServer(baseControlAPI, controlAPIConnSelector, m.RaftNode, forwardAsOwnRequest) // Everything registered on m.server should be an authenticated // wrapper, or a proxy wrapping an authenticated wrapper! api.RegisterCAServer(m.server, proxyCAAPI) api.RegisterNodeCAServer(m.server, proxyNodeCAAPI) api.RegisterRaftServer(m.server, authenticatedRaftAPI) api.RegisterHealthServer(m.server, authenticatedHealthAPI) api.RegisterRaftMembershipServer(m.server, proxyRaftMembershipAPI) api.RegisterControlServer(m.localserver, localProxyControlAPI) api.RegisterControlServer(m.server, authenticatedControlAPI) api.RegisterDispatcherServer(m.server, proxyDispatcherAPI) errServe := make(chan error, 2) for proto, l := range m.listeners { go func(proto string, lis net.Listener) { ctx := log.WithLogger(ctx, log.G(ctx).WithFields( logrus.Fields{ "proto": lis.Addr().Network(), "addr": lis.Addr().String()})) if proto == "unix" { log.G(ctx).Info("Listening for local connections") // we need to disallow double closes because UnixListener.Close // can delete unix-socket file of newer listener. grpc calls // Close twice indeed: in Serve and in Stop. errServe <- m.localserver.Serve(&closeOnceListener{Listener: lis}) } else { log.G(ctx).Info("Listening for connections") errServe <- m.server.Serve(lis) } }(proto, l) } // Set the raft server as serving for the health server healthServer.SetServingStatus("Raft", api.HealthCheckResponse_SERVING) if err := m.RaftNode.JoinAndStart(); err != nil { for _, lis := range m.listeners { lis.Close() } return fmt.Errorf("can't initialize raft node: %v", err) } close(m.started) go func() { err := m.RaftNode.Run(ctx) if err != nil { log.G(ctx).Error(err) m.Stop(ctx) } }() if err := raft.WaitForLeader(ctx, m.RaftNode); err != nil { m.server.Stop() return err } c, err := raft.WaitForCluster(ctx, m.RaftNode) if err != nil { m.server.Stop() return err } raftConfig := c.Spec.Raft if int(raftConfig.ElectionTick) != m.RaftNode.Config.ElectionTick { log.G(ctx).Warningf("election tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.RaftNode.Config.ElectionTick, raftConfig.ElectionTick) } if int(raftConfig.HeartbeatTick) != m.RaftNode.Config.HeartbeatTick { log.G(ctx).Warningf("heartbeat tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.RaftNode.Config.HeartbeatTick, raftConfig.HeartbeatTick) } // wait for an error in serving. err = <-errServe select { // check to see if stopped was posted to. if so, we're in the process of // stopping, or done and that's why we got the error. if stopping is // deliberate, stopped will ALWAYS be closed before the error is trigger, // so this path will ALWAYS be taken if the stop was deliberate case <-m.stopped: // shutdown was requested, do not return an error // but first, we wait to acquire a mutex to guarantee that stopping is // finished. as long as we acquire the mutex BEFORE we return, we know // that stopping is stopped. m.mu.Lock() m.mu.Unlock() return nil // otherwise, we'll get something from errServe, which indicates that an // error in serving has actually occurred and this isn't a planned shutdown default: return err } }
// Run starts all manager sub-systems and the gRPC server at the configured // address. // The call never returns unless an error occurs or `Stop()` is called. func (m *Manager) Run(parent context.Context) error { ctx, ctxCancel := context.WithCancel(parent) defer ctxCancel() // Harakiri. go func() { select { case <-ctx.Done(): case <-m.stopped: ctxCancel() } }() leadershipCh, cancel := m.RaftNode.SubscribeLeadership() defer cancel() go m.handleLeadershipEvents(ctx, leadershipCh) proxyOpts := []grpc.DialOption{ grpc.WithTimeout(5 * time.Second), grpc.WithTransportCredentials(m.config.SecurityConfig.ClientTLSCreds), } cs := raftpicker.NewConnSelector(m.RaftNode, proxyOpts...) m.connSelector = cs // We need special connSelector for controlapi because it provides automatic // leader tracking. // Other APIs are using connSelector which errors out on leader change, but // allows to react quickly to reelections. controlAPIProxyOpts := []grpc.DialOption{ grpc.WithBackoffMaxDelay(time.Second), grpc.WithTransportCredentials(m.config.SecurityConfig.ClientTLSCreds), } controlAPIConnSelector := hackpicker.NewConnSelector(m.RaftNode, controlAPIProxyOpts...) authorize := func(ctx context.Context, roles []string) error { // Authorize the remote roles, ensure they can only be forwarded by managers _, err := ca.AuthorizeForwardedRoleAndOrg(ctx, roles, []string{ca.ManagerRole}, m.config.SecurityConfig.ClientTLSCreds.Organization()) return err } baseControlAPI := controlapi.NewServer(m.RaftNode.MemoryStore(), m.RaftNode, m.config.SecurityConfig.RootCA()) baseResourceAPI := resourceapi.New(m.RaftNode.MemoryStore()) healthServer := health.NewHealthServer() localHealthServer := health.NewHealthServer() authenticatedControlAPI := api.NewAuthenticatedWrapperControlServer(baseControlAPI, authorize) authenticatedResourceAPI := api.NewAuthenticatedWrapperResourceAllocatorServer(baseResourceAPI, authorize) authenticatedDispatcherAPI := api.NewAuthenticatedWrapperDispatcherServer(m.Dispatcher, authorize) authenticatedCAAPI := api.NewAuthenticatedWrapperCAServer(m.caserver, authorize) authenticatedNodeCAAPI := api.NewAuthenticatedWrapperNodeCAServer(m.caserver, authorize) authenticatedRaftAPI := api.NewAuthenticatedWrapperRaftServer(m.RaftNode, authorize) authenticatedHealthAPI := api.NewAuthenticatedWrapperHealthServer(healthServer, authorize) authenticatedRaftMembershipAPI := api.NewAuthenticatedWrapperRaftMembershipServer(m.RaftNode, authorize) proxyDispatcherAPI := api.NewRaftProxyDispatcherServer(authenticatedDispatcherAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyCAAPI := api.NewRaftProxyCAServer(authenticatedCAAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyNodeCAAPI := api.NewRaftProxyNodeCAServer(authenticatedNodeCAAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyRaftMembershipAPI := api.NewRaftProxyRaftMembershipServer(authenticatedRaftMembershipAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) proxyResourceAPI := api.NewRaftProxyResourceAllocatorServer(authenticatedResourceAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo) // localProxyControlAPI is a special kind of proxy. It is only wired up // to receive requests from a trusted local socket, and these requests // don't use TLS, therefore the requests it handles locally should // bypass authorization. When it proxies, it sends them as requests from // this manager rather than forwarded requests (it has no TLS // information to put in the metadata map). forwardAsOwnRequest := func(ctx context.Context) (context.Context, error) { return ctx, nil } localProxyControlAPI := api.NewRaftProxyControlServer(baseControlAPI, controlAPIConnSelector, m.RaftNode, forwardAsOwnRequest) // Everything registered on m.server should be an authenticated // wrapper, or a proxy wrapping an authenticated wrapper! api.RegisterCAServer(m.server, proxyCAAPI) api.RegisterNodeCAServer(m.server, proxyNodeCAAPI) api.RegisterRaftServer(m.server, authenticatedRaftAPI) api.RegisterHealthServer(m.server, authenticatedHealthAPI) api.RegisterRaftMembershipServer(m.server, proxyRaftMembershipAPI) api.RegisterControlServer(m.server, authenticatedControlAPI) api.RegisterResourceAllocatorServer(m.server, proxyResourceAPI) api.RegisterDispatcherServer(m.server, proxyDispatcherAPI) api.RegisterControlServer(m.localserver, localProxyControlAPI) api.RegisterHealthServer(m.localserver, localHealthServer) errServe := make(chan error, 2) for proto, l := range m.listeners { go m.serveListener(ctx, errServe, proto, l) } // Set the raft server as serving for the health server healthServer.SetServingStatus("Raft", api.HealthCheckResponse_SERVING) localHealthServer.SetServingStatus("ControlAPI", api.HealthCheckResponse_SERVING) defer func() { m.server.Stop() m.localserver.Stop() }() if err := m.RaftNode.JoinAndStart(); err != nil { return fmt.Errorf("can't initialize raft node: %v", err) } close(m.started) go func() { err := m.RaftNode.Run(ctx) if err != nil { log.G(ctx).Error(err) m.Stop(ctx) } }() if err := raft.WaitForLeader(ctx, m.RaftNode); err != nil { return err } c, err := raft.WaitForCluster(ctx, m.RaftNode) if err != nil { return err } raftConfig := c.Spec.Raft if int(raftConfig.ElectionTick) != m.RaftNode.Config.ElectionTick { log.G(ctx).Warningf("election tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.RaftNode.Config.ElectionTick, raftConfig.ElectionTick) } if int(raftConfig.HeartbeatTick) != m.RaftNode.Config.HeartbeatTick { log.G(ctx).Warningf("heartbeat tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.RaftNode.Config.HeartbeatTick, raftConfig.HeartbeatTick) } // wait for an error in serving. err = <-errServe select { // check to see if stopped was posted to. if so, we're in the process of // stopping, or done and that's why we got the error. if stopping is // deliberate, stopped will ALWAYS be closed before the error is trigger, // so this path will ALWAYS be taken if the stop was deliberate case <-m.stopped: // shutdown was requested, do not return an error // but first, we wait to acquire a mutex to guarantee that stopping is // finished. as long as we acquire the mutex BEFORE we return, we know // that stopping is stopped. m.mu.Lock() m.mu.Unlock() return nil // otherwise, we'll get something from errServe, which indicates that an // error in serving has actually occurred and this isn't a planned shutdown default: return err } }
// GetRemoteCA returns the remote endpoint's CA certificate func GetRemoteCA(ctx context.Context, d digest.Digest, r remotes.Remotes) (RootCA, error) { // This TLS Config is intentionally using InsecureSkipVerify. Either we're // doing TOFU, in which case we don't validate the remote CA, or we're using // a user supplied hash to check the integrity of the CA certificate. insecureCreds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true}) opts := []grpc.DialOption{ grpc.WithTransportCredentials(insecureCreds), grpc.WithTimeout(5 * time.Second), grpc.WithBackoffMaxDelay(5 * time.Second), } peer, err := r.Select() if err != nil { return RootCA{}, err } conn, err := grpc.Dial(peer.Addr, opts...) if err != nil { return RootCA{}, err } defer conn.Close() client := api.NewCAClient(conn) ctx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() defer func() { if err != nil { r.Observe(peer, -remotes.DefaultObservationWeight) return } r.Observe(peer, remotes.DefaultObservationWeight) }() response, err := client.GetRootCACertificate(ctx, &api.GetRootCACertificateRequest{}) if err != nil { return RootCA{}, err } if d != "" { verifier, err := digest.NewDigestVerifier(d) if err != nil { return RootCA{}, errors.Wrap(err, "unexpected error getting digest verifier") } io.Copy(verifier, bytes.NewReader(response.Certificate)) if !verifier.Verified() { return RootCA{}, errors.Errorf("remote CA does not match fingerprint. Expected: %s", d.Hex()) } } // Check the validity of the remote Cert _, err = helpers.ParseCertificatePEM(response.Certificate) if err != nil { return RootCA{}, err } // Create a Pool with our RootCACertificate pool := x509.NewCertPool() if !pool.AppendCertsFromPEM(response.Certificate) { return RootCA{}, errors.New("failed to append certificate to cert pool") } return RootCA{Cert: response.Certificate, Digest: digest.FromBytes(response.Certificate), Pool: pool}, nil }