Beispiel #1
0
// GetRemoteCA returns the remote endpoint's CA certificate
func GetRemoteCA(ctx context.Context, d digest.Digest, picker *picker.Picker) (RootCA, error) {
	// We need a valid picker to be able to Dial to a remote CA
	if picker == nil {
		return RootCA{}, fmt.Errorf("valid remote address picker required")
	}

	// This TLS Config is intentionally using InsecureSkipVerify. Either we're
	// doing TOFU, in which case we don't validate the remote CA, or we're using
	// a user supplied hash to check the integrity of the CA certificate.
	insecureCreds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true})
	opts := []grpc.DialOption{
		grpc.WithTransportCredentials(insecureCreds),
		grpc.WithBackoffMaxDelay(10 * time.Second),
		grpc.WithPicker(picker)}

	firstAddr, err := picker.PickAddr()
	if err != nil {
		return RootCA{}, err
	}

	conn, err := grpc.Dial(firstAddr, opts...)
	if err != nil {
		return RootCA{}, err
	}
	defer conn.Close()

	client := api.NewCAClient(conn)
	response, err := client.GetRootCACertificate(ctx, &api.GetRootCACertificateRequest{})
	if err != nil {
		return RootCA{}, err
	}

	if d != "" {
		verifier, err := digest.NewDigestVerifier(d)
		if err != nil {
			return RootCA{}, fmt.Errorf("unexpected error getting digest verifier: %v", err)
		}

		io.Copy(verifier, bytes.NewReader(response.Certificate))

		if !verifier.Verified() {
			return RootCA{}, fmt.Errorf("remote CA does not match fingerprint. Expected: %s", d.Hex())

		}
	}

	// Check the validity of the remote Cert
	_, err = helpers.ParseCertificatePEM(response.Certificate)
	if err != nil {
		return RootCA{}, err
	}

	// Create a Pool with our RootCACertificate
	pool := x509.NewCertPool()
	if !pool.AppendCertsFromPEM(response.Certificate) {
		return RootCA{}, fmt.Errorf("failed to append certificate to cert pool")
	}

	return RootCA{Cert: response.Certificate, Pool: pool}, nil
}
Beispiel #2
0
func main() {
	flag.Parse()

	if err := loadConfig(); err != nil {
		log.Fatal("loadConfig:", err)
	}

	// Init RemotePtt
	ptt = pttbbs.NewRemotePtt(config.BoarddAddress, config.BoarddMaxConn)

	// Init mand connection
	if conn, err := grpc.Dial(config.MandAddress, grpc.WithInsecure(), grpc.WithBackoffMaxDelay(time.Second*5)); err != nil {
		log.Fatal("cannot connect to mand:", config.MandAddress, err)
	} else {
		mand = bbspb.NewManServiceClient(conn)
	}

	// Init cache manager
	cacheMgr = cache.NewCacheManager(config.MemcachedAddress, config.MemcachedMaxConn)

	// Load templates
	if err := page.LoadTemplates(config.TemplateDirectory, templateFuncMap()); err != nil {
		log.Fatal("cannot load templates:", err)
	}

	// Init router
	router = createRouter()
	http.Handle("/", router)

	if len(config.Bind) == 0 {
		log.Fatal("No bind addresses specified in config")
	}
	for _, addr := range config.Bind {
		part := strings.SplitN(addr, ":", 2)
		if len(part) != 2 {
			log.Fatal("Invalid bind address: ", addr)
		}
		if listener, err := net.Listen(part[0], part[1]); err != nil {
			log.Fatal("Listen failed for address: ", addr, " error: ", err)
		} else {
			if part[0] == "unix" {
				os.Chmod(part[1], 0777)
				// Ignores errors, we can't do anything to those.
			}
			svr := &http.Server{
				MaxHeaderBytes: 64 * 1024,
			}
			go svr.Serve(listener)
		}
	}

	progExit := make(chan os.Signal)
	signal.Notify(progExit, os.Interrupt)
	<-progExit
}
Beispiel #3
0
func getGRPCConnection(creds credentials.TransportCredentials, connBroker *connectionbroker.Broker, forceRemote bool) (*connectionbroker.Conn, error) {
	dialOpts := []grpc.DialOption{
		grpc.WithTransportCredentials(creds),
		grpc.WithTimeout(5 * time.Second),
		grpc.WithBackoffMaxDelay(5 * time.Second),
	}
	if forceRemote {
		return connBroker.SelectRemote(dialOpts...)
	}
	return connBroker.Select(dialOpts...)
}
Beispiel #4
0
// GRPCDial calls grpc.Dial with the options appropriate for the context.
func (ctx *Context) GRPCDial(target string, opts ...grpc.DialOption) (*grpc.ClientConn, error) {
	ctx.conns.Lock()
	meta, ok := ctx.conns.cache[target]
	if !ok {
		meta = &connMeta{}
		ctx.conns.cache[target] = meta
	}
	ctx.conns.Unlock()

	meta.Do(func() {
		var dialOpt grpc.DialOption
		if ctx.Insecure {
			dialOpt = grpc.WithInsecure()
		} else {
			tlsConfig, err := ctx.GetClientTLSConfig()
			if err != nil {
				meta.err = err
				return
			}
			dialOpt = grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig))
		}

		dialOpts := make([]grpc.DialOption, 0, 2+len(opts))
		dialOpts = append(dialOpts, dialOpt)
		dialOpts = append(dialOpts, grpc.WithBackoffMaxDelay(maxBackoff))
		dialOpts = append(dialOpts, opts...)

		if log.V(1) {
			log.Infof(ctx.masterCtx, "dialing %s", target)
		}
		meta.conn, meta.err = grpc.DialContext(ctx.masterCtx, target, dialOpts...)
		if meta.err == nil {
			if err := ctx.Stopper.RunTask(func() {
				ctx.Stopper.RunWorker(func() {
					err := ctx.runHeartbeat(meta.conn, target)
					if err != nil && !grpcutil.IsClosedConnection(err) {
						log.Error(ctx.masterCtx, err)
					}
					ctx.removeConn(target, meta)
				})
			}); err != nil {
				meta.err = err
				// removeConn and ctx's cleanup worker both lock ctx.conns. However,
				// to avoid racing with meta's initialization, the cleanup worker
				// blocks on meta.Do while holding ctx.conns. Invoke removeConn
				// asynchronously to avoid deadlock.
				go ctx.removeConn(target, meta)
			}
		}
	})

	return meta.conn, meta.err
}
Beispiel #5
0
// dial returns a grpc client connection
func dial(addr string, protocol string, creds credentials.TransportAuthenticator, timeout time.Duration) (*grpc.ClientConn, error) {
	grpcOptions := []grpc.DialOption{
		grpc.WithBackoffMaxDelay(2 * time.Second),
		grpc.WithTransportCredentials(creds),
	}

	if timeout != 0 {
		grpcOptions = append(grpcOptions, grpc.WithTimeout(timeout))
	}

	return grpc.Dial(addr, grpcOptions...)
}
Beispiel #6
0
func (n *Node) runAgent(ctx context.Context, db *bolt.DB, creds credentials.TransportAuthenticator, ready chan<- struct{}) error {
	var manager api.Peer
	select {
	case <-ctx.Done():
	case manager = <-n.remotes.WaitSelect(ctx):
	}
	if ctx.Err() != nil {
		return ctx.Err()
	}
	picker := picker.NewPicker(n.remotes, manager.Addr)
	conn, err := grpc.Dial(manager.Addr,
		grpc.WithPicker(picker),
		grpc.WithTransportCredentials(creds),
		grpc.WithBackoffMaxDelay(maxSessionFailureBackoff))
	if err != nil {
		return err
	}

	agent, err := New(&Config{
		Hostname:         n.config.Hostname,
		Managers:         n.remotes,
		Executor:         n.config.Executor,
		DB:               db,
		Conn:             conn,
		Picker:           picker,
		NotifyRoleChange: n.roleChangeReq,
	})
	if err != nil {
		return err
	}
	if err := agent.Start(ctx); err != nil {
		return err
	}

	n.Lock()
	n.agent = agent
	n.Unlock()

	defer func() {
		n.Lock()
		n.agent = nil
		n.Unlock()
	}()

	go func() {
		<-agent.Ready()
		close(ready)
	}()

	// todo: manually call stop on context cancellation?

	return agent.Err(context.Background())
}
Beispiel #7
0
func dial(n *raftutils.TestNode, addr string) (*grpc.ClientConn, error) {
	grpcOptions := []grpc.DialOption{
		grpc.WithBackoffMaxDelay(2 * time.Second),
		grpc.WithBlock(),
	}
	grpcOptions = append(grpcOptions, grpc.WithTransportCredentials(n.SecurityConfig.ClientTLSCreds))

	grpcOptions = append(grpcOptions, grpc.WithTimeout(10*time.Second))

	cc, err := grpc.Dial(addr, grpcOptions...)
	if err != nil {
		return nil, err
	}
	return cc, nil
}
Beispiel #8
0
func getGRPCConnection(creds credentials.TransportCredentials, r remotes.Remotes) (*grpc.ClientConn, api.Peer, error) {
	peer, err := r.Select()
	if err != nil {
		return nil, api.Peer{}, err
	}

	opts := []grpc.DialOption{
		grpc.WithTransportCredentials(creds),
		grpc.WithTimeout(5 * time.Second),
		grpc.WithBackoffMaxDelay(5 * time.Second),
	}

	conn, err := grpc.Dial(peer.Addr, opts...)
	if err != nil {
		return nil, api.Peer{}, err
	}
	return conn, peer, nil
}
Beispiel #9
0
func (lc *leaseChecker) Check() error {
	conn, err := grpc.Dial(lc.ls.endpoint, grpc.WithInsecure(), grpc.WithBackoffMaxDelay(1))
	if err != nil {
		return fmt.Errorf("%v (%s)", err, lc.ls.endpoint)
	}
	defer func() {
		if conn != nil {
			conn.Close()
		}
	}()
	lc.kvc = pb.NewKVClient(conn)
	lc.leaseClient = pb.NewLeaseClient(conn)
	if err := lc.check(true, lc.ls.revokedLeases.leases); err != nil {
		return err
	}
	if err := lc.check(false, lc.ls.aliveLeases.leases); err != nil {
		return err
	}
	return lc.checkShortLivedLeases()
}
Beispiel #10
0
func (ls *leaseStresser) Stress() error {
	plog.Infof("lease Stresser %v starting ...", ls.endpoint)
	if err := ls.setupOnce(); err != nil {
		return err
	}

	conn, err := grpc.Dial(ls.endpoint, grpc.WithInsecure(), grpc.WithBackoffMaxDelay(1*time.Second))
	if err != nil {
		return fmt.Errorf("%v (%s)", err, ls.endpoint)
	}
	ls.conn = conn
	ls.kvc = pb.NewKVClient(conn)
	ls.lc = pb.NewLeaseClient(conn)
	ls.revokedLeases = &atomicLeases{leases: make(map[int64]time.Time)}
	ls.shortLivedLeases = &atomicLeases{leases: make(map[int64]time.Time)}

	ctx, cancel := context.WithCancel(context.Background())
	ls.cancel = cancel
	ls.ctx = ctx

	ls.runWg.Add(1)
	go ls.run()
	return nil
}
Beispiel #11
0
// GetRemoteSignedCertificate submits a CSR together with the intended role to a remote CA server address
// available through a picker, and that is part of a CA identified by a specific certificate pool.
func GetRemoteSignedCertificate(ctx context.Context, csr []byte, role, secret string, rootCAPool *x509.CertPool, picker *picker.Picker, creds credentials.TransportAuthenticator, nodeInfo chan<- api.IssueNodeCertificateResponse) ([]byte, error) {
	if rootCAPool == nil {
		return nil, fmt.Errorf("valid root CA pool required")
	}
	if picker == nil {
		return nil, fmt.Errorf("valid remote address picker required")
	}

	if creds == nil {
		// This is our only non-MTLS request, and it happens when we are boostraping our TLS certs
		// We're using CARole as server name, so an external CA doesn't also have to have ManagerRole in the cert SANs
		creds = credentials.NewTLS(&tls.Config{ServerName: CARole, RootCAs: rootCAPool})
	}

	opts := []grpc.DialOption{
		grpc.WithTransportCredentials(creds),
		grpc.WithBackoffMaxDelay(10 * time.Second),
		grpc.WithPicker(picker)}

	firstAddr, err := picker.PickAddr()
	if err != nil {
		return nil, err
	}

	conn, err := grpc.Dial(firstAddr, opts...)
	if err != nil {
		return nil, err
	}
	defer conn.Close()

	// Create a CAClient to retrieve a new Certificate
	caClient := api.NewNodeCAClient(conn)

	// Convert our internal string roles into an API role
	apiRole, err := FormatRole(role)
	if err != nil {
		return nil, err
	}

	// Send the Request and retrieve the request token
	issueRequest := &api.IssueNodeCertificateRequest{CSR: csr, Role: apiRole, Secret: secret}
	issueResponse, err := caClient.IssueNodeCertificate(ctx, issueRequest)
	if err != nil {
		return nil, err
	}

	// Send back the NodeID on the nodeInfo, so the caller can know what ID was assigned by the CA
	if nodeInfo != nil {
		nodeInfo <- *issueResponse
	}

	statusRequest := &api.NodeCertificateStatusRequest{NodeID: issueResponse.NodeID}
	expBackoff := events.NewExponentialBackoff(events.ExponentialBackoffConfig{
		Base:   time.Second,
		Factor: time.Second,
		Max:    30 * time.Second,
	})

	log.Infof("Waiting for TLS certificate to be issued...")
	// Exponential backoff with Max of 30 seconds to wait for a new retry
	for {
		// Send the Request and retrieve the certificate
		statusResponse, err := caClient.NodeCertificateStatus(ctx, statusRequest)
		if err != nil {
			return nil, err
		}

		// If the certificate was issued, return
		if statusResponse.Status.State == api.IssuanceStateIssued {
			if statusResponse.Certificate == nil {
				return nil, fmt.Errorf("no certificate in CertificateStatus response")
			}

			// The certificate in the response must match the CSR
			// we submitted. If we are getting a response for a
			// certificate that was previously issued, we need to
			// retry until the certificate gets updated per our
			// current request.
			if bytes.Equal(statusResponse.Certificate.CSR, csr) {
				return statusResponse.Certificate.Certificate, nil
			}
		}

		// If we're still pending, the issuance failed, or the state is unknown
		// let's continue trying.
		expBackoff.Failure(nil, nil)
		time.Sleep(expBackoff.Proceed(nil))
	}
}
Beispiel #12
0
// Run starts all manager sub-systems and the gRPC server at the configured
// address.
// The call never returns unless an error occurs or `Stop()` is called.
//
// TODO(aluzzardi): /!\ This function is *way* too complex. /!\
// It needs to be split into smaller manageable functions.
func (m *Manager) Run(parent context.Context) error {
	ctx, ctxCancel := context.WithCancel(parent)
	defer ctxCancel()

	// Harakiri.
	go func() {
		select {
		case <-ctx.Done():
		case <-m.stopped:
			ctxCancel()
		}
	}()

	leadershipCh, cancel := m.RaftNode.SubscribeLeadership()
	defer cancel()

	go func() {
		for leadershipEvent := range leadershipCh {
			// read out and discard all of the messages when we've stopped
			// don't acquire the mutex yet. if stopped is closed, we don't need
			// this stops this loop from starving Run()'s attempt to Lock
			select {
			case <-m.stopped:
				continue
			default:
				// do nothing, we're not stopped
			}
			// we're not stopping so NOW acquire the mutex
			m.mu.Lock()
			newState := leadershipEvent.(raft.LeadershipState)

			if newState == raft.IsLeader {
				s := m.RaftNode.MemoryStore()

				rootCA := m.config.SecurityConfig.RootCA()
				nodeID := m.config.SecurityConfig.ClientTLSCreds.NodeID()

				raftCfg := raft.DefaultRaftConfig()
				raftCfg.ElectionTick = uint32(m.RaftNode.Config.ElectionTick)
				raftCfg.HeartbeatTick = uint32(m.RaftNode.Config.HeartbeatTick)

				clusterID := m.config.SecurityConfig.ClientTLSCreds.Organization()

				initialCAConfig := ca.DefaultCAConfig()
				initialCAConfig.ExternalCAs = m.config.ExternalCAs

				s.Update(func(tx store.Tx) error {
					// Add a default cluster object to the
					// store. Don't check the error because
					// we expect this to fail unless this
					// is a brand new cluster.
					store.CreateCluster(tx, &api.Cluster{
						ID: clusterID,
						Spec: api.ClusterSpec{
							Annotations: api.Annotations{
								Name: store.DefaultClusterName,
							},
							Orchestration: api.OrchestrationConfig{
								TaskHistoryRetentionLimit: defaultTaskHistoryRetentionLimit,
							},
							Dispatcher: api.DispatcherConfig{
								HeartbeatPeriod: ptypes.DurationProto(dispatcher.DefaultHeartBeatPeriod),
							},
							Raft:     raftCfg,
							CAConfig: initialCAConfig,
						},
						RootCA: api.RootCA{
							CAKey:      rootCA.Key,
							CACert:     rootCA.Cert,
							CACertHash: rootCA.Digest.String(),
							JoinTokens: api.JoinTokens{
								Worker:  ca.GenerateJoinToken(rootCA),
								Manager: ca.GenerateJoinToken(rootCA),
							},
						},
					})
					// Add Node entry for ourself, if one
					// doesn't exist already.
					store.CreateNode(tx, &api.Node{
						ID: nodeID,
						Certificate: api.Certificate{
							CN:   nodeID,
							Role: api.NodeRoleManager,
							Status: api.IssuanceStatus{
								State: api.IssuanceStateIssued,
							},
						},
						Spec: api.NodeSpec{
							Role:       api.NodeRoleManager,
							Membership: api.NodeMembershipAccepted,
						},
					})
					return nil
				})

				// Attempt to rotate the key-encrypting-key of the root CA key-material
				err := m.rotateRootCAKEK(ctx, clusterID)
				if err != nil {
					log.G(ctx).WithError(err).Error("root key-encrypting-key rotation failed")
				}

				m.replicatedOrchestrator = orchestrator.NewReplicatedOrchestrator(s)
				m.globalOrchestrator = orchestrator.NewGlobalOrchestrator(s)
				m.taskReaper = orchestrator.NewTaskReaper(s)
				m.scheduler = scheduler.New(s)
				m.keyManager = keymanager.New(m.RaftNode.MemoryStore(), keymanager.DefaultConfig())

				// TODO(stevvooe): Allocate a context that can be used to
				// shutdown underlying manager processes when leadership is
				// lost.

				m.allocator, err = allocator.New(s)
				if err != nil {
					log.G(ctx).WithError(err).Error("failed to create allocator")
					// TODO(stevvooe): It doesn't seem correct here to fail
					// creating the allocator but then use it anyway.
				}

				if m.keyManager != nil {
					go func(keyManager *keymanager.KeyManager) {
						if err := keyManager.Run(ctx); err != nil {
							log.G(ctx).WithError(err).Error("keymanager failed with an error")
						}
					}(m.keyManager)
				}

				go func(d *dispatcher.Dispatcher) {
					if err := d.Run(ctx); err != nil {
						log.G(ctx).WithError(err).Error("Dispatcher exited with an error")
					}
				}(m.Dispatcher)

				go func(server *ca.Server) {
					if err := server.Run(ctx); err != nil {
						log.G(ctx).WithError(err).Error("CA signer exited with an error")
					}
				}(m.caserver)

				// Start all sub-components in separate goroutines.
				// TODO(aluzzardi): This should have some kind of error handling so that
				// any component that goes down would bring the entire manager down.

				if m.allocator != nil {
					go func(allocator *allocator.Allocator) {
						if err := allocator.Run(ctx); err != nil {
							log.G(ctx).WithError(err).Error("allocator exited with an error")
						}
					}(m.allocator)
				}

				go func(scheduler *scheduler.Scheduler) {
					if err := scheduler.Run(ctx); err != nil {
						log.G(ctx).WithError(err).Error("scheduler exited with an error")
					}
				}(m.scheduler)

				go func(taskReaper *orchestrator.TaskReaper) {
					taskReaper.Run()
				}(m.taskReaper)

				go func(orchestrator *orchestrator.ReplicatedOrchestrator) {
					if err := orchestrator.Run(ctx); err != nil {
						log.G(ctx).WithError(err).Error("replicated orchestrator exited with an error")
					}
				}(m.replicatedOrchestrator)

				go func(globalOrchestrator *orchestrator.GlobalOrchestrator) {
					if err := globalOrchestrator.Run(ctx); err != nil {
						log.G(ctx).WithError(err).Error("global orchestrator exited with an error")
					}
				}(m.globalOrchestrator)

			} else if newState == raft.IsFollower {
				m.Dispatcher.Stop()
				m.caserver.Stop()

				if m.allocator != nil {
					m.allocator.Stop()
					m.allocator = nil
				}

				m.replicatedOrchestrator.Stop()
				m.replicatedOrchestrator = nil

				m.globalOrchestrator.Stop()
				m.globalOrchestrator = nil

				m.taskReaper.Stop()
				m.taskReaper = nil

				m.scheduler.Stop()
				m.scheduler = nil

				if m.keyManager != nil {
					m.keyManager.Stop()
					m.keyManager = nil
				}
			}
			m.mu.Unlock()
		}
	}()

	proxyOpts := []grpc.DialOption{
		grpc.WithTimeout(5 * time.Second),
		grpc.WithTransportCredentials(m.config.SecurityConfig.ClientTLSCreds),
	}

	cs := raftpicker.NewConnSelector(m.RaftNode, proxyOpts...)
	m.connSelector = cs

	// We need special connSelector for controlapi because it provides automatic
	// leader tracking.
	// Other APIs are using connSelector which errors out on leader change, but
	// allows to react quickly to reelections.
	controlAPIProxyOpts := []grpc.DialOption{
		grpc.WithBackoffMaxDelay(time.Second),
		grpc.WithTransportCredentials(m.config.SecurityConfig.ClientTLSCreds),
	}

	controlAPIConnSelector := hackpicker.NewConnSelector(m.RaftNode, controlAPIProxyOpts...)

	authorize := func(ctx context.Context, roles []string) error {
		// Authorize the remote roles, ensure they can only be forwarded by managers
		_, err := ca.AuthorizeForwardedRoleAndOrg(ctx, roles, []string{ca.ManagerRole}, m.config.SecurityConfig.ClientTLSCreds.Organization())
		return err
	}

	baseControlAPI := controlapi.NewServer(m.RaftNode.MemoryStore(), m.RaftNode, m.config.SecurityConfig.RootCA())
	healthServer := health.NewHealthServer()

	authenticatedControlAPI := api.NewAuthenticatedWrapperControlServer(baseControlAPI, authorize)
	authenticatedDispatcherAPI := api.NewAuthenticatedWrapperDispatcherServer(m.Dispatcher, authorize)
	authenticatedCAAPI := api.NewAuthenticatedWrapperCAServer(m.caserver, authorize)
	authenticatedNodeCAAPI := api.NewAuthenticatedWrapperNodeCAServer(m.caserver, authorize)
	authenticatedRaftAPI := api.NewAuthenticatedWrapperRaftServer(m.RaftNode, authorize)
	authenticatedHealthAPI := api.NewAuthenticatedWrapperHealthServer(healthServer, authorize)
	authenticatedRaftMembershipAPI := api.NewAuthenticatedWrapperRaftMembershipServer(m.RaftNode, authorize)

	proxyDispatcherAPI := api.NewRaftProxyDispatcherServer(authenticatedDispatcherAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo)
	proxyCAAPI := api.NewRaftProxyCAServer(authenticatedCAAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo)
	proxyNodeCAAPI := api.NewRaftProxyNodeCAServer(authenticatedNodeCAAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo)
	proxyRaftMembershipAPI := api.NewRaftProxyRaftMembershipServer(authenticatedRaftMembershipAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo)

	// localProxyControlAPI is a special kind of proxy. It is only wired up
	// to receive requests from a trusted local socket, and these requests
	// don't use TLS, therefore the requests it handles locally should
	// bypass authorization. When it proxies, it sends them as requests from
	// this manager rather than forwarded requests (it has no TLS
	// information to put in the metadata map).
	forwardAsOwnRequest := func(ctx context.Context) (context.Context, error) { return ctx, nil }
	localProxyControlAPI := api.NewRaftProxyControlServer(baseControlAPI, controlAPIConnSelector, m.RaftNode, forwardAsOwnRequest)

	// Everything registered on m.server should be an authenticated
	// wrapper, or a proxy wrapping an authenticated wrapper!
	api.RegisterCAServer(m.server, proxyCAAPI)
	api.RegisterNodeCAServer(m.server, proxyNodeCAAPI)
	api.RegisterRaftServer(m.server, authenticatedRaftAPI)
	api.RegisterHealthServer(m.server, authenticatedHealthAPI)
	api.RegisterRaftMembershipServer(m.server, proxyRaftMembershipAPI)
	api.RegisterControlServer(m.localserver, localProxyControlAPI)
	api.RegisterControlServer(m.server, authenticatedControlAPI)
	api.RegisterDispatcherServer(m.server, proxyDispatcherAPI)

	errServe := make(chan error, 2)
	for proto, l := range m.listeners {
		go func(proto string, lis net.Listener) {
			ctx := log.WithLogger(ctx, log.G(ctx).WithFields(
				logrus.Fields{
					"proto": lis.Addr().Network(),
					"addr":  lis.Addr().String()}))
			if proto == "unix" {
				log.G(ctx).Info("Listening for local connections")
				// we need to disallow double closes because UnixListener.Close
				// can delete unix-socket file of newer listener. grpc calls
				// Close twice indeed: in Serve and in Stop.
				errServe <- m.localserver.Serve(&closeOnceListener{Listener: lis})
			} else {
				log.G(ctx).Info("Listening for connections")
				errServe <- m.server.Serve(lis)
			}
		}(proto, l)
	}

	// Set the raft server as serving for the health server
	healthServer.SetServingStatus("Raft", api.HealthCheckResponse_SERVING)

	if err := m.RaftNode.JoinAndStart(); err != nil {
		for _, lis := range m.listeners {
			lis.Close()
		}
		return fmt.Errorf("can't initialize raft node: %v", err)
	}

	close(m.started)

	go func() {
		err := m.RaftNode.Run(ctx)
		if err != nil {
			log.G(ctx).Error(err)
			m.Stop(ctx)
		}
	}()

	if err := raft.WaitForLeader(ctx, m.RaftNode); err != nil {
		m.server.Stop()
		return err
	}

	c, err := raft.WaitForCluster(ctx, m.RaftNode)
	if err != nil {
		m.server.Stop()
		return err
	}
	raftConfig := c.Spec.Raft

	if int(raftConfig.ElectionTick) != m.RaftNode.Config.ElectionTick {
		log.G(ctx).Warningf("election tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.RaftNode.Config.ElectionTick, raftConfig.ElectionTick)
	}
	if int(raftConfig.HeartbeatTick) != m.RaftNode.Config.HeartbeatTick {
		log.G(ctx).Warningf("heartbeat tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.RaftNode.Config.HeartbeatTick, raftConfig.HeartbeatTick)
	}

	// wait for an error in serving.
	err = <-errServe
	select {
	// check to see if stopped was posted to. if so, we're in the process of
	// stopping, or done and that's why we got the error. if stopping is
	// deliberate, stopped will ALWAYS be closed before the error is trigger,
	// so this path will ALWAYS be taken if the stop was deliberate
	case <-m.stopped:
		// shutdown was requested, do not return an error
		// but first, we wait to acquire a mutex to guarantee that stopping is
		// finished. as long as we acquire the mutex BEFORE we return, we know
		// that stopping is stopped.
		m.mu.Lock()
		m.mu.Unlock()
		return nil
	// otherwise, we'll get something from errServe, which indicates that an
	// error in serving has actually occurred and this isn't a planned shutdown
	default:
		return err
	}
}
Beispiel #13
0
// Run starts all manager sub-systems and the gRPC server at the configured
// address.
// The call never returns unless an error occurs or `Stop()` is called.
func (m *Manager) Run(parent context.Context) error {
	ctx, ctxCancel := context.WithCancel(parent)
	defer ctxCancel()

	// Harakiri.
	go func() {
		select {
		case <-ctx.Done():
		case <-m.stopped:
			ctxCancel()
		}
	}()

	leadershipCh, cancel := m.RaftNode.SubscribeLeadership()
	defer cancel()

	go m.handleLeadershipEvents(ctx, leadershipCh)

	proxyOpts := []grpc.DialOption{
		grpc.WithTimeout(5 * time.Second),
		grpc.WithTransportCredentials(m.config.SecurityConfig.ClientTLSCreds),
	}

	cs := raftpicker.NewConnSelector(m.RaftNode, proxyOpts...)
	m.connSelector = cs

	// We need special connSelector for controlapi because it provides automatic
	// leader tracking.
	// Other APIs are using connSelector which errors out on leader change, but
	// allows to react quickly to reelections.
	controlAPIProxyOpts := []grpc.DialOption{
		grpc.WithBackoffMaxDelay(time.Second),
		grpc.WithTransportCredentials(m.config.SecurityConfig.ClientTLSCreds),
	}

	controlAPIConnSelector := hackpicker.NewConnSelector(m.RaftNode, controlAPIProxyOpts...)

	authorize := func(ctx context.Context, roles []string) error {
		// Authorize the remote roles, ensure they can only be forwarded by managers
		_, err := ca.AuthorizeForwardedRoleAndOrg(ctx, roles, []string{ca.ManagerRole}, m.config.SecurityConfig.ClientTLSCreds.Organization())
		return err
	}

	baseControlAPI := controlapi.NewServer(m.RaftNode.MemoryStore(), m.RaftNode, m.config.SecurityConfig.RootCA())
	baseResourceAPI := resourceapi.New(m.RaftNode.MemoryStore())
	healthServer := health.NewHealthServer()
	localHealthServer := health.NewHealthServer()

	authenticatedControlAPI := api.NewAuthenticatedWrapperControlServer(baseControlAPI, authorize)
	authenticatedResourceAPI := api.NewAuthenticatedWrapperResourceAllocatorServer(baseResourceAPI, authorize)
	authenticatedDispatcherAPI := api.NewAuthenticatedWrapperDispatcherServer(m.Dispatcher, authorize)
	authenticatedCAAPI := api.NewAuthenticatedWrapperCAServer(m.caserver, authorize)
	authenticatedNodeCAAPI := api.NewAuthenticatedWrapperNodeCAServer(m.caserver, authorize)
	authenticatedRaftAPI := api.NewAuthenticatedWrapperRaftServer(m.RaftNode, authorize)
	authenticatedHealthAPI := api.NewAuthenticatedWrapperHealthServer(healthServer, authorize)
	authenticatedRaftMembershipAPI := api.NewAuthenticatedWrapperRaftMembershipServer(m.RaftNode, authorize)

	proxyDispatcherAPI := api.NewRaftProxyDispatcherServer(authenticatedDispatcherAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo)
	proxyCAAPI := api.NewRaftProxyCAServer(authenticatedCAAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo)
	proxyNodeCAAPI := api.NewRaftProxyNodeCAServer(authenticatedNodeCAAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo)
	proxyRaftMembershipAPI := api.NewRaftProxyRaftMembershipServer(authenticatedRaftMembershipAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo)
	proxyResourceAPI := api.NewRaftProxyResourceAllocatorServer(authenticatedResourceAPI, cs, m.RaftNode, ca.WithMetadataForwardTLSInfo)

	// localProxyControlAPI is a special kind of proxy. It is only wired up
	// to receive requests from a trusted local socket, and these requests
	// don't use TLS, therefore the requests it handles locally should
	// bypass authorization. When it proxies, it sends them as requests from
	// this manager rather than forwarded requests (it has no TLS
	// information to put in the metadata map).
	forwardAsOwnRequest := func(ctx context.Context) (context.Context, error) { return ctx, nil }
	localProxyControlAPI := api.NewRaftProxyControlServer(baseControlAPI, controlAPIConnSelector, m.RaftNode, forwardAsOwnRequest)

	// Everything registered on m.server should be an authenticated
	// wrapper, or a proxy wrapping an authenticated wrapper!
	api.RegisterCAServer(m.server, proxyCAAPI)
	api.RegisterNodeCAServer(m.server, proxyNodeCAAPI)
	api.RegisterRaftServer(m.server, authenticatedRaftAPI)
	api.RegisterHealthServer(m.server, authenticatedHealthAPI)
	api.RegisterRaftMembershipServer(m.server, proxyRaftMembershipAPI)
	api.RegisterControlServer(m.server, authenticatedControlAPI)
	api.RegisterResourceAllocatorServer(m.server, proxyResourceAPI)
	api.RegisterDispatcherServer(m.server, proxyDispatcherAPI)

	api.RegisterControlServer(m.localserver, localProxyControlAPI)
	api.RegisterHealthServer(m.localserver, localHealthServer)

	errServe := make(chan error, 2)
	for proto, l := range m.listeners {
		go m.serveListener(ctx, errServe, proto, l)
	}

	// Set the raft server as serving for the health server
	healthServer.SetServingStatus("Raft", api.HealthCheckResponse_SERVING)
	localHealthServer.SetServingStatus("ControlAPI", api.HealthCheckResponse_SERVING)

	defer func() {
		m.server.Stop()
		m.localserver.Stop()
	}()

	if err := m.RaftNode.JoinAndStart(); err != nil {
		return fmt.Errorf("can't initialize raft node: %v", err)
	}

	close(m.started)

	go func() {
		err := m.RaftNode.Run(ctx)
		if err != nil {
			log.G(ctx).Error(err)
			m.Stop(ctx)
		}
	}()

	if err := raft.WaitForLeader(ctx, m.RaftNode); err != nil {
		return err
	}

	c, err := raft.WaitForCluster(ctx, m.RaftNode)
	if err != nil {
		return err
	}
	raftConfig := c.Spec.Raft

	if int(raftConfig.ElectionTick) != m.RaftNode.Config.ElectionTick {
		log.G(ctx).Warningf("election tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.RaftNode.Config.ElectionTick, raftConfig.ElectionTick)
	}
	if int(raftConfig.HeartbeatTick) != m.RaftNode.Config.HeartbeatTick {
		log.G(ctx).Warningf("heartbeat tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.RaftNode.Config.HeartbeatTick, raftConfig.HeartbeatTick)
	}

	// wait for an error in serving.
	err = <-errServe
	select {
	// check to see if stopped was posted to. if so, we're in the process of
	// stopping, or done and that's why we got the error. if stopping is
	// deliberate, stopped will ALWAYS be closed before the error is trigger,
	// so this path will ALWAYS be taken if the stop was deliberate
	case <-m.stopped:
		// shutdown was requested, do not return an error
		// but first, we wait to acquire a mutex to guarantee that stopping is
		// finished. as long as we acquire the mutex BEFORE we return, we know
		// that stopping is stopped.
		m.mu.Lock()
		m.mu.Unlock()
		return nil
	// otherwise, we'll get something from errServe, which indicates that an
	// error in serving has actually occurred and this isn't a planned shutdown
	default:
		return err
	}
}
Beispiel #14
0
// GetRemoteCA returns the remote endpoint's CA certificate
func GetRemoteCA(ctx context.Context, d digest.Digest, r remotes.Remotes) (RootCA, error) {
	// This TLS Config is intentionally using InsecureSkipVerify. Either we're
	// doing TOFU, in which case we don't validate the remote CA, or we're using
	// a user supplied hash to check the integrity of the CA certificate.
	insecureCreds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true})
	opts := []grpc.DialOption{
		grpc.WithTransportCredentials(insecureCreds),
		grpc.WithTimeout(5 * time.Second),
		grpc.WithBackoffMaxDelay(5 * time.Second),
	}

	peer, err := r.Select()
	if err != nil {
		return RootCA{}, err
	}

	conn, err := grpc.Dial(peer.Addr, opts...)
	if err != nil {
		return RootCA{}, err
	}
	defer conn.Close()

	client := api.NewCAClient(conn)
	ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
	defer cancel()
	defer func() {
		if err != nil {
			r.Observe(peer, -remotes.DefaultObservationWeight)
			return
		}
		r.Observe(peer, remotes.DefaultObservationWeight)
	}()
	response, err := client.GetRootCACertificate(ctx, &api.GetRootCACertificateRequest{})
	if err != nil {
		return RootCA{}, err
	}

	if d != "" {
		verifier, err := digest.NewDigestVerifier(d)
		if err != nil {
			return RootCA{}, errors.Wrap(err, "unexpected error getting digest verifier")
		}

		io.Copy(verifier, bytes.NewReader(response.Certificate))

		if !verifier.Verified() {
			return RootCA{}, errors.Errorf("remote CA does not match fingerprint. Expected: %s", d.Hex())

		}
	}

	// Check the validity of the remote Cert
	_, err = helpers.ParseCertificatePEM(response.Certificate)
	if err != nil {
		return RootCA{}, err
	}

	// Create a Pool with our RootCACertificate
	pool := x509.NewCertPool()
	if !pool.AppendCertsFromPEM(response.Certificate) {
		return RootCA{}, errors.New("failed to append certificate to cert pool")
	}

	return RootCA{Cert: response.Certificate, Digest: digest.FromBytes(response.Certificate), Pool: pool}, nil
}