func agentTestEnv(t *testing.T) (*Agent, func()) { var cleanup []func() tc := testutils.NewTestCA(t, testutils.AcceptancePolicy(true, true, "")) cleanup = append(cleanup, func() { tc.Stop() }) agentSecurityConfig, err := tc.NewNodeConfig(ca.AgentRole) assert.NoError(t, err) addr := "localhost:4949" remotes := picker.NewRemotes(api.Peer{Addr: addr}) conn, err := grpc.Dial(addr, grpc.WithPicker(picker.NewPicker(remotes, addr)), grpc.WithTransportCredentials(agentSecurityConfig.ClientTLSCreds)) assert.NoError(t, err) db, cleanupStorage := storageTestEnv(t) cleanup = append(cleanup, func() { cleanupStorage() }) agent, err := New(&Config{ Executor: &NoopExecutor{}, Managers: remotes, Conn: conn, DB: db, }) return agent, func() { for i := len(cleanup) - 1; i > 0; i-- { cleanup[i]() } } }
// GetRemoteCA returns the remote endpoint's CA certificate func GetRemoteCA(ctx context.Context, d digest.Digest, picker *picker.Picker) (RootCA, error) { // We need a valid picker to be able to Dial to a remote CA if picker == nil { return RootCA{}, fmt.Errorf("valid remote address picker required") } // This TLS Config is intentionally using InsecureSkipVerify. Either we're // doing TOFU, in which case we don't validate the remote CA, or we're using // a user supplied hash to check the integrity of the CA certificate. insecureCreds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true}) opts := []grpc.DialOption{ grpc.WithTransportCredentials(insecureCreds), grpc.WithBackoffMaxDelay(10 * time.Second), grpc.WithPicker(picker)} firstAddr, err := picker.PickAddr() if err != nil { return RootCA{}, err } conn, err := grpc.Dial(firstAddr, opts...) if err != nil { return RootCA{}, err } defer conn.Close() client := api.NewCAClient(conn) response, err := client.GetRootCACertificate(ctx, &api.GetRootCACertificateRequest{}) if err != nil { return RootCA{}, err } if d != "" { verifier, err := digest.NewDigestVerifier(d) if err != nil { return RootCA{}, fmt.Errorf("unexpected error getting digest verifier: %v", err) } io.Copy(verifier, bytes.NewReader(response.Certificate)) if !verifier.Verified() { return RootCA{}, fmt.Errorf("remote CA does not match fingerprint. Expected: %s", d.Hex()) } } // Check the validity of the remote Cert _, err = helpers.ParseCertificatePEM(response.Certificate) if err != nil { return RootCA{}, err } // Create a Pool with our RootCACertificate pool := x509.NewCertPool() if !pool.AppendCertsFromPEM(response.Certificate) { return RootCA{}, fmt.Errorf("failed to append certificate to cert pool") } return RootCA{Cert: response.Certificate, Pool: pool}, nil }
func (n *Node) runAgent(ctx context.Context, db *bolt.DB, creds credentials.TransportAuthenticator, ready chan<- struct{}) error { var manager api.Peer select { case <-ctx.Done(): case manager = <-n.remotes.WaitSelect(ctx): } if ctx.Err() != nil { return ctx.Err() } picker := picker.NewPicker(n.remotes, manager.Addr) conn, err := grpc.Dial(manager.Addr, grpc.WithPicker(picker), grpc.WithTransportCredentials(creds), grpc.WithBackoffMaxDelay(maxSessionFailureBackoff)) if err != nil { return err } agent, err := New(&Config{ Hostname: n.config.Hostname, Managers: n.remotes, Executor: n.config.Executor, DB: db, Conn: conn, Picker: picker, NotifyRoleChange: n.roleChangeReq, }) if err != nil { return err } if err := agent.Start(ctx); err != nil { return err } n.Lock() n.agent = agent n.Unlock() defer func() { n.Lock() n.agent = nil n.Unlock() }() go func() { <-agent.Ready() close(ready) }() // todo: manually call stop on context cancellation? return agent.Err(context.Background()) }
// Conn returns *grpc.ClientConn with picker which picks raft cluster leader. // Internal connection estabilished lazily on this call. // It can return error if cluster wasn't ready at the moment of initial call. func (c *ConnSelector) Conn() (*grpc.ClientConn, error) { c.mu.Lock() defer c.mu.Unlock() if c.cc != nil { return c.cc, nil } addr, err := c.cluster.LeaderAddr() if err != nil { return nil, err } picker := &picker{raft: c.cluster, addr: addr} opts := append(c.opts, grpc.WithPicker(picker)) cc, err := grpc.Dial(addr, opts...) if err != nil { return nil, err } c.cc = cc return c.cc, nil }
// Conn returns *grpc.ClientConn with picker which picks raft cluster leader. // Internal connection estabilished lazily on this call. // It can return error if cluster wasn't ready at the moment of initial call. func (c *ConnSelector) Conn() (*grpc.ClientConn, error) { c.mu.Lock() defer c.mu.Unlock() if c.cc != nil { return c.cc, nil } addr, err := c.cluster.LeaderAddr() if err != nil { return nil, err } c.picker = newPicker(c.cluster, addr) go c.picker.updateLoop() opts := append(c.opts, grpc.WithPicker(c.picker)) cc, err := grpc.Dial(addr, opts...) if err != nil { return nil, err } c.cc = cc return c.cc, nil }
func (mc *managersCluster) addAgents(count int) error { var addrs []api.Peer for _, m := range mc.ms { addrs = append(addrs, api.Peer{Addr: m.addr}) } for i := 0; i < count; i++ { asConfig, err := mc.tc.NewNodeConfig(ca.AgentRole) if err != nil { return err } managers := picker.NewRemotes(addrs...) peer, err := managers.Select() if err != nil { return err } conn, err := grpc.Dial(peer.Addr, grpc.WithPicker(picker.NewPicker(managers)), grpc.WithTransportCredentials(asConfig.ClientTLSCreds)) if err != nil { return err } id := strconv.Itoa(rand.Int()) a, err := agent.New(&agent.Config{ Hostname: "hostname_" + id, Managers: managers, Executor: &NoopExecutor{}, Conn: conn, }) if err != nil { return err } if err := a.Start(context.Background()); err != nil { return err } mc.agents = append(mc.agents, a) } return nil }
func TestAgentStartStop(t *testing.T) { tc := testutils.NewTestCA(t, testutils.AcceptancePolicy(true, true, "")) defer tc.Stop() agentSecurityConfig, err := tc.NewNodeConfig(ca.AgentRole) assert.NoError(t, err) addr := "localhost:4949" remotes := picker.NewRemotes(api.Peer{Addr: addr}) conn, err := grpc.Dial(addr, grpc.WithPicker(picker.NewPicker(remotes, addr)), grpc.WithTransportCredentials(agentSecurityConfig.ClientTLSCreds)) assert.NoError(t, err) db, cleanup := storageTestEnv(t) defer cleanup() agent, err := New(&Config{ Executor: &NoopExecutor{}, Managers: remotes, Conn: conn, DB: db, }) assert.NoError(t, err) assert.NotNil(t, agent) ctx, _ := context.WithTimeout(context.Background(), 5000*time.Millisecond) assert.Equal(t, errAgentNotStarted, agent.Stop(ctx)) assert.NoError(t, agent.Start(ctx)) if err := agent.Start(ctx); err != errAgentStarted { t.Fatalf("expected agent started error: %v", err) } assert.NoError(t, agent.Stop(ctx)) }
// GetRemoteSignedCertificate submits a CSR together with the intended role to a remote CA server address // available through a picker, and that is part of a CA identified by a specific certificate pool. func GetRemoteSignedCertificate(ctx context.Context, csr []byte, role, secret string, rootCAPool *x509.CertPool, picker *picker.Picker, creds credentials.TransportAuthenticator, nodeInfo chan<- api.IssueNodeCertificateResponse) ([]byte, error) { if rootCAPool == nil { return nil, fmt.Errorf("valid root CA pool required") } if picker == nil { return nil, fmt.Errorf("valid remote address picker required") } if creds == nil { // This is our only non-MTLS request, and it happens when we are boostraping our TLS certs // We're using CARole as server name, so an external CA doesn't also have to have ManagerRole in the cert SANs creds = credentials.NewTLS(&tls.Config{ServerName: CARole, RootCAs: rootCAPool}) } opts := []grpc.DialOption{ grpc.WithTransportCredentials(creds), grpc.WithBackoffMaxDelay(10 * time.Second), grpc.WithPicker(picker)} firstAddr, err := picker.PickAddr() if err != nil { return nil, err } conn, err := grpc.Dial(firstAddr, opts...) if err != nil { return nil, err } defer conn.Close() // Create a CAClient to retrieve a new Certificate caClient := api.NewNodeCAClient(conn) // Convert our internal string roles into an API role apiRole, err := FormatRole(role) if err != nil { return nil, err } // Send the Request and retrieve the request token issueRequest := &api.IssueNodeCertificateRequest{CSR: csr, Role: apiRole, Secret: secret} issueResponse, err := caClient.IssueNodeCertificate(ctx, issueRequest) if err != nil { return nil, err } // Send back the NodeID on the nodeInfo, so the caller can know what ID was assigned by the CA if nodeInfo != nil { nodeInfo <- *issueResponse } statusRequest := &api.NodeCertificateStatusRequest{NodeID: issueResponse.NodeID} expBackoff := events.NewExponentialBackoff(events.ExponentialBackoffConfig{ Base: time.Second, Factor: time.Second, Max: 30 * time.Second, }) log.Infof("Waiting for TLS certificate to be issued...") // Exponential backoff with Max of 30 seconds to wait for a new retry for { // Send the Request and retrieve the certificate statusResponse, err := caClient.NodeCertificateStatus(ctx, statusRequest) if err != nil { return nil, err } // If the certificate was issued, return if statusResponse.Status.State == api.IssuanceStateIssued { if statusResponse.Certificate == nil { return nil, fmt.Errorf("no certificate in CertificateStatus response") } // The certificate in the response must match the CSR // we submitted. If we are getting a response for a // certificate that was previously issued, we need to // retry until the certificate gets updated per our // current request. if bytes.Equal(statusResponse.Certificate.CSR, csr) { return statusResponse.Certificate.Certificate, nil } } // If we're still pending, the issuance failed, or the state is unknown // let's continue trying. expBackoff.Failure(nil, nil) time.Sleep(expBackoff.Proceed(nil)) } }