// Run starts the keymanager, it doesn't return func (k *KeyManager) Run(ctx context.Context) error { k.mu.Lock() log := log.G(ctx).WithField("module", "keymanager") var ( clusters []*api.Cluster err error ) k.store.View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName(k.config.ClusterName)) }) if err != nil { log.Errorf("reading cluster config failed, %v", err) k.mu.Unlock() return err } cluster := clusters[0] if len(cluster.NetworkBootstrapKeys) == 0 { for _, subsys := range k.config.Subsystems { for i := 0; i < keyringSize; i++ { k.keyRing.keys = append(k.keyRing.keys, k.allocateKey(ctx, subsys)) } } if err := k.updateKey(cluster); err != nil { log.Errorf("store update failed %v", err) } } else { k.keyRing.lClock = cluster.EncryptionKeyLamportClock k.keyRing.keys = cluster.NetworkBootstrapKeys k.rotateKey(ctx) } ticker := time.NewTicker(k.config.RotationInterval) defer ticker.Stop() k.ctx, k.cancel = context.WithCancel(ctx) k.mu.Unlock() for { select { case <-ticker.C: k.rotateKey(ctx) case <-k.ctx.Done(): return nil } } }
// RenewTLSConfig will continuously monitor for the necessity of renewing the local certificates, either by // issuing them locally if key-material is available, or requesting them from a remote CA. func RenewTLSConfig(ctx context.Context, s *SecurityConfig, remotes remotes.Remotes, renew <-chan struct{}) <-chan CertificateUpdate { updates := make(chan CertificateUpdate) go func() { var retry time.Duration defer close(updates) for { ctx = log.WithModule(ctx, "tls") log := log.G(ctx).WithFields(logrus.Fields{ "node.id": s.ClientTLSCreds.NodeID(), "node.role": s.ClientTLSCreds.Role(), }) // Our starting default will be 5 minutes retry = 5 * time.Minute // Since the expiration of the certificate is managed remotely we should update our // retry timer on every iteration of this loop. // Retrieve the current certificate expiration information. validFrom, validUntil, err := readCertValidity(s.KeyReader()) if err != nil { // We failed to read the expiration, let's stick with the starting default log.Errorf("failed to read the expiration of the TLS certificate in: %s", s.KeyReader().Target()) updates <- CertificateUpdate{Err: errors.New("failed to read certificate expiration")} } else { // If we have an expired certificate, we let's stick with the starting default in // the hope that this is a temporary clock skew. if validUntil.Before(time.Now()) { log.WithError(err).Errorf("failed to create a new client TLS config") updates <- CertificateUpdate{Err: errors.New("TLS certificate is expired")} } else { // Random retry time between 50% and 80% of the total time to expiration retry = calculateRandomExpiry(validFrom, validUntil) } } log.WithFields(logrus.Fields{ "time": time.Now().Add(retry), }).Debugf("next certificate renewal scheduled") select { case <-time.After(retry): log.Infof("renewing certificate") case <-renew: log.Infof("forced certificate renewal") case <-ctx.Done(): log.Infof("shuting down certificate renewal routine") return } // ignore errors - it will just try again laster if err := RenewTLSConfigNow(ctx, s, remotes); err != nil { updates <- CertificateUpdate{Err: err} } else { updates <- CertificateUpdate{Role: s.ClientTLSCreds.Role()} } } }() return updates }
func (k *KeyManager) rotateKey(ctx context.Context) error { log := log.G(ctx).WithField("module", "keymanager") var ( clusters []*api.Cluster err error ) k.store.View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName(k.config.ClusterName)) }) if err != nil { log.Errorf("reading cluster config failed, %v", err) return err } cluster := clusters[0] if len(cluster.NetworkBootstrapKeys) == 0 { panic(fmt.Errorf("no key in the cluster config")) } subsysKeys := map[string][]*api.EncryptionKey{} for _, key := range k.keyRing.keys { subsysKeys[key.Subsystem] = append(subsysKeys[key.Subsystem], key) } k.keyRing.keys = []*api.EncryptionKey{} // We maintain the latest key and the one before in the key ring to allow // agents to communicate without disruption on key change. for subsys, keys := range subsysKeys { if len(keys) == keyringSize { min := 0 for i, key := range keys[1:] { if key.LamportTime < keys[min].LamportTime { min = i } } keys = append(keys[0:min], keys[min+1:]...) } keys = append(keys, k.allocateKey(ctx, subsys)) subsysKeys[subsys] = keys } for _, keys := range subsysKeys { k.keyRing.keys = append(k.keyRing.keys, keys...) } return k.updateKey(cluster) }
// RenewTLSConfig will continuously monitor for the necessity of renewing the local certificates, either by // issuing them locally if key-material is available, or requesting them from a remote CA. func RenewTLSConfig(ctx context.Context, s *SecurityConfig, baseCertDir string, remotes remotes.Remotes, renew <-chan struct{}) <-chan CertificateUpdate { paths := NewConfigPaths(baseCertDir) updates := make(chan CertificateUpdate) go func() { var retry time.Duration defer close(updates) for { ctx = log.WithModule(ctx, "tls") log := log.G(ctx).WithFields(logrus.Fields{ "node.id": s.ClientTLSCreds.NodeID(), "node.role": s.ClientTLSCreds.Role(), }) // Our starting default will be 5 minutes retry = 5 * time.Minute // Since the expiration of the certificate is managed remotely we should update our // retry timer on every iteration of this loop. // Retrieve the time until the certificate expires. expiresIn, err := readCertExpiration(paths.Node) if err != nil { // We failed to read the expiration, let's stick with the starting default log.Errorf("failed to read the expiration of the TLS certificate in: %s", paths.Node.Cert) updates <- CertificateUpdate{Err: fmt.Errorf("failed to read certificate expiration")} } else { // If we have an expired certificate, we let's stick with the starting default in // the hope that this is a temporary clock skew. if expiresIn.Minutes() < 0 { log.WithError(err).Errorf("failed to create a new client TLS config") updates <- CertificateUpdate{Err: fmt.Errorf("TLS certificate is expired")} } else { // Random retry time between 50% and 80% of the total time to expiration retry = calculateRandomExpiry(expiresIn) } } log.WithFields(logrus.Fields{ "time": time.Now().Add(retry), }).Debugf("next certificate renewal scheduled") select { case <-time.After(retry): log.Infof("renewing certificate") case <-renew: log.Infof("forced certificate renewal") case <-ctx.Done(): log.Infof("shuting down certificate renewal routine") return } // Let's request new certs. Renewals don't require a token. rootCA := s.RootCA() tlsKeyPair, err := rootCA.RequestAndSaveNewCertificates(ctx, paths.Node, "", remotes, s.ClientTLSCreds, nil) if err != nil { log.WithError(err).Errorf("failed to renew the certificate") updates <- CertificateUpdate{Err: err} continue } clientTLSConfig, err := NewClientTLSConfig(tlsKeyPair, rootCA.Pool, CARole) if err != nil { log.WithError(err).Errorf("failed to create a new client config") updates <- CertificateUpdate{Err: err} } serverTLSConfig, err := NewServerTLSConfig(tlsKeyPair, rootCA.Pool) if err != nil { log.WithError(err).Errorf("failed to create a new server config") updates <- CertificateUpdate{Err: err} } err = s.ClientTLSCreds.LoadNewTLSConfig(clientTLSConfig) if err != nil { log.WithError(err).Errorf("failed to update the client credentials") updates <- CertificateUpdate{Err: err} } // Update the external CA to use the new client TLS // config using a copy without a serverName specified. s.externalCA.UpdateTLSConfig(&tls.Config{ Certificates: clientTLSConfig.Certificates, RootCAs: clientTLSConfig.RootCAs, MinVersion: tls.VersionTLS12, }) err = s.ServerTLSCreds.LoadNewTLSConfig(serverTLSConfig) if err != nil { log.WithError(err).Errorf("failed to update the server TLS credentials") updates <- CertificateUpdate{Err: err} } updates <- CertificateUpdate{Role: s.ClientTLSCreds.Role()} } }() return updates }
// RenewTLSConfig will continuously monitor for the necessity of renewing the local certificates, either by // issuing them locally if key-material is available, or requesting them from a remote CA. func RenewTLSConfig(ctx context.Context, s *SecurityConfig, connBroker *connectionbroker.Broker, renew <-chan struct{}) <-chan CertificateUpdate { updates := make(chan CertificateUpdate) go func() { var retry time.Duration expBackoff := events.NewExponentialBackoff(RenewTLSExponentialBackoff) defer close(updates) for { ctx = log.WithModule(ctx, "tls") log := log.G(ctx).WithFields(logrus.Fields{ "node.id": s.ClientTLSCreds.NodeID(), "node.role": s.ClientTLSCreds.Role(), }) // Our starting default will be 5 minutes retry = 5 * time.Minute // Since the expiration of the certificate is managed remotely we should update our // retry timer on every iteration of this loop. // Retrieve the current certificate expiration information. validFrom, validUntil, err := readCertValidity(s.KeyReader()) if err != nil { // We failed to read the expiration, let's stick with the starting default log.Errorf("failed to read the expiration of the TLS certificate in: %s", s.KeyReader().Target()) select { case updates <- CertificateUpdate{Err: errors.New("failed to read certificate expiration")}: case <-ctx.Done(): log.Info("shutting down certificate renewal routine") return } } else { // If we have an expired certificate, try to renew immediately: the hope that this is a temporary clock skew, or // we can issue our own TLS certs. if validUntil.Before(time.Now()) { log.Warn("the current TLS certificate is expired, so an attempt to renew it will be made immediately") // retry immediately(ish) with exponential backoff retry = expBackoff.Proceed(nil) } else { // Random retry time between 50% and 80% of the total time to expiration retry = calculateRandomExpiry(validFrom, validUntil) } } log.WithFields(logrus.Fields{ "time": time.Now().Add(retry), }).Debugf("next certificate renewal scheduled for %v from now", retry) select { case <-time.After(retry): log.Info("renewing certificate") case <-renew: log.Info("forced certificate renewal") case <-ctx.Done(): log.Info("shutting down certificate renewal routine") return } // ignore errors - it will just try again later var certUpdate CertificateUpdate if err := RenewTLSConfigNow(ctx, s, connBroker); err != nil { certUpdate.Err = err expBackoff.Failure(nil, nil) } else { certUpdate.Role = s.ClientTLSCreds.Role() expBackoff = events.NewExponentialBackoff(RenewTLSExponentialBackoff) } select { case updates <- certUpdate: case <-ctx.Done(): log.Info("shutting down certificate renewal routine") return } } }() return updates }