// WaitForCluster waits until node observes that the cluster wide config is // committed to raft. This ensures that we can see and serve informations // related to the cluster. func WaitForCluster(ctx context.Context, n *Node) (cluster *api.Cluster, err error) { watch, cancel := state.Watch(n.MemoryStore().WatchQueue(), state.EventCreateCluster{}) defer cancel() var clusters []*api.Cluster n.MemoryStore().View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName(store.DefaultClusterName)) }) if err != nil { return nil, err } if len(clusters) == 1 { cluster = clusters[0] } else { select { case e := <-watch: cluster = e.(state.EventCreateCluster).Cluster case <-ctx.Done(): return nil, ctx.Err() } } return cluster, nil }
// Verify the key generation and rotation for default subsystems func TestKeyManagerDefaultSubsystem(t *testing.T) { st := store.NewMemoryStore(nil) defer st.Close() createCluster(t, st, "default", "default") k := New(st, DefaultConfig()) ctx := context.Background() go k.Run(ctx) time.Sleep(250 * time.Millisecond) // verify the number of keys allocated matches the keyring size. var ( clusters []*api.Cluster err error ) k.store.View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName(k.config.ClusterName)) }) assert.NoError(t, err) assert.Equal(t, len(clusters[0].NetworkBootstrapKeys), len(k.config.Subsystems)*keyringSize) key1 := clusters[0].NetworkBootstrapKeys[0].Key k.rotateKey(ctx) // verify that after a rotation oldest key has been removed from the keyring assert.Equal(t, len(k.keyRing.keys), len(k.config.Subsystems)*keyringSize) for _, key := range k.keyRing.keys { match := bytes.Equal(key.Key, key1) assert.False(t, match) } }
// AttachNetwork allows the node to request the resources // allocation needed for a network attachment on the specific node. // - Returns `InvalidArgument` if the Spec is malformed. // - Returns `NotFound` if the Network is not found. // - Returns `PermissionDenied` if the Network is not manually attachable. // - Returns an error if the creation fails. func (ra *ResourceAllocator) AttachNetwork(ctx context.Context, request *api.AttachNetworkRequest) (*api.AttachNetworkResponse, error) { nodeInfo, err := ca.RemoteNode(ctx) if err != nil { return nil, err } var network *api.Network ra.store.View(func(tx store.ReadTx) { network = store.GetNetwork(tx, request.Config.Target) if network == nil { if networks, err := store.FindNetworks(tx, store.ByName(request.Config.Target)); err == nil && len(networks) == 1 { network = networks[0] } } }) if network == nil { return nil, grpc.Errorf(codes.NotFound, "network %s not found", request.Config.Target) } if !network.Spec.Attachable { return nil, grpc.Errorf(codes.PermissionDenied, "network %s not manually attachable", request.Config.Target) } t := &api.Task{ ID: identity.NewID(), NodeID: nodeInfo.NodeID, Spec: api.TaskSpec{ Runtime: &api.TaskSpec_Attachment{ Attachment: &api.NetworkAttachmentSpec{ ContainerID: request.ContainerID, }, }, Networks: []*api.NetworkAttachmentConfig{ { Target: network.ID, Addresses: request.Config.Addresses, }, }, }, Status: api.TaskStatus{ State: api.TaskStateNew, Timestamp: ptypes.MustTimestampProto(time.Now()), Message: "created", }, DesiredState: api.TaskStateRunning, // TODO: Add Network attachment. } if err := ra.store.Update(func(tx store.Tx) error { return store.CreateTask(tx, t) }); err != nil { return nil, err } return &api.AttachNetworkResponse{AttachmentID: t.ID}, nil }
func (n *Node) getCurrentRaftConfig() api.RaftConfig { raftConfig := DefaultRaftConfig() n.memoryStore.View(func(readTx store.ReadTx) { clusters, err := store.FindClusters(readTx, store.ByName(store.DefaultClusterName)) if err == nil && len(clusters) == 1 { raftConfig = clusters[0].Spec.Raft } }) return raftConfig }
// ListSecrets returns a `ListSecretResponse` with a list all non-internal `Secret`s being // managed, or all secrets matching any name in `ListSecretsRequest.Names`, any // name prefix in `ListSecretsRequest.NamePrefixes`, any id in // `ListSecretsRequest.SecretIDs`, or any id prefix in `ListSecretsRequest.IDPrefixes`. // - Returns an error if listing fails. func (s *Server) ListSecrets(ctx context.Context, request *api.ListSecretsRequest) (*api.ListSecretsResponse, error) { var ( secrets []*api.Secret respSecrets []*api.Secret err error byFilters []store.By by store.By labels map[string]string ) // return all secrets that match either any of the names or any of the name prefixes (why would you give both?) if request.Filters != nil { for _, name := range request.Filters.Names { byFilters = append(byFilters, store.ByName(name)) } for _, prefix := range request.Filters.NamePrefixes { byFilters = append(byFilters, store.ByNamePrefix(prefix)) } for _, prefix := range request.Filters.IDPrefixes { byFilters = append(byFilters, store.ByIDPrefix(prefix)) } labels = request.Filters.Labels } switch len(byFilters) { case 0: by = store.All case 1: by = byFilters[0] default: by = store.Or(byFilters...) } s.store.View(func(tx store.ReadTx) { secrets, err = store.FindSecrets(tx, by) }) if err != nil { return nil, err } // strip secret data from the secret, filter by label, and filter out all internal secrets for _, secret := range secrets { if secret.Internal || !filterMatchLabels(secret.Spec.Annotations.Labels, labels) { continue } secret.Spec.Data = nil // clean the actual secret data so it's never returned respSecrets = append(respSecrets, secret) } return &api.ListSecretsResponse{Secrets: respSecrets}, nil }
func (r *ReplicatedOrchestrator) initCluster(readTx store.ReadTx) error { clusters, err := store.FindClusters(readTx, store.ByName("default")) if err != nil { return err } if len(clusters) != 1 { // we'll just pick it when it is created. return nil } r.cluster = clusters[0] return nil }
// Run starts the keymanager, it doesn't return func (k *KeyManager) Run(ctx context.Context) error { k.mu.Lock() log := log.G(ctx).WithField("module", "keymanager") var ( clusters []*api.Cluster err error ) k.store.View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName(k.config.ClusterName)) }) if err != nil { log.Errorf("reading cluster config failed, %v", err) k.mu.Unlock() return err } cluster := clusters[0] if len(cluster.NetworkBootstrapKeys) == 0 { for _, subsys := range k.config.Subsystems { for i := 0; i < keyringSize; i++ { k.keyRing.keys = append(k.keyRing.keys, k.allocateKey(ctx, subsys)) } } if err := k.updateKey(cluster); err != nil { log.Errorf("store update failed %v", err) } } else { k.keyRing.lClock = cluster.EncryptionKeyLamportClock k.keyRing.keys = cluster.NetworkBootstrapKeys k.rotateKey(ctx) } ticker := time.NewTicker(k.config.RotationInterval) defer ticker.Stop() k.ctx, k.cancel = context.WithCancel(ctx) k.mu.Unlock() for { select { case <-ticker.C: k.rotateKey(ctx) case <-k.ctx.Done(): return nil } } }
func (k *KeyManager) rotateKey(ctx context.Context) error { log := log.G(ctx).WithField("module", "keymanager") var ( clusters []*api.Cluster err error ) k.store.View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName(k.config.ClusterName)) }) if err != nil { log.Errorf("reading cluster config failed, %v", err) return err } cluster := clusters[0] if len(cluster.NetworkBootstrapKeys) == 0 { panic(fmt.Errorf("no key in the cluster config")) } subsysKeys := map[string][]*api.EncryptionKey{} for _, key := range k.keyRing.keys { subsysKeys[key.Subsystem] = append(subsysKeys[key.Subsystem], key) } k.keyRing.keys = []*api.EncryptionKey{} // We maintain the latest key and the one before in the key ring to allow // agents to communicate without disruption on key change. for subsys, keys := range subsysKeys { if len(keys) == keyringSize { min := 0 for i, key := range keys[1:] { if key.LamportTime < keys[min].LamportTime { min = i } } keys = append(keys[0:min], keys[min+1:]...) } keys = append(keys, k.allocateKey(ctx, subsys)) subsysKeys[subsys] = keys } for _, keys := range subsysKeys { k.keyRing.keys = append(k.keyRing.keys, keys...) } return k.updateKey(cluster) }
func TestGetUnlockKey(t *testing.T) { t.Parallel() tc := testutils.NewTestCA(t) defer tc.Stop() var cluster *api.Cluster tc.MemoryStore.View(func(tx store.ReadTx) { clusters, err := store.FindClusters(tx, store.ByName(store.DefaultClusterName)) require.NoError(t, err) cluster = clusters[0] }) resp, err := tc.CAClients[0].GetUnlockKey(context.Background(), &api.GetUnlockKeyRequest{}) require.NoError(t, err) require.Nil(t, resp.UnlockKey) require.Equal(t, cluster.Meta.Version, resp.Version) // Update the unlock key require.NoError(t, tc.MemoryStore.Update(func(tx store.Tx) error { cluster = store.GetCluster(tx, cluster.ID) cluster.Spec.EncryptionConfig.AutoLockManagers = true cluster.UnlockKeys = []*api.EncryptionKey{{ Subsystem: ca.ManagerRole, Key: []byte("secret"), }} return store.UpdateCluster(tx, cluster) })) tc.MemoryStore.View(func(tx store.ReadTx) { cluster = store.GetCluster(tx, cluster.ID) }) require.NoError(t, raftutils.PollFuncWithTimeout(nil, func() error { resp, err = tc.CAClients[0].GetUnlockKey(context.Background(), &api.GetUnlockKeyRequest{}) if err != nil { return fmt.Errorf("get unlock key: %v", err) } if !bytes.Equal(resp.UnlockKey, []byte("secret")) { return fmt.Errorf("secret hasn't rotated yet") } if cluster.Meta.Version.Index > resp.Version.Index { return fmt.Errorf("hasn't updated to the right version yet") } return nil }, 250*time.Millisecond)) }
// Verify the key generation and rotation for IPsec subsystem func TestKeyManagerCustomSubsystem(t *testing.T) { st := store.NewMemoryStore(nil) defer st.Close() createCluster(t, st, "default", "default") config := &Config{ ClusterName: store.DefaultClusterName, Keylen: DefaultKeyLen, RotationInterval: DefaultKeyRotationInterval, Subsystems: []string{SubsystemIPSec}, } k := New(st, config) ctx := context.Background() go k.Run(ctx) time.Sleep(250 * time.Millisecond) // verify the number of keys allocated matches the keyring size. var ( clusters []*api.Cluster err error ) k.store.View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName(k.config.ClusterName)) }) assert.NoError(t, err) assert.Equal(t, len(clusters[0].NetworkBootstrapKeys), keyringSize) key1 := clusters[0].NetworkBootstrapKeys[0].Key k.rotateKey(ctx) // verify that after a rotation oldest key has been removed from the keyring // also verify that all keys are for the right subsystem assert.Equal(t, len(k.keyRing.keys), keyringSize) for _, key := range k.keyRing.keys { match := bytes.Equal(key.Key, key1) assert.False(t, match) match = key.Subsystem == SubsystemIPSec assert.True(t, match) } }
// Verify the key generation and rotation for IPsec subsystem func TestKeyManagerCustomSubsystem(t *testing.T) { st := store.NewMemoryStore(nil) createCluster(t, st, "default", "default") config := &Config{ ClusterName: store.DefaultClusterName, Keylen: DefaultKeyLen, RotationInterval: DefaultKeyRotationInterval, Subsystems: []string{SubsystemIPSec}, } k := New(st, config) ctx := context.Background() go k.Run(ctx) time.Sleep(250 * time.Millisecond) // verify the first key has been allocated and updated in the // store var ( clusters []*api.Cluster err error ) k.store.View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName(k.config.ClusterName)) }) assert.NoError(t, err) assert.Equal(t, len(clusters[0].NetworkBootstrapKeys), 1) key1 := clusters[0].NetworkBootstrapKeys[0].Key k.rotateKey(ctx) k.rotateKey(ctx) // verify that after two rotations keyring has two keys and the very // first key allocated has been removed assert.Equal(t, len(k.keyRing.keys), 2) for _, key := range k.keyRing.keys { match := bytes.Equal(key.Key, key1) assert.False(t, match) } }
// Run is the TaskReaper's main loop. func (tr *TaskReaper) Run() { defer close(tr.doneChan) tr.store.View(func(readTx store.ReadTx) { clusters, err := store.FindClusters(readTx, store.ByName(store.DefaultClusterName)) if err == nil && len(clusters) == 1 { tr.taskHistory = clusters[0].Spec.Orchestration.TaskHistoryRetentionLimit } }) timer := time.NewTimer(reaperBatchingInterval) for { select { case event := <-tr.watcher: switch v := event.(type) { case state.EventCreateTask: t := v.Task tr.dirty[instanceTuple{ instance: t.Slot, serviceID: t.ServiceID, nodeID: t.NodeID, }] = struct{}{} if len(tr.dirty) > maxDirty { timer.Stop() tr.tick() } else { timer.Reset(reaperBatchingInterval) } case state.EventUpdateCluster: tr.taskHistory = v.Cluster.Spec.Orchestration.TaskHistoryRetentionLimit } case <-timer.C: timer.Stop() tr.tick() case <-tr.stopChan: timer.Stop() return } } }
func (a *Allocator) doNetworkInit(ctx context.Context) (err error) { na, err := networkallocator.New() if err != nil { return err } nc := &networkContext{ nwkAllocator: na, unallocatedTasks: make(map[string]*api.Task), unallocatedServices: make(map[string]*api.Service), unallocatedNetworks: make(map[string]*api.Network), ingressNetwork: newIngressNetwork(), } a.netCtx = nc defer func() { // Clear a.netCtx if initialization was unsuccessful. if err != nil { a.netCtx = nil } }() // Check if we have the ingress network. If not found create // it before reading all network objects for allocation. var networks []*api.Network a.store.View(func(tx store.ReadTx) { networks, err = store.FindNetworks(tx, store.ByName(ingressNetworkName)) if len(networks) > 0 { nc.ingressNetwork = networks[0] } }) if err != nil { return errors.Wrap(err, "failed to find ingress network during init") } // If ingress network is not found, create one right away // using the predefined template. if len(networks) == 0 { if err := a.store.Update(func(tx store.Tx) error { nc.ingressNetwork.ID = identity.NewID() if err := store.CreateNetwork(tx, nc.ingressNetwork); err != nil { return err } return nil }); err != nil { return errors.Wrap(err, "failed to create ingress network") } a.store.View(func(tx store.ReadTx) { networks, err = store.FindNetworks(tx, store.ByName(ingressNetworkName)) if len(networks) > 0 { nc.ingressNetwork = networks[0] } }) if err != nil { return errors.Wrap(err, "failed to find ingress network after creating it") } } // Try to complete ingress network allocation before anything else so // that the we can get the preferred subnet for ingress // network. if !na.IsAllocated(nc.ingressNetwork) { if err := a.allocateNetwork(ctx, nc.ingressNetwork); err != nil { log.G(ctx).WithError(err).Error("failed allocating ingress network during init") } else if _, err := a.store.Batch(func(batch *store.Batch) error { if err := a.commitAllocatedNetwork(ctx, batch, nc.ingressNetwork); err != nil { log.G(ctx).WithError(err).Error("failed committing allocation of ingress network during init") } return nil }); err != nil { log.G(ctx).WithError(err).Error("failed committing allocation of ingress network during init") } } // Allocate networks in the store so far before we started // watching. a.store.View(func(tx store.ReadTx) { networks, err = store.FindNetworks(tx, store.All) }) if err != nil { return errors.Wrap(err, "error listing all networks in store while trying to allocate during init") } var allocatedNetworks []*api.Network for _, n := range networks { if na.IsAllocated(n) { continue } if err := a.allocateNetwork(ctx, n); err != nil { log.G(ctx).WithError(err).Errorf("failed allocating network %s during init", n.ID) continue } allocatedNetworks = append(allocatedNetworks, n) } if _, err := a.store.Batch(func(batch *store.Batch) error { for _, n := range allocatedNetworks { if err := a.commitAllocatedNetwork(ctx, batch, n); err != nil { log.G(ctx).WithError(err).Errorf("failed committing allocation of network %s during init", n.ID) } } return nil }); err != nil { log.G(ctx).WithError(err).Error("failed committing allocation of networks during init") } // Allocate nodes in the store so far before we process watched events. var nodes []*api.Node a.store.View(func(tx store.ReadTx) { nodes, err = store.FindNodes(tx, store.All) }) if err != nil { return errors.Wrap(err, "error listing all nodes in store while trying to allocate during init") } var allocatedNodes []*api.Node for _, node := range nodes { if na.IsNodeAllocated(node) { continue } if node.Attachment == nil { node.Attachment = &api.NetworkAttachment{} } node.Attachment.Network = nc.ingressNetwork.Copy() if err := a.allocateNode(ctx, node); err != nil { log.G(ctx).WithError(err).Errorf("Failed to allocate network resources for node %s during init", node.ID) continue } allocatedNodes = append(allocatedNodes, node) } if _, err := a.store.Batch(func(batch *store.Batch) error { for _, node := range allocatedNodes { if err := a.commitAllocatedNode(ctx, batch, node); err != nil { log.G(ctx).WithError(err).Errorf("Failed to commit allocation of network resources for node %s during init", node.ID) } } return nil }); err != nil { log.G(ctx).WithError(err).Error("Failed to commit allocation of network resources for nodes during init") } // Allocate services in the store so far before we process watched events. var services []*api.Service a.store.View(func(tx store.ReadTx) { services, err = store.FindServices(tx, store.All) }) if err != nil { return errors.Wrap(err, "error listing all services in store while trying to allocate during init") } var allocatedServices []*api.Service for _, s := range services { if nc.nwkAllocator.IsServiceAllocated(s) { continue } if err := a.allocateService(ctx, s); err != nil { log.G(ctx).WithError(err).Errorf("failed allocating service %s during init", s.ID) continue } allocatedServices = append(allocatedServices, s) } if _, err := a.store.Batch(func(batch *store.Batch) error { for _, s := range allocatedServices { if err := a.commitAllocatedService(ctx, batch, s); err != nil { log.G(ctx).WithError(err).Errorf("failed committing allocation of service %s during init", s.ID) } } return nil }); err != nil { log.G(ctx).WithError(err).Error("failed committing allocation of services during init") } // Allocate tasks in the store so far before we started watching. var ( tasks []*api.Task allocatedTasks []*api.Task ) a.store.View(func(tx store.ReadTx) { tasks, err = store.FindTasks(tx, store.All) }) if err != nil { return errors.Wrap(err, "error listing all tasks in store while trying to allocate during init") } for _, t := range tasks { if taskDead(t) { continue } var s *api.Service if t.ServiceID != "" { a.store.View(func(tx store.ReadTx) { s = store.GetService(tx, t.ServiceID) }) } // Populate network attachments in the task // based on service spec. a.taskCreateNetworkAttachments(t, s) if taskReadyForNetworkVote(t, s, nc) { if t.Status.State >= api.TaskStatePending { continue } if a.taskAllocateVote(networkVoter, t.ID) { // If the task is not attached to any network, network // allocators job is done. Immediately cast a vote so // that the task can be moved to ALLOCATED state as // soon as possible. allocatedTasks = append(allocatedTasks, t) } continue } err := a.allocateTask(ctx, t) if err == nil { allocatedTasks = append(allocatedTasks, t) } else if err != errNoChanges { log.G(ctx).WithError(err).Errorf("failed allocating task %s during init", t.ID) nc.unallocatedTasks[t.ID] = t } } if _, err := a.store.Batch(func(batch *store.Batch) error { for _, t := range allocatedTasks { if err := a.commitAllocatedTask(ctx, batch, t); err != nil { log.G(ctx).WithError(err).Errorf("failed committing allocation of task %s during init", t.ID) } } return nil }); err != nil { log.G(ctx).WithError(err).Error("failed committing allocation of tasks during init") } return nil }
// Run is the main loop for a Raft node, it goes along the state machine, // acting on the messages received from other Raft nodes in the cluster. // // Before running the main loop, it first starts the raft node based on saved // cluster state. If no saved state exists, it starts a single-node cluster. func (n *Node) Run(ctx context.Context) error { ctx = log.WithLogger(ctx, logrus.WithField("raft_id", fmt.Sprintf("%x", n.Config.ID))) ctx, cancel := context.WithCancel(ctx) // nodeRemoved indicates that node was stopped due its removal. nodeRemoved := false defer func() { cancel() n.stop(ctx) if nodeRemoved { // Move WAL and snapshot out of the way, since // they are no longer usable. if err := n.moveWALAndSnap(); err != nil { log.G(ctx).WithError(err).Error("failed to move wal after node removal") } } n.done() }() wasLeader := false for { select { case <-n.ticker.C(): n.raftNode.Tick() n.cluster.Tick() case rd := <-n.raftNode.Ready(): raftConfig := DefaultRaftConfig() n.memoryStore.View(func(readTx store.ReadTx) { clusters, err := store.FindClusters(readTx, store.ByName(store.DefaultClusterName)) if err == nil && len(clusters) == 1 { raftConfig = clusters[0].Spec.Raft } }) // Save entries to storage if err := n.saveToStorage(&raftConfig, rd.HardState, rd.Entries, rd.Snapshot); err != nil { log.G(ctx).WithError(err).Error("failed to save entries to storage") } if len(rd.Messages) != 0 { // Send raft messages to peers if err := n.send(ctx, rd.Messages); err != nil { log.G(ctx).WithError(err).Error("failed to send message to members") } } // Apply snapshot to memory store. The snapshot // was applied to the raft store in // saveToStorage. if !raft.IsEmptySnap(rd.Snapshot) { // Load the snapshot data into the store if err := n.restoreFromSnapshot(rd.Snapshot.Data, false); err != nil { log.G(ctx).WithError(err).Error("failed to restore from snapshot") } n.appliedIndex = rd.Snapshot.Metadata.Index n.snapshotIndex = rd.Snapshot.Metadata.Index n.confState = rd.Snapshot.Metadata.ConfState } // If we cease to be the leader, we must cancel any // proposals that are currently waiting for a quorum to // acknowledge them. It is still possible for these to // become committed, but if that happens we will apply // them as any follower would. // It is important that we cancel these proposals before // calling processCommitted, so processCommitted does // not deadlock. if rd.SoftState != nil { if wasLeader && rd.SoftState.RaftState != raft.StateLeader { wasLeader = false if atomic.LoadUint32(&n.signalledLeadership) == 1 { atomic.StoreUint32(&n.signalledLeadership, 0) n.leadershipBroadcast.Publish(IsFollower) } // It is important that we set n.signalledLeadership to 0 // before calling n.wait.cancelAll. When a new raft // request is registered, it checks n.signalledLeadership // afterwards, and cancels the registration if it is 0. // If cancelAll was called first, this call might run // before the new request registers, but // signalledLeadership would be set after the check. // Setting signalledLeadership before calling cancelAll // ensures that if a new request is registered during // this transition, it will either be cancelled by // cancelAll, or by its own check of signalledLeadership. n.wait.cancelAll() } else if !wasLeader && rd.SoftState.RaftState == raft.StateLeader { wasLeader = true } } // Process committed entries for _, entry := range rd.CommittedEntries { if err := n.processCommitted(ctx, entry); err != nil { log.G(ctx).WithError(err).Error("failed to process committed entries") } } // Trigger a snapshot every once in awhile if n.snapshotInProgress == nil && raftConfig.SnapshotInterval > 0 && n.appliedIndex-n.snapshotIndex >= raftConfig.SnapshotInterval { n.doSnapshot(ctx, raftConfig) } if wasLeader && atomic.LoadUint32(&n.signalledLeadership) != 1 { // If all the entries in the log have become // committed, broadcast our leadership status. if n.caughtUp() { atomic.StoreUint32(&n.signalledLeadership, 1) n.leadershipBroadcast.Publish(IsLeader) } } // Advance the state machine n.raftNode.Advance() // On the first startup, or if we are the only // registered member after restoring from the state, // campaign to be the leader. if n.campaignWhenAble { members := n.cluster.Members() if len(members) >= 1 { n.campaignWhenAble = false } if len(members) == 1 && members[n.Config.ID] != nil { if err := n.raftNode.Campaign(ctx); err != nil { panic("raft: cannot campaign to be the leader on node restore") } } } case snapshotIndex := <-n.snapshotInProgress: if snapshotIndex > n.snapshotIndex { n.snapshotIndex = snapshotIndex } n.snapshotInProgress = nil case <-n.removeRaftCh: nodeRemoved = true // If the node was removed from other members, // send back an error to the caller to start // the shutdown process. return ErrMemberRemoved case <-ctx.Done(): return nil } } }
// Run runs dispatcher tasks which should be run on leader dispatcher. // Dispatcher can be stopped with cancelling ctx or calling Stop(). func (d *Dispatcher) Run(ctx context.Context) error { d.mu.Lock() if d.isRunning() { d.mu.Unlock() return fmt.Errorf("dispatcher is already running") } logger := log.G(ctx).WithField("module", "dispatcher") ctx = log.WithLogger(ctx, logger) if err := d.markNodesUnknown(ctx); err != nil { logger.Errorf(`failed to move all nodes to "unknown" state: %v`, err) } configWatcher, cancel, err := store.ViewAndWatch( d.store, func(readTx store.ReadTx) error { clusters, err := store.FindClusters(readTx, store.ByName(store.DefaultClusterName)) if err != nil { return err } if err == nil && len(clusters) == 1 { heartbeatPeriod, err := ptypes.Duration(clusters[0].Spec.Dispatcher.HeartbeatPeriod) if err == nil && heartbeatPeriod > 0 { d.config.HeartbeatPeriod = heartbeatPeriod } if clusters[0].NetworkBootstrapKeys != nil { d.networkBootstrapKeys = clusters[0].NetworkBootstrapKeys } } return nil }, state.EventUpdateCluster{}, ) if err != nil { d.mu.Unlock() return err } defer cancel() d.ctx, d.cancel = context.WithCancel(ctx) d.mu.Unlock() publishManagers := func() { mgrs := getWeightedPeers(d.cluster) sort.Sort(weightedPeerByNodeID(mgrs)) d.mu.Lock() if reflect.DeepEqual(mgrs, d.lastSeenManagers) { d.mu.Unlock() return } d.lastSeenManagers = mgrs d.mu.Unlock() d.mgrQueue.Publish(mgrs) } publishManagers() publishTicker := time.NewTicker(1 * time.Second) defer publishTicker.Stop() batchTimer := time.NewTimer(maxBatchInterval) defer batchTimer.Stop() for { select { case <-publishTicker.C: publishManagers() case <-d.processTaskUpdatesTrigger: d.processTaskUpdates() batchTimer.Reset(maxBatchInterval) case <-batchTimer.C: d.processTaskUpdates() batchTimer.Reset(maxBatchInterval) case v := <-configWatcher: cluster := v.(state.EventUpdateCluster) d.mu.Lock() if cluster.Cluster.Spec.Dispatcher.HeartbeatPeriod != nil { // ignore error, since Spec has passed validation before heartbeatPeriod, _ := ptypes.Duration(cluster.Cluster.Spec.Dispatcher.HeartbeatPeriod) if heartbeatPeriod != d.config.HeartbeatPeriod { // only call d.nodes.updatePeriod when heartbeatPeriod changes d.config.HeartbeatPeriod = heartbeatPeriod d.nodes.updatePeriod(d.config.HeartbeatPeriod, d.config.HeartbeatEpsilon, d.config.GracePeriodMultiplier) } } d.networkBootstrapKeys = cluster.Cluster.NetworkBootstrapKeys d.mu.Unlock() d.keyMgrQueue.Publish(struct{}{}) case <-d.ctx.Done(): return nil } } }
// Run is the main loop for a Raft node, it goes along the state machine, // acting on the messages received from other Raft nodes in the cluster. // // Before running the main loop, it first starts the raft node based on saved // cluster state. If no saved state exists, it starts a single-node cluster. func (n *Node) Run(ctx context.Context) error { defer func() { close(n.doneCh) }() for { select { case <-n.ticker.C(): n.Tick() case rd := <-n.Ready(): raftConfig := DefaultRaftConfig() n.memoryStore.View(func(readTx store.ReadTx) { clusters, err := store.FindClusters(readTx, store.ByName(store.DefaultClusterName)) if err == nil && len(clusters) == 1 { raftConfig = clusters[0].Spec.Raft } }) // Save entries to storage if err := n.saveToStorage(&raftConfig, rd.HardState, rd.Entries, rd.Snapshot); err != nil { n.Config.Logger.Error(err) } // Send raft messages to peers if err := n.send(rd.Messages); err != nil { n.Config.Logger.Error(err) } // Apply snapshot to memory store. The snapshot // was applied to the raft store in // saveToStorage. if !raft.IsEmptySnap(rd.Snapshot) { // Load the snapshot data into the store if err := n.restoreFromSnapshot(rd.Snapshot.Data, n.forceNewCluster); err != nil { n.Config.Logger.Error(err) } n.appliedIndex = rd.Snapshot.Metadata.Index n.snapshotIndex = rd.Snapshot.Metadata.Index n.confState = rd.Snapshot.Metadata.ConfState } // Process committed entries for _, entry := range rd.CommittedEntries { if err := n.processCommitted(entry); err != nil { n.Config.Logger.Error(err) } } // Trigger a snapshot every once in awhile if n.snapshotInProgress == nil && raftConfig.SnapshotInterval > 0 && n.appliedIndex-n.snapshotIndex >= raftConfig.SnapshotInterval { n.doSnapshot(&raftConfig) } // If we cease to be the leader, we must cancel // any proposals that are currently waiting for // a quorum to acknowledge them. It is still // possible for these to become committed, but // if that happens we will apply them as any // follower would. if rd.SoftState != nil { if n.wasLeader && rd.SoftState.RaftState != raft.StateLeader { n.wasLeader = false n.wait.cancelAll() n.leadershipBroadcast.Write(IsFollower) } else if !n.wasLeader && rd.SoftState.RaftState == raft.StateLeader { n.wasLeader = true n.leadershipBroadcast.Write(IsLeader) } } // If we are the only registered member after // restoring from the state, campaign to be the // leader. if !n.restored { if len(n.cluster.Members()) <= 1 { if err := n.Campaign(n.Ctx); err != nil { panic("raft: cannot campaign to be the leader on node restore") } } n.restored = true } // Advance the state machine n.Advance() case snapshotIndex := <-n.snapshotInProgress: if snapshotIndex > n.snapshotIndex { n.snapshotIndex = snapshotIndex } n.snapshotInProgress = nil case <-n.removeRaftCh: // If the node was removed from other members, // send back an error to the caller to start // the shutdown process. n.stop() // Move WAL and snapshot out of the way, since // they are no longer usable. if err := n.moveWALAndSnap(); err != nil { n.Config.Logger.Error(err) } return ErrMemberRemoved case <-n.stopCh: n.stop() return nil } } }
// Run runs dispatcher tasks which should be run on leader dispatcher. // Dispatcher can be stopped with cancelling ctx or calling Stop(). func (d *Dispatcher) Run(ctx context.Context) error { d.mu.Lock() if d.isRunning() { d.mu.Unlock() return errors.New("dispatcher is already running") } ctx = log.WithModule(ctx, "dispatcher") if err := d.markNodesUnknown(ctx); err != nil { log.G(ctx).Errorf(`failed to move all nodes to "unknown" state: %v`, err) } configWatcher, cancel, err := store.ViewAndWatch( d.store, func(readTx store.ReadTx) error { clusters, err := store.FindClusters(readTx, store.ByName(store.DefaultClusterName)) if err != nil { return err } if err == nil && len(clusters) == 1 { heartbeatPeriod, err := gogotypes.DurationFromProto(clusters[0].Spec.Dispatcher.HeartbeatPeriod) if err == nil && heartbeatPeriod > 0 { d.config.HeartbeatPeriod = heartbeatPeriod } if clusters[0].NetworkBootstrapKeys != nil { d.networkBootstrapKeys = clusters[0].NetworkBootstrapKeys } } return nil }, state.EventUpdateCluster{}, ) if err != nil { d.mu.Unlock() return err } // set queues here to guarantee that Close will close them d.mgrQueue = watch.NewQueue() d.keyMgrQueue = watch.NewQueue() peerWatcher, peerCancel := d.cluster.SubscribePeers() defer peerCancel() d.lastSeenManagers = getWeightedPeers(d.cluster) defer cancel() d.ctx, d.cancel = context.WithCancel(ctx) ctx = d.ctx d.wg.Add(1) defer d.wg.Done() d.mu.Unlock() publishManagers := func(peers []*api.Peer) { var mgrs []*api.WeightedPeer for _, p := range peers { mgrs = append(mgrs, &api.WeightedPeer{ Peer: p, Weight: remotes.DefaultObservationWeight, }) } d.mu.Lock() d.lastSeenManagers = mgrs d.mu.Unlock() d.mgrQueue.Publish(mgrs) } batchTimer := time.NewTimer(maxBatchInterval) defer batchTimer.Stop() for { select { case ev := <-peerWatcher: publishManagers(ev.([]*api.Peer)) case <-d.processUpdatesTrigger: d.processUpdates(ctx) batchTimer.Reset(maxBatchInterval) case <-batchTimer.C: d.processUpdates(ctx) batchTimer.Reset(maxBatchInterval) case v := <-configWatcher: cluster := v.(state.EventUpdateCluster) d.mu.Lock() if cluster.Cluster.Spec.Dispatcher.HeartbeatPeriod != nil { // ignore error, since Spec has passed validation before heartbeatPeriod, _ := gogotypes.DurationFromProto(cluster.Cluster.Spec.Dispatcher.HeartbeatPeriod) if heartbeatPeriod != d.config.HeartbeatPeriod { // only call d.nodes.updatePeriod when heartbeatPeriod changes d.config.HeartbeatPeriod = heartbeatPeriod d.nodes.updatePeriod(d.config.HeartbeatPeriod, d.config.HeartbeatEpsilon, d.config.GracePeriodMultiplier) } } d.networkBootstrapKeys = cluster.Cluster.NetworkBootstrapKeys d.mu.Unlock() d.keyMgrQueue.Publish(cluster.Cluster.NetworkBootstrapKeys) case <-ctx.Done(): return nil } } }
// Run starts all manager sub-systems and the gRPC server at the configured // address. // The call never returns unless an error occurs or `Stop()` is called. func (m *Manager) Run(parent context.Context) error { ctx, ctxCancel := context.WithCancel(parent) defer ctxCancel() // Harakiri. go func() { select { case <-ctx.Done(): case <-m.stopped: ctxCancel() } }() leadershipCh, cancel := m.raftNode.SubscribeLeadership() defer cancel() go m.handleLeadershipEvents(ctx, leadershipCh) authorize := func(ctx context.Context, roles []string) error { var ( blacklistedCerts map[string]*api.BlacklistedCertificate clusters []*api.Cluster err error ) m.raftNode.MemoryStore().View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName("default")) }) // Not having a cluster object yet means we can't check // the blacklist. if err == nil && len(clusters) == 1 { blacklistedCerts = clusters[0].BlacklistedCertificates } // Authorize the remote roles, ensure they can only be forwarded by managers _, err = ca.AuthorizeForwardedRoleAndOrg(ctx, roles, []string{ca.ManagerRole}, m.config.SecurityConfig.ClientTLSCreds.Organization(), blacklistedCerts) return err } baseControlAPI := controlapi.NewServer(m.raftNode.MemoryStore(), m.raftNode, m.config.SecurityConfig.RootCA()) baseResourceAPI := resourceapi.New(m.raftNode.MemoryStore()) healthServer := health.NewHealthServer() localHealthServer := health.NewHealthServer() authenticatedControlAPI := api.NewAuthenticatedWrapperControlServer(baseControlAPI, authorize) authenticatedResourceAPI := api.NewAuthenticatedWrapperResourceAllocatorServer(baseResourceAPI, authorize) authenticatedDispatcherAPI := api.NewAuthenticatedWrapperDispatcherServer(m.dispatcher, authorize) authenticatedCAAPI := api.NewAuthenticatedWrapperCAServer(m.caserver, authorize) authenticatedNodeCAAPI := api.NewAuthenticatedWrapperNodeCAServer(m.caserver, authorize) authenticatedRaftAPI := api.NewAuthenticatedWrapperRaftServer(m.raftNode, authorize) authenticatedHealthAPI := api.NewAuthenticatedWrapperHealthServer(healthServer, authorize) authenticatedRaftMembershipAPI := api.NewAuthenticatedWrapperRaftMembershipServer(m.raftNode, authorize) proxyDispatcherAPI := api.NewRaftProxyDispatcherServer(authenticatedDispatcherAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) proxyCAAPI := api.NewRaftProxyCAServer(authenticatedCAAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) proxyNodeCAAPI := api.NewRaftProxyNodeCAServer(authenticatedNodeCAAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) proxyRaftMembershipAPI := api.NewRaftProxyRaftMembershipServer(authenticatedRaftMembershipAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) proxyResourceAPI := api.NewRaftProxyResourceAllocatorServer(authenticatedResourceAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) // localProxyControlAPI is a special kind of proxy. It is only wired up // to receive requests from a trusted local socket, and these requests // don't use TLS, therefore the requests it handles locally should // bypass authorization. When it proxies, it sends them as requests from // this manager rather than forwarded requests (it has no TLS // information to put in the metadata map). forwardAsOwnRequest := func(ctx context.Context) (context.Context, error) { return ctx, nil } localProxyControlAPI := api.NewRaftProxyControlServer(baseControlAPI, m.raftNode, forwardAsOwnRequest) // Everything registered on m.server should be an authenticated // wrapper, or a proxy wrapping an authenticated wrapper! api.RegisterCAServer(m.server, proxyCAAPI) api.RegisterNodeCAServer(m.server, proxyNodeCAAPI) api.RegisterRaftServer(m.server, authenticatedRaftAPI) api.RegisterHealthServer(m.server, authenticatedHealthAPI) api.RegisterRaftMembershipServer(m.server, proxyRaftMembershipAPI) api.RegisterControlServer(m.server, authenticatedControlAPI) api.RegisterResourceAllocatorServer(m.server, proxyResourceAPI) api.RegisterDispatcherServer(m.server, proxyDispatcherAPI) api.RegisterControlServer(m.localserver, localProxyControlAPI) api.RegisterHealthServer(m.localserver, localHealthServer) healthServer.SetServingStatus("Raft", api.HealthCheckResponse_NOT_SERVING) localHealthServer.SetServingStatus("ControlAPI", api.HealthCheckResponse_NOT_SERVING) errServe := make(chan error, len(m.listeners)) for proto, l := range m.listeners { go m.serveListener(ctx, errServe, proto, l) } defer func() { m.server.Stop() m.localserver.Stop() }() // Set the raft server as serving for the health server healthServer.SetServingStatus("Raft", api.HealthCheckResponse_SERVING) if err := m.raftNode.JoinAndStart(ctx); err != nil { return errors.Wrap(err, "can't initialize raft node") } localHealthServer.SetServingStatus("ControlAPI", api.HealthCheckResponse_SERVING) close(m.started) go func() { err := m.raftNode.Run(ctx) if err != nil { log.G(ctx).Error(err) m.Stop(ctx) } }() if err := raft.WaitForLeader(ctx, m.raftNode); err != nil { return err } c, err := raft.WaitForCluster(ctx, m.raftNode) if err != nil { return err } raftConfig := c.Spec.Raft if int(raftConfig.ElectionTick) != m.raftNode.Config.ElectionTick { log.G(ctx).Warningf("election tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.raftNode.Config.ElectionTick, raftConfig.ElectionTick) } if int(raftConfig.HeartbeatTick) != m.raftNode.Config.HeartbeatTick { log.G(ctx).Warningf("heartbeat tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.raftNode.Config.HeartbeatTick, raftConfig.HeartbeatTick) } // wait for an error in serving. err = <-errServe select { // check to see if stopped was posted to. if so, we're in the process of // stopping, or done and that's why we got the error. if stopping is // deliberate, stopped will ALWAYS be closed before the error is trigger, // so this path will ALWAYS be taken if the stop was deliberate case <-m.stopped: // shutdown was requested, do not return an error // but first, we wait to acquire a mutex to guarantee that stopping is // finished. as long as we acquire the mutex BEFORE we return, we know // that stopping is stopped. m.mu.Lock() m.mu.Unlock() return nil // otherwise, we'll get something from errServe, which indicates that an // error in serving has actually occurred and this isn't a planned shutdown default: return err } }
func (a *Allocator) doNetworkInit(ctx context.Context) error { na, err := networkallocator.New() if err != nil { return err } nc := &networkContext{ nwkAllocator: na, unallocatedTasks: make(map[string]*api.Task), unallocatedServices: make(map[string]*api.Service), unallocatedNetworks: make(map[string]*api.Network), } // Check if we have the ingress network. If not found create // it before reading all network objects for allocation. var networks []*api.Network a.store.View(func(tx store.ReadTx) { networks, err = store.FindNetworks(tx, store.ByName(ingressNetworkName)) if len(networks) > 0 { ingressNetwork = networks[0] } }) if err != nil { return fmt.Errorf("failed to find ingress network during init: %v", err) } // If ingress network is not found, create one right away // using the predefined template. if len(networks) == 0 { if err := a.store.Update(func(tx store.Tx) error { ingressNetwork.ID = identity.NewID() if err := store.CreateNetwork(tx, ingressNetwork); err != nil { return err } return nil }); err != nil { return fmt.Errorf("failed to create ingress network: %v", err) } a.store.View(func(tx store.ReadTx) { networks, err = store.FindNetworks(tx, store.ByName(ingressNetworkName)) if len(networks) > 0 { ingressNetwork = networks[0] } }) if err != nil { return fmt.Errorf("failed to find ingress network after creating it: %v", err) } } // Try to complete ingress network allocation before anything else so // that the we can get the preferred subnet for ingress // network. if !na.IsAllocated(ingressNetwork) { if err := a.allocateNetwork(ctx, nc, ingressNetwork); err != nil { log.G(ctx).Errorf("failed allocating ingress network during init: %v", err) } // Update store after allocation if err := a.store.Update(func(tx store.Tx) error { if err := store.UpdateNetwork(tx, ingressNetwork); err != nil { return err } return nil }); err != nil { return fmt.Errorf("failed to create ingress network: %v", err) } } // Allocate networks in the store so far before we started // watching. a.store.View(func(tx store.ReadTx) { networks, err = store.FindNetworks(tx, store.All) }) if err != nil { return fmt.Errorf("error listing all networks in store while trying to allocate during init: %v", err) } for _, n := range networks { if na.IsAllocated(n) { continue } if err := a.allocateNetwork(ctx, nc, n); err != nil { log.G(ctx).Errorf("failed allocating network %s during init: %v", n.ID, err) } } // Allocate nodes in the store so far before we process watched events. var nodes []*api.Node a.store.View(func(tx store.ReadTx) { nodes, err = store.FindNodes(tx, store.All) }) if err != nil { return fmt.Errorf("error listing all services in store while trying to allocate during init: %v", err) } for _, node := range nodes { if na.IsNodeAllocated(node) { continue } if node.Attachment == nil { node.Attachment = &api.NetworkAttachment{} } node.Attachment.Network = ingressNetwork.Copy() if err := a.allocateNode(ctx, nc, node); err != nil { log.G(ctx).Errorf("Failed to allocate network resources for node %s during init: %v", node.ID, err) } } // Allocate services in the store so far before we process watched events. var services []*api.Service a.store.View(func(tx store.ReadTx) { services, err = store.FindServices(tx, store.All) }) if err != nil { return fmt.Errorf("error listing all services in store while trying to allocate during init: %v", err) } for _, s := range services { if s.Spec.Endpoint == nil { continue } if na.IsServiceAllocated(s) { continue } if err := a.allocateService(ctx, nc, s); err != nil { log.G(ctx).Errorf("failed allocating service %s during init: %v", s.ID, err) } } // Allocate tasks in the store so far before we started watching. var tasks []*api.Task a.store.View(func(tx store.ReadTx) { tasks, err = store.FindTasks(tx, store.All) }) if err != nil { return fmt.Errorf("error listing all tasks in store while trying to allocate during init: %v", err) } if _, err := a.store.Batch(func(batch *store.Batch) error { for _, t := range tasks { if taskDead(t) { continue } var s *api.Service if t.ServiceID != "" { a.store.View(func(tx store.ReadTx) { s = store.GetService(tx, t.ServiceID) }) } // Populate network attachments in the task // based on service spec. a.taskCreateNetworkAttachments(t, s) if taskReadyForNetworkVote(t, s, nc) { if t.Status.State >= api.TaskStateAllocated { continue } if a.taskAllocateVote(networkVoter, t.ID) { // If the task is not attached to any network, network // allocators job is done. Immediately cast a vote so // that the task can be moved to ALLOCATED state as // soon as possible. if err := batch.Update(func(tx store.Tx) error { storeT := store.GetTask(tx, t.ID) if storeT == nil { return fmt.Errorf("task %s not found while trying to update state", t.ID) } updateTaskStatus(storeT, api.TaskStateAllocated, "allocated") if err := store.UpdateTask(tx, storeT); err != nil { return fmt.Errorf("failed updating state in store transaction for task %s: %v", storeT.ID, err) } return nil }); err != nil { log.G(ctx).WithError(err).Error("error updating task network") } } continue } err := batch.Update(func(tx store.Tx) error { _, err := a.allocateTask(ctx, nc, tx, t) return err }) if err != nil { log.G(ctx).Errorf("failed allocating task %s during init: %v", t.ID, err) nc.unallocatedTasks[t.ID] = t } } return nil }); err != nil { return err } a.netCtx = nc return nil }
func TestNewNodeCertificateRequiresToken(t *testing.T) { t.Parallel() tc := testutils.NewTestCA(t) defer tc.Stop() csr, _, err := ca.GenerateNewCSR() assert.NoError(t, err) // Issuance fails if no secret is provided role := api.NodeRoleManager issueRequest := &api.IssueNodeCertificateRequest{CSR: csr, Role: role} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.EqualError(t, err, "rpc error: code = 3 desc = A valid join token is necessary to join this cluster") role = api.NodeRoleWorker issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.EqualError(t, err, "rpc error: code = 3 desc = A valid join token is necessary to join this cluster") // Issuance fails if wrong secret is provided role = api.NodeRoleManager issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: "invalid-secret"} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.EqualError(t, err, "rpc error: code = 3 desc = A valid join token is necessary to join this cluster") role = api.NodeRoleWorker issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: "invalid-secret"} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.EqualError(t, err, "rpc error: code = 3 desc = A valid join token is necessary to join this cluster") // Issuance succeeds if correct token is provided role = api.NodeRoleManager issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: tc.ManagerToken} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.NoError(t, err) role = api.NodeRoleWorker issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: tc.WorkerToken} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.NoError(t, err) // Rotate manager and worker tokens var ( newManagerToken string newWorkerToken string ) assert.NoError(t, tc.MemoryStore.Update(func(tx store.Tx) error { clusters, _ := store.FindClusters(tx, store.ByName(store.DefaultClusterName)) newWorkerToken = ca.GenerateJoinToken(&tc.RootCA) clusters[0].RootCA.JoinTokens.Worker = newWorkerToken newManagerToken = ca.GenerateJoinToken(&tc.RootCA) clusters[0].RootCA.JoinTokens.Manager = newManagerToken return store.UpdateCluster(tx, clusters[0]) })) // updating the join token may take a little bit in order to register on the CA server, so poll assert.NoError(t, raftutils.PollFunc(nil, func() error { // Old token should fail role = api.NodeRoleManager issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: tc.ManagerToken} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) if err == nil { return fmt.Errorf("join token not updated yet") } return nil })) // Old token should fail assert.EqualError(t, err, "rpc error: code = 3 desc = A valid join token is necessary to join this cluster") role = api.NodeRoleWorker issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: tc.WorkerToken} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.EqualError(t, err, "rpc error: code = 3 desc = A valid join token is necessary to join this cluster") // New token should succeed role = api.NodeRoleManager issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: newManagerToken} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.NoError(t, err) role = api.NodeRoleWorker issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: newWorkerToken} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.NoError(t, err) }
// RemoveNode removes a Node referenced by NodeID with the given NodeSpec. // - Returns NotFound if the Node is not found. // - Returns FailedPrecondition if the Node has manager role (and is part of the memberlist) or is not shut down. // - Returns InvalidArgument if NodeID or NodeVersion is not valid. // - Returns an error if the delete fails. func (s *Server) RemoveNode(ctx context.Context, request *api.RemoveNodeRequest) (*api.RemoveNodeResponse, error) { if request.NodeID == "" { return nil, grpc.Errorf(codes.InvalidArgument, errInvalidArgument.Error()) } err := s.store.Update(func(tx store.Tx) error { node := store.GetNode(tx, request.NodeID) if node == nil { return grpc.Errorf(codes.NotFound, "node %s not found", request.NodeID) } if node.Spec.Role == api.NodeRoleManager { if s.raft == nil { return grpc.Errorf(codes.FailedPrecondition, "node %s is a manager but cannot access node information from the raft memberlist", request.NodeID) } if member := s.raft.GetMemberByNodeID(request.NodeID); member != nil { return grpc.Errorf(codes.FailedPrecondition, "node %s is a cluster manager and is a member of the raft cluster. It must be demoted to worker before removal", request.NodeID) } } if !request.Force && node.Status.State == api.NodeStatus_READY { return grpc.Errorf(codes.FailedPrecondition, "node %s is not down and can't be removed", request.NodeID) } // lookup the cluster clusters, err := store.FindClusters(tx, store.ByName("default")) if err != nil { return err } if len(clusters) != 1 { return grpc.Errorf(codes.Internal, "could not fetch cluster object") } cluster := clusters[0] removedNode := &api.RemovedNode{ID: node.ID} // Set an expiry time for this RemovedNode if a certificate // exists and can be parsed. if len(node.Certificate.Certificate) != 0 { certBlock, _ := pem.Decode(node.Certificate.Certificate) if certBlock != nil { X509Cert, err := x509.ParseCertificate(certBlock.Bytes) if err == nil && !X509Cert.NotAfter.IsZero() { expiry, err := ptypes.TimestampProto(X509Cert.NotAfter) if err == nil { removedNode.Expiry = expiry } } } } cluster.RemovedNodes = append(cluster.RemovedNodes, removedNode) if err := store.UpdateCluster(tx, cluster); err != nil { return err } return store.DeleteNode(tx, request.NodeID) }) if err != nil { return nil, err } return &api.RemoveNodeResponse{}, nil }
// Run runs the CA signer main loop. // The CA signer can be stopped with cancelling ctx or calling Stop(). func (s *Server) Run(ctx context.Context) error { s.mu.Lock() if s.isRunning() { s.mu.Unlock() return errors.New("CA signer is already running") } s.wg.Add(1) s.mu.Unlock() defer s.wg.Done() ctx = log.WithModule(ctx, "ca") // Retrieve the channels to keep track of changes in the cluster // Retrieve all the currently registered nodes var nodes []*api.Node updates, cancel, err := store.ViewAndWatch( s.store, func(readTx store.ReadTx) error { clusters, err := store.FindClusters(readTx, store.ByName(store.DefaultClusterName)) if err != nil { return err } if len(clusters) != 1 { return errors.New("could not find cluster object") } s.updateCluster(ctx, clusters[0]) nodes, err = store.FindNodes(readTx, store.All) return err }, state.EventCreateNode{}, state.EventUpdateNode{}, state.EventUpdateCluster{}, ) // Do this after updateCluster has been called, so isRunning never // returns true without joinTokens being set correctly. s.mu.Lock() s.ctx, s.cancel = context.WithCancel(ctx) s.mu.Unlock() close(s.started) if err != nil { log.G(ctx).WithFields(logrus.Fields{ "method": "(*Server).Run", }).WithError(err).Errorf("snapshot store view failed") return err } defer cancel() // We might have missed some updates if there was a leader election, // so let's pick up the slack. if err := s.reconcileNodeCertificates(ctx, nodes); err != nil { // We don't return here because that means the Run loop would // never run. Log an error instead. log.G(ctx).WithFields(logrus.Fields{ "method": "(*Server).Run", }).WithError(err).Errorf("error attempting to reconcile certificates") } ticker := time.NewTicker(s.reconciliationRetryInterval) defer ticker.Stop() // Watch for new nodes being created, new nodes being updated, and changes // to the cluster for { select { case event := <-updates: switch v := event.(type) { case state.EventCreateNode: s.evaluateAndSignNodeCert(ctx, v.Node) case state.EventUpdateNode: // If this certificate is already at a final state // no need to evaluate and sign it. if !isFinalState(v.Node.Certificate.Status) { s.evaluateAndSignNodeCert(ctx, v.Node) } case state.EventUpdateCluster: s.updateCluster(ctx, v.Cluster) } case <-ticker.C: for _, node := range s.pending { if err := s.evaluateAndSignNodeCert(ctx, node); err != nil { // If this sign operation did not succeed, the rest are // unlikely to. Yield so that we don't hammer an external CA. // Since the map iteration order is randomized, there is no // risk of getting stuck on a problematic CSR. break } } case <-ctx.Done(): return ctx.Err() case <-s.ctx.Done(): return nil } } }
// IssueNodeCertificate is responsible for gatekeeping both certificate requests from new nodes in the swarm, // and authorizing certificate renewals. // If a node presented a valid certificate, the corresponding certificate is set in a RENEW state. // If a node failed to present a valid certificate, we check for a valid join token and set the // role accordingly. A new random node ID is generated, and the corresponding node entry is created. // IssueNodeCertificate is the only place where new node entries to raft should be created. func (s *Server) IssueNodeCertificate(ctx context.Context, request *api.IssueNodeCertificateRequest) (*api.IssueNodeCertificateResponse, error) { // First, let's see if the remote node is presenting a non-empty CSR if len(request.CSR) == 0 { return nil, grpc.Errorf(codes.InvalidArgument, codes.InvalidArgument.String()) } if err := s.addTask(); err != nil { return nil, err } defer s.doneTask() var ( blacklistedCerts map[string]*api.BlacklistedCertificate clusters []*api.Cluster err error ) s.store.View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName("default")) }) // Not having a cluster object yet means we can't check // the blacklist. if err == nil && len(clusters) == 1 { blacklistedCerts = clusters[0].BlacklistedCertificates } // If the remote node is a worker (either forwarded by a manager, or calling directly), // issue a renew worker certificate entry with the correct ID nodeID, err := AuthorizeForwardedRoleAndOrg(ctx, []string{WorkerRole}, []string{ManagerRole}, s.securityConfig.ClientTLSCreds.Organization(), blacklistedCerts) if err == nil { return s.issueRenewCertificate(ctx, nodeID, request.CSR) } // If the remote node is a manager (either forwarded by another manager, or calling directly), // issue a renew certificate entry with the correct ID nodeID, err = AuthorizeForwardedRoleAndOrg(ctx, []string{ManagerRole}, []string{ManagerRole}, s.securityConfig.ClientTLSCreds.Organization(), blacklistedCerts) if err == nil { return s.issueRenewCertificate(ctx, nodeID, request.CSR) } // The remote node didn't successfully present a valid MTLS certificate, let's issue a // certificate with a new random ID role := api.NodeRole(-1) s.mu.Lock() if subtle.ConstantTimeCompare([]byte(s.joinTokens.Manager), []byte(request.Token)) == 1 { role = api.NodeRoleManager } else if subtle.ConstantTimeCompare([]byte(s.joinTokens.Worker), []byte(request.Token)) == 1 { role = api.NodeRoleWorker } s.mu.Unlock() if role < 0 { return nil, grpc.Errorf(codes.InvalidArgument, "A valid join token is necessary to join this cluster") } // Max number of collisions of ID or CN to tolerate before giving up maxRetries := 3 // Generate a random ID for this new node for i := 0; ; i++ { nodeID = identity.NewID() // Create a new node err := s.store.Update(func(tx store.Tx) error { node := &api.Node{ ID: nodeID, Certificate: api.Certificate{ CSR: request.CSR, CN: nodeID, Role: role, Status: api.IssuanceStatus{ State: api.IssuanceStatePending, }, }, Spec: api.NodeSpec{ Role: role, Membership: api.NodeMembershipAccepted, }, } return store.CreateNode(tx, node) }) if err == nil { log.G(ctx).WithFields(logrus.Fields{ "node.id": nodeID, "node.role": role, "method": "IssueNodeCertificate", }).Debugf("new certificate entry added") break } if err != store.ErrExist { return nil, err } if i == maxRetries { return nil, err } log.G(ctx).WithFields(logrus.Fields{ "node.id": nodeID, "node.role": role, "method": "IssueNodeCertificate", }).Errorf("randomly generated node ID collided with an existing one - retrying") } return &api.IssueNodeCertificateResponse{ NodeID: nodeID, NodeMembership: api.NodeMembershipAccepted, }, nil }
// Run contains the GlobalOrchestrator event loop func (g *GlobalOrchestrator) Run(ctx context.Context) error { defer close(g.doneChan) // Watch changes to services and tasks queue := g.store.WatchQueue() watcher, cancel := queue.Watch() defer cancel() // lookup the cluster var err error g.store.View(func(readTx store.ReadTx) { var clusters []*api.Cluster clusters, err = store.FindClusters(readTx, store.ByName("default")) if len(clusters) != 1 { return // just pick up the cluster when it is created. } g.cluster = clusters[0] }) if err != nil { return err } // Get list of nodes var nodes []*api.Node g.store.View(func(readTx store.ReadTx) { nodes, err = store.FindNodes(readTx, store.All) }) if err != nil { return err } for _, n := range nodes { // if a node is in drain state, do not add it if isValidNode(n) { g.nodes[n.ID] = struct{}{} } } // Lookup global services var existingServices []*api.Service g.store.View(func(readTx store.ReadTx) { existingServices, err = store.FindServices(readTx, store.All) }) if err != nil { return err } for _, s := range existingServices { if isGlobalService(s) { g.globalServices[s.ID] = s g.reconcileOneService(ctx, s) } } for { select { case event := <-watcher: // TODO(stevvooe): Use ctx to limit running time of operation. switch v := event.(type) { case state.EventUpdateCluster: g.cluster = v.Cluster case state.EventCreateService: if !isGlobalService(v.Service) { continue } g.globalServices[v.Service.ID] = v.Service g.reconcileOneService(ctx, v.Service) case state.EventUpdateService: if !isGlobalService(v.Service) { continue } g.globalServices[v.Service.ID] = v.Service g.reconcileOneService(ctx, v.Service) case state.EventDeleteService: if !isGlobalService(v.Service) { continue } deleteServiceTasks(ctx, g.store, v.Service) // delete the service from service map delete(g.globalServices, v.Service.ID) g.restarts.ClearServiceHistory(v.Service.ID) case state.EventCreateNode: g.reconcileOneNode(ctx, v.Node) case state.EventUpdateNode: switch v.Node.Status.State { // NodeStatus_DISCONNECTED is a transient state, no need to make any change case api.NodeStatus_DOWN: g.removeTasksFromNode(ctx, v.Node) case api.NodeStatus_READY: // node could come back to READY from DOWN or DISCONNECT g.reconcileOneNode(ctx, v.Node) } case state.EventDeleteNode: g.removeTasksFromNode(ctx, v.Node) delete(g.nodes, v.Node.ID) case state.EventUpdateTask: if _, exists := g.globalServices[v.Task.ServiceID]; !exists { continue } // global orchestrator needs to inspect when a task has terminated // it should ignore tasks whose DesiredState is past running, which // means the task has been processed if isTaskTerminated(v.Task) { g.restartTask(ctx, v.Task.ID, v.Task.ServiceID) } case state.EventDeleteTask: // CLI allows deleting task if _, exists := g.globalServices[v.Task.ServiceID]; !exists { continue } g.reconcileServiceOneNode(ctx, v.Task.ServiceID, v.Task.NodeID) } case <-g.stopChan: return nil } } }
// Run starts all manager sub-systems and the gRPC server at the configured // address. // The call never returns unless an error occurs or `Stop()` is called. func (m *Manager) Run(parent context.Context) error { ctx, ctxCancel := context.WithCancel(parent) defer ctxCancel() m.cancelFunc = ctxCancel leadershipCh, cancel := m.raftNode.SubscribeLeadership() defer cancel() go m.handleLeadershipEvents(ctx, leadershipCh) authorize := func(ctx context.Context, roles []string) error { var ( blacklistedCerts map[string]*api.BlacklistedCertificate clusters []*api.Cluster err error ) m.raftNode.MemoryStore().View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName("default")) }) // Not having a cluster object yet means we can't check // the blacklist. if err == nil && len(clusters) == 1 { blacklistedCerts = clusters[0].BlacklistedCertificates } // Authorize the remote roles, ensure they can only be forwarded by managers _, err = ca.AuthorizeForwardedRoleAndOrg(ctx, roles, []string{ca.ManagerRole}, m.config.SecurityConfig.ClientTLSCreds.Organization(), blacklistedCerts) return err } baseControlAPI := controlapi.NewServer(m.raftNode.MemoryStore(), m.raftNode, m.config.SecurityConfig.RootCA()) baseResourceAPI := resourceapi.New(m.raftNode.MemoryStore()) healthServer := health.NewHealthServer() localHealthServer := health.NewHealthServer() authenticatedControlAPI := api.NewAuthenticatedWrapperControlServer(baseControlAPI, authorize) authenticatedResourceAPI := api.NewAuthenticatedWrapperResourceAllocatorServer(baseResourceAPI, authorize) authenticatedLogsServerAPI := api.NewAuthenticatedWrapperLogsServer(m.logbroker, authorize) authenticatedLogBrokerAPI := api.NewAuthenticatedWrapperLogBrokerServer(m.logbroker, authorize) authenticatedDispatcherAPI := api.NewAuthenticatedWrapperDispatcherServer(m.dispatcher, authorize) authenticatedCAAPI := api.NewAuthenticatedWrapperCAServer(m.caserver, authorize) authenticatedNodeCAAPI := api.NewAuthenticatedWrapperNodeCAServer(m.caserver, authorize) authenticatedRaftAPI := api.NewAuthenticatedWrapperRaftServer(m.raftNode, authorize) authenticatedHealthAPI := api.NewAuthenticatedWrapperHealthServer(healthServer, authorize) authenticatedRaftMembershipAPI := api.NewAuthenticatedWrapperRaftMembershipServer(m.raftNode, authorize) proxyDispatcherAPI := api.NewRaftProxyDispatcherServer(authenticatedDispatcherAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) proxyCAAPI := api.NewRaftProxyCAServer(authenticatedCAAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) proxyNodeCAAPI := api.NewRaftProxyNodeCAServer(authenticatedNodeCAAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) proxyRaftMembershipAPI := api.NewRaftProxyRaftMembershipServer(authenticatedRaftMembershipAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) proxyResourceAPI := api.NewRaftProxyResourceAllocatorServer(authenticatedResourceAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) proxyLogBrokerAPI := api.NewRaftProxyLogBrokerServer(authenticatedLogBrokerAPI, m.raftNode, ca.WithMetadataForwardTLSInfo) // localProxyControlAPI is a special kind of proxy. It is only wired up // to receive requests from a trusted local socket, and these requests // don't use TLS, therefore the requests it handles locally should // bypass authorization. When it proxies, it sends them as requests from // this manager rather than forwarded requests (it has no TLS // information to put in the metadata map). forwardAsOwnRequest := func(ctx context.Context) (context.Context, error) { return ctx, nil } localProxyControlAPI := api.NewRaftProxyControlServer(baseControlAPI, m.raftNode, forwardAsOwnRequest) localProxyLogsAPI := api.NewRaftProxyLogsServer(m.logbroker, m.raftNode, forwardAsOwnRequest) // Everything registered on m.server should be an authenticated // wrapper, or a proxy wrapping an authenticated wrapper! api.RegisterCAServer(m.server, proxyCAAPI) api.RegisterNodeCAServer(m.server, proxyNodeCAAPI) api.RegisterRaftServer(m.server, authenticatedRaftAPI) api.RegisterHealthServer(m.server, authenticatedHealthAPI) api.RegisterRaftMembershipServer(m.server, proxyRaftMembershipAPI) api.RegisterControlServer(m.server, authenticatedControlAPI) api.RegisterLogsServer(m.server, authenticatedLogsServerAPI) api.RegisterLogBrokerServer(m.server, proxyLogBrokerAPI) api.RegisterResourceAllocatorServer(m.server, proxyResourceAPI) api.RegisterDispatcherServer(m.server, proxyDispatcherAPI) api.RegisterControlServer(m.localserver, localProxyControlAPI) api.RegisterLogsServer(m.localserver, localProxyLogsAPI) api.RegisterHealthServer(m.localserver, localHealthServer) healthServer.SetServingStatus("Raft", api.HealthCheckResponse_NOT_SERVING) localHealthServer.SetServingStatus("ControlAPI", api.HealthCheckResponse_NOT_SERVING) errServe := make(chan error, len(m.listeners)) for _, lis := range m.listeners { go m.serveListener(ctx, errServe, lis) } defer func() { m.server.Stop() m.localserver.Stop() }() // Set the raft server as serving for the health server healthServer.SetServingStatus("Raft", api.HealthCheckResponse_SERVING) if err := m.raftNode.JoinAndStart(ctx); err != nil { return errors.Wrap(err, "can't initialize raft node") } localHealthServer.SetServingStatus("ControlAPI", api.HealthCheckResponse_SERVING) close(m.started) go func() { err := m.raftNode.Run(ctx) if err != nil { log.G(ctx).Error(err) m.Stop(ctx) } }() if err := raft.WaitForLeader(ctx, m.raftNode); err != nil { return err } c, err := raft.WaitForCluster(ctx, m.raftNode) if err != nil { return err } raftConfig := c.Spec.Raft if int(raftConfig.ElectionTick) != m.raftNode.Config.ElectionTick { log.G(ctx).Warningf("election tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.raftNode.Config.ElectionTick, raftConfig.ElectionTick) } if int(raftConfig.HeartbeatTick) != m.raftNode.Config.HeartbeatTick { log.G(ctx).Warningf("heartbeat tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.raftNode.Config.HeartbeatTick, raftConfig.HeartbeatTick) } // wait for an error in serving. err = <-errServe m.mu.Lock() if m.stopped { m.mu.Unlock() return nil } m.mu.Unlock() m.Stop(ctx) return err }
// Run starts all manager sub-systems and the gRPC server at the configured // address. // The call never returns unless an error occurs or `Stop()` is called. func (m *Manager) Run(parent context.Context) error { ctx, ctxCancel := context.WithCancel(parent) defer ctxCancel() m.cancelFunc = ctxCancel leadershipCh, cancel := m.raftNode.SubscribeLeadership() defer cancel() go m.handleLeadershipEvents(ctx, leadershipCh) authorize := func(ctx context.Context, roles []string) error { var ( blacklistedCerts map[string]*api.BlacklistedCertificate clusters []*api.Cluster err error ) m.raftNode.MemoryStore().View(func(readTx store.ReadTx) { clusters, err = store.FindClusters(readTx, store.ByName("default")) }) // Not having a cluster object yet means we can't check // the blacklist. if err == nil && len(clusters) == 1 { blacklistedCerts = clusters[0].BlacklistedCertificates } // Authorize the remote roles, ensure they can only be forwarded by managers _, err = ca.AuthorizeForwardedRoleAndOrg(ctx, roles, []string{ca.ManagerRole}, m.config.SecurityConfig.ClientTLSCreds.Organization(), blacklistedCerts) return err } baseControlAPI := controlapi.NewServer(m.raftNode.MemoryStore(), m.raftNode, m.config.SecurityConfig.RootCA(), m.config.PluginGetter) baseResourceAPI := resourceapi.New(m.raftNode.MemoryStore()) healthServer := health.NewHealthServer() localHealthServer := health.NewHealthServer() authenticatedControlAPI := api.NewAuthenticatedWrapperControlServer(baseControlAPI, authorize) authenticatedResourceAPI := api.NewAuthenticatedWrapperResourceAllocatorServer(baseResourceAPI, authorize) authenticatedLogsServerAPI := api.NewAuthenticatedWrapperLogsServer(m.logbroker, authorize) authenticatedLogBrokerAPI := api.NewAuthenticatedWrapperLogBrokerServer(m.logbroker, authorize) authenticatedDispatcherAPI := api.NewAuthenticatedWrapperDispatcherServer(m.dispatcher, authorize) authenticatedCAAPI := api.NewAuthenticatedWrapperCAServer(m.caserver, authorize) authenticatedNodeCAAPI := api.NewAuthenticatedWrapperNodeCAServer(m.caserver, authorize) authenticatedRaftAPI := api.NewAuthenticatedWrapperRaftServer(m.raftNode, authorize) authenticatedHealthAPI := api.NewAuthenticatedWrapperHealthServer(healthServer, authorize) authenticatedRaftMembershipAPI := api.NewAuthenticatedWrapperRaftMembershipServer(m.raftNode, authorize) proxyDispatcherAPI := api.NewRaftProxyDispatcherServer(authenticatedDispatcherAPI, m.raftNode, nil, ca.WithMetadataForwardTLSInfo) proxyCAAPI := api.NewRaftProxyCAServer(authenticatedCAAPI, m.raftNode, nil, ca.WithMetadataForwardTLSInfo) proxyNodeCAAPI := api.NewRaftProxyNodeCAServer(authenticatedNodeCAAPI, m.raftNode, nil, ca.WithMetadataForwardTLSInfo) proxyRaftMembershipAPI := api.NewRaftProxyRaftMembershipServer(authenticatedRaftMembershipAPI, m.raftNode, nil, ca.WithMetadataForwardTLSInfo) proxyResourceAPI := api.NewRaftProxyResourceAllocatorServer(authenticatedResourceAPI, m.raftNode, nil, ca.WithMetadataForwardTLSInfo) proxyLogBrokerAPI := api.NewRaftProxyLogBrokerServer(authenticatedLogBrokerAPI, m.raftNode, nil, ca.WithMetadataForwardTLSInfo) // The following local proxies are only wired up to receive requests // from a trusted local socket, and these requests don't use TLS, // therefore the requests they handle locally should bypass // authorization. When requests are proxied from these servers, they // are sent as requests from this manager rather than forwarded // requests (it has no TLS information to put in the metadata map). forwardAsOwnRequest := func(ctx context.Context) (context.Context, error) { return ctx, nil } handleRequestLocally := func(ctx context.Context) (context.Context, error) { remoteAddr := "127.0.0.1:0" m.addrMu.Lock() if m.config.RemoteAPI != nil { if m.config.RemoteAPI.AdvertiseAddr != "" { remoteAddr = m.config.RemoteAPI.AdvertiseAddr } else { remoteAddr = m.config.RemoteAPI.ListenAddr } } m.addrMu.Unlock() creds := m.config.SecurityConfig.ClientTLSCreds nodeInfo := ca.RemoteNodeInfo{ Roles: []string{creds.Role()}, Organization: creds.Organization(), NodeID: creds.NodeID(), RemoteAddr: remoteAddr, } return context.WithValue(ctx, ca.LocalRequestKey, nodeInfo), nil } localProxyControlAPI := api.NewRaftProxyControlServer(baseControlAPI, m.raftNode, handleRequestLocally, forwardAsOwnRequest) localProxyLogsAPI := api.NewRaftProxyLogsServer(m.logbroker, m.raftNode, handleRequestLocally, forwardAsOwnRequest) localProxyDispatcherAPI := api.NewRaftProxyDispatcherServer(m.dispatcher, m.raftNode, handleRequestLocally, forwardAsOwnRequest) localProxyCAAPI := api.NewRaftProxyCAServer(m.caserver, m.raftNode, handleRequestLocally, forwardAsOwnRequest) localProxyNodeCAAPI := api.NewRaftProxyNodeCAServer(m.caserver, m.raftNode, handleRequestLocally, forwardAsOwnRequest) localProxyResourceAPI := api.NewRaftProxyResourceAllocatorServer(baseResourceAPI, m.raftNode, handleRequestLocally, forwardAsOwnRequest) localProxyLogBrokerAPI := api.NewRaftProxyLogBrokerServer(m.logbroker, m.raftNode, handleRequestLocally, forwardAsOwnRequest) // Everything registered on m.server should be an authenticated // wrapper, or a proxy wrapping an authenticated wrapper! api.RegisterCAServer(m.server, proxyCAAPI) api.RegisterNodeCAServer(m.server, proxyNodeCAAPI) api.RegisterRaftServer(m.server, authenticatedRaftAPI) api.RegisterHealthServer(m.server, authenticatedHealthAPI) api.RegisterRaftMembershipServer(m.server, proxyRaftMembershipAPI) api.RegisterControlServer(m.server, authenticatedControlAPI) api.RegisterLogsServer(m.server, authenticatedLogsServerAPI) api.RegisterLogBrokerServer(m.server, proxyLogBrokerAPI) api.RegisterResourceAllocatorServer(m.server, proxyResourceAPI) api.RegisterDispatcherServer(m.server, proxyDispatcherAPI) api.RegisterControlServer(m.localserver, localProxyControlAPI) api.RegisterLogsServer(m.localserver, localProxyLogsAPI) api.RegisterHealthServer(m.localserver, localHealthServer) api.RegisterDispatcherServer(m.localserver, localProxyDispatcherAPI) api.RegisterCAServer(m.localserver, localProxyCAAPI) api.RegisterNodeCAServer(m.localserver, localProxyNodeCAAPI) api.RegisterResourceAllocatorServer(m.localserver, localProxyResourceAPI) api.RegisterLogBrokerServer(m.localserver, localProxyLogBrokerAPI) healthServer.SetServingStatus("Raft", api.HealthCheckResponse_NOT_SERVING) localHealthServer.SetServingStatus("ControlAPI", api.HealthCheckResponse_NOT_SERVING) go m.serveListener(ctx, m.remoteListener) go m.serveListener(ctx, m.controlListener) defer func() { m.server.Stop() m.localserver.Stop() }() // Set the raft server as serving for the health server healthServer.SetServingStatus("Raft", api.HealthCheckResponse_SERVING) if err := m.raftNode.JoinAndStart(ctx); err != nil { return errors.Wrap(err, "can't initialize raft node") } localHealthServer.SetServingStatus("ControlAPI", api.HealthCheckResponse_SERVING) close(m.started) go func() { err := m.raftNode.Run(ctx) if err != nil { log.G(ctx).WithError(err).Error("raft node stopped") m.Stop(ctx, false) } }() if err := raft.WaitForLeader(ctx, m.raftNode); err != nil { return err } c, err := raft.WaitForCluster(ctx, m.raftNode) if err != nil { return err } raftConfig := c.Spec.Raft if err := m.watchForKEKChanges(ctx); err != nil { return err } if int(raftConfig.ElectionTick) != m.raftNode.Config.ElectionTick { log.G(ctx).Warningf("election tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.raftNode.Config.ElectionTick, raftConfig.ElectionTick) } if int(raftConfig.HeartbeatTick) != m.raftNode.Config.HeartbeatTick { log.G(ctx).Warningf("heartbeat tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.raftNode.Config.HeartbeatTick, raftConfig.HeartbeatTick) } // wait for an error in serving. err = <-m.errServe m.mu.Lock() if m.stopped { m.mu.Unlock() return nil } m.mu.Unlock() m.Stop(ctx, false) return err }
// Run runs the CA signer main loop. // The CA signer can be stopped with cancelling ctx or calling Stop(). func (s *Server) Run(ctx context.Context) error { s.mu.Lock() if s.isRunning() { s.mu.Unlock() return fmt.Errorf("CA signer is already running") } s.wg.Add(1) s.mu.Unlock() defer s.wg.Done() logger := log.G(ctx).WithField("module", "ca") ctx = log.WithLogger(ctx, logger) // Run() should never be called twice, but just in case, we're // attempting to close the started channel in a safe way select { case <-s.started: return fmt.Errorf("CA server cannot be started more than once") default: close(s.started) } // Retrieve the channels to keep track of changes in the cluster // Retrieve all the currently registered nodes var nodes []*api.Node updates, cancel, err := store.ViewAndWatch( s.store, func(readTx store.ReadTx) error { clusters, err := store.FindClusters(readTx, store.ByName(store.DefaultClusterName)) if err != nil { return err } if len(clusters) != 1 { return fmt.Errorf("could not find cluster object") } s.updateCluster(ctx, clusters[0]) nodes, err = store.FindNodes(readTx, store.All) return err }, state.EventCreateNode{}, state.EventUpdateNode{}, state.EventUpdateCluster{}, ) // Do this after updateCluster has been called, so isRunning never // returns true without joinTokens being set correctly. s.mu.Lock() s.ctx, s.cancel = context.WithCancel(ctx) s.mu.Unlock() if err != nil { log.G(ctx).WithFields(logrus.Fields{ "method": "(*Server).Run", }).WithError(err).Errorf("snapshot store view failed") return err } defer cancel() // We might have missed some updates if there was a leader election, // so let's pick up the slack. if err := s.reconcileNodeCertificates(ctx, nodes); err != nil { // We don't return here because that means the Run loop would // never run. Log an error instead. log.G(ctx).WithFields(logrus.Fields{ "method": "(*Server).Run", }).WithError(err).Errorf("error attempting to reconcile certificates") } // Watch for new nodes being created, new nodes being updated, and changes // to the cluster for { select { case event := <-updates: switch v := event.(type) { case state.EventCreateNode: s.evaluateAndSignNodeCert(ctx, v.Node) case state.EventUpdateNode: // If this certificate is already at a final state // no need to evaluate and sign it. if !isFinalState(v.Node.Certificate.Status) { s.evaluateAndSignNodeCert(ctx, v.Node) } case state.EventUpdateCluster: s.updateCluster(ctx, v.Cluster) } case <-ctx.Done(): return ctx.Err() case <-s.ctx.Done(): return nil } } }
// Run is the TaskReaper's main loop. func (tr *TaskReaper) Run() { defer close(tr.doneChan) var tasks []*api.Task tr.store.View(func(readTx store.ReadTx) { var err error clusters, err := store.FindClusters(readTx, store.ByName(store.DefaultClusterName)) if err == nil && len(clusters) == 1 { tr.taskHistory = clusters[0].Spec.Orchestration.TaskHistoryRetentionLimit } tasks, err = store.FindTasks(readTx, store.ByTaskState(api.TaskStateOrphaned)) if err != nil { log.G(context.TODO()).WithError(err).Error("failed to find Orphaned tasks in task reaper init") } }) if len(tasks) > 0 { for _, t := range tasks { // Do not reap service tasks immediately if t.ServiceID != "" { continue } tr.orphaned = append(tr.orphaned, t.ID) } if len(tr.orphaned) > 0 { tr.tick() } } timer := time.NewTimer(reaperBatchingInterval) for { select { case event := <-tr.watcher: switch v := event.(type) { case state.EventCreateTask: t := v.Task tr.dirty[instanceTuple{ instance: t.Slot, serviceID: t.ServiceID, nodeID: t.NodeID, }] = struct{}{} case state.EventUpdateTask: t := v.Task if t.Status.State >= api.TaskStateOrphaned && t.ServiceID == "" { tr.orphaned = append(tr.orphaned, t.ID) } case state.EventUpdateCluster: tr.taskHistory = v.Cluster.Spec.Orchestration.TaskHistoryRetentionLimit } if len(tr.dirty)+len(tr.orphaned) > maxDirty { timer.Stop() tr.tick() } else { timer.Reset(reaperBatchingInterval) } case <-timer.C: timer.Stop() tr.tick() case <-tr.stopChan: timer.Stop() return } } }
func TestNewNodeCertificateRequiresToken(t *testing.T) { tc := testutils.NewTestCA(t) defer tc.Stop() csr, _, err := ca.GenerateAndWriteNewKey(tc.Paths.Node) assert.NoError(t, err) // Issuance fails if no secret is provided role := api.NodeRoleManager issueRequest := &api.IssueNodeCertificateRequest{CSR: csr, Role: role} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.EqualError(t, err, "rpc error: code = 3 desc = A valid join token is necessary to join this cluster") role = api.NodeRoleWorker issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.EqualError(t, err, "rpc error: code = 3 desc = A valid join token is necessary to join this cluster") // Issuance fails if wrong secret is provided role = api.NodeRoleManager issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: "invalid-secret"} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.EqualError(t, err, "rpc error: code = 3 desc = A valid join token is necessary to join this cluster") role = api.NodeRoleWorker issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: "invalid-secret"} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.EqualError(t, err, "rpc error: code = 3 desc = A valid join token is necessary to join this cluster") // Issuance succeeds if correct token is provided role = api.NodeRoleManager issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: tc.ManagerToken} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.NoError(t, err) role = api.NodeRoleWorker issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: tc.WorkerToken} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.NoError(t, err) // Rotate manager and worker tokens var ( newManagerToken string newWorkerToken string ) assert.NoError(t, tc.MemoryStore.Update(func(tx store.Tx) error { clusters, _ := store.FindClusters(tx, store.ByName(store.DefaultClusterName)) newWorkerToken = ca.GenerateJoinToken(&tc.RootCA) clusters[0].RootCA.JoinTokens.Worker = newWorkerToken newManagerToken = ca.GenerateJoinToken(&tc.RootCA) clusters[0].RootCA.JoinTokens.Manager = newManagerToken return store.UpdateCluster(tx, clusters[0]) })) time.Sleep(500 * time.Millisecond) // Old token should fail role = api.NodeRoleManager issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: tc.ManagerToken} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.EqualError(t, err, "rpc error: code = 3 desc = A valid join token is necessary to join this cluster") role = api.NodeRoleWorker issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: tc.WorkerToken} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.EqualError(t, err, "rpc error: code = 3 desc = A valid join token is necessary to join this cluster") // New token should succeed role = api.NodeRoleManager issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: newManagerToken} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.NoError(t, err) role = api.NodeRoleWorker issueRequest = &api.IssueNodeCertificateRequest{CSR: csr, Role: role, Token: newWorkerToken} _, err = tc.NodeCAClients[0].IssueNodeCertificate(context.Background(), issueRequest) assert.NoError(t, err) }