// When a pod is deleted, enqueue the controller that manages the pod and update its expectations. // obj could be an *api.Pod, or a DeletionFinalStateUnknown marker item. func (rm *ReplicationManager) deletePod(obj interface{}) { pod, ok := obj.(*api.Pod) // When a delete is dropped, the relist will notice a pod in the store not // in the list, leading to the insertion of a tombstone object which contains // the deleted key/value. Note that this value might be stale. If the pod // changed labels the new rc will not be woken up till the periodic resync. if !ok { tombstone, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { glog.Errorf("Couldn't get object from tombstone %+v, could take up to %v before a controller recreates a replica", obj, controller.ExpectationsTimeout) return } pod, ok = tombstone.Obj.(*api.Pod) if !ok { glog.Errorf("Tombstone contained object that is not a pod %+v, could take up to %v before controller recreates a replica", obj, controller.ExpectationsTimeout) return } } if rc := rm.getPodController(pod); rc != nil { rcKey, err := controller.KeyFunc(rc) if err != nil { glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err) return } rm.expectations.DeletionObserved(rcKey) rm.enqueueController(rc) } }
func getKey(rc *api.ReplicationController, t *testing.T) string { if key, err := controller.KeyFunc(rc); err != nil { t.Errorf("Unexpected error getting key for rc %v: %v", rc.Name, err) return "" } else { return key } }
// syncReplicationController will sync the rc with the given key if it has had its expectations fulfilled, meaning // it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked // concurrently with the same key. func (rm *ReplicationManager) syncReplicationController(key string) error { startTime := time.Now() defer func() { glog.V(4).Infof("Finished syncing controller %q (%v)", key, time.Now().Sub(startTime)) }() obj, exists, err := rm.rcStore.Store.GetByKey(key) if !exists { glog.Infof("Replication Controller has been deleted %v", key) rm.expectations.DeleteExpectations(key) return nil } if err != nil { glog.Infof("Unable to retrieve rc %v from store: %v", key, err) rm.queue.Add(key) return err } rc := *obj.(*api.ReplicationController) if !rm.podStoreSynced() { // Sleep so we give the pod reflector goroutine a chance to run. time.Sleep(PodStoreSyncedPollPeriod) glog.Infof("Waiting for pods controller to sync, requeuing rc %v", rc.Name) rm.enqueueController(&rc) return nil } // Check the expectations of the rc before counting active pods, otherwise a new pod can sneak in // and update the expectations after we've retrieved active pods from the store. If a new pod enters // the store after we've checked the expectation, the rc sync is just deferred till the next relist. rcKey, err := controller.KeyFunc(&rc) if err != nil { glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err) return err } rcNeedsSync := rm.expectations.SatisfiedExpectations(rcKey) podList, err := rm.podStore.Pods(rc.Namespace).List(labels.Set(rc.Spec.Selector).AsSelector()) if err != nil { glog.Errorf("Error getting pods for rc %q: %v", key, err) rm.queue.Add(key) return err } // TODO: Do this in a single pass, or use an index. filteredPods := controller.FilterActivePods(podList.Items) if rcNeedsSync { rm.manageReplicas(filteredPods, &rc) } // Always updates status as pods come up or die. if err := updateReplicaCount(rm.kubeClient.ReplicationControllers(rc.Namespace), rc, len(filteredPods)); err != nil { // Multiple things could lead to this update failing. Requeuing the controller ensures // we retry with some fairness. glog.V(2).Infof("Failed to update replica count for controller %v, requeuing", rc.Name) rm.enqueueController(&rc) } return nil }
// manageReplicas checks and updates replicas for the given replication controller. func (rm *ReplicationManager) manageReplicas(filteredPods []*api.Pod, rc *api.ReplicationController) { diff := len(filteredPods) - rc.Spec.Replicas rcKey, err := controller.KeyFunc(rc) if err != nil { glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err) return } if diff < 0 { diff *= -1 if diff > rm.burstReplicas { diff = rm.burstReplicas } rm.expectations.ExpectCreations(rcKey, diff) wait := sync.WaitGroup{} wait.Add(diff) glog.V(2).Infof("Too few %q/%q replicas, need %d, creating %d", rc.Namespace, rc.Name, rc.Spec.Replicas, diff) for i := 0; i < diff; i++ { go func() { defer wait.Done() if err := rm.podControl.CreateReplica(rc.Namespace, rc); err != nil { // Decrement the expected number of creates because the informer won't observe this pod glog.V(2).Infof("Failed creation, decrementing expectations for controller %q/%q", rc.Namespace, rc.Name) rm.expectations.CreationObserved(rcKey) util.HandleError(err) } }() } wait.Wait() } else if diff > 0 { if diff > rm.burstReplicas { diff = rm.burstReplicas } rm.expectations.ExpectDeletions(rcKey, diff) glog.V(2).Infof("Too many %q/%q replicas, need %d, deleting %d", rc.Namespace, rc.Name, rc.Spec.Replicas, diff) // No need to sort pods if we are about to delete all of them if rc.Spec.Replicas != 0 { // Sort the pods in the order such that not-ready < ready, unscheduled // < scheduled, and pending < running. This ensures that we delete pods // in the earlier stages whenever possible. sort.Sort(controller.ActivePods(filteredPods)) } wait := sync.WaitGroup{} wait.Add(diff) for i := 0; i < diff; i++ { go func(ix int) { defer wait.Done() if err := rm.podControl.DeletePod(rc.Namespace, filteredPods[ix].Name); err != nil { // Decrement the expected number of deletes because the informer won't observe this deletion glog.V(2).Infof("Failed deletion, decrementing expectations for controller %q/%q", rc.Namespace, rc.Name) rm.expectations.DeletionObserved(rcKey) } }(i) } wait.Wait() } }
func TestSyncReplicationControllerDormancy(t *testing.T) { // Setup a test server so we can lie about the current state of pods fakeHandler := util.FakeHandler{ StatusCode: 200, ResponseBody: "", } testServer := httptest.NewServer(&fakeHandler) defer testServer.Close() client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()}) fakePodControl := FakePodControl{} manager := NewReplicationManager(client, BurstReplicas) manager.podStoreSynced = alwaysReady manager.podControl = &fakePodControl controllerSpec := newReplicationController(2) manager.rcStore.Store.Add(controllerSpec) newPodList(manager.podStore.Store, 1, api.PodRunning, controllerSpec) // Creates a replica and sets expectations controllerSpec.Status.Replicas = 1 manager.syncReplicationController(getKey(controllerSpec, t)) validateSyncReplication(t, &fakePodControl, 1, 0) // Expectations prevents replicas but not an update on status controllerSpec.Status.Replicas = 0 fakePodControl.clear() manager.syncReplicationController(getKey(controllerSpec, t)) validateSyncReplication(t, &fakePodControl, 0, 0) // Get the key for the controller rcKey, err := controller.KeyFunc(controllerSpec) if err != nil { t.Errorf("Couldn't get key for object %+v: %v", controllerSpec, err) } // Lowering expectations should lead to a sync that creates a replica, however the // fakePodControl error will prevent this, leaving expectations at 0, 0 manager.expectations.CreationObserved(rcKey) controllerSpec.Status.Replicas = 1 fakePodControl.clear() fakePodControl.err = fmt.Errorf("Fake Error") manager.syncReplicationController(getKey(controllerSpec, t)) validateSyncReplication(t, &fakePodControl, 0, 0) // This replica should not need a Lowering of expectations, since the previous create failed fakePodControl.err = nil manager.syncReplicationController(getKey(controllerSpec, t)) validateSyncReplication(t, &fakePodControl, 1, 0) // 1 PUT for the rc status during dormancy window. // Note that the pod creates go through pod control so they're not recorded. fakeHandler.ValidateRequestCount(t, 1) }
// When a pod is created, enqueue the controller that manages it and update it's expectations. func (rm *ReplicationManager) addPod(obj interface{}) { pod := obj.(*api.Pod) if rc := rm.getPodController(pod); rc != nil { rcKey, err := controller.KeyFunc(rc) if err != nil { glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err) return } rm.expectations.CreationObserved(rcKey) rm.enqueueController(rc) } }
// obj could be an *api.ReplicationController, or a DeletionFinalStateUnknown marker item. func (rm *ReplicationManager) enqueueController(obj interface{}) { key, err := controller.KeyFunc(obj) if err != nil { glog.Errorf("Couldn't get key for object %+v: %v", obj, err) return } // TODO: Handle overlapping controllers better. Either disallow them at admission time or // deterministically avoid syncing controllers that fight over pods. Currently, we only // ensure that the same controller is synced for a given pod. When we periodically relist // all controllers there will still be some replica instability. One way to handle this is // by querying the store for all controllers that this rc overlaps, as well as all // controllers that overlap this rc, and sorting them. rm.queue.Add(key) }
func TestDeleteControllerAndExpectations(t *testing.T) { client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()}) manager := NewReplicationManager(client, 10) manager.podStoreSynced = alwaysReady rc := newReplicationController(1) manager.rcStore.Store.Add(rc) fakePodControl := FakePodControl{} manager.podControl = &fakePodControl // This should set expectations for the rc manager.syncReplicationController(getKey(rc, t)) validateSyncReplication(t, &fakePodControl, 1, 0) fakePodControl.clear() // Get the RC key rcKey, err := controller.KeyFunc(rc) if err != nil { t.Errorf("Couldn't get key for object %+v: %v", rc, err) } // This is to simulate a concurrent addPod, that has a handle on the expectations // as the controller deletes it. podExp, exists, err := manager.expectations.GetExpectations(rcKey) if !exists || err != nil { t.Errorf("No expectations found for rc") } manager.rcStore.Delete(rc) manager.syncReplicationController(getKey(rc, t)) if _, exists, err = manager.expectations.GetExpectations(rcKey); exists { t.Errorf("Found expectaions, expected none since the rc has been deleted.") } // This should have no effect, since we've deleted the rc. podExp.Seen(1, 0) manager.podStore.Store.Replace(make([]interface{}, 0)) manager.syncReplicationController(getKey(rc, t)) validateSyncReplication(t, &fakePodControl, 0, 0) }
func doTestControllerBurstReplicas(t *testing.T, burstReplicas, numReplicas int) { client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()}) fakePodControl := FakePodControl{} manager := NewReplicationManager(client, burstReplicas) manager.podStoreSynced = alwaysReady manager.podControl = &fakePodControl controllerSpec := newReplicationController(numReplicas) manager.rcStore.Store.Add(controllerSpec) expectedPods := 0 pods := newPodList(nil, numReplicas, api.PodPending, controllerSpec) rcKey, err := controller.KeyFunc(controllerSpec) if err != nil { t.Errorf("Couldn't get key for object %+v: %v", controllerSpec, err) } // Size up the controller, then size it down, and confirm the expected create/delete pattern for _, replicas := range []int{numReplicas, 0} { controllerSpec.Spec.Replicas = replicas manager.rcStore.Store.Add(controllerSpec) for i := 0; i < numReplicas; i += burstReplicas { manager.syncReplicationController(getKey(controllerSpec, t)) // The store accrues active pods. It's also used by the rc to determine how many // replicas to create. activePods := len(manager.podStore.Store.List()) if replicas != 0 { // This is the number of pods currently "in flight". They were created by the rc manager above, // which then puts the rc to sleep till all of them have been observed. expectedPods = replicas - activePods if expectedPods > burstReplicas { expectedPods = burstReplicas } // This validates the rc manager sync actually created pods validateSyncReplication(t, &fakePodControl, expectedPods, 0) // This simulates the watch events for all but 1 of the expected pods. // None of these should wake the controller because it has expectations==BurstReplicas. for i := 0; i < expectedPods-1; i++ { manager.podStore.Store.Add(&pods.Items[i]) manager.addPod(&pods.Items[i]) } podExp, exists, err := manager.expectations.GetExpectations(rcKey) if !exists || err != nil { t.Fatalf("Did not find expectations for rc.") } if add, _ := podExp.GetExpectations(); add != 1 { t.Fatalf("Expectations are wrong %v", podExp) } } else { expectedPods = (replicas - activePods) * -1 if expectedPods > burstReplicas { expectedPods = burstReplicas } validateSyncReplication(t, &fakePodControl, 0, expectedPods) for i := 0; i < expectedPods-1; i++ { manager.podStore.Store.Delete(&pods.Items[i]) manager.deletePod(&pods.Items[i]) } podExp, exists, err := manager.expectations.GetExpectations(rcKey) if !exists || err != nil { t.Fatalf("Did not find expectations for rc.") } if _, del := podExp.GetExpectations(); del != 1 { t.Fatalf("Expectations are wrong %v", podExp) } } // Check that the rc didn't take any action for all the above pods fakePodControl.clear() manager.syncReplicationController(getKey(controllerSpec, t)) validateSyncReplication(t, &fakePodControl, 0, 0) // Create/Delete the last pod // The last add pod will decrease the expectation of the rc to 0, // which will cause it to create/delete the remaining replicas upto burstReplicas. if replicas != 0 { manager.podStore.Store.Add(&pods.Items[expectedPods-1]) manager.addPod(&pods.Items[expectedPods-1]) } else { manager.podStore.Store.Delete(&pods.Items[expectedPods-1]) manager.deletePod(&pods.Items[expectedPods-1]) } pods.Items = pods.Items[expectedPods:] } // Confirm that we've created the right number of replicas activePods := len(manager.podStore.Store.List()) if activePods != controllerSpec.Spec.Replicas { t.Fatalf("Unexpected number of active pods, expected %d, got %d", controllerSpec.Spec.Replicas, activePods) } // Replenish the pod list, since we cut it down sizing up pods = newPodList(nil, replicas, api.PodRunning, controllerSpec) } }