// Assigns CIDR to Node and sends an update to the API server. func (r *rangeAllocator) updateCIDRAllocation(data nodeAndCIDR) error { var err error var node *api.Node defer r.removeNodeFromProcessing(data.nodeName) for rep := 0; rep < podCIDRUpdateRetry; rep++ { // TODO: change it to using PATCH instead of full Node updates. node, err = r.client.Core().Nodes().Get(data.nodeName) glog.Infof("Got Node: %v", node) if err != nil { glog.Errorf("Failed while getting node %v to retry updating Node.Spec.PodCIDR: %v", data.nodeName, err) continue } node.Spec.PodCIDR = data.cidr.String() if _, err := r.client.Core().Nodes().Update(node); err != nil { glog.Errorf("Failed while updating Node.Spec.PodCIDR (%d retries left): %v", podCIDRUpdateRetry-rep-1, err) } else { break } } if err != nil { recordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed") // We accept the fact that we may leek CIDRs here. This is safer than releasing // them in case when we don't know if request went through. // NodeController restart will return all falsely allocated CIDRs to the pool. if !apierrors.IsServerTimeout(err) { glog.Errorf("CIDR assignment for node %v failed: %v. Releasing allocated CIDR", data.nodeName, err) if releaseErr := r.cidrs.release(data.cidr); releaseErr != nil { glog.Errorf("Error releasing allocated CIDR for node %v: %v", data.nodeName, releaseErr) } } } return err }
func updateIngressOrFail(clientset *federation_release_1_4.Clientset, namespace string) (newIng *v1beta1.Ingress) { var err error if clientset == nil || len(namespace) == 0 { Fail(fmt.Sprintf("Internal error: invalid parameters passed to createIngressOrFail: clientset: %v, namespace: %v", clientset, namespace)) } ingress := &v1beta1.Ingress{ ObjectMeta: v1.ObjectMeta{ Name: FederatedIngressName, }, Spec: v1beta1.IngressSpec{ Backend: &v1beta1.IngressBackend{ ServiceName: "updated-testingress-service", ServicePort: intstr.FromInt(80), }, }, } for MaxRetriesOnFederatedApiserver := 0; MaxRetriesOnFederatedApiserver < 3; MaxRetriesOnFederatedApiserver++ { _, err = clientset.Extensions().Ingresses(namespace).Get(FederatedIngressName) if err != nil { framework.Failf("failed to get ingress %q: %v", FederatedIngressName, err) } newIng, err = clientset.Extensions().Ingresses(namespace).Update(ingress) if err == nil { describeIng(namespace) return } if !errors.IsConflict(err) && !errors.IsServerTimeout(err) { framework.Failf("failed to update ingress %q: %v", FederatedIngressName, err) } } framework.Failf("too many retries updating ingress %q", FederatedIngressName) return newIng }
func updateSecretOrFail(clientset *fedclientset.Clientset, namespace string) *v1.Secret { if clientset == nil || len(namespace) == 0 { Fail(fmt.Sprintf("Internal error: invalid parameters passed to updateSecretOrFail: clientset: %v, namespace: %v", clientset, namespace)) } var newSecret *v1.Secret for retryCount := 0; retryCount < MaxRetries; retryCount++ { secret, err := clientset.Core().Secrets(namespace).Get(FederatedSecretName) if err != nil { framework.Failf("failed to get secret %q: %v", FederatedSecretName, err) } // Update one of the data in the secret. secret.Data = map[string][]byte{ "key": []byte("value"), } newSecret, err = clientset.Core().Secrets(namespace).Update(secret) if err == nil { return newSecret } if !errors.IsConflict(err) && !errors.IsServerTimeout(err) { framework.Failf("failed to update secret %q: %v", FederatedSecretName, err) } } framework.Failf("too many retries updating secret %q", FederatedSecretName) return newSecret }
func updateSecretOrFail(clientset *federation_release_1_4.Clientset, namespace string) *v1.Secret { if clientset == nil || len(namespace) == 0 { Fail(fmt.Sprintf("Internal error: invalid parameters passed to updateSecretOrFail: clientset: %v, namespace: %v", clientset, namespace)) } var err error var newSecret *v1.Secret secret := &v1.Secret{ ObjectMeta: v1.ObjectMeta{ Name: UpdatedFederatedSecretName, }, } for retryCount := 0; retryCount < MaxRetries; retryCount++ { _, err = clientset.Core().Secrets(namespace).Get(FederatedSecretName) if err != nil { framework.Failf("failed to get secret %q: %v", FederatedSecretName, err) } newSecret, err = clientset.Core().Secrets(namespace).Update(secret) if err == nil { return newSecret } if !errors.IsConflict(err) && !errors.IsServerTimeout(err) { framework.Failf("failed to update secret %q: %v", FederatedSecretName, err) } } framework.Failf("too many retries updating secret %q", FederatedSecretName) return newSecret }
func updateDaemonSetOrFail(clientset *fedclientset.Clientset, namespace string) *v1beta1.DaemonSet { if clientset == nil || len(namespace) == 0 { Fail(fmt.Sprintf("Internal error: invalid parameters passed to updateDaemonSetOrFail: clientset: %v, namespace: %v", clientset, namespace)) } var newDaemonSet *v1beta1.DaemonSet for retryCount := 0; retryCount < FederatedDaemonSetMaxRetries; retryCount++ { daemonset, err := clientset.Extensions().DaemonSets(namespace).Get(FederatedDaemonSetName) if err != nil { framework.Failf("failed to get daemonset %q: %v", FederatedDaemonSetName, err) } // Update one of the data in the daemonset. daemonset.Annotations = map[string]string{"ccc": "ddd"} newDaemonSet, err = clientset.Extensions().DaemonSets(namespace).Update(daemonset) if err == nil { return newDaemonSet } if !errors.IsConflict(err) && !errors.IsServerTimeout(err) { framework.Failf("failed to update daemonset %q: %v", FederatedDaemonSetName, err) } } framework.Failf("too many retries updating daemonset %q", FederatedDaemonSetName) return newDaemonSet }
func (t *Tester) testCreateGeneratesNameReturnsServerTimeout(valid runtime.Object) { objectMeta := t.getObjectMetaOrFail(valid) objectMeta.Name = "" objectMeta.GenerateName = "test-" t.withStorageError(errors.NewAlreadyExists("kind", "thing"), func() { _, err := t.storage.(rest.Creater).Create(t.TestContext(), valid) if err == nil || !errors.IsServerTimeout(err) { t.Fatalf("Unexpected error: %v", err) } }) }
func (t *Tester) TestCreateGeneratesNameReturnsServerTimeout(valid runtime.Object) { objectMeta, err := api.ObjectMetaFor(valid) if err != nil { t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, valid) } objectMeta.Name = "" objectMeta.GenerateName = "test-" t.withStorageError(errors.NewAlreadyExists("kind", "thing"), func() { _, err := t.storage.(rest.Creater).Create(t.TestContext(), valid) if err == nil || !errors.IsServerTimeout(err) { t.Fatalf("Unexpected error: %v", err) } }) }
func (p *statefulSetTester) update(ns, name string, update func(ps *apps.StatefulSet)) { for i := 0; i < 3; i++ { ps, err := p.c.Apps().StatefulSets(ns).Get(name, metav1.GetOptions{}) if err != nil { framework.Failf("failed to get statefulset %q: %v", name, err) } update(ps) ps, err = p.c.Apps().StatefulSets(ns).Update(ps) if err == nil { return } if !apierrs.IsConflict(err) && !apierrs.IsServerTimeout(err) { framework.Failf("failed to update statefulset %q: %v", name, err) } } framework.Failf("too many retries draining statefulset %q", name) }
// DefaultRetriable adds retry information to the provided error, and will refresh the // info if the client info is stale. If the refresh fails the error is made fatal. // All other errors are left in their natural state - they will not be retried unless // they define a Temporary() method that returns true. func DefaultRetriable(info *resource.Info, err error) error { if err == nil { return nil } switch { case errors.IsMethodNotSupported(err): return ErrNotRetriable{err} case errors.IsConflict(err): if refreshErr := info.Get(); refreshErr != nil { return ErrNotRetriable{err} } return ErrRetriable{err} case errors.IsServerTimeout(err): return ErrRetriable{err} } return err }
// updateService fetches a service, calls the update function on it, // and then attempts to send the updated service. It retries up to 2 // times in the face of timeouts and conflicts. func updateService(c *client.Client, namespace, serviceName string, update func(*api.Service)) (*api.Service, error) { var service *api.Service var err error for i := 0; i < 3; i++ { service, err = c.Services(namespace).Get(serviceName) if err != nil { return service, err } update(service) service, err = c.Services(namespace).Update(service) if !errors.IsConflict(err) && !errors.IsServerTimeout(err) { return service, err } } return service, err }
func (j *testJig) update(update func(ing *extensions.Ingress)) { var err error ns, name := j.ing.Namespace, j.ing.Name for i := 0; i < 3; i++ { j.ing, err = j.client.Extensions().Ingresses(ns).Get(name, metav1.GetOptions{}) if err != nil { framework.Failf("failed to get ingress %q: %v", name, err) } update(j.ing) j.ing, err = j.client.Extensions().Ingresses(ns).Update(j.ing) if err == nil { describeIng(j.ing.Namespace) return } if !apierrs.IsConflict(err) && !apierrs.IsServerTimeout(err) { framework.Failf("failed to update ingress %q: %v", name, err) } } framework.Failf("too many retries updating ingress %q", name) }
func TestAssignsDefaultServiceAccountAndRejectsMissingAPIToken(t *testing.T) { ns := "myns" admit := NewServiceAccount(nil) admit.MountServiceAccountToken = true admit.RequireAPIToken = true // Add the default service account for the ns into the cache admit.serviceAccounts.Add(&api.ServiceAccount{ ObjectMeta: api.ObjectMeta{ Name: DefaultServiceAccountName, Namespace: ns, }, }) pod := &api.Pod{} attrs := admission.NewAttributesRecord(pod, api.Kind("Pod").WithVersion("version"), ns, "myname", api.Resource("pods").WithVersion("version"), "", admission.Create, nil) err := admit.Admit(attrs) if err == nil || !errors.IsServerTimeout(err) { t.Errorf("Expected server timeout error for missing API token: %v", err) } }
// reconcile an unknown (from the perspective of our registry) non-terminal task func (k *KubernetesScheduler) reconcileNonTerminalTask(driver bindings.SchedulerDriver, taskStatus *mesos.TaskStatus) { // attempt to recover task from pod info: // - task data may contain an api.PodStatusResult; if status.reason == REASON_RECONCILIATION then status.data == nil // - the Name can be parsed by container.ParseFullName() to yield a pod Name and Namespace // - pull the pod metadata down from the api server // - perform task recovery based on pod metadata taskId := taskStatus.TaskId.GetValue() if taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION && taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER { // there will be no data in the task status that we can use to determine the associated pod switch taskStatus.GetState() { case mesos.TaskState_TASK_STAGING: // there is still hope for this task, don't kill it just yet //TODO(jdef) there should probably be a limit for how long we tolerate tasks stuck in this state return default: // for TASK_{STARTING,RUNNING} we should have already attempted to recoverTasks() for. // if the scheduler failed over before the executor fired TASK_STARTING, then we should *not* // be processing this reconciliation update before we process the one from the executor. // point: we don't know what this task is (perhaps there was unrecoverable metadata in the pod), // so it gets killed. log.Errorf("killing non-terminal, unrecoverable task %v", taskId) } } else if podStatus, err := podtask.ParsePodStatusResult(taskStatus); err != nil { // possible rogue pod exists at this point because we can't identify it; should kill the task log.Errorf("possible rogue pod; illegal task status data for task %v, expected an api.PodStatusResult: %v", taskId, err) } else if name, namespace, err := container.ParsePodFullName(podStatus.Name); err != nil { // possible rogue pod exists at this point because we can't identify it; should kill the task log.Errorf("possible rogue pod; illegal api.PodStatusResult, unable to parse full pod name from: '%v' for task %v: %v", podStatus.Name, taskId, err) } else if pod, err := k.client.Pods(namespace).Get(name); err == nil { if t, ok, err := podtask.RecoverFrom(*pod); ok { log.Infof("recovered task %v from metadata in pod %v/%v", taskId, namespace, name) _, err := k.taskRegistry.Register(t, nil) if err != nil { // someone beat us to it?! log.Warningf("failed to register recovered task: %v", err) return } else { k.taskRegistry.UpdateStatus(taskStatus) } return } else if err != nil { //should kill the pod and the task log.Errorf("killing pod, failed to recover task from pod %v/%v: %v", namespace, name, err) if err := k.client.Pods(namespace).Delete(name, nil); err != nil { log.Errorf("failed to delete pod %v/%v: %v", namespace, name, err) } } else { //this is pretty unexpected: we received a TASK_{STARTING,RUNNING} message, but the apiserver's pod //metadata is not appropriate for task reconstruction -- which should almost certainly never //be the case unless someone swapped out the pod on us (and kept the same namespace/name) while //we were failed over. //kill this task, allow the newly launched scheduler to schedule the new pod log.Warningf("unexpected pod metadata for task %v in apiserver, assuming new unscheduled pod spec: %+v", taskId, pod) } } else if errors.IsNotFound(err) { // pod lookup failed, should delete the task since the pod is no longer valid; may be redundant, that's ok log.Infof("killing task %v since pod %v/%v no longer exists", taskId, namespace, name) } else if errors.IsServerTimeout(err) { log.V(2).Infof("failed to reconcile task due to API server timeout: %v", err) return } else { log.Errorf("unexpected API server error, aborting reconcile for task %v: %v", taskId, err) return } if _, err := driver.KillTask(taskStatus.TaskId); err != nil { log.Errorf("failed to kill task %v: %v", taskId, err) } }
func TestCheckGeneratedNameError(t *testing.T) { expect := errors.NewNotFound("foo", "bar") if err := rest.CheckGeneratedNameError(Strategy, expect, &api.Pod{}); err != expect { t.Errorf("NotFoundError should be ignored: %v", err) } expect = errors.NewAlreadyExists("foo", "bar") if err := rest.CheckGeneratedNameError(Strategy, expect, &api.Pod{}); err != expect { t.Errorf("AlreadyExists should be returned when no GenerateName field: %v", err) } expect = errors.NewAlreadyExists("foo", "bar") if err := rest.CheckGeneratedNameError(Strategy, expect, &api.Pod{ObjectMeta: api.ObjectMeta{GenerateName: "foo"}}); err == nil || !errors.IsServerTimeout(err) { t.Errorf("expected try again later error: %v", err) } }