Esempio n. 1
0
// Assigns CIDR to Node and sends an update to the API server.
func (r *rangeAllocator) updateCIDRAllocation(data nodeAndCIDR) error {
	var err error
	var node *api.Node
	defer r.removeNodeFromProcessing(data.nodeName)
	for rep := 0; rep < podCIDRUpdateRetry; rep++ {
		// TODO: change it to using PATCH instead of full Node updates.
		node, err = r.client.Core().Nodes().Get(data.nodeName)
		glog.Infof("Got Node: %v", node)
		if err != nil {
			glog.Errorf("Failed while getting node %v to retry updating Node.Spec.PodCIDR: %v", data.nodeName, err)
			continue
		}
		node.Spec.PodCIDR = data.cidr.String()
		if _, err := r.client.Core().Nodes().Update(node); err != nil {
			glog.Errorf("Failed while updating Node.Spec.PodCIDR (%d retries left): %v", podCIDRUpdateRetry-rep-1, err)
		} else {
			break
		}
	}
	if err != nil {
		recordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed")
		// We accept the fact that we may leek CIDRs here. This is safer than releasing
		// them in case when we don't know if request went through.
		// NodeController restart will return all falsely allocated CIDRs to the pool.
		if !apierrors.IsServerTimeout(err) {
			glog.Errorf("CIDR assignment for node %v failed: %v. Releasing allocated CIDR", data.nodeName, err)
			if releaseErr := r.cidrs.release(data.cidr); releaseErr != nil {
				glog.Errorf("Error releasing allocated CIDR for node %v: %v", data.nodeName, releaseErr)
			}
		}
	}
	return err
}
func updateIngressOrFail(clientset *federation_release_1_4.Clientset, namespace string) (newIng *v1beta1.Ingress) {
	var err error
	if clientset == nil || len(namespace) == 0 {
		Fail(fmt.Sprintf("Internal error: invalid parameters passed to createIngressOrFail: clientset: %v, namespace: %v", clientset, namespace))
	}
	ingress := &v1beta1.Ingress{
		ObjectMeta: v1.ObjectMeta{
			Name: FederatedIngressName,
		},
		Spec: v1beta1.IngressSpec{
			Backend: &v1beta1.IngressBackend{
				ServiceName: "updated-testingress-service",
				ServicePort: intstr.FromInt(80),
			},
		},
	}

	for MaxRetriesOnFederatedApiserver := 0; MaxRetriesOnFederatedApiserver < 3; MaxRetriesOnFederatedApiserver++ {
		_, err = clientset.Extensions().Ingresses(namespace).Get(FederatedIngressName)
		if err != nil {
			framework.Failf("failed to get ingress %q: %v", FederatedIngressName, err)
		}
		newIng, err = clientset.Extensions().Ingresses(namespace).Update(ingress)
		if err == nil {
			describeIng(namespace)
			return
		}
		if !errors.IsConflict(err) && !errors.IsServerTimeout(err) {
			framework.Failf("failed to update ingress %q: %v", FederatedIngressName, err)
		}
	}
	framework.Failf("too many retries updating ingress %q", FederatedIngressName)
	return newIng
}
func updateSecretOrFail(clientset *fedclientset.Clientset, namespace string) *v1.Secret {
	if clientset == nil || len(namespace) == 0 {
		Fail(fmt.Sprintf("Internal error: invalid parameters passed to updateSecretOrFail: clientset: %v, namespace: %v", clientset, namespace))
	}

	var newSecret *v1.Secret
	for retryCount := 0; retryCount < MaxRetries; retryCount++ {
		secret, err := clientset.Core().Secrets(namespace).Get(FederatedSecretName)
		if err != nil {
			framework.Failf("failed to get secret %q: %v", FederatedSecretName, err)
		}

		// Update one of the data in the secret.
		secret.Data = map[string][]byte{
			"key": []byte("value"),
		}
		newSecret, err = clientset.Core().Secrets(namespace).Update(secret)
		if err == nil {
			return newSecret
		}
		if !errors.IsConflict(err) && !errors.IsServerTimeout(err) {
			framework.Failf("failed to update secret %q: %v", FederatedSecretName, err)
		}
	}
	framework.Failf("too many retries updating secret %q", FederatedSecretName)
	return newSecret
}
Esempio n. 4
0
func updateSecretOrFail(clientset *federation_release_1_4.Clientset, namespace string) *v1.Secret {
	if clientset == nil || len(namespace) == 0 {
		Fail(fmt.Sprintf("Internal error: invalid parameters passed to updateSecretOrFail: clientset: %v, namespace: %v", clientset, namespace))
	}

	var err error
	var newSecret *v1.Secret
	secret := &v1.Secret{
		ObjectMeta: v1.ObjectMeta{
			Name: UpdatedFederatedSecretName,
		},
	}

	for retryCount := 0; retryCount < MaxRetries; retryCount++ {
		_, err = clientset.Core().Secrets(namespace).Get(FederatedSecretName)
		if err != nil {
			framework.Failf("failed to get secret %q: %v", FederatedSecretName, err)
		}
		newSecret, err = clientset.Core().Secrets(namespace).Update(secret)
		if err == nil {
			return newSecret
		}
		if !errors.IsConflict(err) && !errors.IsServerTimeout(err) {
			framework.Failf("failed to update secret %q: %v", FederatedSecretName, err)
		}
	}
	framework.Failf("too many retries updating secret %q", FederatedSecretName)
	return newSecret
}
Esempio n. 5
0
func updateDaemonSetOrFail(clientset *fedclientset.Clientset, namespace string) *v1beta1.DaemonSet {
	if clientset == nil || len(namespace) == 0 {
		Fail(fmt.Sprintf("Internal error: invalid parameters passed to updateDaemonSetOrFail: clientset: %v, namespace: %v", clientset, namespace))
	}

	var newDaemonSet *v1beta1.DaemonSet
	for retryCount := 0; retryCount < FederatedDaemonSetMaxRetries; retryCount++ {
		daemonset, err := clientset.Extensions().DaemonSets(namespace).Get(FederatedDaemonSetName)
		if err != nil {
			framework.Failf("failed to get daemonset %q: %v", FederatedDaemonSetName, err)
		}

		// Update one of the data in the daemonset.
		daemonset.Annotations = map[string]string{"ccc": "ddd"}
		newDaemonSet, err = clientset.Extensions().DaemonSets(namespace).Update(daemonset)
		if err == nil {
			return newDaemonSet
		}
		if !errors.IsConflict(err) && !errors.IsServerTimeout(err) {
			framework.Failf("failed to update daemonset %q: %v", FederatedDaemonSetName, err)
		}
	}
	framework.Failf("too many retries updating daemonset %q", FederatedDaemonSetName)
	return newDaemonSet
}
Esempio n. 6
0
func (t *Tester) testCreateGeneratesNameReturnsServerTimeout(valid runtime.Object) {
	objectMeta := t.getObjectMetaOrFail(valid)
	objectMeta.Name = ""
	objectMeta.GenerateName = "test-"
	t.withStorageError(errors.NewAlreadyExists("kind", "thing"), func() {
		_, err := t.storage.(rest.Creater).Create(t.TestContext(), valid)
		if err == nil || !errors.IsServerTimeout(err) {
			t.Fatalf("Unexpected error: %v", err)
		}
	})
}
Esempio n. 7
0
func (t *Tester) TestCreateGeneratesNameReturnsServerTimeout(valid runtime.Object) {
	objectMeta, err := api.ObjectMetaFor(valid)
	if err != nil {
		t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, valid)
	}

	objectMeta.Name = ""
	objectMeta.GenerateName = "test-"
	t.withStorageError(errors.NewAlreadyExists("kind", "thing"), func() {
		_, err := t.storage.(rest.Creater).Create(t.TestContext(), valid)
		if err == nil || !errors.IsServerTimeout(err) {
			t.Fatalf("Unexpected error: %v", err)
		}
	})
}
Esempio n. 8
0
func (p *statefulSetTester) update(ns, name string, update func(ps *apps.StatefulSet)) {
	for i := 0; i < 3; i++ {
		ps, err := p.c.Apps().StatefulSets(ns).Get(name, metav1.GetOptions{})
		if err != nil {
			framework.Failf("failed to get statefulset %q: %v", name, err)
		}
		update(ps)
		ps, err = p.c.Apps().StatefulSets(ns).Update(ps)
		if err == nil {
			return
		}
		if !apierrs.IsConflict(err) && !apierrs.IsServerTimeout(err) {
			framework.Failf("failed to update statefulset %q: %v", name, err)
		}
	}
	framework.Failf("too many retries draining statefulset %q", name)
}
Esempio n. 9
0
// DefaultRetriable adds retry information to the provided error, and will refresh the
// info if the client info is stale. If the refresh fails the error is made fatal.
// All other errors are left in their natural state - they will not be retried unless
// they define a Temporary() method that returns true.
func DefaultRetriable(info *resource.Info, err error) error {
	if err == nil {
		return nil
	}
	switch {
	case errors.IsMethodNotSupported(err):
		return ErrNotRetriable{err}
	case errors.IsConflict(err):
		if refreshErr := info.Get(); refreshErr != nil {
			return ErrNotRetriable{err}
		}
		return ErrRetriable{err}
	case errors.IsServerTimeout(err):
		return ErrRetriable{err}
	}
	return err
}
Esempio n. 10
0
// updateService fetches a service, calls the update function on it,
// and then attempts to send the updated service. It retries up to 2
// times in the face of timeouts and conflicts.
func updateService(c *client.Client, namespace, serviceName string, update func(*api.Service)) (*api.Service, error) {
	var service *api.Service
	var err error
	for i := 0; i < 3; i++ {
		service, err = c.Services(namespace).Get(serviceName)
		if err != nil {
			return service, err
		}

		update(service)

		service, err = c.Services(namespace).Update(service)

		if !errors.IsConflict(err) && !errors.IsServerTimeout(err) {
			return service, err
		}
	}
	return service, err
}
Esempio n. 11
0
func (j *testJig) update(update func(ing *extensions.Ingress)) {
	var err error
	ns, name := j.ing.Namespace, j.ing.Name
	for i := 0; i < 3; i++ {
		j.ing, err = j.client.Extensions().Ingresses(ns).Get(name, metav1.GetOptions{})
		if err != nil {
			framework.Failf("failed to get ingress %q: %v", name, err)
		}
		update(j.ing)
		j.ing, err = j.client.Extensions().Ingresses(ns).Update(j.ing)
		if err == nil {
			describeIng(j.ing.Namespace)
			return
		}
		if !apierrs.IsConflict(err) && !apierrs.IsServerTimeout(err) {
			framework.Failf("failed to update ingress %q: %v", name, err)
		}
	}
	framework.Failf("too many retries updating ingress %q", name)
}
Esempio n. 12
0
func TestAssignsDefaultServiceAccountAndRejectsMissingAPIToken(t *testing.T) {
	ns := "myns"

	admit := NewServiceAccount(nil)
	admit.MountServiceAccountToken = true
	admit.RequireAPIToken = true

	// Add the default service account for the ns into the cache
	admit.serviceAccounts.Add(&api.ServiceAccount{
		ObjectMeta: api.ObjectMeta{
			Name:      DefaultServiceAccountName,
			Namespace: ns,
		},
	})

	pod := &api.Pod{}
	attrs := admission.NewAttributesRecord(pod, api.Kind("Pod").WithVersion("version"), ns, "myname", api.Resource("pods").WithVersion("version"), "", admission.Create, nil)
	err := admit.Admit(attrs)
	if err == nil || !errors.IsServerTimeout(err) {
		t.Errorf("Expected server timeout error for missing API token: %v", err)
	}
}
Esempio n. 13
0
// reconcile an unknown (from the perspective of our registry) non-terminal task
func (k *KubernetesScheduler) reconcileNonTerminalTask(driver bindings.SchedulerDriver, taskStatus *mesos.TaskStatus) {
	// attempt to recover task from pod info:
	// - task data may contain an api.PodStatusResult; if status.reason == REASON_RECONCILIATION then status.data == nil
	// - the Name can be parsed by container.ParseFullName() to yield a pod Name and Namespace
	// - pull the pod metadata down from the api server
	// - perform task recovery based on pod metadata
	taskId := taskStatus.TaskId.GetValue()
	if taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION && taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER {
		// there will be no data in the task status that we can use to determine the associated pod
		switch taskStatus.GetState() {
		case mesos.TaskState_TASK_STAGING:
			// there is still hope for this task, don't kill it just yet
			//TODO(jdef) there should probably be a limit for how long we tolerate tasks stuck in this state
			return
		default:
			// for TASK_{STARTING,RUNNING} we should have already attempted to recoverTasks() for.
			// if the scheduler failed over before the executor fired TASK_STARTING, then we should *not*
			// be processing this reconciliation update before we process the one from the executor.
			// point: we don't know what this task is (perhaps there was unrecoverable metadata in the pod),
			// so it gets killed.
			log.Errorf("killing non-terminal, unrecoverable task %v", taskId)
		}
	} else if podStatus, err := podtask.ParsePodStatusResult(taskStatus); err != nil {
		// possible rogue pod exists at this point because we can't identify it; should kill the task
		log.Errorf("possible rogue pod; illegal task status data for task %v, expected an api.PodStatusResult: %v", taskId, err)
	} else if name, namespace, err := container.ParsePodFullName(podStatus.Name); err != nil {
		// possible rogue pod exists at this point because we can't identify it; should kill the task
		log.Errorf("possible rogue pod; illegal api.PodStatusResult, unable to parse full pod name from: '%v' for task %v: %v",
			podStatus.Name, taskId, err)
	} else if pod, err := k.client.Pods(namespace).Get(name); err == nil {
		if t, ok, err := podtask.RecoverFrom(*pod); ok {
			log.Infof("recovered task %v from metadata in pod %v/%v", taskId, namespace, name)
			_, err := k.taskRegistry.Register(t, nil)
			if err != nil {
				// someone beat us to it?!
				log.Warningf("failed to register recovered task: %v", err)
				return
			} else {
				k.taskRegistry.UpdateStatus(taskStatus)
			}
			return
		} else if err != nil {
			//should kill the pod and the task
			log.Errorf("killing pod, failed to recover task from pod %v/%v: %v", namespace, name, err)
			if err := k.client.Pods(namespace).Delete(name, nil); err != nil {
				log.Errorf("failed to delete pod %v/%v: %v", namespace, name, err)
			}
		} else {
			//this is pretty unexpected: we received a TASK_{STARTING,RUNNING} message, but the apiserver's pod
			//metadata is not appropriate for task reconstruction -- which should almost certainly never
			//be the case unless someone swapped out the pod on us (and kept the same namespace/name) while
			//we were failed over.

			//kill this task, allow the newly launched scheduler to schedule the new pod
			log.Warningf("unexpected pod metadata for task %v in apiserver, assuming new unscheduled pod spec: %+v", taskId, pod)
		}
	} else if errors.IsNotFound(err) {
		// pod lookup failed, should delete the task since the pod is no longer valid; may be redundant, that's ok
		log.Infof("killing task %v since pod %v/%v no longer exists", taskId, namespace, name)
	} else if errors.IsServerTimeout(err) {
		log.V(2).Infof("failed to reconcile task due to API server timeout: %v", err)
		return
	} else {
		log.Errorf("unexpected API server error, aborting reconcile for task %v: %v", taskId, err)
		return
	}
	if _, err := driver.KillTask(taskStatus.TaskId); err != nil {
		log.Errorf("failed to kill task %v: %v", taskId, err)
	}
}
Esempio n. 14
0
func TestCheckGeneratedNameError(t *testing.T) {
	expect := errors.NewNotFound("foo", "bar")
	if err := rest.CheckGeneratedNameError(Strategy, expect, &api.Pod{}); err != expect {
		t.Errorf("NotFoundError should be ignored: %v", err)
	}

	expect = errors.NewAlreadyExists("foo", "bar")
	if err := rest.CheckGeneratedNameError(Strategy, expect, &api.Pod{}); err != expect {
		t.Errorf("AlreadyExists should be returned when no GenerateName field: %v", err)
	}

	expect = errors.NewAlreadyExists("foo", "bar")
	if err := rest.CheckGeneratedNameError(Strategy, expect, &api.Pod{ObjectMeta: api.ObjectMeta{GenerateName: "foo"}}); err == nil || !errors.IsServerTimeout(err) {
		t.Errorf("expected try again later error: %v", err)
	}
}