func getReferencedServiceAccountToken(c *client.Client, ns string, name string, shouldWait bool) (string, string, error) { tokenName := "" token := "" findToken := func() (bool, error) { user, err := c.ServiceAccounts(ns).Get(name) if errors.IsNotFound(err) { return false, nil } if err != nil { return false, err } for _, ref := range user.Secrets { secret, err := c.Secrets(ns).Get(ref.Name) if errors.IsNotFound(err) { continue } if err != nil { return false, err } if secret.Type != api.SecretTypeServiceAccountToken { continue } name := secret.Annotations[api.ServiceAccountNameKey] uid := secret.Annotations[api.ServiceAccountUIDKey] tokenData := secret.Data[api.ServiceAccountTokenKey] if name == user.Name && uid == string(user.UID) && len(tokenData) > 0 { tokenName = secret.Name token = string(tokenData) return true, nil } } return false, nil } if shouldWait { err := wait.Poll(time.Second, 10*time.Second, findToken) if err != nil { return "", "", err } } else { ok, err := findToken() if err != nil { return "", "", err } if !ok { return "", "", fmt.Errorf("No token found for %s/%s", ns, name) } } return tokenName, token, nil }
// syncNamespace makes namespace life-cycle decisions func syncNamespace(qingClient client.Interface, namespace api.Namespace) (err error) { if namespace.DeletionTimestamp == nil { return nil } // if there is a deletion timestamp, and the status is not terminating, then update status if !namespace.DeletionTimestamp.IsZero() && namespace.Status.Phase != api.NamespaceTerminating { newNamespace := api.Namespace{} newNamespace.ObjectMeta = namespace.ObjectMeta newNamespace.Status = namespace.Status newNamespace.Status.Phase = api.NamespaceTerminating result, err := qingClient.Namespaces().Status(&newNamespace) if err != nil { return err } // work with the latest copy so we can proceed to clean up right away without another interval namespace = *result } // if the namespace is already finalized, delete it if finalized(namespace) { err = qingClient.Namespaces().Delete(namespace.Name) if err != nil && !errors.IsNotFound(err) { return err } return nil } // there may still be content for us to remove err = deleteAllContent(qingClient, namespace.Name) if err != nil { return err } // we have removed content, so mark it finalized by us result, err := finalize(qingClient, namespace) if err != nil { return err } // now check if all finalizers have reported that we delete now if finalized(*result) { err = qingClient.Namespaces().Delete(namespace.Name) if err != nil && !errors.IsNotFound(err) { return err } } return nil }
func (s *ServiceController) persistUpdate(service *api.Service) error { var err error for i := 0; i < clientRetryCount; i++ { _, err = s.qingClient.Services(service.Namespace).Update(service) if err == nil { return nil } // If the object no longer exists, we don't want to recreate it. Just bail // out so that we can process the delete, which we should soon be receiving // if we haven't already. if errors.IsNotFound(err) { glog.Infof("Not persisting update to service that no longer exists: %v", err) return nil } // TODO: Try to resolve the conflict if the change was unrelated to load // balancer status. For now, just rely on the fact that we'll // also process the update that caused the resource version to change. if errors.IsConflict(err) { glog.Infof("Not persisting update to service that has been changed since we received it: %v", err) return nil } glog.Warningf("Failed to persist updated LoadBalancerStatus to service %s after creating its external load balancer: %v", service.Name, err) time.Sleep(clientRetryInterval) } return err }
// Visit implements Visitor func (r *Selector) Visit(fn VisitorFunc) error { list, err := NewHelper(r.Client, r.Mapping).List(r.Namespace, r.ResourceMapping().APIVersion, r.Selector) if err != nil { if errors.IsBadRequest(err) || errors.IsNotFound(err) { if r.Selector.Empty() { glog.V(2).Infof("Unable to list %q: %v", r.Mapping.Resource, err) } else { glog.V(2).Infof("Unable to find %q that match the selector %q: %v", r.Mapping.Resource, r.Selector, err) } return nil } return err } accessor := r.Mapping.MetadataAccessor resourceVersion, _ := accessor.ResourceVersion(list) info := &Info{ Client: r.Client, Mapping: r.Mapping, Namespace: r.Namespace, Object: list, ResourceVersion: resourceVersion, } return fn(info) }
func (factory *ConfigFactory) makeDefaultErrorFunc(backoff *podBackoff, podQueue *cache.FIFO) func(pod *api.Pod, err error) { return func(pod *api.Pod, err error) { if err == scheduler.ErrNoNodesAvailable { glog.V(4).Infof("Unable to schedule %v %v: no nodes are registered to the cluster; waiting", pod.Namespace, pod.Name) } else { glog.Errorf("Error scheduling %v %v: %v; retrying", pod.Namespace, pod.Name, err) } backoff.gc() // Retry asynchronously. // Note that this is extremely rudimentary and we need a more real error handling path. go func() { defer util.HandleCrash() podID := pod.Name podNamespace := pod.Namespace backoff.wait(podID) // Get the pod again; it may have changed/been scheduled already. pod = &api.Pod{} err := factory.Client.Get().Namespace(podNamespace).Resource("pods").Name(podID).Do().Into(pod) if err != nil { if !errors.IsNotFound(err) { glog.Errorf("Error getting pod %v for retry: %v; abandoning", podID, err) } return } if pod.Spec.NodeName == "" { podQueue.Add(pod) } }() } }
func (t *Tester) TestUpdateFailsOnNotFound(valid runtime.Object) { _, _, err := t.storage.(rest.Updater).Update(t.TestContext(), valid) if err == nil { t.Errorf("Expected an error, but we didn't get one") } else if !errors.IsNotFound(err) { t.Errorf("Expected NotFound error, got '%v'", err) } }
func TestEtcdDelete(t *testing.T) { podA := &api.Pod{ ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"}, Spec: api.PodSpec{NodeName: "machine"}, } nodeWithPodA := tools.EtcdResponseWithError{ R: &etcd.Response{ Node: &etcd.Node{ Value: runtime.EncodeOrDie(testapi.Codec(), podA), ModifiedIndex: 1, CreatedIndex: 1, }, }, E: nil, } emptyNode := tools.EtcdResponseWithError{ R: &etcd.Response{}, E: tools.EtcdErrorNotFound, } key := "foo" table := map[string]struct { existing tools.EtcdResponseWithError expect tools.EtcdResponseWithError errOK func(error) bool }{ "normal": { existing: nodeWithPodA, expect: emptyNode, errOK: func(err error) bool { return err == nil }, }, "notExisting": { existing: emptyNode, expect: emptyNode, errOK: func(err error) bool { return errors.IsNotFound(err) }, }, } for name, item := range table { fakeClient, registry := NewTestGenericEtcdRegistry(t) path := etcdtest.AddPrefix("pods/foo") fakeClient.Data[path] = item.existing obj, err := registry.Delete(api.NewContext(), key, nil) if !item.errOK(err) { t.Errorf("%v: unexpected error: %v (%#v)", name, err, obj) } if item.expect.E != nil { item.expect.E.(*etcd.EtcdError).Index = fakeClient.ChangeIndex } if e, a := item.expect, fakeClient.Data[path]; !api.Semantic.DeepDerivative(e, a) { t.Errorf("%v:\n%s", name, util.ObjectDiff(e, a)) } } }
// this pod may be out of sync with respect to the API server registry: // this pod | apiserver registry // -------------|---------------------- // host=.* | 404 ; pod was deleted // host=.* | 5xx ; failed to sync, try again later? // host="" | host="" ; perhaps no updates to process? // host="" | host="..." ; pod has been scheduled and assigned, is there a task assigned? (check TaskIdKey in binding?) // host="..." | host="" ; pod is no longer scheduled, does it need to be re-queued? // host="..." | host="..." ; perhaps no updates to process? // // TODO(jdef) this needs an integration test func (s *schedulingPlugin) reconcilePod(oldPod api.Pod) { log.V(1).Infof("reconcile pod %v", oldPod.Name) ctx := api.WithNamespace(api.NewDefaultContext(), oldPod.Namespace) pod, err := s.client.Pods(api.NamespaceValue(ctx)).Get(oldPod.Name) if err != nil { if errors.IsNotFound(err) { // attempt to delete if err = s.deleter.deleteOne(&Pod{Pod: &oldPod}); err != nil && err != noSuchPodErr && err != noSuchTaskErr { log.Errorf("failed to delete pod: %v: %v", oldPod.Name, err) } } else { //TODO(jdef) other errors should probably trigger a retry (w/ backoff). //For now, drop the pod on the floor log.Warning("aborting reconciliation for pod %v: %v", oldPod.Name, err) } return } if oldPod.Spec.NodeName != pod.Spec.NodeName { if pod.Spec.NodeName == "" { // pod is unscheduled. // it's possible that we dropped the pod in the scheduler error handler // because of task misalignment with the pod (task.Has(podtask.Launched) == true) podKey, err := podtask.MakePodKey(ctx, pod.Name) if err != nil { log.Error(err) return } s.api.Lock() defer s.api.Unlock() if _, state := s.api.tasks().ForPod(podKey); state != podtask.StateUnknown { //TODO(jdef) reconcile the task log.Errorf("task already registered for pod %v", pod.Name) return } now := time.Now() log.V(3).Infof("reoffering pod %v", podKey) s.qr.reoffer(&Pod{ Pod: pod, deadline: &now, }) } else { // pod is scheduled. // not sure how this happened behind our backs. attempt to reconstruct // at least a partial podtask.T record. //TODO(jdef) reconcile the task log.Errorf("pod already scheduled: %v", pod.Name) } } else { //TODO(jdef) for now, ignore the fact that the rest of the spec may be different //and assume that our knowledge of the pod aligns with that of the apiserver log.Error("pod reconciliation does not support updates; not yet implemented") } }
func LoadExistingNextReplicationController(c *client.Client, namespace, newName string) (*api.ReplicationController, error) { if len(newName) == 0 { return nil, nil } newRc, err := c.ReplicationControllers(namespace).Get(newName) if err != nil && errors.IsNotFound(err) { return nil, nil } return newRc, err }
func deletePersistentVolumeClaims(qingClient client.Interface, ns string) error { items, err := qingClient.PersistentVolumeClaims(ns).List(labels.Everything(), fields.Everything()) if err != nil { return err } for i := range items.Items { err := qingClient.PersistentVolumeClaims(ns).Delete(items.Items[i].Name) if err != nil && !errors.IsNotFound(err) { return err } } return nil }
func deleteReplicationControllers(qingClient client.Interface, ns string) error { items, err := qingClient.ReplicationControllers(ns).List(labels.Everything()) if err != nil { return err } for i := range items.Items { err := qingClient.ReplicationControllers(ns).Delete(items.Items[i].Name) if err != nil && !errors.IsNotFound(err) { return err } } return nil }
func deleteResourceQuotas(qingClient client.Interface, ns string) error { resourceQuotas, err := qingClient.ResourceQuotas(ns).List(labels.Everything()) if err != nil { return err } for i := range resourceQuotas.Items { err := qingClient.ResourceQuotas(ns).Delete(resourceQuotas.Items[i].Name) if err != nil && !errors.IsNotFound(err) { return err } } return nil }
func (t *Tester) TestDeleteGracefulUsesZeroOnNil(existing runtime.Object, expectedGrace int64) { objectMeta, err := api.ObjectMetaFor(existing) if err != nil { t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing) } ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace) _, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, nil) if err != nil { t.Errorf("unexpected error: %v", err) } if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); !errors.IsNotFound(err) { t.Errorf("unexpected error, object should exist: %v", err) } }
func (t *Tester) TestDeleteNonExist(createFn func() runtime.Object) { existing := createFn() objectMeta, err := api.ObjectMetaFor(existing) if err != nil { t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing) } context := t.TestContext() t.withStorageError(&etcd.EtcdError{ErrorCode: tools.EtcdErrorCodeNotFound}, func() { _, err := t.storage.(rest.GracefulDeleter).Delete(context, objectMeta.Name, nil) if err == nil || !errors.IsNotFound(err) { t.Fatalf("Unexpected error: %v", err) } }) }
// Ensure that when scheduler creates a binding for a pod that has already been deleted // by the API server, API server returns not-found error. func TestEtcdCreateBindingNoPod(t *testing.T) { registry, bindingRegistry, _, fakeClient, _ := newStorage(t) ctx := api.NewDefaultContext() fakeClient.TestIndex = true key, _ := registry.KeyFunc(ctx, "foo") key = etcdtest.AddPrefix(key) fakeClient.Data[key] = tools.EtcdResponseWithError{ R: &etcd.Response{ Node: nil, }, E: tools.EtcdErrorNotFound, } // Assume that a pod has undergone the following: // - Create (apiserver) // - Schedule (scheduler) // - Delete (apiserver) _, err := bindingRegistry.Create(ctx, &api.Binding{ ObjectMeta: api.ObjectMeta{Namespace: api.NamespaceDefault, Name: "foo"}, Target: api.ObjectReference{Name: "machine"}, }) if err == nil { t.Fatalf("Expected not-found-error but got nothing") } if !errors.IsNotFound(etcderrors.InterpretGetError(err, "Pod", "foo")) { t.Fatalf("Unexpected error returned: %#v", err) } _, err = registry.Get(ctx, "foo") if err == nil { t.Fatalf("Expected not-found-error but got nothing") } if !errors.IsNotFound(etcderrors.InterpretGetError(err, "Pod", "foo")) { t.Fatalf("Unexpected error: %v", err) } }
func Rename(c RollingUpdaterClient, rc *api.ReplicationController, newName string) error { oldName := rc.Name rc.Name = newName rc.ResourceVersion = "" _, err := c.CreateReplicationController(rc.Namespace, rc) if err != nil { return err } err = c.DeleteReplicationController(rc.Namespace, oldName) if err != nil && !errors.IsNotFound(err) { return err } return nil }
func TestEtcdGetNotFound(t *testing.T) { registry, _, _, fakeClient, _ := newStorage(t) ctx := api.NewDefaultContext() key, _ := registry.KeyFunc(ctx, "foo") key = etcdtest.AddPrefix(key) fakeClient.Data[key] = tools.EtcdResponseWithError{ R: &etcd.Response{ Node: nil, }, E: tools.EtcdErrorNotFound, } _, err := registry.Get(ctx, "foo") if !errors.IsNotFound(err) { t.Errorf("Unexpected error returned: %#v", err) } }
func podScheduled(c *client.Client, podNamespace, podName string) wait.ConditionFunc { return func() (bool, error) { pod, err := c.Pods(podNamespace).Get(podName) if errors.IsNotFound(err) { return false, nil } if err != nil { // This could be a connection error so we want to retry. return false, nil } if pod.Spec.NodeName == "" { return false, nil } return true, nil } }
func TestEtcdGetServiceNotFound(t *testing.T) { ctx := api.NewDefaultContext() fakeClient := tools.NewFakeEtcdClient(t) registry := NewTestEtcdRegistry(fakeClient) key, _ := makeServiceKey(ctx, "foo") key = etcdtest.AddPrefix(key) fakeClient.Data[key] = tools.EtcdResponseWithError{ R: &etcd.Response{ Node: nil, }, E: tools.EtcdErrorNotFound, } _, err := registry.GetService(ctx, "foo") if !errors.IsNotFound(err) { t.Errorf("Unexpected error returned: %#v", err) } }
func podRunning(c *client.Client, podNamespace string, podName string) wait.ConditionFunc { return func() (bool, error) { pod, err := c.Pods(podNamespace).Get(podName) if apierrors.IsNotFound(err) { return false, nil } if err != nil { // This could be a connection error so we want to retry, but log the error. glog.Errorf("Error when reading pod %q: %v", podName, err) return false, nil } if pod.Status.Phase != api.PodRunning { return false, nil } return true, nil } }
func RunDescribe(f *cmdutil.Factory, out io.Writer, cmd *cobra.Command, args []string) error { selector := cmdutil.GetFlagString(cmd, "selector") cmdNamespace, err := f.DefaultNamespace() if err != nil { return err } mapper, typer := f.Object() r := resource.NewBuilder(mapper, typer, f.ClientMapperForCommand()). ContinueOnError(). NamespaceParam(cmdNamespace).DefaultNamespace(). SelectorParam(selector). ResourceTypeOrNameArgs(false, args...). Flatten(). Do() err = r.Err() if err != nil { return err } mapping, err := r.ResourceMapping() if err != nil { return err } describer, err := f.Describer(mapping) if err != nil { return err } infos, err := r.Infos() if err != nil { if errors.IsNotFound(err) && len(args) == 2 { return DescribeMatchingResources(mapper, typer, describer, f, cmdNamespace, args[0], args[1], out) } return err } for _, info := range infos { s, err := describer.Describe(info.Namespace, info.Name) if err != nil { return err } fmt.Fprintf(out, "%s\n\n", s) } return nil }
// DeleteService deletes a Service specified by its name. func (r *Registry) DeleteService(ctx api.Context, name string) error { key, err := makeServiceKey(ctx, name) if err != nil { return err } err = r.Delete(key, true) if err != nil { return etcderr.InterpretDeleteError(err, "service", name) } // TODO: can leave dangling endpoints, and potentially return incorrect // endpoints if a new service is created with the same name err = r.endpoints.DeleteEndpoints(ctx, name) if err != nil && !errors.IsNotFound(err) { return err } return nil }
func (t *Tester) TestDeleteNoGraceful(createFn func() runtime.Object, wasGracefulFn func() bool) { existing := createFn() objectMeta, err := api.ObjectMetaFor(existing) if err != nil { t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing) } ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace) _, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(10)) if err != nil { t.Errorf("unexpected error: %v", err) } if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); !errors.IsNotFound(err) { t.Errorf("unexpected error, object should not exist: %v", err) } if wasGracefulFn() { t.Errorf("resource should not support graceful delete") } }
func getServiceAccount(c *client.Client, ns string, name string, shouldWait bool) (*api.ServiceAccount, error) { if !shouldWait { return c.ServiceAccounts(ns).Get(name) } var user *api.ServiceAccount var err error err = wait.Poll(time.Second, 10*time.Second, func() (bool, error) { user, err = c.ServiceAccounts(ns).Get(name) if errors.IsNotFound(err) { return false, nil } if err != nil { return false, err } return true, nil }) return user, err }
// getServiceAccount returns the ServiceAccount referenced by the given secret. If the secret is not // of type ServiceAccountToken, or if the referenced ServiceAccount does not exist, nil is returned func (e *TokensController) getServiceAccount(secret *api.Secret, fetchOnCacheMiss bool) (*api.ServiceAccount, error) { name, uid := serviceAccountNameAndUID(secret) if len(name) == 0 { return nil, nil } key := &api.ServiceAccount{ObjectMeta: api.ObjectMeta{Namespace: secret.Namespace}} namespaceAccounts, err := e.serviceAccounts.Index("namespace", key) if err != nil { return nil, err } for _, obj := range namespaceAccounts { serviceAccount := obj.(*api.ServiceAccount) if name != serviceAccount.Name { // Name must match continue } if len(uid) > 0 && uid != string(serviceAccount.UID) { // If UID is specified, it must match continue } return serviceAccount, nil } if fetchOnCacheMiss { serviceAccount, err := e.client.ServiceAccounts(secret.Namespace).Get(name) if apierrors.IsNotFound(err) { return nil, nil } if err != nil { return nil, err } if len(uid) > 0 && uid != string(serviceAccount.UID) { // If UID is specified, it must match return nil, nil } return serviceAccount, nil } return nil, nil }
func TestEtcdGetControllerNotFound(t *testing.T) { ctx := api.NewDefaultContext() storage, fakeClient := newStorage(t) key, _ := makeControllerKey(ctx, validController.Name) key = etcdtest.AddPrefix(key) fakeClient.Data[key] = tools.EtcdResponseWithError{ R: &etcd.Response{ Node: nil, }, E: tools.EtcdErrorNotFound, } ctrl, err := storage.Get(ctx, validController.Name) if ctrl != nil { t.Errorf("Unexpected non-nil controller: %#v", ctrl) } if !errors.IsNotFound(err) { t.Errorf("Unexpected error returned: %#v", err) } }
func TestErrors(t *testing.T) { o := NewObjects(api.Scheme, api.Scheme) o.Add(&api.List{ Items: []runtime.Object{ // This first call to List will return this error &(errors.NewNotFound("ServiceList", "").(*errors.StatusError).ErrStatus), // The second call to List will return this error &(errors.NewForbidden("ServiceList", "", nil).(*errors.StatusError).ErrStatus), }, }) client := &Fake{ReactFn: ObjectReaction(o, latest.RESTMapper)} _, err := client.Services("test").List(labels.Everything()) if !errors.IsNotFound(err) { t.Fatalf("unexpected error: %v", err) } t.Logf("error: %#v", err.(*errors.StatusError).Status()) _, err = client.Services("test").List(labels.Everything()) if !errors.IsForbidden(err) { t.Fatalf("unexpected error: %v", err) } }
func (k *QingYuanScheduler) reconcileTerminalTask(driver bindings.SchedulerDriver, taskStatus *mesos.TaskStatus) { task, state := k.taskRegistry.UpdateStatus(taskStatus) if (state == podtask.StateRunning || state == podtask.StatePending) && taskStatus.SlaveId != nil && ((taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER && taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION) || (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED) || (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED)) { //-- // pod-task has metadata that refers to: // (1) a task that Mesos no longer knows about, or else // (2) a pod that the Qinglet will never report as "failed" // For now, destroy the pod and hope that there's a replication controller backing it up. // TODO(jdef) for case #2 don't delete the pod, just update it's status to Failed pod := &task.Pod log.Warningf("deleting rogue pod %v/%v for lost task %v", pod.Namespace, pod.Name, task.ID) if err := k.client.Pods(pod.Namespace).Delete(pod.Name, nil); err != nil && !errors.IsNotFound(err) { log.Errorf("failed to delete pod %v/%v for terminal task %v: %v", pod.Namespace, pod.Name, task.ID, err) } } else if taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED || taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED { // attempt to prevent dangling pods in the pod and task registries log.V(1).Infof("request explicit reconciliation to clean up for task %v after executor reported (terminated/unregistered)", taskStatus.TaskId.GetValue()) k.reconciler.RequestExplicit() } else if taskStatus.GetState() == mesos.TaskState_TASK_LOST && state == podtask.StateRunning && taskStatus.ExecutorId != nil && taskStatus.SlaveId != nil { //TODO(jdef) this may not be meaningful once we have proper checkpointing and master detection //If we're reconciling and receive this then the executor may be //running a task that we need it to kill. It's possible that the framework //is unrecognized by the master at this point, so KillTask is not guaranteed //to do anything. The underlying driver transport may be able to send a //FrameworkMessage directly to the slave to terminate the task. log.V(2).Info("forwarding TASK_LOST message to executor %v on slave %v", taskStatus.ExecutorId, taskStatus.SlaveId) data := fmt.Sprintf("task-lost:%s", task.ID) //TODO(jdef) use a real message type if _, err := driver.SendFrameworkMessage(taskStatus.ExecutorId, taskStatus.SlaveId, data); err != nil { log.Error(err.Error()) } } }
// getServiceAccount returns the ServiceAccount for the given namespace and name if it exists func (s *serviceAccount) getServiceAccount(namespace string, name string) (*api.ServiceAccount, error) { key := &api.ServiceAccount{ObjectMeta: api.ObjectMeta{Namespace: namespace}} index, err := s.serviceAccounts.Index("namespace", key) if err != nil { return nil, err } for _, obj := range index { serviceAccount := obj.(*api.ServiceAccount) if serviceAccount.Name == name { return serviceAccount, nil } } // Could not find in cache, attempt to look up directly numAttempts := 1 if name == DefaultServiceAccountName { // If this is the default serviceaccount, attempt more times, since it should be auto-created by the controller numAttempts = 10 } retryInterval := time.Duration(rand.Int63n(100)+int64(100)) * time.Millisecond for i := 0; i < numAttempts; i++ { if i != 0 { time.Sleep(retryInterval) } serviceAccount, err := s.client.ServiceAccounts(namespace).Get(name) if err == nil { return serviceAccount, nil } if !errors.IsNotFound(err) { return nil, err } } return nil, nil }
func (e *endpointController) syncService(key string) { startTime := time.Now() defer func() { glog.V(4).Infof("Finished syncing service %q endpoints. (%v)", key, time.Now().Sub(startTime)) }() obj, exists, err := e.serviceStore.Store.GetByKey(key) if err != nil || !exists { // Delete the corresponding endpoint, as the service has been deleted. // TODO: Please note that this will delete an endpoint when a // service is deleted. However, if we're down at the time when // the service is deleted, we will miss that deletion, so this // doesn't completely solve the problem. See #6877. namespace, name, err := cache.SplitMetaNamespaceKey(key) if err != nil { glog.Errorf("Need to delete endpoint with key %q, but couldn't understand the key: %v", key, err) // Don't retry, as the key isn't going to magically become understandable. return } err = e.client.Endpoints(namespace).Delete(name) if err != nil && !errors.IsNotFound(err) { glog.Errorf("Error deleting endpoint %q: %v", key, err) e.queue.Add(key) // Retry } return } service := obj.(*api.Service) if service.Spec.Selector == nil { // services without a selector receive no endpoints from this controller; // these services will receive the endpoints that are created out-of-band via the REST API. return } glog.V(5).Infof("About to update endpoints for service %q", key) pods, err := e.podStore.Pods(service.Namespace).List(labels.Set(service.Spec.Selector).AsSelector()) if err != nil { // Since we're getting stuff from a local cache, it is // basically impossible to get this error. glog.Errorf("Error syncing service %q: %v", key, err) e.queue.Add(key) // Retry return } subsets := []api.EndpointSubset{} for i := range pods.Items { pod := &pods.Items[i] for i := range service.Spec.Ports { servicePort := &service.Spec.Ports[i] portName := servicePort.Name portProto := servicePort.Protocol portNum, err := findPort(pod, servicePort) if err != nil { glog.V(4).Infof("Failed to find port for service %s/%s: %v", service.Namespace, service.Name, err) continue } // HACK(jdef): use HostIP instead of pod.CurrentState.PodIP for generic mesos compat if len(pod.Status.HostIP) == 0 { glog.V(4).Infof("Failed to find a host IP for pod %s/%s", pod.Namespace, pod.Name) continue } if !api.IsPodReady(pod) { glog.V(5).Infof("Pod is out of service: %v/%v", pod.Namespace, pod.Name) continue } // HACK(jdef): use HostIP instead of pod.CurrentState.PodIP for generic mesos compat epp := api.EndpointPort{Name: portName, Port: portNum, Protocol: portProto} epa := api.EndpointAddress{IP: pod.Status.HostIP, TargetRef: &api.ObjectReference{ Kind: "Pod", Namespace: pod.ObjectMeta.Namespace, Name: pod.ObjectMeta.Name, UID: pod.ObjectMeta.UID, ResourceVersion: pod.ObjectMeta.ResourceVersion, }} subsets = append(subsets, api.EndpointSubset{Addresses: []api.EndpointAddress{epa}, Ports: []api.EndpointPort{epp}}) } } subsets = endpoints.RepackSubsets(subsets) // See if there's actually an update here. currentEndpoints, err := e.client.Endpoints(service.Namespace).Get(service.Name) if err != nil { if errors.IsNotFound(err) { currentEndpoints = &api.Endpoints{ ObjectMeta: api.ObjectMeta{ Name: service.Name, Labels: service.Labels, }, } } else { glog.Errorf("Error getting endpoints: %v", err) e.queue.Add(key) // Retry return } } if reflect.DeepEqual(currentEndpoints.Subsets, subsets) && reflect.DeepEqual(currentEndpoints.Labels, service.Labels) { glog.V(5).Infof("endpoints are equal for %s/%s, skipping update", service.Namespace, service.Name) return } newEndpoints := currentEndpoints newEndpoints.Subsets = subsets newEndpoints.Labels = service.Labels if len(currentEndpoints.ResourceVersion) == 0 { // No previous endpoints, create them _, err = e.client.Endpoints(service.Namespace).Create(newEndpoints) } else { // Pre-existing _, err = e.client.Endpoints(service.Namespace).Update(newEndpoints) } if err != nil { glog.Errorf("Error updating endpoints: %v", err) e.queue.Add(key) // Retry } }