func (ks *QingYuanScheduler) recoverTasks() error { ctx := api.NewDefaultContext() podList, err := ks.client.Pods(api.NamespaceValue(ctx)).List(labels.Everything(), fields.Everything()) if err != nil { log.V(1).Infof("failed to recover pod registry, madness may ensue: %v", err) return err } recoverSlave := func(t *podtask.T) { slaveId := t.Spec.SlaveID ks.slaves.checkAndAdd(slaveId, t.Offer.Host()) } for _, pod := range podList.Items { if t, ok, err := podtask.RecoverFrom(pod); err != nil { log.Errorf("failed to recover task from pod, will attempt to delete '%v/%v': %v", pod.Namespace, pod.Name, err) err := ks.client.Pods(pod.Namespace).Delete(pod.Name, nil) //TODO(jdef) check for temporary or not-found errors if err != nil { log.Errorf("failed to delete pod '%v/%v': %v", pod.Namespace, pod.Name, err) } } else if ok { ks.taskRegistry.Register(t, nil) recoverSlave(t) log.Infof("recovered task %v from pod %v/%v", t.ID, pod.Namespace, pod.Name) } } return nil }
// TestValidNamespace validates that namespace rules are enforced on a resource prior to create or update func TestValidNamespace(t *testing.T) { ctx := api.NewDefaultContext() namespace, _ := api.NamespaceFrom(ctx) resource := api.ReplicationController{} if !api.ValidNamespace(ctx, &resource.ObjectMeta) { t.Errorf("expected success") } if namespace != resource.Namespace { t.Errorf("expected resource to have the default namespace assigned during validation") } resource = api.ReplicationController{ObjectMeta: api.ObjectMeta{Namespace: "other"}} if api.ValidNamespace(ctx, &resource.ObjectMeta) { t.Errorf("Expected error that resource and context errors do not match because resource has different namespace") } ctx = api.NewContext() if api.ValidNamespace(ctx, &resource.ObjectMeta) { t.Errorf("Expected error that resource and context errors do not match since context has no namespace") } ctx = api.NewContext() ns := api.NamespaceValue(ctx) if ns != "" { t.Errorf("Expected the empty string") } }
// reconciler action factory, performs explicit task reconciliation for non-terminal // tasks identified by annotations in the QingYuan pod registry. func (k *QingYuanScheduler) makePodRegistryReconciler() ReconcilerAction { return ReconcilerAction(func(drv bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error { ctx := api.NewDefaultContext() podList, err := k.client.Pods(api.NamespaceValue(ctx)).List(labels.Everything(), fields.Everything()) if err != nil { return proc.ErrorChanf("failed to reconcile pod registry: %v", err) } taskToSlave := make(map[string]string) for _, pod := range podList.Items { if len(pod.Annotations) == 0 { continue } taskId, found := pod.Annotations[meta.TaskIdKey] if !found { continue } slaveId, found := pod.Annotations[meta.SlaveIdKey] if !found { continue } taskToSlave[taskId] = slaveId } return proc.ErrorChan(k.explicitlyReconcileTasks(drv, taskToSlave, cancel)) }) }
// Bind just does a POST binding RPC. func (b *binder) Bind(binding *api.Binding) error { glog.V(2).Infof("Attempting to bind %v to %v", binding.Name, binding.Target.Name) ctx := api.WithNamespace(api.NewContext(), binding.Namespace) return b.Post().Namespace(api.NamespaceValue(ctx)).Resource("bindings").Body(binding).Do().Error() // TODO: use Pods interface for binding once clusters are upgraded // return b.Pods(binding.Namespace).Bind(binding) }
// this pod may be out of sync with respect to the API server registry: // this pod | apiserver registry // -------------|---------------------- // host=.* | 404 ; pod was deleted // host=.* | 5xx ; failed to sync, try again later? // host="" | host="" ; perhaps no updates to process? // host="" | host="..." ; pod has been scheduled and assigned, is there a task assigned? (check TaskIdKey in binding?) // host="..." | host="" ; pod is no longer scheduled, does it need to be re-queued? // host="..." | host="..." ; perhaps no updates to process? // // TODO(jdef) this needs an integration test func (s *schedulingPlugin) reconcilePod(oldPod api.Pod) { log.V(1).Infof("reconcile pod %v", oldPod.Name) ctx := api.WithNamespace(api.NewDefaultContext(), oldPod.Namespace) pod, err := s.client.Pods(api.NamespaceValue(ctx)).Get(oldPod.Name) if err != nil { if errors.IsNotFound(err) { // attempt to delete if err = s.deleter.deleteOne(&Pod{Pod: &oldPod}); err != nil && err != noSuchPodErr && err != noSuchTaskErr { log.Errorf("failed to delete pod: %v: %v", oldPod.Name, err) } } else { //TODO(jdef) other errors should probably trigger a retry (w/ backoff). //For now, drop the pod on the floor log.Warning("aborting reconciliation for pod %v: %v", oldPod.Name, err) } return } if oldPod.Spec.NodeName != pod.Spec.NodeName { if pod.Spec.NodeName == "" { // pod is unscheduled. // it's possible that we dropped the pod in the scheduler error handler // because of task misalignment with the pod (task.Has(podtask.Launched) == true) podKey, err := podtask.MakePodKey(ctx, pod.Name) if err != nil { log.Error(err) return } s.api.Lock() defer s.api.Unlock() if _, state := s.api.tasks().ForPod(podKey); state != podtask.StateUnknown { //TODO(jdef) reconcile the task log.Errorf("task already registered for pod %v", pod.Name) return } now := time.Now() log.V(3).Infof("reoffering pod %v", podKey) s.qr.reoffer(&Pod{ Pod: pod, deadline: &now, }) } else { // pod is scheduled. // not sure how this happened behind our backs. attempt to reconstruct // at least a partial podtask.T record. //TODO(jdef) reconcile the task log.Errorf("pod already scheduled: %v", pod.Name) } } else { //TODO(jdef) for now, ignore the fact that the rest of the spec may be different //and assume that our knowledge of the pod aligns with that of the apiserver log.Error("pod reconciliation does not support updates; not yet implemented") } }
func TestRESTCreate(t *testing.T) { table := []struct { ctx api.Context event *api.Event valid bool }{ { ctx: api.NewDefaultContext(), event: testEvent("foo"), valid: true, }, { ctx: api.NewContext(), event: testEvent("bar"), valid: true, }, { ctx: api.WithNamespace(api.NewContext(), "nondefault"), event: testEvent("bazzzz"), valid: false, }, } for _, item := range table { _, storage := NewTestREST() c, err := storage.Create(item.ctx, item.event) if !item.valid { if err == nil { ctxNS := api.NamespaceValue(item.ctx) t.Errorf("unexpected non-error for %v (%v, %v)", item.event.Name, ctxNS, item.event.Namespace) } continue } if err != nil { t.Errorf("%v: Unexpected error %v", item.event.Name, err) continue } if !api.HasObjectMetaSystemFieldValues(&item.event.ObjectMeta) { t.Errorf("storage did not populate object meta field values") } if e, a := item.event, c; !reflect.DeepEqual(e, a) { t.Errorf("diff: %s", util.ObjectDiff(e, a)) } // Ensure we implement the interface _ = rest.Watcher(storage) } }
func (rs *REST) Create(ctx api.Context, obj runtime.Object) (runtime.Object, error) { event, ok := obj.(*api.Event) if !ok { return nil, fmt.Errorf("invalid object type") } if api.NamespaceValue(ctx) != "" { if !api.ValidNamespace(ctx, &event.ObjectMeta) { return nil, errors.NewConflict("event", event.Namespace, fmt.Errorf("event.namespace does not match the provided context")) } } if errs := validation.ValidateEvent(event); len(errs) > 0 { return nil, errors.NewInvalid("event", event.Name, errs) } api.FillObjectMetaSystemFields(ctx, &event.ObjectMeta) err := rs.registry.CreateWithName(ctx, event.Name, event) if err != nil { return nil, err } return rs.registry.Get(ctx, event.Name) }
// async continuation of LaunchTask func (k *QingYuanExecutor) launchTask(driver bindings.ExecutorDriver, taskId string, pod *api.Pod) { //HACK(jdef): cloned binding construction from k8s plugin/pkg/scheduler/scheduler.go binding := &api.Binding{ ObjectMeta: api.ObjectMeta{ Namespace: pod.Namespace, Name: pod.Name, Annotations: make(map[string]string), }, Target: api.ObjectReference{ Kind: "Node", Name: pod.Annotations[meta.BindingHostKey], }, } // forward the annotations that the scheduler wants to apply for k, v := range pod.Annotations { binding.Annotations[k] = v } deleteTask := func() { k.lock.Lock() defer k.lock.Unlock() delete(k.tasks, taskId) k.resetSuicideWatch(driver) } log.Infof("Binding '%v/%v' to '%v' with annotations %+v...", pod.Namespace, pod.Name, binding.Target.Name, binding.Annotations) ctx := api.WithNamespace(api.NewContext(), binding.Namespace) // TODO(k8s): use Pods interface for binding once clusters are upgraded // return b.Pods(binding.Namespace).Bind(binding) err := k.client.Post().Namespace(api.NamespaceValue(ctx)).Resource("bindings").Body(binding).Do().Error() if err != nil { deleteTask() k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED, messages.CreateBindingFailure)) return } podFullName := container.GetPodFullName(pod) // allow a recently failed-over scheduler the chance to recover the task/pod binding: // it may have failed and recovered before the apiserver is able to report the updated // binding information. replays of this status event will signal to the scheduler that // the apiserver should be up-to-date. data, err := json.Marshal(api.PodStatusResult{ ObjectMeta: api.ObjectMeta{ Name: podFullName, SelfLink: "/podstatusresult", }, }) if err != nil { deleteTask() log.Errorf("failed to marshal pod status result: %v", err) k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED, err.Error())) return } k.lock.Lock() defer k.lock.Unlock() // Add the task. task, found := k.tasks[taskId] if !found { log.V(1).Infof("task %v not found, probably killed: aborting launch, reporting lost", taskId) k.reportLostTask(driver, taskId, messages.LaunchTaskFailed) return } //TODO(jdef) check for duplicate pod name, if found send TASK_ERROR // from here on, we need to delete containers associated with the task // upon it going into a terminal state task.podName = podFullName k.pods[podFullName] = pod // send the latest snapshot of the set of pods to the qinglet via the pod update channel. // this results in the qinglet spinning up the new pod. update := qinglet.PodUpdate{Op: qinglet.SET} for _, p := range k.pods { update.Pods = append(update.Pods, p) } k.updateChan <- update statusUpdate := &mesos.TaskStatus{ TaskId: mutil.NewTaskID(taskId), State: mesos.TaskState_TASK_STARTING.Enum(), Message: proto.String(messages.CreateBindingSuccess), Data: data, } k.sendStatus(driver, statusUpdate) // Delay reporting 'task running' until container is up. psf := podStatusFunc(func() (*api.PodStatus, error) { status, err := k.podStatusFunc(k.kl, pod) if err != nil { return nil, err } status.Phase = qinglet.GetPhase(&pod.Spec, status.ContainerStatuses) hostIP, err := k.kl.GetHostIP() if err != nil { log.Errorf("Cannot get host IP: %v", err) } else { status.HostIP = hostIP.String() } return status, nil }) go k._launchTask(driver, taskId, podFullName, psf) }