// Generates skydns records for a headless service. func (ks *kube2sky) newHeadlessService(subdomain string, service *kapi.Service) error { // Create an A record for every pod in the service. // This record must be periodically updated. // Format is as follows: // For a service x, with pods a and b create DNS records, // a.x.ns.domain. and, b.x.ns.domain. ks.mlock.Lock() defer ks.mlock.Unlock() key, err := kcache.MetaNamespaceKeyFunc(service) if err != nil { return err } e, exists, err := ks.endpointsStore.GetByKey(key) if err != nil { return fmt.Errorf("failed to get endpoints object from endpoints store - %v", err) } if !exists { glog.V(1).Infof("could not find endpoints for service %q in namespace %q. DNS records will be created once endpoints show up.", service.Name, service.Namespace) return nil } if e, ok := e.(*kapi.Endpoints); ok { return ks.generateRecordsForHeadlessService(subdomain, e, service) } return nil }
// implements Unique func (p *Pod) GetUID() string { if id, err := cache.MetaNamespaceKeyFunc(p.Pod); err != nil { panic(fmt.Sprintf("failed to determine pod id for '%+v'", p.Pod)) } else { return id } }
// Schedule implements the Scheduler interface of Kubernetes. // It returns the selectedMachine's name and error (if there's any). func (k *kubeScheduler) Schedule(pod *api.Pod, unused algorithm.NodeLister) (string, error) { log.Infof("Try to schedule pod %v\n", pod.Name) ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace) // default upstream scheduler passes pod.Name as binding.PodID podKey, err := podtask.MakePodKey(ctx, pod.Name) if err != nil { return "", err } k.api.Lock() defer k.api.Unlock() switch task, state := k.api.tasks().ForPod(podKey); state { case podtask.StateUnknown: // There's a bit of a potential race here, a pod could have been yielded() and // then before we get *here* it could be deleted. // We use meta to index the pod in the store since that's what k8s reflector does. podName, err := cache.MetaNamespaceKeyFunc(pod) if err != nil { log.Warningf("aborting Schedule, unable to understand pod object %+v", pod) return "", noSuchPodErr } if deleted := k.podUpdates.Poll(podName, queue.DELETE_EVENT); deleted { // avoid scheduling a pod that's been deleted between yieldPod() and Schedule() log.Infof("aborting Schedule, pod has been deleted %+v", pod) return "", noSuchPodErr } return k.doSchedule(k.api.tasks().Register(k.api.createPodTask(ctx, pod))) //TODO(jdef) it's possible that the pod state has diverged from what //we knew previously, we should probably update the task.Pod state here //before proceeding with scheduling case podtask.StatePending: if pod.UID != task.Pod.UID { // we're dealing with a brand new pod spec here, so the old one must have been // deleted -- and so our task store is out of sync w/ respect to reality //TODO(jdef) reconcile task return "", fmt.Errorf("task %v spec is out of sync with pod %v spec, aborting schedule", task.ID, pod.Name) } else if task.Has(podtask.Launched) { // task has been marked as "launched" but the pod binding creation may have failed in k8s, // but we're going to let someone else handle it, probably the mesos task error handler return "", fmt.Errorf("task %s has already been launched, aborting schedule", task.ID) } else { return k.doSchedule(task, nil) } default: return "", fmt.Errorf("task %s is not pending, nothing to schedule", task.ID) } }
func (ks *kube2sky) getServiceFromEndpoints(e *kapi.Endpoints) (*kapi.Service, error) { key, err := kcache.MetaNamespaceKeyFunc(e) if err != nil { return nil, err } obj, exists, err := ks.servicesStore.GetByKey(key) if err != nil { return nil, fmt.Errorf("failed to get service object from services store - %v", err) } if !exists { glog.V(1).Infof("could not find service for endpoint %q in namespace %q", e.Name, e.Namespace) return nil, nil } if svc, ok := obj.(*kapi.Service); ok { return svc, nil } return nil, fmt.Errorf("got a non service object in services store %v", obj) }
// implementation of scheduling plugin's NextPod func; see k8s plugin/pkg/scheduler func (q *queuer) yield() *api.Pod { log.V(2).Info("attempting to yield a pod") q.lock.Lock() defer q.lock.Unlock() for { // limit blocking here to short intervals so that we don't block the // enqueuer Run() routine for very long kpod := q.podQueue.Await(yieldPopTimeout) if kpod == nil { signalled := runtime.After(q.unscheduledCond.Wait) // lock is yielded at this point and we're going to wait for either // a timeout, or a signal that there's data select { case <-time.After(yieldWaitTimeout): q.unscheduledCond.Broadcast() // abort Wait() <-signalled // wait for the go-routine, and the lock log.V(4).Infoln("timed out waiting for a pod to yield") case <-signalled: // we have acquired the lock, and there // may be a pod for us to pop now } continue } pod := kpod.(*Pod).Pod if podName, err := cache.MetaNamespaceKeyFunc(pod); err != nil { log.Warningf("yield unable to understand pod object %+v, will skip: %v", pod, err) } else if !q.podUpdates.Poll(podName, queue.POP_EVENT) { log.V(1).Infof("yield popped a transitioning pod, skipping: %+v", pod) } else if recoverAssignedSlave(pod) != "" { // should never happen if enqueuePods is filtering properly log.Warningf("yield popped an already-scheduled pod, skipping: %+v", pod) } else { return pod } } }
// DeletionHandlingMetaNamespaceKeyFunc checks for // cache.DeletedFinalStateUnknown objects before calling // cache.MetaNamespaceKeyFunc. func DeletionHandlingMetaNamespaceKeyFunc(obj interface{}) (string, error) { if d, ok := obj.(cache.DeletedFinalStateUnknown); ok { return d.Key, nil } return cache.MetaNamespaceKeyFunc(obj) }
func DoTestUnschedulableNodes(t *testing.T, restClient *client.Client, nodeStore cache.Store) { goodCondition := api.NodeCondition{ Type: api.NodeReady, Status: api.ConditionTrue, Reason: fmt.Sprintf("schedulable condition"), LastHeartbeatTime: unversioned.Time{time.Now()}, } badCondition := api.NodeCondition{ Type: api.NodeReady, Status: api.ConditionUnknown, Reason: fmt.Sprintf("unschedulable condition"), LastHeartbeatTime: unversioned.Time{time.Now()}, } // Create a new schedulable node, since we're first going to apply // the unschedulable condition and verify that pods aren't scheduled. node := &api.Node{ ObjectMeta: api.ObjectMeta{Name: "node-scheduling-test-node"}, Spec: api.NodeSpec{Unschedulable: false}, Status: api.NodeStatus{ Capacity: api.ResourceList{ api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI), }, Conditions: []api.NodeCondition{goodCondition}, }, } nodeKey, err := cache.MetaNamespaceKeyFunc(node) if err != nil { t.Fatalf("Couldn't retrieve key for node %v", node.Name) } // The test does the following for each nodeStateManager in this list: // 1. Create a new node // 2. Apply the makeUnSchedulable function // 3. Create a new pod // 4. Check that the pod doesn't get assigned to the node // 5. Apply the schedulable function // 6. Check that the pod *does* get assigned to the node // 7. Delete the pod and node. nodeModifications := []nodeStateManager{ // Test node.Spec.Unschedulable=true/false { makeUnSchedulable: func(t *testing.T, n *api.Node, s cache.Store, c *client.Client) { n.Spec.Unschedulable = true if _, err := c.Nodes().Update(n); err != nil { t.Fatalf("Failed to update node with unschedulable=true: %v", err) } err = waitForReflection(s, nodeKey, func(node interface{}) bool { // An unschedulable node should get deleted from the store return node == nil }) if err != nil { t.Fatalf("Failed to observe reflected update for setting unschedulable=true: %v", err) } }, makeSchedulable: func(t *testing.T, n *api.Node, s cache.Store, c *client.Client) { n.Spec.Unschedulable = false if _, err := c.Nodes().Update(n); err != nil { t.Fatalf("Failed to update node with unschedulable=false: %v", err) } err = waitForReflection(s, nodeKey, func(node interface{}) bool { return node != nil && node.(*api.Node).Spec.Unschedulable == false }) if err != nil { t.Fatalf("Failed to observe reflected update for setting unschedulable=false: %v", err) } }, }, // Test node.Status.Conditions=ConditionTrue/Unknown { makeUnSchedulable: func(t *testing.T, n *api.Node, s cache.Store, c *client.Client) { n.Status = api.NodeStatus{ Capacity: api.ResourceList{ api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI), }, Conditions: []api.NodeCondition{badCondition}, } if _, err = c.Nodes().UpdateStatus(n); err != nil { t.Fatalf("Failed to update node with bad status condition: %v", err) } err = waitForReflection(s, nodeKey, func(node interface{}) bool { return node != nil && node.(*api.Node).Status.Conditions[0].Status == api.ConditionUnknown }) if err != nil { t.Fatalf("Failed to observe reflected update for status condition update: %v", err) } }, makeSchedulable: func(t *testing.T, n *api.Node, s cache.Store, c *client.Client) { n.Status = api.NodeStatus{ Capacity: api.ResourceList{ api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI), }, Conditions: []api.NodeCondition{goodCondition}, } if _, err = c.Nodes().UpdateStatus(n); err != nil { t.Fatalf("Failed to update node with healthy status condition: %v", err) } waitForReflection(s, nodeKey, func(node interface{}) bool { return node != nil && node.(*api.Node).Status.Conditions[0].Status == api.ConditionTrue }) if err != nil { t.Fatalf("Failed to observe reflected update for status condition update: %v", err) } }, }, } for i, mod := range nodeModifications { unSchedNode, err := restClient.Nodes().Create(node) if err != nil { t.Fatalf("Failed to create node: %v", err) } // Apply the unschedulable modification to the node, and wait for the reflection mod.makeUnSchedulable(t, unSchedNode, nodeStore, restClient) // Create the new pod, note that this needs to happen post unschedulable // modification or we have a race in the test. pod := &api.Pod{ ObjectMeta: api.ObjectMeta{Name: "node-scheduling-test-pod"}, Spec: api.PodSpec{ Containers: []api.Container{{Name: "container", Image: "kubernetes/pause:go"}}, }, } myPod, err := restClient.Pods(api.NamespaceDefault).Create(pod) if err != nil { t.Fatalf("Failed to create pod: %v", err) } // There are no schedulable nodes - the pod shouldn't be scheduled. err = wait.Poll(time.Second, util.ForeverTestTimeout, podScheduled(restClient, myPod.Namespace, myPod.Name)) if err == nil { t.Errorf("Pod scheduled successfully on unschedulable nodes") } if err != wait.ErrWaitTimeout { t.Errorf("Test %d: failed while trying to confirm the pod does not get scheduled on the node: %v", i, err) } else { t.Logf("Test %d: Pod did not get scheduled on an unschedulable node", i) } // Apply the schedulable modification to the node, and wait for the reflection schedNode, err := restClient.Nodes().Get(unSchedNode.Name) if err != nil { t.Fatalf("Failed to get node: %v", err) } mod.makeSchedulable(t, schedNode, nodeStore, restClient) // Wait until the pod is scheduled. err = wait.Poll(time.Second, util.ForeverTestTimeout, podScheduled(restClient, myPod.Namespace, myPod.Name)) if err != nil { t.Errorf("Test %d: failed to schedule a pod: %v", i, err) } else { t.Logf("Test %d: Pod got scheduled on a schedulable node", i) } err = restClient.Pods(api.NamespaceDefault).Delete(myPod.Name, api.NewDeleteOptions(0)) if err != nil { t.Errorf("Failed to delete pod: %v", err) } err = restClient.Nodes().Delete(schedNode.Name) if err != nil { t.Errorf("Failed to delete node: %v", err) } } }