// TODO(jdef): find a way to test this without killing CI builds. // intended to be run with -race func TestProc_doWithNestedXConcurrent(t *testing.T) { t.Skip("disabled for causing CI timeouts.") config := defaultConfig config.actionQueueDepth = 0 p := newConfigured(config) var wg sync.WaitGroup const CONC = 20 wg.Add(CONC) // this test spins up TONS of goroutines that can take a little while to execute on a busy // CI server. drawing the line at 10s because I've never seen it take anywhere near that long. timeout := 10 * time.Second for i := 0; i < CONC; i++ { i := i errOnce := NewErrorOnce(p.Done()) runtime.After(func() { runDelegationTest(t, p, fmt.Sprintf("nested%d", i), errOnce, timeout) }).Then(wg.Done) go func() { select { case err := <-errOnce.Err(): if err != nil { t.Fatalf("delegate %d: unexpected error: %v", i, err) } case <-time.After(2 * timeout): t.Fatalf("delegate %d: timed out waiting for doer result", i) } }() } ch := runtime.After(wg.Wait) fatalAfter(t, ch, 2*timeout, "timed out waiting for concurrent delegates") <-p.End() fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death") }
func (self *procImpl) begin() runtime.Signal { if !self.state.transition(stateNew, stateRunning) { panic(fmt.Errorf("failed to transition from New to Idle state")) } defer log.V(2).Infof("started process %d", self.pid) var entered runtime.Latch // execute actions on the backlog chan return runtime.After(func() { runtime.Until(func() { if entered.Acquire() { close(self.running) self.wg.Add(1) } for action := range self.backlog { select { case <-self.terminate: return default: // signal to indicate there's room in the backlog now self.changed.Broadcast() // rely on Until to handle action panics action() } } }, self.actionHandlerCrashDelay, self.terminate) }).Then(func() { log.V(2).Infof("finished processing action backlog for process %d", self.pid) if !entered.Acquire() { self.wg.Done() } }) }
func TestProc_multiAction(t *testing.T) { p := New() const COUNT = 10 var called sync.WaitGroup called.Add(COUNT) // test FIFO property next := 0 for i := 0; i < COUNT; i++ { log.Infof("do'ing deferred action %d", i) idx := i err := p.Do(func() { defer called.Done() log.Infof("deferred action invoked") if next != idx { t.Fatalf("expected index %d instead of %d", idx, next) } next++ }) if err != nil { t.Fatalf("unexpected error: %v", err) } } fatalAfter(t, runtime.After(called.Wait), 2*time.Second, "timed out waiting for deferred actions to be invoked") p.End() fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death") }
func TestProc_manyEndings(t *testing.T) { p := New() const COUNT = 20 var wg sync.WaitGroup wg.Add(COUNT) for i := 0; i < COUNT; i++ { runtime.On(p.End(), wg.Done) } fatalAfter(t, runtime.After(wg.Wait), 5*time.Second, "timed out waiting for loose End()s") fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death") }
// MergeOutput waits for the given tasks to complete. meanwhile it logs each time a task // process completes or generates an error. when shouldQuit closes, tasks are canceled and this // func eventually returns once all ongoing event handlers have completed running. func MergeOutput(tasks []*Task, shouldQuit <-chan struct{}) Events { tc := make(chan *Completion) var waitForTasks sync.WaitGroup waitForTasks.Add(len(tasks)) for _, t := range tasks { t := t // translate task dead signal into Done go func() { <-t.done waitForTasks.Done() }() // fan-in task completion and error events to tc, ec go t.forwardUntil(tc, shouldQuit) } tclistener := make(chan *Completion) done := runtime.After(func() { completionFinished := runtime.After(func() { defer close(tclistener) forwardCompletionUntil(tc, tclistener, nil, shouldQuit, func(tt *Completion, shutdown bool) { prefix := "" if shutdown { prefix = "(shutdown) " } log.Infof(prefix+"task %q exited with status %d", tt.name, tt.code) }) }) waitForTasks.Wait() close(tc) <-completionFinished }) ei := newEventsImpl(tclistener, done) return ei }
// spawns a go-routine to watch for unscheduled pods and queue them up // for scheduling. returns immediately. func (q *queuer) Run(done <-chan struct{}) { go runtime.Until(func() { log.Info("Watching for newly created pods") q.lock.Lock() defer q.lock.Unlock() for { // limit blocking here for short intervals so that scheduling // may proceed even if there have been no recent pod changes p := q.podUpdates.Await(enqueuePopTimeout) if p == nil { signalled := runtime.After(q.deltaCond.Wait) // we've yielded the lock select { case <-time.After(enqueueWaitTimeout): q.deltaCond.Broadcast() // abort Wait() <-signalled // wait for lock re-acquisition log.V(4).Infoln("timed out waiting for a pod update") case <-signalled: // we've acquired the lock and there may be // changes for us to process now } continue } pod := p.(*Pod) if recoverAssignedSlave(pod.Pod) != "" { log.V(3).Infof("dequeuing assigned pod for scheduling: %v", pod.Pod.Name) q.dequeue(pod.GetUID()) } else if pod.InGracefulTermination() { // pods which are pre-scheduled (i.e. NodeName is set) may be gracefully deleted, // even though they are not running yet. log.V(3).Infof("dequeuing graceful deleted pre-scheduled pod for scheduling: %v", pod.Pod.Name) q.dequeue(pod.GetUID()) } else { // use ReplaceExisting because we are always pushing the latest state now := time.Now() pod.deadline = &now if q.podQueue.Offer(pod, queue.ReplaceExisting) { q.unscheduledCond.Broadcast() log.V(3).Infof("queued pod for scheduling: %v", pod.Pod.Name) } else { log.Warningf("failed to queue pod for scheduling: %v", pod.Pod.Name) } } } }, 1*time.Second, done) }
// Notify runs Elect() on m, and calls Start()/Stop() on s when the // elected master starts/stops matching 'id'. Never returns. func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{}) { n := ¬ifier{id: Master(id), service: s, masters: make(chan Master, 1)} finished := runtime.After(func() { runtime.Until(func() { for { w := m.Elect(path, id) for { select { case <-abort: return case event, open := <-w.ResultChan(): if !open { break } if event.Type != watch.Modified { continue } electedMaster, ok := event.Object.(Master) if !ok { glog.Errorf("Unexpected object from election channel: %v", event.Object) break } sendElected: for { select { case <-abort: return case n.masters <- electedMaster: break sendElected default: // ring full, discard old value and add the new select { case <-abort: return case <-n.masters: default: // ring was cleared for us?! } } } } } } }, 0, abort) }) runtime.Until(func() { n.serviceLoop(finished) }, 0, abort) }
// OnError spawns a goroutine that waits for an error. if a non-nil error is read from // the channel then the handler func is invoked, otherwise (nil error or closed chan) // the handler is skipped. if a nil handler is specified then it's not invoked. // the signal chan that's returned closes once the error process logic (and handler, // if any) has completed. func OnError(ch <-chan error, f func(error), abort <-chan struct{}) <-chan struct{} { return runtime.After(func() { if ch == nil { return } select { case err, ok := <-ch: if ok && err != nil && f != nil { f(err) } case <-abort: if f != nil { f(errProcessTerminated) } } }) }
func Test(t *testing.T) { m := NewFake() changes := make(chan bool, 1500) done := make(chan struct{}) s := &slowService{t: t, changes: changes, done: done} // change master to "notme" such that the initial m.Elect call inside Notify // will trigger an obversable event. We will wait for it to make sure the // Notify loop will see those master changes triggered by the go routine below. m.ChangeMaster(Master("me")) temporaryWatch := m.mux.Watch() ch := temporaryWatch.ResultChan() notifyDone := runtime.After(func() { Notify(m, "", "me", s, done) }) // wait for the event triggered by the initial m.Elect of Notify. Then drain // the channel to not block anything. <-ch temporaryWatch.Stop() for i := 0; i < len(ch); i += 1 { // go 1.3 and 1.4 compatible loop <-ch } go func() { defer close(done) for i := 0; i < 500; i++ { for _, key := range []string{"me", "notme", "alsonotme"} { m.ChangeMaster(Master(key)) } } }() <-notifyDone close(changes) changesNum := len(changes) if changesNum > 1000 || changesNum == 0 { t.Errorf("unexpected number of changes: %v", changesNum) } }
// Notify runs Elect() on m, and calls Start()/Stop() on s when the // elected master starts/stops matching 'id'. Never returns. func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{}) { n := ¬ifier{id: Master(id), service: s} n.changed = make(chan struct{}) finished := runtime.After(func() { runtime.Until(func() { for { w := m.Elect(path, id) for { select { case <-abort: return case event, open := <-w.ResultChan(): if !open { break } if event.Type != watch.Modified { continue } electedMaster, ok := event.Object.(Master) if !ok { glog.Errorf("Unexpected object from election channel: %v", event.Object) break } n.lock.Lock() n.desired = electedMaster n.lock.Unlock() // notify serviceLoop, but don't block. If a change // is queued already it will see the new n.desired. select { case n.changed <- struct{}{}: } } } } }, 0, abort) }) runtime.Until(func() { n.serviceLoop(finished) }, 0, abort) }
// implementation of scheduling plugin's NextPod func; see k8s plugin/pkg/scheduler func (q *queuer) yield() *api.Pod { log.V(2).Info("attempting to yield a pod") q.lock.Lock() defer q.lock.Unlock() for { // limit blocking here to short intervals so that we don't block the // enqueuer Run() routine for very long kpod := q.podQueue.Await(yieldPopTimeout) if kpod == nil { signalled := runtime.After(q.unscheduledCond.Wait) // lock is yielded at this point and we're going to wait for either // a timeout, or a signal that there's data select { case <-time.After(yieldWaitTimeout): q.unscheduledCond.Broadcast() // abort Wait() <-signalled // wait for the go-routine, and the lock log.V(4).Infoln("timed out waiting for a pod to yield") case <-signalled: // we have acquired the lock, and there // may be a pod for us to pop now } continue } pod := kpod.(*Pod).Pod if podName, err := cache.MetaNamespaceKeyFunc(pod); err != nil { log.Warningf("yield unable to understand pod object %+v, will skip: %v", pod, err) } else if !q.podUpdates.Poll(podName, queue.POP_EVENT) { log.V(1).Infof("yield popped a transitioning pod, skipping: %+v", pod) } else if annotatedForExecutor(pod) { // should never happen if enqueuePods is filtering properly log.Warningf("yield popped an already-scheduled pod, skipping: %+v", pod) } else { return pod } } }
// intended to be run with -race func TestProc_doWithNestedXConcurrent(t *testing.T) { config := defaultConfig config.actionQueueDepth = 4000 p := newConfigured(config) var wg sync.WaitGroup const CONC = 20 wg.Add(CONC) for i := 0; i < CONC; i++ { i := i errOnce := NewErrorOnce(p.Done()) runtime.After(func() { runDelegationTest(t, p, fmt.Sprintf("nested%d", i), errOnce) }).Then(wg.Done) go func() { err, _ := <-errOnce.Err() if err != nil { t.Errorf("delegate %d: unexpected error: %v", i, err) } }() } wg.Wait() <-p.End() <-p.Done() }
// TestExecutorLaunchAndKillTask ensures that the executor is able to launch // and kill tasks while properly bookkeping its tasks. func TestExecutorLaunchAndKillTask(t *testing.T) { // create a fake pod watch. We use that below to submit new pods to the scheduler podListWatch := NewMockPodsListWatch(api.PodList{}) // create fake apiserver testApiServer := NewTestServer(t, api.NamespaceDefault, &podListWatch.list) defer testApiServer.server.Close() mockDriver := &MockExecutorDriver{} updates := make(chan interface{}, 1024) config := Config{ Docker: dockertools.ConnectToDockerOrDie("fake://"), Updates: updates, APIClient: client.NewOrDie(&client.Config{ Host: testApiServer.server.URL, Version: testapi.Default.Version(), }), Kubelet: &fakeKubelet{ Kubelet: &kubelet.Kubelet{}, hostIP: net.IPv4(127, 0, 0, 1), }, PodStatusFunc: func(kl KubeletInterface, pod *api.Pod) (*api.PodStatus, error) { return &api.PodStatus{ ContainerStatuses: []api.ContainerStatus{ { Name: "foo", State: api.ContainerState{ Running: &api.ContainerStateRunning{}, }, }, }, Phase: api.PodRunning, }, nil }, } executor := New(config) executor.Init(mockDriver) executor.Registered(mockDriver, nil, nil, nil) select { case <-updates: case <-time.After(time.Second): t.Fatalf("Executor should send an initial update on Registration") } pod := NewTestPod(1) podTask, err := podtask.New(api.NewDefaultContext(), "", *pod, &mesosproto.ExecutorInfo{}) assert.Equal(t, nil, err, "must be able to create a task from a pod") taskInfo := podTask.BuildTaskInfo() data, err := testapi.Default.Codec().Encode(pod) assert.Equal(t, nil, err, "must be able to encode a pod's spec data") taskInfo.Data = data var statusUpdateCalls sync.WaitGroup statusUpdateDone := func(_ mock.Arguments) { statusUpdateCalls.Done() } statusUpdateCalls.Add(1) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_STARTING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() statusUpdateCalls.Add(1) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_RUNNING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() executor.LaunchTask(mockDriver, taskInfo) assertext.EventuallyTrue(t, 5*time.Second, func() bool { executor.lock.Lock() defer executor.lock.Unlock() return len(executor.tasks) == 1 && len(executor.pods) == 1 }, "executor must be able to create a task and a pod") gotPodUpdate := false select { case m := <-updates: update, ok := m.(kubelet.PodUpdate) if ok && len(update.Pods) == 1 { gotPodUpdate = true } case <-time.After(time.Second): } assert.Equal(t, true, gotPodUpdate, "the executor should send an update about a new pod to "+ "the updates chan when creating a new one.") // Allow some time for asynchronous requests to the driver. finished := kmruntime.After(statusUpdateCalls.Wait) select { case <-finished: case <-time.After(5 * time.Second): t.Fatalf("timed out waiting for status update calls to finish") } statusUpdateCalls.Add(1) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_KILLED, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() executor.KillTask(mockDriver, taskInfo.TaskId) assertext.EventuallyTrue(t, 5*time.Second, func() bool { executor.lock.Lock() defer executor.lock.Unlock() return len(executor.tasks) == 0 && len(executor.pods) == 0 }, "executor must be able to kill a created task and pod") // Allow some time for asynchronous requests to the driver. finished = kmruntime.After(statusUpdateCalls.Wait) select { case <-finished: case <-time.After(5 * time.Second): t.Fatalf("timed out waiting for status update calls to finish") } mockDriver.AssertExpectations(t) }
// TestExecutorLaunchAndKillTask ensures that the executor is able to launch tasks and generates // appropriate status messages for mesos. It then kills the task and validates that appropriate // actions are taken by the executor. func TestExecutorLaunchAndKillTask(t *testing.T) { var ( mockDriver = &MockExecutorDriver{} registry = newFakeRegistry() executor = New(Config{ Docker: dockertools.ConnectToDockerOrDie("fake://", 0), NodeInfos: make(chan NodeInfo, 1), Registry: registry, }) mockKubeAPI = &mockKubeAPI{} pod = NewTestPod(1) executorinfo = &mesosproto.ExecutorInfo{} ) executor.kubeAPI = mockKubeAPI executor.Init(mockDriver) executor.Registered(mockDriver, nil, nil, nil) podTask, err := podtask.New( api.NewDefaultContext(), podtask.Config{ Prototype: executorinfo, HostPortStrategy: hostport.StrategyWildcard, }, pod, ) assert.Equal(t, nil, err, "must be able to create a task from a pod") pod.Annotations = map[string]string{ "k8s.mesosphere.io/taskId": podTask.ID, } podTask.Spec = &podtask.Spec{Executor: executorinfo} taskInfo, err := podTask.BuildTaskInfo() assert.Equal(t, nil, err, "must be able to build task info") data, err := runtime.Encode(testapi.Default.Codec(), pod) assert.Equal(t, nil, err, "must be able to encode a pod's spec data") taskInfo.Data = data var statusUpdateCalls sync.WaitGroup statusUpdateCalls.Add(1) statusUpdateDone := func(_ mock.Arguments) { statusUpdateCalls.Done() } mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_STARTING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() statusUpdateCalls.Add(1) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_RUNNING, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() executor.LaunchTask(mockDriver, taskInfo) assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool { executor.lock.Lock() defer executor.lock.Unlock() return !registry.empty() }, "executor must be able to create a task and a pod") // simulate a pod source update; normally this update is generated when binding a pod err = registry.phaseChange(pod, api.PodPending) assert.NoError(t, err) // simulate a pod source update; normally this update is generated by the kubelet once the pod is healthy err = registry.phaseChange(pod, api.PodRunning) assert.NoError(t, err) // Allow some time for asynchronous requests to the driver. finished := kmruntime.After(statusUpdateCalls.Wait) select { case <-finished: case <-time.After(wait.ForeverTestTimeout): t.Fatalf("timed out waiting for status update calls to finish") } statusUpdateCalls.Add(1) mockDriver.On( "SendStatusUpdate", mesosproto.TaskState_TASK_KILLED, ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() // simulate what happens when the apiserver is told to delete a pod mockKubeAPI.On("killPod", pod.Namespace, pod.Name).Return(nil).Run(func(_ mock.Arguments) { registry.Remove(podTask.ID) }) executor.KillTask(mockDriver, taskInfo.TaskId) assertext.EventuallyTrue(t, wait.ForeverTestTimeout, func() bool { executor.lock.Lock() defer executor.lock.Unlock() return registry.empty() }, "executor must be able to kill a created task and pod") // Allow some time for asynchronous requests to the driver. finished = kmruntime.After(statusUpdateCalls.Wait) select { case <-finished: case <-time.After(wait.ForeverTestTimeout): t.Fatalf("timed out waiting for status update calls to finish") } mockDriver.AssertExpectations(t) mockKubeAPI.AssertExpectations(t) }