Ejemplo n.º 1
0
// Runs e; will not return until stopCh is closed. workers determines how many
// endpoints will be handled in parallel.
func (e *EndpointController) Run(workers int, stopCh <-chan struct{}) {
	defer util.HandleCrash()
	go e.serviceController.Run(stopCh)
	go e.podController.Run(stopCh)
	for i := 0; i < workers; i++ {
		go util.Until(e.worker, time.Second, stopCh)
	}
	go func() {
		defer util.HandleCrash()
		time.Sleep(5 * time.Minute) // give time for our cache to fill
		e.checkLeftoverEndpoints()
	}()
	<-stopCh
	e.queue.ShutDown()
}
Ejemplo n.º 2
0
// Copy the reader to the response. The created WebSocket is closed after this
// method completes.
func (r *Reader) Copy(w http.ResponseWriter, req *http.Request) error {
	go func() {
		defer util.HandleCrash()
		websocket.Server{Handshake: r.handshake, Handler: r.handle}.ServeHTTP(w, req)
	}()
	return <-r.err
}
Ejemplo n.º 3
0
func (c *clusterResourceVersionObserver) ObserveResourceVersion(resourceVersion string, timeout time.Duration) error {
	if len(c.watchers) == 0 {
		return nil
	}

	wg := &sync.WaitGroup{}
	backendErrors := make([]error, len(c.watchers), len(c.watchers))
	for i, watcher := range c.watchers {
		wg.Add(1)
		go func(i int, watcher rest.Watcher) {
			defer kutil.HandleCrash()
			defer wg.Done()
			backendErrors[i] = watchForResourceVersion(c.versioner, watcher, resourceVersion, timeout)
		}(i, watcher)
	}

	glog.V(5).Infof("waiting for resourceVersion %s to be distributed", resourceVersion)
	wg.Wait()

	successes := 0
	for _, err := range backendErrors {
		if err == nil {
			successes++
		} else {
			glog.V(4).Infof("error verifying resourceVersion %s: %v", resourceVersion, err)
		}
	}
	glog.V(5).Infof("resourceVersion %s was distributed to %d etcd cluster members (out of %d)", resourceVersion, successes, len(c.watchers))

	if successes >= c.successThreshold {
		return nil
	}

	return fmt.Errorf("resourceVersion %s was observed on %d cluster members (threshold %d): %v", resourceVersion, successes, c.successThreshold, backendErrors)
}
Ejemplo n.º 4
0
// finishRequest makes a given resultFunc asynchronous and handles errors returned by the response.
// Any api.Status object returned is considered an "error", which interrupts the normal response flow.
func finishRequest(timeout time.Duration, fn resultFunc) (result runtime.Object, err error) {
	// these channels need to be buffered to prevent the goroutine below from hanging indefinitely
	// when the select statement reads something other than the one the goroutine sends on.
	ch := make(chan runtime.Object, 1)
	errCh := make(chan error, 1)
	panicCh := make(chan interface{}, 1)
	go func() {
		// panics don't cross goroutine boundaries, so we have to handle ourselves
		defer util.HandleCrash(func(panicReason interface{}) {
			// Propagate to parent goroutine
			panicCh <- panicReason
		})

		if result, err := fn(); err != nil {
			errCh <- err
		} else {
			ch <- result
		}
	}()

	select {
	case result = <-ch:
		if status, ok := result.(*unversioned.Status); ok {
			return nil, errors.FromObject(status)
		}
		return result, nil
	case err = <-errCh:
		return nil, err
	case p := <-panicCh:
		panic(p)
	case <-time.After(timeout):
		return nil, errors.NewTimeoutError("request did not complete within allowed duration", 0)
	}
}
Ejemplo n.º 5
0
// buildImage invokes a docker build on a particular directory
func buildImage(client DockerClient, dir string, noCache bool, tag string, tar tar.Tar, pullAuth *docker.AuthConfigurations, forcePull bool) error {
	// TODO: be able to pass a stream directly to the Docker build to avoid the double temp hit
	r, w := io.Pipe()
	go func() {
		defer util.HandleCrash()
		defer w.Close()
		if err := tar.CreateTarStream(dir, false, w); err != nil {
			w.CloseWithError(err)
		}
	}()
	defer w.Close()
	glog.V(5).Infof("Invoking Docker build to create %q", tag)
	opts := docker.BuildImageOptions{
		Name:           tag,
		RmTmpContainer: true,
		OutputStream:   os.Stdout,
		InputStream:    r,
		NoCache:        noCache,
		Pull:           forcePull,
	}
	if pullAuth != nil {
		opts.AuthConfigs = *pullAuth
	}
	return client.BuildImage(opts)
}
Ejemplo n.º 6
0
func (udp *udpProxySocket) getBackendConn(activeClients *clientCache, cliAddr net.Addr, proxier *Proxier, service proxy.ServicePortName, timeout time.Duration) (net.Conn, error) {
	activeClients.mu.Lock()
	defer activeClients.mu.Unlock()

	svrConn, found := activeClients.clients[cliAddr.String()]
	if !found {
		// TODO: This could spin up a new goroutine to make the outbound connection,
		// and keep accepting inbound traffic.
		glog.V(3).Infof("New UDP connection from %s", cliAddr)
		var err error
		svrConn, err = tryConnect(service, cliAddr, "udp", proxier)
		if err != nil {
			return nil, err
		}
		if err = svrConn.SetDeadline(time.Now().Add(timeout)); err != nil {
			glog.Errorf("SetDeadline failed: %v", err)
			return nil, err
		}
		activeClients.clients[cliAddr.String()] = svrConn
		go func(cliAddr net.Addr, svrConn net.Conn, activeClients *clientCache, timeout time.Duration) {
			defer util.HandleCrash()
			udp.proxyClient(cliAddr, svrConn, activeClients, timeout)
		}(cliAddr, svrConn, activeClients, timeout)
	}
	return svrConn, nil
}
Ejemplo n.º 7
0
// buildImage invokes a docker build on a particular directory
func buildImage(client DockerClient, dir string, dockerfilePath string, noCache bool, tag string, tar tar.Tar, pullAuth *docker.AuthConfigurations, forcePull bool, cgLimits *s2iapi.CGroupLimits) error {
	// TODO: be able to pass a stream directly to the Docker build to avoid the double temp hit
	r, w := io.Pipe()
	go func() {
		defer util.HandleCrash()
		defer w.Close()
		if err := tar.CreateTarStream(dir, false, w); err != nil {
			w.CloseWithError(err)
		}
	}()
	defer w.Close()
	glog.V(5).Infof("Invoking Docker build to create %q", tag)
	opts := docker.BuildImageOptions{
		Name:           tag,
		RmTmpContainer: true,
		OutputStream:   os.Stdout,
		InputStream:    r,
		Dockerfile:     dockerfilePath,
		NoCache:        noCache,
		Pull:           forcePull,
	}
	if cgLimits != nil {
		opts.Memory = cgLimits.MemoryLimitBytes
		opts.Memswap = cgLimits.MemorySwap
		opts.CPUShares = cgLimits.CPUShares
		opts.CPUPeriod = cgLimits.CPUPeriod
		opts.CPUQuota = cgLimits.CPUQuota
	}
	if pullAuth != nil {
		opts.AuthConfigs = *pullAuth
	}
	return client.BuildImage(opts)
}
Ejemplo n.º 8
0
// etcdWatch calls etcd's Watch function, and handles any errors. Meant to be called
// as a goroutine.
func (w *etcdWatcher) etcdWatch(ctx context.Context, client etcd.KeysAPI, key string, resourceVersion uint64) {
	defer util.HandleCrash()
	defer close(w.etcdError)
	defer close(w.etcdIncoming)
	if resourceVersion == 0 {
		latest, err := etcdGetInitialWatchState(ctx, client, key, w.list, w.etcdIncoming)
		if err != nil {
			w.etcdError <- err
			return
		}
		resourceVersion = latest
	}

	opts := etcd.WatcherOptions{
		Recursive:  w.list,
		AfterIndex: resourceVersion,
	}
	watcher := client.Watcher(key, &opts)
	w.stopLock.Lock()
	w.ctx, w.cancel = context.WithCancel(ctx)
	w.stopLock.Unlock()

	for {
		resp, err := watcher.Next(w.ctx)
		if err != nil {
			w.etcdError <- err
			return
		}
		w.etcdIncoming <- resp
	}
}
Ejemplo n.º 9
0
// etcdWatch calls etcd's Watch function, and handles any errors. Meant to be called
// as a goroutine.
func (w *etcdWatcher) etcdWatch(client tools.EtcdClient, key string, resourceVersion uint64) {
	// glog.Infof("Watching")
	defer util.HandleCrash()
	defer close(w.etcdError)
	if resourceVersion == 0 {
		latest, err := etcdGetInitialWatchState(client, key, w.list, w.etcdIncoming)
		if err != nil {
			if etcdError, ok := err.(*etcd.EtcdError); ok && etcdError != nil && etcdError.ErrorCode == tools.EtcdErrorCodeNotFound {
				// glog.Errorf("Error getting initial watch, key not found: %v", err)

				return
			}
			glog.Errorf("Error getting initial watch: %v", err)
			w.etcdError <- err
			return
		}
		resourceVersion = latest + 1
	}
	response, err := client.Watch(key, resourceVersion, w.list, w.etcdIncoming, w.etcdStop)
	glog.Infof("response is %v", response)
	if err != nil && err != etcd.ErrWatchStoppedByUser {
		glog.Errorf("Error watch: %v", err)
		w.etcdError <- err
	}
}
Ejemplo n.º 10
0
func (factory *ConfigFactory) makeDefaultErrorFunc(backoff *podBackoff, podQueue *cache.FIFO) func(pod *api.Pod, err error) {
	return func(pod *api.Pod, err error) {
		if err == scheduler.ErrNoNodesAvailable {
			glog.V(4).Infof("Unable to schedule %v %v: no nodes are registered to the cluster; waiting", pod.Namespace, pod.Name)
		} else {
			glog.Errorf("Error scheduling %v %v: %v; retrying", pod.Namespace, pod.Name, err)
		}
		backoff.gc()
		// Retry asynchronously.
		// Note that this is extremely rudimentary and we need a more real error handling path.
		go func() {
			defer util.HandleCrash()
			podID := pod.Name
			podNamespace := pod.Namespace
			backoff.wait(podID)
			// Get the pod again; it may have changed/been scheduled already.
			pod = &api.Pod{}
			err := factory.Client.Get().Namespace(podNamespace).Resource("pods").Name(podID).Do().Into(pod)
			if err != nil {
				if !errors.IsNotFound(err) {
					glog.Errorf("Error getting pod %v for retry: %v; abandoning", podID, err)
				}
				return
			}
			if pod.Spec.NodeName == "" {
				podQueue.Add(pod)
			}
		}()
	}
}
Ejemplo n.º 11
0
// addServiceOnPort starts listening for a new service, returning the serviceInfo.
// Pass proxyPort=0 to allocate a random port. The timeout only applies to UDP
// connections, for now.
func (proxier *Proxier) addServiceOnPort(service proxy.ServicePortName, protocol api.Protocol, proxyPort int, timeout time.Duration) (*serviceInfo, error) {
	sock, err := newProxySocket(protocol, proxier.listenIP, proxyPort)
	if err != nil {
		return nil, err
	}
	_, portStr, err := net.SplitHostPort(sock.Addr().String())
	if err != nil {
		sock.Close()
		return nil, err
	}
	portNum, err := strconv.Atoi(portStr)
	if err != nil {
		sock.Close()
		return nil, err
	}
	si := &serviceInfo{
		proxyPort:           portNum,
		protocol:            protocol,
		socket:              sock,
		timeout:             timeout,
		sessionAffinityType: api.ServiceAffinityNone, // default
		stickyMaxAgeMinutes: 180,                     // TODO: paramaterize this in the API.
	}
	proxier.setServiceInfo(service, si)

	glog.V(2).Infof("Proxying for service %q on %s port %d", service, protocol, portNum)
	go func(service proxy.ServicePortName, proxier *Proxier) {
		defer util.HandleCrash()
		atomic.AddInt32(&proxier.numProxyLoops, 1)
		sock.ProxyLoop(service, si, proxier)
		atomic.AddInt32(&proxier.numProxyLoops, -1)
	}(service, proxier)

	return si, nil
}
Ejemplo n.º 12
0
// implementation of scheduling plugin's Error func; see plugin/pkg/scheduler
func (k *errorHandler) handleSchedulingError(pod *api.Pod, schedulingErr error) {

	if schedulingErr == noSuchPodErr {
		log.V(2).Infof("Not rescheduling non-existent pod %v", pod.Name)
		return
	}

	log.Infof("Error scheduling %v: %v; retrying", pod.Name, schedulingErr)
	defer util.HandleCrash()

	// default upstream scheduler passes pod.Name as binding.PodID
	ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace)
	podKey, err := podtask.MakePodKey(ctx, pod.Name)
	if err != nil {
		log.Errorf("Failed to construct pod key, aborting scheduling for pod %v: %v", pod.Name, err)
		return
	}

	k.backoff.GC()
	k.api.Lock()
	defer k.api.Unlock()

	switch task, state := k.api.tasks().ForPod(podKey); state {
	case podtask.StateUnknown:
		// if we don't have a mapping here any more then someone deleted the pod
		log.V(2).Infof("Could not resolve pod to task, aborting pod reschdule: %s", podKey)
		return

	case podtask.StatePending:
		if task.Has(podtask.Launched) {
			log.V(2).Infof("Skipping re-scheduling for already-launched pod %v", podKey)
			return
		}
		breakoutEarly := queue.BreakChan(nil)
		if schedulingErr == noSuitableOffersErr {
			log.V(3).Infof("adding backoff breakout handler for pod %v", podKey)
			breakoutEarly = queue.BreakChan(k.api.offers().Listen(podKey, func(offer *mesos.Offer) bool {
				k.api.Lock()
				defer k.api.Unlock()
				switch task, state := k.api.tasks().Get(task.ID); state {
				case podtask.StatePending:
					// Assess fitness of pod with the current offer. The scheduler normally
					// "backs off" when it can't find an offer that matches up with a pod.
					// The backoff period for a pod can terminate sooner if an offer becomes
					// available that matches up.
					return !task.Has(podtask.Launched) && k.api.algorithm().FitPredicate()(task, offer, nil)
				default:
					// no point in continuing to check for matching offers
					return true
				}
			}))
		}
		delay := k.backoff.Get(podKey)
		log.V(3).Infof("requeuing pod %v with delay %v", podKey, delay)
		k.qr.requeue(&Pod{Pod: pod, delay: &delay, notify: breakoutEarly})

	default:
		log.V(2).Infof("Task is no longer pending, aborting reschedule for pod %v", podKey)
	}
}
Ejemplo n.º 13
0
func (cc *cadvisorClient) exportHTTP(port uint) error {
	// Register the handlers regardless as this registers the prometheus
	// collector properly.
	mux := http.NewServeMux()
	err := cadvisorHttp.RegisterHandlers(mux, cc, "", "", "", "", "/metrics")
	if err != nil {
		return err
	}

	// Only start the http server if port > 0
	if port > 0 {
		serv := &http.Server{
			Addr:    fmt.Sprintf(":%d", port),
			Handler: mux,
		}

		// TODO(vmarmol): Remove this when the cAdvisor port is once again free.
		// If export failed, retry in the background until we are able to bind.
		// This allows an existing cAdvisor to be killed before this one registers.
		go func() {
			defer util.HandleCrash()

			err := serv.ListenAndServe()
			for err != nil {
				glog.Infof("Failed to register cAdvisor on port %d, retrying. Error: %v", port, err)
				time.Sleep(time.Minute)
				err = serv.ListenAndServe()
			}
		}()
	}

	return nil
}
Ejemplo n.º 14
0
// receive reads result from the decoder in a loop and sends down the result channel.
func (sw *StreamWatcher) receive() {
	defer close(sw.result)
	defer sw.Stop()
	defer util.HandleCrash()
	for {
		action, obj, err := sw.source.Decode()
		if err != nil {
			// Ignore expected error.
			if sw.stopping() {
				return
			}
			switch err {
			case io.EOF:
				// watch closed normally
			case io.ErrUnexpectedEOF:
				glog.V(1).Infof("Unexpected EOF during watch stream event decoding: %v", err)
			default:
				msg := "Unable to decode an event from the watch stream: %v"
				if net.IsProbableEOF(err) {
					glog.V(5).Infof(msg, err)
				} else {
					glog.Errorf(msg, err)
				}
			}
			return
		}
		sw.result <- Event{
			Type:   action,
			Object: obj,
		}
	}
}
Ejemplo n.º 15
0
// translate pulls stuff from etcd, converts, and pushes out the outgoing channel. Meant to be
// called as a goroutine.
func (w *etcdWatcher) translate() {
	defer close(w.outgoing)
	defer util.HandleCrash()

	for {
		select {
		case err := <-w.etcdError:
			if err != nil {
				w.emit(watch.Event{
					Type: watch.Error,
					Object: &unversioned.Status{
						Status:  unversioned.StatusFailure,
						Message: err.Error(),
					},
				})
			}
			return
		case <-w.userStop:
			w.etcdStop <- true
			return
		case res, ok := <-w.etcdIncoming:
			if ok {
				if curLen := int64(len(w.etcdIncoming)); watchChannelHWM.Check(curLen) {
					// Monitor if this gets backed up, and how much.
					glog.V(2).Infof("watch: %v objects queued in channel.", curLen)
				}
				w.sendResult(res)
			}
			// If !ok, don't return here-- must wait for etcdError channel
			// to give an error or be closed.
		}
	}
}
Ejemplo n.º 16
0
// etcdWatch calls etcd's Watch function, and handles any errors. Meant to be called
// as a goroutine.
func (w *etcdWatcher) etcdWatch(ctx context.Context, client etcd.KeysAPI, key string, resourceVersion uint64) {
	defer util.HandleCrash()
	defer close(w.etcdError)
	defer close(w.etcdIncoming)

	// All calls to etcd are coming from this function - once it is finished
	// no other call to etcd should be generated by this watcher.
	done := func() {}

	// We need to be prepared, that Stop() can be called at any time.
	// It can potentially also be called, even before this function is called.
	// If that is the case, we simply skip all the code here.
	// See #18928 for more details.
	var watcher etcd.Watcher
	returned := func() bool {
		w.stopLock.Lock()
		defer w.stopLock.Unlock()
		if w.stopped {
			// Watcher has already been stopped - don't event initiate it here.
			return true
		}
		w.wg.Add(1)
		done = w.wg.Done
		// Perform initialization of watcher under lock - we want to avoid situation when
		// Stop() is called in the meantime (which in tests can cause etcd termination and
		// strange behavior here).
		if resourceVersion == 0 {
			latest, err := etcdGetInitialWatchState(ctx, client, key, w.list, w.etcdIncoming)
			if err != nil {
				w.etcdError <- err
				return true
			}
			resourceVersion = latest
		}

		opts := etcd.WatcherOptions{
			Recursive:  w.list,
			AfterIndex: resourceVersion,
		}
		watcher = client.Watcher(key, &opts)
		w.ctx, w.cancel = context.WithCancel(ctx)
		return false
	}()
	defer done()
	if returned {
		return
	}

	for {
		resp, err := watcher.Next(w.ctx)
		if err != nil {
			w.etcdError <- err
			return
		}
		w.etcdIncoming <- resp
	}
}
Ejemplo n.º 17
0
// doProbe probes the container once and records the result.
// Returns whether the worker should continue.
func (w *worker) doProbe() (keepGoing bool) {
	defer util.HandleCrash(func(_ interface{}) { keepGoing = true })

	status, ok := w.probeManager.statusManager.GetPodStatus(w.pod.UID)
	if !ok {
		// Either the pod has not been created yet, or it was already deleted.
		glog.V(3).Infof("No status for pod: %v", kubeletutil.FormatPodName(w.pod))
		return true
	}

	// Worker should terminate if pod is terminated.
	if status.Phase == api.PodFailed || status.Phase == api.PodSucceeded {
		glog.V(3).Infof("Pod %v %v, exiting probe worker",
			kubeletutil.FormatPodName(w.pod), status.Phase)
		return false
	}

	c, ok := api.GetContainerStatus(status.ContainerStatuses, w.container.Name)
	if !ok {
		// Either the container has not been created yet, or it was deleted.
		glog.V(3).Infof("Non-existant container probed: %v - %v",
			kubeletutil.FormatPodName(w.pod), w.container.Name)
		return true // Wait for more information.
	}

	if w.containerID.String() != c.ContainerID {
		if !w.containerID.IsEmpty() {
			w.resultsManager.Remove(w.containerID)
		}
		w.containerID = kubecontainer.ParseContainerID(c.ContainerID)
	}

	if c.State.Running == nil {
		glog.V(3).Infof("Non-running container probed: %v - %v",
			kubeletutil.FormatPodName(w.pod), w.container.Name)
		if !w.containerID.IsEmpty() {
			w.resultsManager.Set(w.containerID, results.Failure, w.pod)
		}
		// Abort if the container will not be restarted.
		return c.State.Terminated == nil ||
			w.pod.Spec.RestartPolicy != api.RestartPolicyNever
	}

	if int64(time.Since(c.State.Running.StartedAt.Time).Seconds()) < w.spec.InitialDelaySeconds {
		w.resultsManager.Set(w.containerID, w.initialValue, w.pod)
		return true
	}

	// TODO: Move error handling out of prober.
	result, _ := w.probeManager.prober.probe(w.probeType, w.pod, status, w.container, w.containerID)
	if result != probe.Unknown {
		w.resultsManager.Set(w.containerID, result != probe.Failure, w.pod)
	}

	return true
}
Ejemplo n.º 18
0
// ignoreReceives reads from a WebSocket until it is closed, then returns. If timeout is set, the
// read and write deadlines are pushed every time a new message is received.
func ignoreReceives(ws *websocket.Conn, timeout time.Duration) {
	defer util.HandleCrash()
	var data []byte
	for {
		resetTimeout(ws, timeout)
		if err := websocket.Message.Receive(ws, &data); err != nil {
			return
		}
	}
}
Ejemplo n.º 19
0
// doProbe probes the container once and records the result.
// Returns whether the worker should continue.
func doProbe(m *manager, w *worker) (keepGoing bool) {
	defer util.HandleCrash(func(_ interface{}) { keepGoing = true })

	status, ok := m.statusManager.GetPodStatus(w.pod.UID)
	if !ok {
		// Either the pod has not been created yet, or it was already deleted.
		glog.V(3).Infof("No status for pod: %v", kubeutil.FormatPodName(w.pod))
		return true
	}

	// Worker should terminate if pod is terminated.
	if status.Phase == api.PodFailed || status.Phase == api.PodSucceeded {
		glog.V(3).Infof("Pod %v %v, exiting probe worker",
			kubeutil.FormatPodName(w.pod), status.Phase)
		return false
	}

	c, ok := api.GetContainerStatus(status.ContainerStatuses, w.container.Name)
	if !ok {
		// Either the container has not been created yet, or it was deleted.
		glog.V(3).Infof("Non-existant container probed: %v - %v",
			kubeutil.FormatPodName(w.pod), w.container.Name)
		return true // Wait for more information.
	}

	if w.containerID != types.UID(c.ContainerID) {
		if w.containerID != "" {
			m.readinessCache.removeReadiness(string(w.containerID))
		}
		w.containerID = types.UID(kubecontainer.TrimRuntimePrefix(c.ContainerID))
	}

	if c.State.Running == nil {
		glog.V(3).Infof("Non-running container probed: %v - %v",
			kubeutil.FormatPodName(w.pod), w.container.Name)
		m.readinessCache.setReadiness(string(w.containerID), false)
		// Abort if the container will not be restarted.
		return c.State.Terminated == nil ||
			w.pod.Spec.RestartPolicy != api.RestartPolicyNever
	}

	if int64(time.Since(c.State.Running.StartedAt.Time).Seconds()) < w.spec.InitialDelaySeconds {
		// Readiness defaults to false during the initial delay.
		m.readinessCache.setReadiness(string(w.containerID), false)
		return true
	}

	// TODO: Move error handling out of prober.
	result, _ := m.prober.ProbeReadiness(w.pod, status, w.container, string(w.containerID))
	if result != probe.Unknown {
		m.readinessCache.setReadiness(string(w.containerID), result != probe.Failure)
	}

	return true
}
Ejemplo n.º 20
0
// Detaches the specified persistent disk device from node, verifies that it is detached, and retries if it fails.
// This function is intended to be called asynchronously as a go routine.
func detachDiskAndVerify(c *gcePersistentDiskCleaner) {
	glog.V(5).Infof("detachDiskAndVerify(...) for pd %q. Will block for pending operations", c.pdName)
	defer util.HandleCrash()

	// Block execution until any pending attach/detach operations for this PD have completed
	attachDetachMutex.LockKey(c.pdName)
	defer attachDetachMutex.UnlockKey(c.pdName)

	glog.V(5).Infof("detachDiskAndVerify(...) for pd %q. Awake and ready to execute.", c.pdName)

	devicePaths := getDiskByIdPaths(c.gcePersistentDisk)
	var gceCloud *gcecloud.GCECloud
	for numRetries := 0; numRetries < maxRetries; numRetries++ {
		var err error
		if gceCloud == nil {
			gceCloud, err = getCloudProvider()
			if err != nil || gceCloud == nil {
				// Retry on error. See issue #11321
				glog.Errorf("Error getting GCECloudProvider while detaching PD %q: %v", c.pdName, err)
				time.Sleep(errorSleepDuration)
				continue
			}
		}

		if numRetries > 0 {
			glog.Warningf("Retrying detach for GCE PD %q (retry count=%v).", c.pdName, numRetries)
		}

		if err := gceCloud.DetachDisk(c.pdName, c.plugin.host.GetHostName()); err != nil {
			glog.Errorf("Error detaching PD %q: %v", c.pdName, err)
			time.Sleep(errorSleepDuration)
			continue
		}

		for numChecks := 0; numChecks < maxChecks; numChecks++ {
			allPathsRemoved, err := verifyAllPathsRemoved(devicePaths)
			if err != nil {
				// Log error, if any, and continue checking periodically.
				glog.Errorf("Error verifying GCE PD (%q) is detached: %v", c.pdName, err)
			} else if allPathsRemoved {
				// All paths to the PD have been succefully removed
				unmountPDAndRemoveGlobalPath(c)
				glog.Infof("Successfully detached GCE PD %q.", c.pdName)
				return
			}

			// Sleep then check again
			glog.V(3).Infof("Waiting for GCE PD %q to detach.", c.pdName)
			time.Sleep(checkSleepDuration)
		}

	}

	glog.Errorf("Failed to detach GCE PD %q. One or more mount paths was not removed.", c.pdName)
}
Ejemplo n.º 21
0
// Run the main goroutine responsible for watching and syncing jobs.
func (jm *JobController) Run(workers int, stopCh <-chan struct{}) {
	defer util.HandleCrash()
	go jm.jobController.Run(stopCh)
	go jm.podController.Run(stopCh)
	for i := 0; i < workers; i++ {
		go util.Until(jm.worker, time.Second, stopCh)
	}
	<-stopCh
	glog.Infof("Shutting down Job Manager")
	jm.queue.ShutDown()
}
Ejemplo n.º 22
0
// Run begins watching and syncing.
func (rm *ReplicationManager) Run(workers int, stopCh <-chan struct{}) {
	defer util.HandleCrash()
	go rm.rcController.Run(stopCh)
	go rm.podController.Run(stopCh)
	for i := 0; i < workers; i++ {
		go util.Until(rm.worker, time.Second, stopCh)
	}
	<-stopCh
	glog.Infof("Shutting down RC Manager")
	rm.queue.ShutDown()
}
Ejemplo n.º 23
0
// Run starts the leader election loop
func (le *LeaderElector) Run() {
	defer func() {
		util.HandleCrash()
		le.config.Callbacks.OnStoppedLeading()
	}()
	le.acquire()
	stop := make(chan struct{})
	go le.config.Callbacks.OnStartedLeading(stop)
	le.renew()
	close(stop)
}
Ejemplo n.º 24
0
// spawn a goroutine to execute a func, immediately returns a chan that closes
// upon completion of the func. returns a nil signal chan if the given func is nil.
func After(f func()) Signal {
	ch := make(chan struct{})
	go func() {
		defer close(ch)
		defer util.HandleCrash()
		if f != nil {
			f()
		}
	}()
	return Signal(ch)
}
Ejemplo n.º 25
0
// Run begins watching and syncing.
func (rsc *ReplicaSetController) Run(workers int, stopCh <-chan struct{}) {
	defer util.HandleCrash()
	go rsc.rsController.Run(stopCh)
	go rsc.podController.Run(stopCh)
	for i := 0; i < workers; i++ {
		go util.Until(rsc.worker, time.Second, stopCh)
	}
	<-stopCh
	glog.Infof("Shutting down ReplicaSet Controller")
	rsc.queue.ShutDown()
}
Ejemplo n.º 26
0
// Run begins quota controller using the specified number of workers
func (rq *ResourceQuotaController) Run(workers int, stopCh <-chan struct{}) {
	defer util.HandleCrash()
	go rq.rqController.Run(stopCh)
	go rq.podController.Run(stopCh)
	for i := 0; i < workers; i++ {
		go util.Until(rq.worker, time.Second, stopCh)
	}
	go util.Until(func() { rq.enqueueAll() }, rq.resyncPeriod(), stopCh)
	<-stopCh
	glog.Infof("Shutting down ResourceQuotaController")
	rq.queue.ShutDown()
}
Ejemplo n.º 27
0
// Run begins watching and syncing daemon sets.
func (dsc *DaemonSetsController) Run(workers int, stopCh <-chan struct{}) {
	defer util.HandleCrash()
	go dsc.dsController.Run(stopCh)
	go dsc.podController.Run(stopCh)
	go dsc.nodeController.Run(stopCh)
	for i := 0; i < workers; i++ {
		go util.Until(dsc.worker, time.Second, stopCh)
	}
	<-stopCh
	glog.Infof("Shutting down Daemon Set Controller")
	dsc.queue.ShutDown()
}
Ejemplo n.º 28
0
// Run begins watching and syncing.
func (dc *DeploymentController) Run(workers int, stopCh <-chan struct{}) {
	defer util.HandleCrash()
	go dc.dController.Run(stopCh)
	go dc.rcController.Run(stopCh)
	go dc.podController.Run(stopCh)
	for i := 0; i < workers; i++ {
		go util.Until(dc.worker, time.Second, stopCh)
	}
	<-stopCh
	glog.Infof("Shutting down deployment controller")
	dc.queue.ShutDown()
}
Ejemplo n.º 29
0
// assumes that caller has obtained state lock
func (k *KubernetesExecutor) doShutdown(driver bindings.ExecutorDriver) {
	defer func() {
		log.Errorf("exiting with unclean shutdown: %v", recover())
		if k.exitFunc != nil {
			k.exitFunc(1)
		}
	}()

	(&k.state).transitionTo(terminalState)

	// signal to all listeners that this KubeletExecutor is done!
	close(k.terminate)
	close(k.updateChan)
	close(k.nodeInfos)

	if k.shutdownAlert != nil {
		func() {
			util.HandleCrash()
			k.shutdownAlert()
		}()
	}

	log.Infoln("Stopping executor driver")
	_, err := driver.Stop()
	if err != nil {
		log.Warningf("failed to stop executor driver: %v", err)
	}

	log.Infoln("Shutdown the executor")

	// according to docs, mesos will generate TASK_LOST updates for us
	// if needed, so don't take extra time to do that here.
	k.tasks = map[string]*kuberTask{}

	select {
	// the main Run() func may still be running... wait for it to finish: it will
	// clear the pod configuration cleanly, telling k8s "there are no pods" and
	// clean up resources (pods, volumes, etc).
	case <-k.kubeletFinished:

	//TODO(jdef) attempt to wait for events to propagate to API server?

	// TODO(jdef) extract constant, should be smaller than whatever the
	// slave graceful shutdown timeout period is.
	case <-time.After(15 * time.Second):
		log.Errorf("timed out waiting for kubelet Run() to die")
	}
	log.Infoln("exiting")
	if k.exitFunc != nil {
		k.exitFunc(0)
	}
}
Ejemplo n.º 30
0
// Open the connection and create channels for reading and writing.
func (conn *Conn) Open(w http.ResponseWriter, req *http.Request) ([]io.ReadWriteCloser, error) {
	go func() {
		defer util.HandleCrash()
		defer conn.Close()
		websocket.Server{Handshake: conn.handshake, Handler: conn.handle}.ServeHTTP(w, req)
	}()
	<-conn.ready
	rwc := make([]io.ReadWriteCloser, len(conn.channels))
	for i := range conn.channels {
		rwc[i] = conn.channels[i]
	}
	return rwc, nil
}