Ejemplo n.º 1
0
// Runs e; will not return until stopCh is closed. workers determines how many
// endpoints will be handled in parallel.
func (e *endpointController) Run(workers int, stopCh <-chan struct{}) {
	defer util.HandleCrash()
	go e.serviceController.Run(stopCh)
	go e.podController.Run(stopCh)
	for i := 0; i < workers; i++ {
		go util.Until(e.worker, time.Second, stopCh)
	}
	go func() {
		defer util.HandleCrash()
		time.Sleep(5 * time.Minute) // give time for our cache to fill
		e.checkLeftoverEndpoints()
	}()
	<-stopCh
	e.queue.ShutDown()
}
Ejemplo n.º 2
0
func (rm *ReplicationManager) WatchControllers() {
	watchChannel := make(chan *etcd.Response)
	go func() {
		defer util.HandleCrash()
		defer func() {
			close(watchChannel)
		}()
		rm.etcdClient.Watch("/registry/controllers", 0, true, watchChannel, nil)
	}()

	for {
		watchResponse, ok := <-watchChannel
		if !ok {
			// watchChannel has been closed. Let the util.Forever() that
			// called us call us again.
			return
		}
		if watchResponse == nil {
			time.Sleep(time.Second * 10)
			continue
		}
		log.Printf("Got watch: %#v", watchResponse)
		controller, err := rm.handleWatchResponse(watchResponse)
		if err != nil {
			log.Printf("Error handling data: %#v, %#v", err, watchResponse)
			continue
		}
		rm.syncReplicationController(*controller)
	}
}
Ejemplo n.º 3
0
// UpdateAllContainers updates information about all containers.
// Callers should let one call to UpdateAllContainers finish before
// calling again, or risk having new info getting clobbered by delayed
// old info.
func (p *PodCache) UpdateAllContainers() {
	// TODO: this is silly, we should pro-actively update the pod status when
	// the API server makes changes.
	p.clearNodeStatus()

	ctx := api.NewContext()
	pods, err := p.pods.ListPods(ctx, labels.Everything())
	if err != nil {
		glog.Errorf("Error getting pod list: %v", err)
		return
	}

	// TODO: this algorithm is 1 goroutine & RPC per pod. With a little work,
	// it should be possible to make it 1 per *node*, which will be important
	// at very large scales. (To be clear, the goroutines shouldn't matter--
	// it's the RPCs that need to be minimized.)
	var wg sync.WaitGroup
	for i := range pods.Items {
		pod := &pods.Items[i]
		wg.Add(1)
		go func() {
			defer util.HandleCrash()
			defer wg.Done()
			err := p.updatePodStatus(pod)
			if err != nil && err != client.ErrPodInfoNotAvailable {
				glog.Errorf("Error getting info for pod %v/%v: %v", pod.Namespace, pod.Name, err)
			}
		}()
	}
	wg.Wait()
}
Ejemplo n.º 4
0
func (factory *ConfigFactory) makeDefaultErrorFunc(backoff *podBackoff, podQueue *cache.FIFO) func(pod *api.Pod, err error) {
	return func(pod *api.Pod, err error) {
		glog.Errorf("Error scheduling %v: %v; retrying", pod.Name, err)
		backoff.gc()
		// Retry asynchronously.
		// Note that this is extremely rudimentary and we need a more real error handling path.
		go func() {
			defer util.HandleCrash()
			podID := pod.Name
			podNamespace := pod.Namespace
			// stash maximum retry times of scheduling
			schedulerFailureCount := pod.Status.SchedulerFailureCount
			backoff.wait(podID)
			// Get the pod again; it may have changed/been scheduled already.
			pod = &api.Pod{}
			err := factory.Client.Get().Namespace(podNamespace).Path("pods").Path(podID).Do().Into(pod)
			if err != nil {
				glog.Errorf("Error getting pod %v for retry: %v; abandoning", podID, err)
				return
			}
			pod.Status.SchedulerFailureCount = schedulerFailureCount
			if pod.Status.Host == "" {
				podQueue.Add(pod.Name, pod)
			}
		}()
	}
}
Ejemplo n.º 5
0
func (factory *ConfigFactory) makeDefaultErrorFunc(backoff *podBackoff, podQueue *cache.FIFO) func(pod *api.Pod, err error) {
	return func(pod *api.Pod, err error) {
		if err == scheduler.ErrNoNodesAvailable {
			glog.V(4).Infof("Unable to schedule %v %v: no nodes are registered to the cluster; waiting", pod.Namespace, pod.Name)
		} else {
			glog.Errorf("Error scheduling %v %v: %v; retrying", pod.Namespace, pod.Name, err)
		}
		backoff.gc()
		// Retry asynchronously.
		// Note that this is extremely rudimentary and we need a more real error handling path.
		go func() {
			defer util.HandleCrash()
			podID := pod.Name
			podNamespace := pod.Namespace
			backoff.wait(podID)
			// Get the pod again; it may have changed/been scheduled already.
			pod = &api.Pod{}
			err := factory.Client.Get().Namespace(podNamespace).Resource("pods").Name(podID).Do().Into(pod)
			if err != nil {
				if !errors.IsNotFound(err) {
					glog.Errorf("Error getting pod %v for retry: %v; abandoning", podID, err)
				}
				return
			}
			if pod.Spec.NodeName == "" {
				podQueue.Add(pod)
			}
		}()
	}
}
Ejemplo n.º 6
0
// receive reads result from the decoder in a loop and sends down the result channel.
func (sw *StreamWatcher) receive() {
	defer close(sw.result)
	defer sw.Stop()
	defer util.HandleCrash()
	for {
		action, obj, err := sw.source.Decode()
		if err != nil {
			// Ignore expected error.
			if sw.stopping() {
				return
			}
			switch err {
			case io.EOF:
				// watch closed normally
			case io.ErrUnexpectedEOF:
				glog.V(1).Infof("Unexpected EOF during watch stream event decoding: %v", err)
			default:
				msg := "Unable to decode an event from the watch stream: %v"
				if util.IsProbableEOF(err) {
					glog.V(5).Infof(msg, err)
				} else {
					glog.Errorf(msg, err)
				}
			}
			return
		}
		sw.result <- Event{
			Type:   action,
			Object: obj,
		}
	}
}
Ejemplo n.º 7
0
// translate pulls stuff from etcd, converts, and pushes out the outgoing channel. Meant to be
// called as a goroutine.
func (w *etcdWatcher) translate() {
	defer close(w.outgoing)
	defer util.HandleCrash()

	for {
		select {
		case err := <-w.etcdError:
			if err != nil {
				w.emit(watch.Event{
					watch.Error,
					&api.Status{
						Status:  api.StatusFailure,
						Message: err.Error(),
					},
				})
			}
			return
		case <-w.userStop:
			w.etcdStop <- true
			return
		case res, ok := <-w.etcdIncoming:
			if ok {
				if curLen := int64(len(w.etcdIncoming)); watchChannelHWM.Check(curLen) {
					// Monitor if this gets backed up, and how much.
					glog.V(2).Infof("watch: %v objects queued in channel.", curLen)
				}
				w.sendResult(res)
			}
			// If !ok, don't return here-- must wait for etcdError channel
			// to give an error or be closed.
		}
	}
}
Ejemplo n.º 8
0
func (cc *cadvisorClient) exportHTTP(port uint) error {
	mux := http.NewServeMux()
	err := cadvisorHttp.RegisterHandlers(mux, cc, "", "", "", "", "/metrics")
	if err != nil {
		return err
	}

	serv := &http.Server{
		Addr:    fmt.Sprintf(":%d", port),
		Handler: mux,
	}

	// TODO(vmarmol): Remove this when the cAdvisor port is once again free.
	// If export failed, retry in the background until we are able to bind.
	// This allows an existing cAdvisor to be killed before this one registers.
	go func() {
		defer util.HandleCrash()

		err := serv.ListenAndServe()
		for err != nil {
			glog.Infof("Failed to register cAdvisor on port %d, retrying. Error: %v", port, err)
			time.Sleep(time.Minute)
			err = serv.ListenAndServe()
		}
	}()

	return nil
}
Ejemplo n.º 9
0
// addServiceOnPort starts listening for a new service, returning the serviceInfo.
// Pass proxyPort=0 to allocate a random port. The timeout only applies to UDP
// connections, for now.
func (proxier *Proxier) addServiceOnPort(service string, protocol api.Protocol, proxyPort int, timeout time.Duration) (*serviceInfo, error) {
	sock, err := newProxySocket(protocol, proxier.listenAddress, proxyPort)
	if err != nil {
		return nil, err
	}
	_, portStr, err := net.SplitHostPort(sock.Addr().String())
	if err != nil {
		sock.Close()
		return nil, err
	}
	portNum, err := strconv.Atoi(portStr)
	if err != nil {
		sock.Close()
		return nil, err
	}
	si := &serviceInfo{
		proxyPort: portNum,
		protocol:  protocol,
		active:    true,
		socket:    sock,
		timeout:   timeout,
	}
	proxier.setServiceInfo(service, si)

	glog.V(1).Infof("Proxying for service %q on %s port %d", service, protocol, portNum)
	go func(service string, info *serviceInfo, proxier *Proxier) {
		defer util.HandleCrash()
		sock.ProxyLoop(service, info, proxier)
	}(service, si, proxier)

	return si, nil
}
Ejemplo n.º 10
0
func (proxier *Proxier) startAccepting(service string, sock proxySocket) {
	glog.Infof("Listening for %s on %s:%s", service, sock.Addr().Network(), sock.Addr().String())
	go func(service string, proxier *Proxier) {
		defer util.HandleCrash()
		sock.ProxyLoop(service, proxier)
	}(service, proxier)
}
Ejemplo n.º 11
0
// translate pulls stuff from etcd, converts, and pushes out the outgoing channel. Meant to be
// called as a goroutine.
func (w *etcdWatcher) translate() {
	defer close(w.outgoing)
	defer util.HandleCrash()

	for {
		select {
		case err := <-w.etcdError:
			if err != nil {
				w.emit(watch.Event{
					watch.Error,
					&api.Status{
						Status:  api.StatusFailure,
						Message: err.Error(),
					},
				})
			}
			return
		case <-w.userStop:
			w.etcdStop <- true
			return
		case res, ok := <-w.etcdIncoming:
			if ok {
				w.sendResult(res)
			}
			// If !ok, don't return here-- must wait for etcdError channel
			// to give an error or be closed.
		}
	}
}
// Apply the new setting to the specified pod. updateComplete is called when the update is completed.
func (p *podWorkers) UpdatePod(pod *api.Pod, mirrorPod *api.Pod, updateComplete func()) {
	uid := pod.UID
	var podUpdates chan workUpdate
	var exists bool

	p.podLock.Lock()
	defer p.podLock.Unlock()
	if podUpdates, exists = p.podUpdates[uid]; !exists {
		// We need to have a buffer here, because checkForUpdates() method that
		// puts an update into channel is called from the same goroutine where
		// the channel is consumed. However, it is guaranteed that in such case
		// the channel is empty, so buffer of size 1 is enough.
		podUpdates = make(chan workUpdate, 1)
		p.podUpdates[uid] = podUpdates
		go func() {
			defer util.HandleCrash()
			p.managePodLoop(podUpdates)
		}()
	}
	if !p.isWorking[pod.UID] {
		p.isWorking[pod.UID] = true
		podUpdates <- workUpdate{
			pod:              pod,
			mirrorPod:        mirrorPod,
			updateCompleteFn: updateComplete,
		}
	} else {
		p.lastUndeliveredWorkUpdate[pod.UID] = workUpdate{
			pod:              pod,
			mirrorPod:        mirrorPod,
			updateCompleteFn: updateComplete,
		}
	}
}
Ejemplo n.º 13
0
// MakeAsync takes a function and executes it, delivering the result in the way required
// by RESTStorage's Update, Delete, and Create methods.
func MakeAsync(fn WorkFunc) <-chan interface{} {
	channel := make(chan interface{})
	go func() {
		defer util.HandleCrash()
		obj, err := fn()
		if err != nil {
			status := http.StatusInternalServerError
			switch {
			case tools.IsEtcdTestFailed(err):
				status = http.StatusConflict
			}
			channel <- &api.Status{
				Status:  api.StatusFailure,
				Message: err.Error(),
				Code:    status,
			}
		} else {
			channel <- obj
		}
		// 'close' is used to signal that no further values will
		// be written to the channel. Not strictly necessary, but
		// also won't hurt.
		close(channel)
	}()
	return channel
}
Ejemplo n.º 14
0
func (udp *udpProxySocket) getBackendConn(activeClients *clientCache, cliAddr net.Addr, proxier *Proxier, service ServicePortName, timeout time.Duration) (net.Conn, error) {
	activeClients.mu.Lock()
	defer activeClients.mu.Unlock()

	svrConn, found := activeClients.clients[cliAddr.String()]
	if !found {
		// TODO: This could spin up a new goroutine to make the outbound connection,
		// and keep accepting inbound traffic.
		glog.V(2).Infof("New UDP connection from %s", cliAddr)
		var err error
		svrConn, err = tryConnect(service, cliAddr, "udp", proxier)
		if err != nil {
			return nil, err
		}
		if err = svrConn.SetDeadline(time.Now().Add(timeout)); err != nil {
			glog.Errorf("SetDeadline failed: %v", err)
			return nil, err
		}
		activeClients.clients[cliAddr.String()] = svrConn
		go func(cliAddr net.Addr, svrConn net.Conn, activeClients *clientCache, timeout time.Duration) {
			defer util.HandleCrash()
			udp.proxyClient(cliAddr, svrConn, activeClients, timeout)
		}(cliAddr, svrConn, activeClients, timeout)
	}
	return svrConn, nil
}
Ejemplo n.º 15
0
func (udp *udpProxySocket) getBackendConn(activeClients *clientCache, cliAddr net.Addr, proxier *Proxier, service string, timeout time.Duration) (net.Conn, error) {
	activeClients.mu.Lock()
	defer activeClients.mu.Unlock()

	svrConn, found := activeClients.clients[cliAddr.String()]
	if !found {
		// TODO: This could spin up a new goroutine to make the outbound connection,
		// and keep accepting inbound traffic.
		glog.V(2).Infof("New UDP connection from %s", cliAddr)
		endpoint, err := proxier.loadBalancer.NextEndpoint(service, cliAddr)
		if err != nil {
			glog.Errorf("Couldn't find an endpoint for %s %v", service, err)
			return nil, err
		}
		glog.V(3).Infof("Mapped service %q to endpoint %s", service, endpoint)
		svrConn, err = net.DialTimeout("udp", endpoint, endpointDialTimeout)
		if err != nil {
			// TODO: Try another endpoint?
			glog.Errorf("Dial failed: %v", err)
			return nil, err
		}
		activeClients.clients[cliAddr.String()] = svrConn
		go func(cliAddr net.Addr, svrConn net.Conn, activeClients *clientCache, timeout time.Duration) {
			defer util.HandleCrash()
			udp.proxyClient(cliAddr, svrConn, activeClients, timeout)
		}(cliAddr, svrConn, activeClients, timeout)
	}
	return svrConn, nil
}
Ejemplo n.º 16
0
// addServiceOnPort starts listening for a new service, returning the serviceInfo.
// Pass proxyPort=0 to allocate a random port. The timeout only applies to UDP
// connections, for now.
func (proxier *Proxier) addServiceOnPort(service ServicePortName, protocol api.Protocol, proxyPort int, timeout time.Duration) (*serviceInfo, error) {
	sock, err := newProxySocket(protocol, proxier.listenIP, proxyPort)
	if err != nil {
		return nil, err
	}
	_, portStr, err := net.SplitHostPort(sock.Addr().String())
	if err != nil {
		sock.Close()
		return nil, err
	}
	portNum, err := strconv.Atoi(portStr)
	if err != nil {
		sock.Close()
		return nil, err
	}
	si := &serviceInfo{
		proxyPort:           portNum,
		protocol:            protocol,
		socket:              sock,
		timeout:             timeout,
		sessionAffinityType: api.ServiceAffinityNone, // default
		stickyMaxAgeMinutes: 180,                     // TODO: paramaterize this in the API.
	}
	proxier.setServiceInfo(service, si)

	glog.V(2).Infof("Proxying for service %q on %s port %d", service, protocol, portNum)
	go func(service ServicePortName, proxier *Proxier) {
		defer util.HandleCrash()
		atomic.AddInt32(&proxier.numProxyLoops, 1)
		sock.ProxyLoop(service, si, proxier)
		atomic.AddInt32(&proxier.numProxyLoops, -1)
	}(service, proxier)

	return si, nil
}
Ejemplo n.º 17
0
// Watch etcd for changes, receives config objects from the etcd client watch.
// This function loops forever and is intended to be run as a goroutine.
func (kl *Kubelet) WatchEtcd(watchChannel <-chan *etcd.Response, changeChannel chan<- []api.ContainerManifest) {
	defer util.HandleCrash()
	for {
		watchResponse := <-watchChannel
		log.Printf("Got change: %#v", watchResponse)

		// This means the channel has been closed.
		if watchResponse == nil {
			return
		}

		if watchResponse.Node == nil || len(watchResponse.Node.Value) == 0 {
			log.Printf("No nodes field: %#v", watchResponse)
			if watchResponse.Node != nil {
				log.Printf("Node: %#v", watchResponse.Node)
			}
		}
		log.Printf("Got data: %v", watchResponse.Node.Value)
		var manifests []api.ContainerManifest
		if err := kl.ExtractYAMLData([]byte(watchResponse.Node.Value), &manifests); err != nil {
			continue
		}
		// Ok, we have a valid configuration, send to channel for
		// rejiggering.
		changeChannel <- manifests
	}
}
Ejemplo n.º 18
0
func MakeAsync(fn func() interface{}) <-chan interface{} {
	channel := make(chan interface{}, 1)
	go func() {
		defer util.HandleCrash()
		channel <- fn()
	}()
	return channel
}
Ejemplo n.º 19
0
// etcdWatch calls etcd's Watch function, and handles any errors. Meant to be called
// as a goroutine.
func (w *etcdWatcher) etcdWatch(client EtcdGetSet, key string) {
	defer util.HandleCrash()
	defer close(w.etcdCallEnded)
	_, err := client.Watch(key, 0, w.list, w.etcdIncoming, w.etcdStop)
	if err != etcd.ErrWatchStoppedByUser {
		glog.Errorf("etcd.Watch stopped unexpectedly: %v (%#v)", err, err)
	}
}
Ejemplo n.º 20
0
// SyncPods synchronizes the configured list of pods (desired state) with the host current state.
func (kl *Kubelet) SyncPods(pods []Pod) error {
	glog.Infof("Desired [%s]: %+v", kl.hostname, pods)
	var err error
	dockerIdsToKeep := map[DockerID]empty{}
	keepChannel := make(chan DockerID, defaultChanSize)
	waitGroup := sync.WaitGroup{}

	dockerContainers, err := getKubeletDockerContainers(kl.dockerClient)
	if err != nil {
		glog.Errorf("Error listing containers %#v", dockerContainers)
		return err
	}

	// Check for any containers that need starting
	for i := range pods {
		waitGroup.Add(1)
		go func(index int) {
			defer util.HandleCrash()
			defer waitGroup.Done()
			// necessary to dereference by index here b/c otherwise the shared value
			// in the for each is re-used.
			err := kl.syncPod(&pods[index], dockerContainers, keepChannel)
			if err != nil {
				glog.Errorf("Error syncing pod: %v skipping.", err)
			}
		}(i)
	}
	ch := make(chan bool)
	go func() {
		for id := range keepChannel {
			dockerIdsToKeep[id] = empty{}
		}
		ch <- true
	}()
	if len(pods) > 0 {
		waitGroup.Wait()
	}
	close(keepChannel)
	<-ch

	// Kill any containers we don't need
	existingContainers, err := getKubeletDockerContainers(kl.dockerClient)
	if err != nil {
		glog.Errorf("Error listing containers: %v", err)
		return err
	}
	for id, container := range existingContainers {
		if _, ok := dockerIdsToKeep[id]; !ok {
			glog.Infof("Killing: %s", id)
			err = kl.killContainer(*container)
			if err != nil {
				glog.Errorf("Error killing container: %v", err)
			}
		}
	}
	return err
}
Ejemplo n.º 21
0
// implementation of scheduling plugin's Error func; see plugin/pkg/scheduler
func (k *errorHandler) handleSchedulingError(pod *api.Pod, schedulingErr error) {

	if schedulingErr == noSuchPodErr {
		log.V(2).Infof("Not rescheduling non-existent pod %v", pod.Name)
		return
	}

	log.Infof("Error scheduling %v: %v; retrying", pod.Name, schedulingErr)
	defer util.HandleCrash()

	// default upstream scheduler passes pod.Name as binding.PodID
	ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace)
	podKey, err := podtask.MakePodKey(ctx, pod.Name)
	if err != nil {
		log.Errorf("Failed to construct pod key, aborting scheduling for pod %v: %v", pod.Name, err)
		return
	}

	k.backoff.GC()
	k.api.Lock()
	defer k.api.Unlock()

	switch task, state := k.api.tasks().ForPod(podKey); state {
	case podtask.StateUnknown:
		// if we don't have a mapping here any more then someone deleted the pod
		log.V(2).Infof("Could not resolve pod to task, aborting pod reschdule: %s", podKey)
		return

	case podtask.StatePending:
		if task.Has(podtask.Launched) {
			log.V(2).Infof("Skipping re-scheduling for already-launched pod %v", podKey)
			return
		}
		breakoutEarly := queue.BreakChan(nil)
		if schedulingErr == noSuitableOffersErr {
			log.V(3).Infof("adding backoff breakout handler for pod %v", podKey)
			breakoutEarly = queue.BreakChan(k.api.offers().Listen(podKey, func(offer *mesos.Offer) bool {
				k.api.Lock()
				defer k.api.Unlock()
				switch task, state := k.api.tasks().Get(task.ID); state {
				case podtask.StatePending:
					return !task.Has(podtask.Launched) && task.AcceptOffer(offer)
				default:
					// no point in continuing to check for matching offers
					return true
				}
			}))
		}
		delay := k.backoff.Get(podKey)
		log.V(3).Infof("requeuing pod %v with delay %v", podKey, delay)
		k.qr.requeue(&Pod{Pod: pod, delay: &delay, notify: breakoutEarly})

	default:
		log.V(2).Infof("Task is no longer pending, aborting reschedule for pod %v", podKey)
	}
}
// Runs e; will not return until stopCh is closed. workers determines how many
// endpoints will be handled in parallel.
func (e *EndpointController) Run(workers int, stopCh <-chan struct{}) {
	defer util.HandleCrash()
	go e.serviceController.Run(stopCh)
	go e.podController.Run(stopCh)
	for i := 0; i < workers; i++ {
		go util.Until(e.worker, time.Second, stopCh)
	}
	<-stopCh
	e.queue.ShutDown()
}
Ejemplo n.º 23
0
// etcdWatch calls etcd's Watch function, and handles any errors. Meant to be called
// as a goroutine.
func (w *etcdWatcher) etcdWatch(client EtcdGetSet, key string) {
	defer util.HandleCrash()
	_, err := client.Watch(key, 0, w.list, w.etcdIncoming, w.etcdStop)
	if err == etcd.ErrWatchStoppedByUser {
		// etcd doesn't close the channel in this case.
		close(w.etcdIncoming)
	} else {
		glog.Errorf("etcd.Watch stopped unexpectedly: %v (%#v)", err, err)
	}
}
Ejemplo n.º 24
0
// spawn a goroutine to execute a func, immediately returns a chan that closes
// upon completion of the func. returns a nil signal chan if the given func is nil.
func After(f func()) Signal {
	ch := make(chan struct{})
	go func() {
		defer close(ch)
		defer util.HandleCrash()
		if f != nil {
			f()
		}
	}()
	return Signal(ch)
}
Ejemplo n.º 25
0
// Veify the specified persistent disk device has been succesfully detached, and retries if it fails.
// This function is intended to be called asynchronously as a go routine.
func verifyDetached(pd *gcePersistentDisk, gce cloudprovider.Interface) {
	defer util.HandleCrash()

	// Setting bufferSize to 0 so that when senders send, they are blocked until we recieve. This avoids the need to have a separate exit check.
	ch, err := detachCleanupManager.Start(pd.pdName, 0 /* bufferSize */)
	if err != nil {
		glog.Errorf("Error adding %q to detachCleanupManager: %v", pd.pdName, err)
		return
	}
	defer detachCleanupManager.Close(pd.pdName)

	devicePaths := getDiskByIdPaths(pd)
	for numRetries := 0; numRetries < maxRetries; numRetries++ {
		for numChecks := 0; numChecks < maxChecks; numChecks++ {
			select {
			case <-ch:
				glog.Warningf("Terminating GCE PD %q detach verification. Another attach/detach call was made for this PD.", pd.pdName)
				return
			default:
				allPathsRemoved := true
				for _, path := range devicePaths {
					if err := udevadmChangeToDrive(path); err != nil {
						// udevadm errors should not block disk detachment, log and continue
						glog.Errorf("%v", err)
					}
					if exists, err := pathExists(path); err != nil {
						glog.Errorf("Error check path: %v", err)
						return
					} else {
						allPathsRemoved = allPathsRemoved && !exists
					}
				}
				if allPathsRemoved {
					// All paths to the PD have been succefully removed
					glog.V(5).Infof("Succesfully detached GCE PD %q.", pd.pdName)
					return
				}

				// Sleep then check again
				glog.V(5).Infof("Waiting for GCE PD %q to detach.", pd.pdName)
				time.Sleep(checkSleepDuration)
			}
		}

		// Try detaching disk again
		glog.Warningf("Timed out waiting for GCE PD %q to detach. Retrying detach.", pd.pdName)
		if err := gce.(*gce_cloud.GCECloud).DetachDisk(pd.pdName); err != nil {
			glog.Errorf("Error on retry detach PD %q: %v", pd.pdName, err)
			return
		}
	}

	glog.Errorf("Could not detach GCE PD %q. One or more mount paths was not removed.", pd.pdName)
}
// Run begins watching and syncing.
func (rm *ReplicationManager) Run(workers int, stopCh <-chan struct{}) {
	defer util.HandleCrash()
	go rm.rcController.Run(stopCh)
	go rm.podController.Run(stopCh)
	for i := 0; i < workers; i++ {
		go util.Until(rm.worker, time.Second, stopCh)
	}
	<-stopCh
	glog.Infof("Shutting down RC Manager")
	rm.queue.ShutDown()
}
Ejemplo n.º 27
0
// Run begins processing items, and will continue until a value is sent down stopCh.
// It's an error to call Run more than once.
// Run blocks; call via go.
func (c *Controller) Run(stopCh <-chan struct{}) {
	defer util.HandleCrash()
	cache.NewReflector(
		c.config.ListerWatcher,
		c.config.ObjectType,
		c.config.Queue,
		c.config.FullResyncPeriod,
	).RunUntil(stopCh)

	util.Until(c.processLoop, time.Second, stopCh)
}
Ejemplo n.º 28
0
// Waits forever for the operation to complete; call via go when
// the operation is created. Sets op.finished when the operation
// does complete, and closes the notify channel, in case there
// are any WaitFor() calls in progress.
// Does not keep op locked while waiting.
func (op *Operation) wait() {
	defer util.HandleCrash()
	result := <-op.awaiting

	op.lock.Lock()
	defer op.lock.Unlock()
	op.result = result
	finished := time.Now()
	op.finished = &finished
	close(op.notify)
}
Ejemplo n.º 29
0
// TODO: move this into the kubelet itself
func MonitorCAdvisor(k *Kubelet, cp uint) {
	defer util.HandleCrash()
	// TODO: Monitor this connection, reconnect if needed?
	glog.V(1).Infof("Trying to create cadvisor client.")
	cadvisorClient, err := cadvisor.NewClient("http://127.0.0.1:" + strconv.Itoa(int(cp)))
	if err != nil {
		glog.Errorf("Error on creating cadvisor client: %v", err)
		return
	}
	glog.V(1).Infof("Successfully created cadvisor client.")
	k.SetCadvisorClient(cadvisorClient)
}
Ejemplo n.º 30
0
// SyncManifests synchronizes the configured list of containers (desired state) with the host current state.
func (kl *Kubelet) SyncManifests(config []api.ContainerManifest) error {
	glog.Infof("Desired: %+v", config)
	var err error
	dockerIdsToKeep := map[DockerID]empty{}
	keepChannel := make(chan DockerID)
	waitGroup := sync.WaitGroup{}

	// Check for any containers that need starting
	for ix := range config {
		waitGroup.Add(1)
		go func(index int) {
			defer util.HandleCrash()
			defer waitGroup.Done()
			// necessary to dereference by index here b/c otherwise the shared value
			// in the for each is re-used.
			err := kl.syncManifest(&config[index], keepChannel)
			if err != nil {
				glog.Errorf("Error syncing manifest: %v skipping.", err)
			}
		}(ix)
	}
	ch := make(chan bool)
	go func() {
		for id := range keepChannel {
			dockerIdsToKeep[id] = empty{}
		}
		ch <- true
	}()
	if len(config) > 0 {
		waitGroup.Wait()
	}
	close(keepChannel)
	<-ch

	// Kill any containers we don't need
	existingContainers, err := kl.getDockerContainers()
	if err != nil {
		glog.Errorf("Error listing containers: %v", err)
		return err
	}
	for id, container := range existingContainers {
		if _, ok := dockerIdsToKeep[id]; !ok {
			glog.Infof("Killing: %s", id)
			err = kl.killContainer(container)
			if err != nil {
				glog.Errorf("Error killing container: %v", err)
			}
		}
	}
	return err
}