// UpgradeResponse upgrades an HTTP response to one that supports multiplexed // streams. newStreamHandler will be called synchronously whenever the // other end of the upgraded connection creates a new stream. func (u responseUpgrader) UpgradeResponse(w http.ResponseWriter, req *http.Request, newStreamHandler httpstream.NewStreamHandler) httpstream.Connection { connectionHeader := strings.ToLower(req.Header.Get(httpstream.HeaderConnection)) upgradeHeader := strings.ToLower(req.Header.Get(httpstream.HeaderUpgrade)) if !strings.Contains(connectionHeader, strings.ToLower(httpstream.HeaderUpgrade)) || !strings.Contains(upgradeHeader, strings.ToLower(HeaderSpdy31)) { w.WriteHeader(http.StatusBadRequest) fmt.Fprintf(w, "unable to upgrade: missing upgrade headers in request: %#v", req.Header) return nil } hijacker, ok := w.(http.Hijacker) if !ok { w.WriteHeader(http.StatusInternalServerError) fmt.Fprintf(w, "unable to upgrade: unable to hijack response") return nil } w.Header().Add(httpstream.HeaderConnection, httpstream.HeaderUpgrade) w.Header().Add(httpstream.HeaderUpgrade, HeaderSpdy31) w.WriteHeader(http.StatusSwitchingProtocols) conn, _, err := hijacker.Hijack() if err != nil { util.HandleError(fmt.Errorf("unable to upgrade: error hijacking response: %v", err)) return nil } spdyConn, err := NewServerConnection(conn, newStreamHandler) if err != nil { util.HandleError(fmt.Errorf("unable to upgrade: error creating SPDY server connection: %v", err)) return nil } return spdyConn }
// watchHandler watches w and keeps *resourceVersion up to date. func (r *Reflector) watchHandler(w watch.Interface, resourceVersion *string, resyncCh <-chan time.Time, stopCh <-chan struct{}) error { start := time.Now() eventCount := 0 // Stopping the watcher should be idempotent and if we return from this function there's no way // we're coming back in with the same watch interface. defer w.Stop() loop: for { select { case <-stopCh: return errorStopRequested case <-resyncCh: return errorResyncRequested case event, ok := <-w.ResultChan(): if !ok { break loop } if event.Type == watch.Error { return apierrs.FromObject(event.Object) } if e, a := r.expectedType, reflect.TypeOf(event.Object); e != nil && e != a { util.HandleError(fmt.Errorf("%s: expected type %v, but watch event object had type %v", r.name, e, a)) continue } meta, err := meta.Accessor(event.Object) if err != nil { util.HandleError(fmt.Errorf("%s: unable to understand watch event %#v", r.name, event)) continue } newResourceVersion := meta.ResourceVersion() switch event.Type { case watch.Added: r.store.Add(event.Object) case watch.Modified: r.store.Update(event.Object) case watch.Deleted: // TODO: Will any consumers need access to the "last known // state", which is passed in event.Object? If so, may need // to change this. r.store.Delete(event.Object) default: util.HandleError(fmt.Errorf("%s: unable to understand watch event %#v", r.name, event)) } *resourceVersion = newResourceVersion r.setLastSyncResourceVersion(newResourceVersion) eventCount++ } } watchDuration := time.Now().Sub(start) if watchDuration < 1*time.Second && eventCount == 0 { glog.V(4).Infof("%s: Unexpected watch close - watch lasted less than a second and no items received", r.name) return errors.New("very short watch") } glog.V(4).Infof("%s: Watch close - %v total %v items received", r.name, r.expectedType, eventCount) return nil }
func forcefullyDeletePod(c client.Interface, pod *api.Pod) { var zero int64 err := c.Pods(pod.Namespace).Delete(pod.Name, &api.DeleteOptions{GracePeriodSeconds: &zero}) if err != nil { util.HandleError(err) } }
// forward dials the remote host specific in req, upgrades the request, starts // listeners for each port specified in ports, and forwards local connections // to the remote host via streams. func (pf *PortForwarder) forward() error { var err error listenSuccess := false for _, port := range pf.ports { err = pf.listenOnPort(&port) switch { case err == nil: listenSuccess = true default: glog.Warningf("Unable to listen on port %d: %v", port.Local, err) } } if !listenSuccess { return fmt.Errorf("Unable to listen on any of the requested ports: %v", pf.ports) } close(pf.Ready) // wait for interrupt or conn closure select { case <-pf.stopChan: case <-pf.streamConn.CloseChan(): util.HandleError(errors.New("lost connection to pod")) } return nil }
// RunUntil starts the controller until the provided ch is closed. func (c *Repair) RunUntil(ch chan struct{}) { util.Until(func() { if err := c.RunOnce(); err != nil { util.HandleError(err) } }, c.interval, ch) }
func (gcc *GCController) gc() { terminatedPods, _ := gcc.podStore.List(labels.Everything()) terminatedPodCount := len(terminatedPods) sort.Sort(byCreationTimestamp(terminatedPods)) deleteCount := terminatedPodCount - gcc.threshold if deleteCount > terminatedPodCount { deleteCount = terminatedPodCount } if deleteCount > 0 { glog.Infof("garbage collecting %v pods", deleteCount) } var wait sync.WaitGroup for i := 0; i < deleteCount; i++ { wait.Add(1) go func(namespace string, name string) { defer wait.Done() if err := gcc.deletePod(namespace, name); err != nil { // ignore not founds defer util.HandleError(err) } }(terminatedPods[i].Namespace, terminatedPods[i].Name) } wait.Wait() }
// ServePortForward handles a port forwarding request. A single request is // kept alive as long as the client is still alive and the connection has not // been timed out due to idleness. This function handles multiple forwarded // connections; i.e., multiple `curl http://localhost:8888/` requests will be // handled by a single invocation of ServePortForward. func ServePortForward(w http.ResponseWriter, req *http.Request, portForwarder PortForwarder, podName string, uid types.UID, idleTimeout time.Duration, streamCreationTimeout time.Duration) { supportedPortForwardProtocols := []string{portforward.PortForwardProtocolV1Name} _, err := httpstream.Handshake(req, w, supportedPortForwardProtocols, portforward.PortForwardProtocolV1Name) // negotiated protocol isn't currently used server side, but could be in the future if err != nil { // Handshake writes the error to the client util.HandleError(err) return } streamChan := make(chan httpstream.Stream, 1) glog.V(5).Infof("Upgrading port forward response") upgrader := spdy.NewResponseUpgrader() conn := upgrader.UpgradeResponse(w, req, portForwardStreamReceived(streamChan)) if conn == nil { return } defer conn.Close() glog.V(5).Infof("(conn=%p) setting port forwarding streaming connection idle timeout to %v", conn, idleTimeout) conn.SetIdleTimeout(idleTimeout) h := &portForwardStreamHandler{ conn: conn, streamChan: streamChan, streamPairs: make(map[string]*portForwardStreamPair), streamCreationTimeout: streamCreationTimeout, pod: podName, uid: uid, forwarder: portForwarder, } h.run() }
// run is the main loop for the portForwardStreamHandler. It processes new // streams, invoking portForward for each complete stream pair. The loop exits // when the httpstream.Connection is closed. func (h *portForwardStreamHandler) run() { glog.V(5).Infof("(conn=%p) waiting for port forward streams", h.conn) Loop: for { select { case <-h.conn.CloseChan(): glog.V(5).Infof("(conn=%p) upgraded connection closed", h.conn) break Loop case stream := <-h.streamChan: requestID := h.requestID(stream) streamType := stream.Headers().Get(api.StreamType) glog.V(5).Infof("(conn=%p, request=%s) received new stream of type %s", h.conn, requestID, streamType) p, created := h.getStreamPair(requestID) if created { go h.monitorStreamPair(p, time.After(h.streamCreationTimeout)) } if complete, err := p.add(stream); err != nil { msg := fmt.Sprintf("error processing stream for request %s: %v", requestID, err) util.HandleError(errors.New(msg)) p.printError(msg) } else if complete { go h.portForward(p) } } } }
// RunKubernetesService periodically updates the kubernetes service func (c *Controller) RunKubernetesService(ch chan struct{}) { util.Until(func() { if err := c.UpdateKubernetesService(); err != nil { util.HandleError(fmt.Errorf("unable to sync kubernetes service: %v", err)) } }, c.EndpointInterval, ch) }
func (pf *PortForwarder) Close() { // stop all listeners for _, l := range pf.listeners { if err := l.Close(); err != nil { util.HandleError(fmt.Errorf("error closing listener: %v", err)) } } }
// manageReplicas checks and updates replicas for the given replication controller. func (rm *ReplicationManager) manageReplicas(filteredPods []*api.Pod, rc *api.ReplicationController) { diff := len(filteredPods) - rc.Spec.Replicas rcKey, err := controller.KeyFunc(rc) if err != nil { glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err) return } if diff < 0 { diff *= -1 if diff > rm.burstReplicas { diff = rm.burstReplicas } rm.expectations.ExpectCreations(rcKey, diff) wait := sync.WaitGroup{} wait.Add(diff) glog.V(2).Infof("Too few %q/%q replicas, need %d, creating %d", rc.Namespace, rc.Name, rc.Spec.Replicas, diff) for i := 0; i < diff; i++ { go func() { defer wait.Done() if err := rm.podControl.CreatePods(rc.Namespace, rc.Spec.Template, rc); err != nil { // Decrement the expected number of creates because the informer won't observe this pod glog.V(2).Infof("Failed creation, decrementing expectations for controller %q/%q", rc.Namespace, rc.Name) rm.expectations.CreationObserved(rcKey) util.HandleError(err) } }() } wait.Wait() } else if diff > 0 { if diff > rm.burstReplicas { diff = rm.burstReplicas } rm.expectations.ExpectDeletions(rcKey, diff) glog.V(2).Infof("Too many %q/%q replicas, need %d, deleting %d", rc.Namespace, rc.Name, rc.Spec.Replicas, diff) // No need to sort pods if we are about to delete all of them if rc.Spec.Replicas != 0 { // Sort the pods in the order such that not-ready < ready, unscheduled // < scheduled, and pending < running. This ensures that we delete pods // in the earlier stages whenever possible. sort.Sort(controller.ActivePods(filteredPods)) } wait := sync.WaitGroup{} wait.Add(diff) for i := 0; i < diff; i++ { go func(ix int) { defer wait.Done() if err := rm.podControl.DeletePod(rc.Namespace, filteredPods[ix].Name); err != nil { // Decrement the expected number of deletes because the informer won't observe this deletion glog.V(2).Infof("Failed deletion, decrementing expectations for controller %q/%q", rc.Namespace, rc.Name) rm.expectations.DeletionObserved(rcKey) } }(i) } wait.Wait() } }
// monitorStreamPair waits for the pair to receive both its error and data // streams, or for the timeout to expire (whichever happens first), and then // removes the pair. func (h *portForwardStreamHandler) monitorStreamPair(p *portForwardStreamPair, timeout <-chan time.Time) { select { case <-timeout: err := fmt.Errorf("(conn=%p, request=%s) timed out waiting for streams", h.conn, p.requestID) util.HandleError(err) p.printError(err.Error()) case <-p.complete: glog.V(5).Infof("(conn=%p, request=%s) successfully received error and data streams", h.conn, p.requestID) } h.removeStreamPair(p.requestID) }
// waitForConnection waits for new connections to listener and handles them in // the background. func (pf *PortForwarder) waitForConnection(listener net.Listener, port ForwardedPort) { for { conn, err := listener.Accept() if err != nil { // TODO consider using something like https://github.com/hydrogen18/stoppableListener? if !strings.Contains(strings.ToLower(err.Error()), "use of closed network connection") { util.HandleError(fmt.Errorf("Error accepting connection on port %d: %v", port.Local, err)) } return } go pf.handleConnection(conn, port) } }
// errorJSONFatal renders an error to the response, and if codec fails will render plaintext. // Returns the HTTP status code of the error. func errorJSONFatal(err error, codec runtime.Codec, w http.ResponseWriter) int { util.HandleError(fmt.Errorf("apiserver was unable to write a JSON response: %v", err)) status := errToAPIStatus(err) output, err := codec.Encode(status) if err != nil { w.WriteHeader(status.Code) fmt.Fprintf(w, "%s: %s", status.Reason, status.Message) return status.Code } w.Header().Set("Content-Type", "application/json") w.WriteHeader(status.Code) w.Write(output) return status.Code }
// maybeDeleteTerminatingPod non-gracefully deletes pods that are terminating // that should not be gracefully terminated. func (nc *NodeController) maybeDeleteTerminatingPod(obj interface{}) { pod, ok := obj.(*api.Pod) if !ok { return } // consider only terminating pods if pod.DeletionTimestamp == nil { return } // delete terminating pods that have not yet been scheduled if len(pod.Spec.NodeName) == 0 { nc.forcefullyDeletePod(pod) return } nodeObj, found, err := nc.nodeStore.GetByKey(pod.Spec.NodeName) if err != nil { // this can only happen if the Store.KeyFunc has a problem creating // a key for the pod. If it happens once, it will happen again so // don't bother requeuing the pod. util.HandleError(err) return } // delete terminating pods that have been scheduled on // nonexistant nodes if !found { nc.forcefullyDeletePod(pod) return } // delete terminating pods that have been scheduled on // nodes that do not support graceful termination // TODO(mikedanese): this can be removed when we no longer // guarantee backwards compatibility of master API to kubelets with // versions less than 1.1.0 node := nodeObj.(*api.Node) v, err := version.Parse(node.Status.NodeInfo.KubeletVersion) if err != nil { glog.Infof("couldn't parse verions %q of minion: %v", node.Status.NodeInfo.KubeletVersion, err) nc.forcefullyDeletePod(pod) return } if gracefulDeletionVersion.GT(v) { nc.forcefullyDeletePod(pod) return } }
// portForward invokes the portForwardStreamHandler's forwarder.PortForward // function for the given stream pair. func (h *portForwardStreamHandler) portForward(p *portForwardStreamPair) { defer p.dataStream.Close() defer p.errorStream.Close() portString := p.dataStream.Headers().Get(api.PortHeader) port, _ := strconv.ParseUint(portString, 10, 16) glog.V(5).Infof("(conn=%p, request=%s) invoking forwarder.PortForward for port %s", h.conn, p.requestID, portString) err := h.forwarder.PortForward(h.pod, h.uid, uint16(port), p.dataStream) glog.V(5).Infof("(conn=%p, request=%s) done invoking forwarder.PortForward for port %s", h.conn, p.requestID, portString) if err != nil { msg := fmt.Errorf("error forwarding port %d to pod %s, uid %v: %v", port, h.pod, h.uid, err) util.HandleError(msg) fmt.Fprint(p.errorStream, msg.Error()) } }
// getListener creates a listener on the interface targeted by the given hostname on the given port with // the given protocol. protocol is in net.Listen style which basically admits values like tcp, tcp4, tcp6 func (pf *PortForwarder) getListener(protocol string, hostname string, port *ForwardedPort) (net.Listener, error) { listener, err := net.Listen(protocol, fmt.Sprintf("%s:%d", hostname, port.Local)) if err != nil { util.HandleError(fmt.Errorf("Unable to create listener: Error %s", err)) return nil, err } listenerAddress := listener.Addr().String() host, localPort, _ := net.SplitHostPort(listenerAddress) localPortUInt, err := strconv.ParseUint(localPort, 10, 16) if err != nil { return nil, fmt.Errorf("Error parsing local port: %s from %s (%s)", err, listenerAddress, host) } port.Local = uint16(localPortUInt) glog.Infof("Forwarding from %s:%d -> %d", hostname, localPortUInt, port.Remote) return listener, nil }
// Loop infinitely, processing all service updates provided by the queue. func (s *ServiceController) watchServices(serviceQueue *cache.DeltaFIFO) { for { newItem := serviceQueue.Pop() deltas, ok := newItem.(cache.Deltas) if !ok { glog.Errorf("Received object from service watcher that wasn't Deltas: %+v", newItem) } delta := deltas.Newest() if delta == nil { glog.Errorf("Received nil delta from watcher queue.") continue } err, shouldRetry := s.processDelta(delta) if shouldRetry { // Add the failed service back to the queue so we'll retry it. glog.Errorf("Failed to process service delta. Retrying: %v", err) time.Sleep(processingRetryInterval) serviceQueue.AddIfNotPresent(deltas) } else if err != nil { util.HandleError(fmt.Errorf("Failed to process service delta. Not retrying: %v", err)) } } }
// errToAPIStatus converts an error to an unversioned.Status object. func errToAPIStatus(err error) *unversioned.Status { switch t := err.(type) { case statusError: status := t.Status() if len(status.Status) == 0 { status.Status = unversioned.StatusFailure } if status.Code == 0 { switch status.Status { case unversioned.StatusSuccess: status.Code = http.StatusOK case unversioned.StatusFailure: status.Code = http.StatusInternalServerError } } //TODO: check for invalid responses return &status default: status := http.StatusInternalServerError switch { //TODO: replace me with NewConflictErr case etcdstorage.IsEtcdTestFailed(err): status = http.StatusConflict } // Log errors that were not converted to an error status // by REST storage - these typically indicate programmer // error by not using pkg/api/errors, or unexpected failure // cases. util.HandleError(fmt.Errorf("apiserver received an error that is not an unversioned.Status: %v", err)) return &unversioned.Status{ Status: unversioned.StatusFailure, Code: status, Reason: unversioned.StatusReasonUnknown, Message: err.Error(), } } }
func (e *streamProtocolV2) stream(conn httpstream.Connection) error { headers := http.Header{} // set up error stream errorChan := make(chan error) headers.Set(api.StreamType, api.StreamTypeError) errorStream, err := conn.CreateStream(headers) if err != nil { return err } go func() { message, err := ioutil.ReadAll(errorStream) switch { case err != nil && err != io.EOF: errorChan <- fmt.Errorf("error reading from error stream: %s", err) case len(message) > 0: errorChan <- fmt.Errorf("error executing remote command: %s", message) default: errorChan <- nil } close(errorChan) }() var wg sync.WaitGroup var once sync.Once // set up stdin stream if e.stdin != nil { headers.Set(api.StreamType, api.StreamTypeStdin) remoteStdin, err := conn.CreateStream(headers) if err != nil { return err } // copy from client's stdin to container's stdin go func() { // if e.stdin is noninteractive, e.g. `echo abc | kubectl exec -i <pod> -- cat`, make sure // we close remoteStdin as soon as the copy from e.stdin to remoteStdin finishes. Otherwise // the executed command will remain running. defer once.Do(func() { remoteStdin.Close() }) if _, err := io.Copy(remoteStdin, e.stdin); err != nil { util.HandleError(err) } }() // read from remoteStdin until the stream is closed. this is essential to // be able to exit interactive sessions cleanly and not leak goroutines or // hang the client's terminal. // // go-dockerclient's current hijack implementation // (https://github.com/fsouza/go-dockerclient/blob/89f3d56d93788dfe85f864a44f85d9738fca0670/client.go#L564) // waits for all three streams (stdin/stdout/stderr) to finish copying // before returning. When hijack finishes copying stdout/stderr, it calls // Close() on its side of remoteStdin, which allows this copy to complete. // When that happens, we must Close() on our side of remoteStdin, to // allow the copy in hijack to complete, and hijack to return. go func() { defer once.Do(func() { remoteStdin.Close() }) // this "copy" doesn't actually read anything - it's just here to wait for // the server to close remoteStdin. if _, err := io.Copy(ioutil.Discard, remoteStdin); err != nil { util.HandleError(err) } }() } // set up stdout stream if e.stdout != nil { headers.Set(api.StreamType, api.StreamTypeStdout) remoteStdout, err := conn.CreateStream(headers) if err != nil { return err } wg.Add(1) go func() { defer wg.Done() if _, err := io.Copy(e.stdout, remoteStdout); err != nil { util.HandleError(err) } }() } // set up stderr stream if e.stderr != nil && !e.tty { headers.Set(api.StreamType, api.StreamTypeStderr) remoteStderr, err := conn.CreateStream(headers) if err != nil { return err } wg.Add(1) go func() { defer wg.Done() if _, err := io.Copy(e.stderr, remoteStderr); err != nil { util.HandleError(err) } }() } // we're waiting for stdout/stderr to finish copying wg.Wait() // waits for errorStream to finish reading with an error or nil return <-errorChan }
func (jm *JobController) manageJob(activePods []*api.Pod, succeeded int, job *extensions.Job) int { var activeLock sync.Mutex active := len(activePods) parallelism := *job.Spec.Parallelism jobKey, err := controller.KeyFunc(job) if err != nil { glog.Errorf("Couldn't get key for job %#v: %v", job, err) return 0 } if active > parallelism { diff := active - parallelism jm.expectations.ExpectDeletions(jobKey, diff) glog.V(4).Infof("Too many pods running job %q, need %d, deleting %d", jobKey, parallelism, diff) // Sort the pods in the order such that not-ready < ready, unscheduled // < scheduled, and pending < running. This ensures that we delete pods // in the earlier stages whenever possible. sort.Sort(controller.ActivePods(activePods)) active -= diff wait := sync.WaitGroup{} wait.Add(diff) for i := 0; i < diff; i++ { go func(ix int) { defer wait.Done() if err := jm.podControl.DeletePod(job.Namespace, activePods[ix].Name); err != nil { defer util.HandleError(err) // Decrement the expected number of deletes because the informer won't observe this deletion jm.expectations.DeletionObserved(jobKey) activeLock.Lock() active++ activeLock.Unlock() } }(i) } wait.Wait() } else if active < parallelism { // how many executions are left to run diff := *job.Spec.Completions - succeeded // limit to parallelism and count active pods as well if diff > parallelism { diff = parallelism } diff -= active jm.expectations.ExpectCreations(jobKey, diff) glog.V(4).Infof("Too few pods running job %q, need %d, creating %d", jobKey, parallelism, diff) active += diff wait := sync.WaitGroup{} wait.Add(diff) for i := 0; i < diff; i++ { go func() { defer wait.Done() if err := jm.podControl.CreatePods(job.Namespace, &job.Spec.Template, job); err != nil { defer util.HandleError(err) // Decrement the expected number of creates because the informer won't observe this pod jm.expectations.CreationObserved(jobKey) activeLock.Lock() active-- activeLock.Unlock() } }() } wait.Wait() } return active }
// handleConnection copies data between the local connection and the stream to // the remote server. func (pf *PortForwarder) handleConnection(conn net.Conn, port ForwardedPort) { defer conn.Close() glog.Infof("Handling connection for %d", port.Local) requestID := pf.nextRequestID() // create error stream headers := http.Header{} headers.Set(api.StreamType, api.StreamTypeError) headers.Set(api.PortHeader, fmt.Sprintf("%d", port.Remote)) headers.Set(api.PortForwardRequestIDHeader, strconv.Itoa(requestID)) errorStream, err := pf.streamConn.CreateStream(headers) if err != nil { util.HandleError(fmt.Errorf("error creating error stream for port %d -> %d: %v", port.Local, port.Remote, err)) return } // we're not writing to this stream errorStream.Close() errorChan := make(chan error) go func() { message, err := ioutil.ReadAll(errorStream) switch { case err != nil: errorChan <- fmt.Errorf("error reading from error stream for port %d -> %d: %v", port.Local, port.Remote, err) case len(message) > 0: errorChan <- fmt.Errorf("an error occurred forwarding %d -> %d: %v", port.Local, port.Remote, string(message)) } close(errorChan) }() // create data stream headers.Set(api.StreamType, api.StreamTypeData) dataStream, err := pf.streamConn.CreateStream(headers) if err != nil { util.HandleError(fmt.Errorf("error creating forwarding stream for port %d -> %d: %v", port.Local, port.Remote, err)) return } localError := make(chan struct{}) remoteDone := make(chan struct{}) go func() { // Copy from the remote side to the local port. if _, err := io.Copy(conn, dataStream); err != nil && !strings.Contains(err.Error(), "use of closed network connection") { util.HandleError(fmt.Errorf("error copying from remote stream to local connection: %v", err)) } // inform the select below that the remote copy is done close(remoteDone) }() go func() { // inform server we're not sending any more data after copy unblocks defer dataStream.Close() // Copy from the local port to the remote side. if _, err := io.Copy(dataStream, conn); err != nil && !strings.Contains(err.Error(), "use of closed network connection") { util.HandleError(fmt.Errorf("error copying from local connection to remote stream: %v", err)) // break out of the select below without waiting for the other copy to finish close(localError) } }() // wait for either a local->remote error or for copying from remote->local to finish select { case <-remoteDone: case <-localError: } // always expect something on errorChan (it may be nil) err = <-errorChan if err != nil { util.HandleError(err) } }
func (dsc *DaemonSetsController) manage(ds *extensions.DaemonSet) { // Find out which nodes are running the daemon pods selected by ds. nodeToDaemonPods, err := dsc.getNodesToDaemonPods(ds) if err != nil { glog.Errorf("Error getting node to daemon pod mapping for daemon set %+v: %v", ds, err) } // For each node, if the node is running the daemon pod but isn't supposed to, kill the daemon // pod. If the node is supposed to run the daemon pod, but isn't, create the daemon pod on the node. nodeList, err := dsc.nodeStore.List() if err != nil { glog.Errorf("Couldn't get list of nodes when syncing daemon set %+v: %v", ds, err) } var nodesNeedingDaemonPods, podsToDelete []string for i, node := range nodeList.Items { // Check if the node satisfies the daemon set's node selector. nodeSelector := labels.Set(ds.Spec.Template.Spec.NodeSelector).AsSelector() shouldRun := nodeSelector.Matches(labels.Set(nodeList.Items[i].Labels)) // If the daemon set specifies a node name, check that it matches with nodeName. nodeName := nodeList.Items[i].Name shouldRun = shouldRun && (ds.Spec.Template.Spec.NodeName == "" || ds.Spec.Template.Spec.NodeName == nodeName) // If the node is not ready, don't run on it. // TODO(mikedanese): remove this once daemonpods forgive nodes shouldRun = shouldRun && api.IsNodeReady(&node) daemonPods, isRunning := nodeToDaemonPods[nodeName] if shouldRun && !isRunning { // If daemon pod is supposed to be running on node, but isn't, create daemon pod. nodesNeedingDaemonPods = append(nodesNeedingDaemonPods, nodeName) } else if shouldRun && len(daemonPods) > 1 { // If daemon pod is supposed to be running on node, but more than 1 daemon pod is running, delete the excess daemon pods. // TODO: sort the daemon pods by creation time, so the the oldest is preserved. for i := 1; i < len(daemonPods); i++ { podsToDelete = append(podsToDelete, daemonPods[i].Name) } } else if !shouldRun && isRunning { // If daemon pod isn't supposed to run on node, but it is, delete all daemon pods on node. for i := range daemonPods { podsToDelete = append(podsToDelete, daemonPods[i].Name) } } } // We need to set expectations before creating/deleting pods to avoid race conditions. dsKey, err := controller.KeyFunc(ds) if err != nil { glog.Errorf("Couldn't get key for object %+v: %v", ds, err) return } dsc.expectations.SetExpectations(dsKey, len(nodesNeedingDaemonPods), len(podsToDelete)) glog.V(4).Infof("Nodes needing daemon pods for daemon set %s: %+v", ds.Name, nodesNeedingDaemonPods) for i := range nodesNeedingDaemonPods { if err := dsc.podControl.CreatePodsOnNode(nodesNeedingDaemonPods[i], ds.Namespace, ds.Spec.Template, ds); err != nil { glog.V(2).Infof("Failed creation, decrementing expectations for set %q/%q", ds.Namespace, ds.Name) dsc.expectations.CreationObserved(dsKey) util.HandleError(err) } } glog.V(4).Infof("Pods to delete for daemon set %s: %+v", ds.Name, podsToDelete) for i := range podsToDelete { if err := dsc.podControl.DeletePod(ds.Namespace, podsToDelete[i]); err != nil { glog.V(2).Infof("Failed deletion, decrementing expectations for set %q/%q", ds.Namespace, ds.Name) dsc.expectations.DeletionObserved(dsKey) util.HandleError(err) } } }
// RunOnce verifies the state of the cluster IP allocations and returns an error if an unrecoverable problem occurs. func (c *Repair) RunOnce() error { // TODO: (per smarterclayton) if Get() or ListServices() is a weak consistency read, // or if they are executed against different leaders, // the ordering guarantee required to ensure no IP is allocated twice is violated. // ListServices must return a ResourceVersion higher than the etcd index Get triggers, // and the release code must not release services that have had IPs allocated but not yet been created // See #8295 // If etcd server is not running we should wait for some time and fail only then. This is particularly // important when we start apiserver and etcd at the same time. var latest *api.RangeAllocation var err error err = wait.PollImmediate(time.Second, 10*time.Second, func() (bool, error) { latest, err = c.alloc.Get() return err == nil, err }) if err != nil { return fmt.Errorf("unable to refresh the service IP block: %v", err) } ctx := api.WithNamespace(api.NewDefaultContext(), api.NamespaceAll) list, err := c.registry.ListServices(ctx, labels.Everything(), fields.Everything()) if err != nil { return fmt.Errorf("unable to refresh the service IP block: %v", err) } r := ipallocator.NewCIDRRange(c.network) for _, svc := range list.Items { if !api.IsServiceIPSet(&svc) { continue } ip := net.ParseIP(svc.Spec.ClusterIP) if ip == nil { // cluster IP is broken, reallocate util.HandleError(fmt.Errorf("the cluster IP %s for service %s/%s is not a valid IP; please recreate", svc.Spec.ClusterIP, svc.Name, svc.Namespace)) continue } switch err := r.Allocate(ip); err { case nil: case ipallocator.ErrAllocated: // TODO: send event // cluster IP is broken, reallocate util.HandleError(fmt.Errorf("the cluster IP %s for service %s/%s was assigned to multiple services; please recreate", ip, svc.Name, svc.Namespace)) case ipallocator.ErrNotInRange: // TODO: send event // cluster IP is broken, reallocate util.HandleError(fmt.Errorf("the cluster IP %s for service %s/%s is not within the service CIDR %s; please recreate", ip, svc.Name, svc.Namespace, c.network)) case ipallocator.ErrFull: // TODO: send event return fmt.Errorf("the service CIDR %v is full; you must widen the CIDR in order to create new services", r) default: return fmt.Errorf("unable to allocate cluster IP %s for service %s/%s due to an unknown error, exiting: %v", ip, svc.Name, svc.Namespace, err) } } err = r.Snapshot(latest) if err != nil { return fmt.Errorf("unable to persist the updated service IP allocations: %v", err) } if err := c.alloc.CreateOrUpdate(latest); err != nil { return fmt.Errorf("unable to persist the updated service IP allocations: %v", err) } return nil }
// Returns error if ListAndWatch didn't even tried to initialize watch. func (r *Reflector) ListAndWatch(stopCh <-chan struct{}) error { var resourceVersion string resyncCh, cleanup := r.resyncChan() defer cleanup() if r.verbose { glog.Infof("watch %v: listwatch cycle started.", r.name) defer glog.Infof("watch %v: listwatch cycle exited.", r.name) } list, err := r.listerWatcher.List() if err != nil { return fmt.Errorf("%s: Failed to list %v: %v", r.name, r.expectedType, err) } meta, err := meta.Accessor(list) if err != nil { return fmt.Errorf("%s: Unable to understand list result %#v", r.name, list) } resourceVersion = meta.ResourceVersion() items, err := runtime.ExtractList(list) if err != nil { return fmt.Errorf("%s: Unable to understand list result %#v (%v)", r.name, list, err) } if err := r.syncWith(items, resourceVersion); err != nil { return fmt.Errorf("%s: Unable to sync list result: %v", r.name, err) } r.setLastSyncResourceVersion(resourceVersion) for { w, err := r.listerWatcher.Watch(resourceVersion) if err != nil { switch err { case io.EOF: // watch closed normally case io.ErrUnexpectedEOF: glog.V(1).Infof("%s: Watch for %v closed with unexpected EOF: %v", r.name, r.expectedType, err) default: util.HandleError(fmt.Errorf("%s: Failed to watch %v: %v", r.name, r.expectedType, err)) } // If this is "connection refused" error, it means that most likely apiserver is not responsive. // It doesn't make sense to re-list all objects because most likely we will be able to restart // watch where we ended. // If that's the case wait and resend watch request. if urlError, ok := err.(*url.Error); ok { if opError, ok := urlError.Err.(*net.OpError); ok { if errno, ok := opError.Err.(syscall.Errno); ok && errno == syscall.ECONNREFUSED { time.Sleep(time.Second) continue } } } return nil } if err := r.watchHandler(w, &resourceVersion, resyncCh, stopCh); err != nil { if err != errorResyncRequested && err != errorStopRequested { glog.Warningf("%s: watch of %v ended with: %v", r.name, r.expectedType, err) } return nil } } }
// NewNamespaceController creates a new NamespaceController func NewNamespaceController(kubeClient client.Interface, versions *unversioned.APIVersions, resyncPeriod time.Duration) *NamespaceController { var controller *framework.Controller _, controller = framework.NewInformer( &cache.ListWatch{ ListFunc: func() (runtime.Object, error) { return kubeClient.Namespaces().List(labels.Everything(), fields.Everything()) }, WatchFunc: func(resourceVersion string) (watch.Interface, error) { return kubeClient.Namespaces().Watch(labels.Everything(), fields.Everything(), resourceVersion) }, }, &api.Namespace{}, // TODO: Can we have much longer period here? resyncPeriod, framework.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { namespace := obj.(*api.Namespace) if err := syncNamespace(kubeClient, versions, namespace); err != nil { if estimate, ok := err.(*contentRemainingError); ok { go func() { // Estimate is the aggregate total of TerminationGracePeriodSeconds, which defaults to 30s // for pods. However, most processes will terminate faster - within a few seconds, probably // with a peak within 5-10s. So this division is a heuristic that avoids waiting the full // duration when in many cases things complete more quickly. The extra second added is to // ensure we never wait 0 seconds. t := estimate.Estimate/2 + 1 glog.V(4).Infof("Content remaining in namespace %s, waiting %d seconds", namespace.Name, t) time.Sleep(time.Duration(t) * time.Second) if err := controller.Requeue(namespace); err != nil { util.HandleError(err) } }() return } util.HandleError(err) } }, UpdateFunc: func(oldObj, newObj interface{}) { namespace := newObj.(*api.Namespace) if err := syncNamespace(kubeClient, versions, namespace); err != nil { if estimate, ok := err.(*contentRemainingError); ok { go func() { t := estimate.Estimate/2 + 1 glog.V(4).Infof("Content remaining in namespace %s, waiting %d seconds", namespace.Name, t) time.Sleep(time.Duration(t) * time.Second) if err := controller.Requeue(namespace); err != nil { util.HandleError(err) } }() return } util.HandleError(err) } }, }, ) return &NamespaceController{ controller: controller, } }
// Returns error if ListAndWatch didn't even tried to initialize watch. func (r *Reflector) ListAndWatch(stopCh <-chan struct{}) error { var resourceVersion string resyncCh, cleanup := r.resyncChan() defer cleanup() list, err := r.listerWatcher.List() if err != nil { return fmt.Errorf("%s: Failed to list %v: %v", r.name, r.expectedType, err) } meta, err := meta.Accessor(list) if err != nil { return fmt.Errorf("%s: Unable to understand list result %#v", r.name, list) } resourceVersion = meta.ResourceVersion() items, err := runtime.ExtractList(list) if err != nil { return fmt.Errorf("%s: Unable to understand list result %#v (%v)", r.name, list, err) } if err := r.syncWith(items, resourceVersion); err != nil { return fmt.Errorf("%s: Unable to sync list result: %v", r.name, err) } r.setLastSyncResourceVersion(resourceVersion) for { options := api.ListOptions{ ResourceVersion: resourceVersion, // We want to avoid situations when resyncing is breaking the TCP connection // - see comment for 'timeoutForWatch()' for more details. TimeoutSeconds: r.timeoutForWatch(), } w, err := r.listerWatcher.Watch(options) if err != nil { switch err { case io.EOF: // watch closed normally case io.ErrUnexpectedEOF: glog.V(1).Infof("%s: Watch for %v closed with unexpected EOF: %v", r.name, r.expectedType, err) default: util.HandleError(fmt.Errorf("%s: Failed to watch %v: %v", r.name, r.expectedType, err)) } // If this is "connection refused" error, it means that most likely apiserver is not responsive. // It doesn't make sense to re-list all objects because most likely we will be able to restart // watch where we ended. // If that's the case wait and resend watch request. if urlError, ok := err.(*url.Error); ok { if opError, ok := urlError.Err.(*net.OpError); ok { if errno, ok := opError.Err.(syscall.Errno); ok && errno == syscall.ECONNREFUSED { time.Sleep(time.Second) continue } } } return nil } if err := r.watchHandler(w, &resourceVersion, resyncCh, stopCh); err != nil { if err != errorResyncRequested && err != errorStopRequested { glog.Warningf("%s: watch of %v ended with: %v", r.name, r.expectedType, err) } return nil } if r.canForceResyncNow() { glog.V(4).Infof("%s: next resync planned for %#v, forcing now", r.name, r.nextResync) return nil } } }
// Run starts an asynchronous loop that monitors the status of cluster nodes. func (nc *NodeController) Run(period time.Duration) { go nc.nodeController.Run(util.NeverStop) go nc.podController.Run(util.NeverStop) // Incorporate the results of node status pushed from kubelet to master. go util.Until(func() { if err := nc.monitorNodeStatus(); err != nil { glog.Errorf("Error monitoring node status: %v", err) } }, nc.nodeMonitorPeriod, util.NeverStop) // Managing eviction of nodes: // 1. when we delete pods off a node, if the node was not empty at the time we then // queue a termination watcher // a. If we hit an error, retry deletion // 2. The terminator loop ensures that pods are eventually cleaned and we never // terminate a pod in a time period less than nc.maximumGracePeriod. AddedAt // is the time from which we measure "has this pod been terminating too long", // after which we will delete the pod with grace period 0 (force delete). // a. If we hit errors, retry instantly // b. If there are no pods left terminating, exit // c. If there are pods still terminating, wait for their estimated completion // before retrying go util.Until(func() { nc.evictorLock.Lock() defer nc.evictorLock.Unlock() nc.podEvictor.Try(func(value TimedValue) (bool, time.Duration) { remaining, err := nc.deletePods(value.Value) if err != nil { util.HandleError(fmt.Errorf("unable to evict node %q: %v", value.Value, err)) return false, 0 } if remaining { nc.terminationEvictor.Add(value.Value) } return true, 0 }) }, nodeEvictionPeriod, util.NeverStop) // TODO: replace with a controller that ensures pods that are terminating complete // in a particular time period go util.Until(func() { nc.evictorLock.Lock() defer nc.evictorLock.Unlock() nc.terminationEvictor.Try(func(value TimedValue) (bool, time.Duration) { completed, remaining, err := nc.terminatePods(value.Value, value.AddedAt) if err != nil { util.HandleError(fmt.Errorf("unable to terminate pods on node %q: %v", value.Value, err)) return false, 0 } if completed { glog.Infof("All pods terminated on %s", value.Value) nc.recordNodeEvent(value.Value, "TerminatedAllPods", fmt.Sprintf("Terminated all Pods on Node %s.", value.Value)) return true, 0 } glog.V(2).Infof("Pods terminating since %s on %q, estimated completion %s", value.AddedAt, value.Value, remaining) // clamp very short intervals if remaining < nodeEvictionPeriod { remaining = nodeEvictionPeriod } return false, remaining }) }, nodeEvictionPeriod, util.NeverStop) }
// RunOnce verifies the state of the port allocations and returns an error if an unrecoverable problem occurs. func (c *Repair) RunOnce() error { // TODO: (per smarterclayton) if Get() or ListServices() is a weak consistency read, // or if they are executed against different leaders, // the ordering guarantee required to ensure no port is allocated twice is violated. // ListServices must return a ResourceVersion higher than the etcd index Get triggers, // and the release code must not release services that have had ports allocated but not yet been created // See #8295 // If etcd server is not running we should wait for some time and fail only then. This is particularly // important when we start apiserver and etcd at the same time. var latest *api.RangeAllocation var err error for i := 0; i < 10; i++ { if latest, err = c.alloc.Get(); err != nil { time.Sleep(time.Second) } else { break } } if err != nil { return fmt.Errorf("unable to refresh the port block: %v", err) } ctx := api.WithNamespace(api.NewDefaultContext(), api.NamespaceAll) list, err := c.registry.ListServices(ctx, labels.Everything(), fields.Everything()) if err != nil { return fmt.Errorf("unable to refresh the port block: %v", err) } r := portallocator.NewPortAllocator(c.portRange) for i := range list.Items { svc := &list.Items[i] ports := service.CollectServiceNodePorts(svc) if len(ports) == 0 { continue } for _, port := range ports { switch err := r.Allocate(port); err { case nil: case portallocator.ErrAllocated: // TODO: send event // port is broken, reallocate util.HandleError(fmt.Errorf("the port %d for service %s/%s was assigned to multiple services; please recreate", port, svc.Name, svc.Namespace)) case portallocator.ErrNotInRange: // TODO: send event // port is broken, reallocate util.HandleError(fmt.Errorf("the port %d for service %s/%s is not within the port range %v; please recreate", port, svc.Name, svc.Namespace, c.portRange)) case portallocator.ErrFull: // TODO: send event return fmt.Errorf("the port range %v is full; you must widen the port range in order to create new services", c.portRange) default: return fmt.Errorf("unable to allocate port %d for service %s/%s due to an unknown error, exiting: %v", port, svc.Name, svc.Namespace, err) } } } err = r.Snapshot(latest) if err != nil { return fmt.Errorf("unable to persist the updated port allocations: %v", err) } if err := c.alloc.CreateOrUpdate(latest); err != nil { return fmt.Errorf("unable to persist the updated port allocations: %v", err) } return nil }