func createStreams(req *http.Request, w http.ResponseWriter, supportedStreamProtocols []string, idleTimeout, streamCreationTimeout time.Duration) (*context, bool) { opts, err := newOptions(req) if err != nil { runtime.HandleError(err) w.WriteHeader(http.StatusBadRequest) fmt.Fprint(w, err.Error()) return nil, false } if wsstream.IsWebSocketRequest(req) { return createWebSocketStreams(req, w, opts, idleTimeout) } protocol, err := httpstream.Handshake(req, w, supportedStreamProtocols) if err != nil { w.WriteHeader(http.StatusBadRequest) fmt.Fprint(w, err.Error()) return nil, false } streamCh := make(chan streamAndReply) upgrader := spdy.NewResponseUpgrader() conn := upgrader.UpgradeResponse(w, req, func(stream httpstream.Stream, replySent <-chan struct{}) error { streamCh <- streamAndReply{Stream: stream, replySent: replySent} return nil }) // from this point on, we can no longer call methods on response if conn == nil { // The upgrader is responsible for notifying the client of any errors that // occurred during upgrading. All we can do is return here at this point // if we weren't successful in upgrading. return nil, false } conn.SetIdleTimeout(idleTimeout) var handler protocolHandler switch protocol { case StreamProtocolV2Name: handler = &v2ProtocolHandler{} case "": glog.V(4).Infof("Client did not request protocol negotiaion. Falling back to %q", StreamProtocolV1Name) fallthrough case StreamProtocolV1Name: handler = &v1ProtocolHandler{} } expired := time.NewTimer(streamCreationTimeout) ctx, err := handler.waitForStreams(streamCh, opts.expectedStreams, expired.C) if err != nil { runtime.HandleError(err) return nil, false } ctx.conn = conn ctx.tty = opts.tty return ctx, true }
func (w *etcdWatcher) decodeObject(node *etcd.Node) (runtime.Object, error) { if obj, found := w.cache.getFromCache(node.ModifiedIndex, storage.Everything); found { return obj, nil } obj, err := runtime.Decode(w.encoding, []byte(node.Value)) if err != nil { return nil, err } // ensure resource version is set on the object we load from etcd if err := w.versioner.UpdateObject(obj, node.ModifiedIndex); err != nil { utilruntime.HandleError(fmt.Errorf("failure to version api object (%d) %#v: %v", node.ModifiedIndex, obj, err)) } // perform any necessary transformation if w.transform != nil { obj, err = w.transform(obj) if err != nil { utilruntime.HandleError(fmt.Errorf("failure to transform api object %#v: %v", obj, err)) return nil, err } } if node.ModifiedIndex != 0 { w.cache.addToCache(node.ModifiedIndex, obj) } return obj, nil }
func (w *etcdWatcher) sendDelete(res *etcd.Response) { if res.PrevNode == nil { utilruntime.HandleError(fmt.Errorf("unexpected nil prev node: %#v", res)) return } if w.include != nil && !w.include(res.PrevNode.Key) { return } node := *res.PrevNode if res.Node != nil { // Note that this sends the *old* object with the etcd index for the time at // which it gets deleted. This will allow users to restart the watch at the right // index. node.ModifiedIndex = res.Node.ModifiedIndex } obj, err := w.decodeObject(&node) if err != nil { utilruntime.HandleError(fmt.Errorf("failure to decode api object: %v\nfrom %#v %#v", err, res, res.Node)) // TODO: expose an error through watch.Interface? // Ignore this value. If we stop the watch on a bad value, a client that uses // the resourceVersion to resume will never be able to get past a bad value. return } if !w.filter(obj) { return } w.emit(watch.Event{ Type: watch.Deleted, Object: obj, }) }
func (w *etcdWatcher) sendAdd(res *etcd.Response) { if res.Node == nil { utilruntime.HandleError(fmt.Errorf("unexpected nil node: %#v", res)) return } if w.include != nil && !w.include(res.Node.Key) { return } obj, err := w.decodeObject(res.Node) if err != nil { utilruntime.HandleError(fmt.Errorf("failure to decode api object: %v\n'%v' from %#v %#v", err, string(res.Node.Value), res, res.Node)) // TODO: expose an error through watch.Interface? // Ignore this value. If we stop the watch on a bad value, a client that uses // the resourceVersion to resume will never be able to get past a bad value. return } if !w.filter(obj) { return } action := watch.Added if res.Node.ModifiedIndex != res.Node.CreatedIndex { action = watch.Modified } w.emit(watch.Event{ Type: action, Object: obj, }) }
// Loop infinitely, processing all service updates provided by the queue. func (s *ServiceController) watchServices(serviceQueue *cache.DeltaFIFO) { for { serviceQueue.Pop(func(obj interface{}) error { deltas, ok := obj.(cache.Deltas) if !ok { runtime.HandleError(fmt.Errorf("Received object from service watcher that wasn't Deltas: %+v", obj)) return nil } delta := deltas.Newest() if delta == nil { runtime.HandleError(fmt.Errorf("Received nil delta from watcher queue.")) return nil } err, retryDelay := s.processDelta(delta) if retryDelay != 0 { // Add the failed service back to the queue so we'll retry it. runtime.HandleError(fmt.Errorf("Failed to process service delta. Retrying in %s: %v", retryDelay, err)) go func(deltas cache.Deltas, delay time.Duration) { time.Sleep(delay) if err := serviceQueue.AddIfNotPresent(deltas); err != nil { runtime.HandleError(fmt.Errorf("Error requeuing service delta - will not retry: %v", err)) } }(deltas, retryDelay) } else if err != nil { runtime.HandleError(fmt.Errorf("Failed to process service delta. Not retrying: %v", err)) } return nil }) } }
// UpgradeResponse upgrades an HTTP response to one that supports multiplexed // streams. newStreamHandler will be called synchronously whenever the // other end of the upgraded connection creates a new stream. func (u responseUpgrader) UpgradeResponse(w http.ResponseWriter, req *http.Request, newStreamHandler httpstream.NewStreamHandler) httpstream.Connection { connectionHeader := strings.ToLower(req.Header.Get(httpstream.HeaderConnection)) upgradeHeader := strings.ToLower(req.Header.Get(httpstream.HeaderUpgrade)) if !strings.Contains(connectionHeader, strings.ToLower(httpstream.HeaderUpgrade)) || !strings.Contains(upgradeHeader, strings.ToLower(HeaderSpdy31)) { w.WriteHeader(http.StatusBadRequest) fmt.Fprintf(w, "unable to upgrade: missing upgrade headers in request: %#v", req.Header) return nil } hijacker, ok := w.(http.Hijacker) if !ok { w.WriteHeader(http.StatusInternalServerError) fmt.Fprintf(w, "unable to upgrade: unable to hijack response") return nil } w.Header().Add(httpstream.HeaderConnection, httpstream.HeaderUpgrade) w.Header().Add(httpstream.HeaderUpgrade, HeaderSpdy31) w.WriteHeader(http.StatusSwitchingProtocols) conn, _, err := hijacker.Hijack() if err != nil { runtime.HandleError(fmt.Errorf("unable to upgrade: error hijacking response: %v", err)) return nil } spdyConn, err := NewServerConnection(conn, newStreamHandler) if err != nil { runtime.HandleError(fmt.Errorf("unable to upgrade: error creating SPDY server connection: %v", err)) return nil } return spdyConn }
// watchHandler watches w and keeps *resourceVersion up to date. func (r *Reflector) watchHandler(w watch.Interface, resourceVersion *string, errc chan error, stopCh <-chan struct{}) error { start := time.Now() eventCount := 0 // Stopping the watcher should be idempotent and if we return from this function there's no way // we're coming back in with the same watch interface. defer w.Stop() loop: for { select { case <-stopCh: return errorStopRequested case err := <-errc: return err case event, ok := <-w.ResultChan(): if !ok { break loop } if event.Type == watch.Error { return apierrs.FromObject(event.Object) } if e, a := r.expectedType, reflect.TypeOf(event.Object); e != nil && e != a { utilruntime.HandleError(fmt.Errorf("%s: expected type %v, but watch event object had type %v", r.name, e, a)) continue } meta, err := meta.Accessor(event.Object) if err != nil { utilruntime.HandleError(fmt.Errorf("%s: unable to understand watch event %#v", r.name, event)) continue } newResourceVersion := meta.GetResourceVersion() switch event.Type { case watch.Added: r.store.Add(event.Object) case watch.Modified: r.store.Update(event.Object) case watch.Deleted: // TODO: Will any consumers need access to the "last known // state", which is passed in event.Object? If so, may need // to change this. r.store.Delete(event.Object) default: utilruntime.HandleError(fmt.Errorf("%s: unable to understand watch event %#v", r.name, event)) } *resourceVersion = newResourceVersion r.setLastSyncResourceVersion(newResourceVersion) eventCount++ } } watchDuration := time.Now().Sub(start) if watchDuration < 1*time.Second && eventCount == 0 { glog.V(4).Infof("%s: Unexpected watch close - watch lasted less than a second and no items received", r.name) return errors.New("very short watch") } glog.V(4).Infof("%s: Watch close - %v total %v items received", r.name, r.expectedType, eventCount) return nil }
// HandleWS implements a websocket handler. func (s *WatchServer) HandleWS(ws *websocket.Conn) { defer ws.Close() done := make(chan struct{}) go wsstream.IgnoreReceives(ws, 0) var unknown runtime.Unknown internalEvent := &versioned.InternalEvent{} buf := &bytes.Buffer{} streamBuf := &bytes.Buffer{} ch := s.watching.ResultChan() for { select { case <-done: s.watching.Stop() return case event, ok := <-ch: if !ok { // End of results. return } obj := event.Object s.fixup(obj) if err := s.embeddedEncoder.Encode(obj, buf); err != nil { // unexpected error utilruntime.HandleError(fmt.Errorf("unable to encode watch object: %v", err)) return } // ContentType is not required here because we are defaulting to the serializer // type unknown.Raw = buf.Bytes() event.Object = &unknown // the internal event will be versioned by the encoder *internalEvent = versioned.InternalEvent(event) if err := s.encoder.Encode(internalEvent, streamBuf); err != nil { // encoding error utilruntime.HandleError(fmt.Errorf("unable to encode event: %v", err)) s.watching.Stop() return } if s.useTextFraming { if err := websocket.Message.Send(ws, streamBuf.String()); err != nil { // Client disconnect. s.watching.Stop() return } } else { if err := websocket.Message.Send(ws, streamBuf.Bytes()); err != nil { // Client disconnect. s.watching.Stop() return } } buf.Reset() streamBuf.Reset() } } }
// maybeDeleteTerminatingPod non-gracefully deletes pods that are terminating // that should not be gracefully terminated. func (nc *NodeController) maybeDeleteTerminatingPod(obj interface{}) { pod, ok := obj.(*api.Pod) if !ok { return } // consider only terminating pods if pod.DeletionTimestamp == nil { return } // delete terminating pods that have not yet been scheduled if len(pod.Spec.NodeName) == 0 { utilruntime.HandleError(nc.forcefullyDeletePod(pod)) return } nodeObj, found, err := nc.nodeStore.GetByKey(pod.Spec.NodeName) if err != nil { // this can only happen if the Store.KeyFunc has a problem creating // a key for the pod. If it happens once, it will happen again so // don't bother requeuing the pod. utilruntime.HandleError(err) return } // delete terminating pods that have been scheduled on // nonexistent nodes if !found { glog.Warningf("Unable to find Node: %v, deleting all assigned Pods.", pod.Spec.NodeName) utilruntime.HandleError(nc.forcefullyDeletePod(pod)) return } // delete terminating pods that have been scheduled on // nodes that do not support graceful termination // TODO(mikedanese): this can be removed when we no longer // guarantee backwards compatibility of master API to kubelets with // versions less than 1.1.0 node := nodeObj.(*api.Node) v, err := version.Parse(node.Status.NodeInfo.KubeletVersion) if err != nil { glog.V(0).Infof("couldn't parse verions %q of minion: %v", node.Status.NodeInfo.KubeletVersion, err) utilruntime.HandleError(nc.forcefullyDeletePod(pod)) return } if gracefulDeletionVersion.GT(v) { utilruntime.HandleError(nc.forcefullyDeletePod(pod)) return } }
func (w *etcdWatcher) sendModify(res *etcd.Response) { if res.Node == nil { glog.Errorf("unexpected nil node: %#v", res) return } if w.include != nil && !w.include(res.Node.Key) { return } curObj, err := w.decodeObject(res.Node) if err != nil { utilruntime.HandleError(fmt.Errorf("failure to decode api object: %v\n'%v' from %#v %#v", err, string(res.Node.Value), res, res.Node)) // TODO: expose an error through watch.Interface? // Ignore this value. If we stop the watch on a bad value, a client that uses // the resourceVersion to resume will never be able to get past a bad value. return } curObjPasses := w.filter(curObj) oldObjPasses := false var oldObj runtime.Object if res.PrevNode != nil && res.PrevNode.Value != "" { // Ignore problems reading the old object. if oldObj, err = w.decodeObject(res.PrevNode); err == nil { if err := w.versioner.UpdateObject(oldObj, res.Node.ModifiedIndex); err != nil { utilruntime.HandleError(fmt.Errorf("failure to version api object (%d) %#v: %v", res.Node.ModifiedIndex, oldObj, err)) } oldObjPasses = w.filter(oldObj) } } // Some changes to an object may cause it to start or stop matching a filter. // We need to report those as adds/deletes. So we have to check both the previous // and current value of the object. switch { case curObjPasses && oldObjPasses: w.emit(watch.Event{ Type: watch.Modified, Object: curObj, }) case curObjPasses && !oldObjPasses: w.emit(watch.Event{ Type: watch.Added, Object: curObj, }) case !curObjPasses && oldObjPasses: w.emit(watch.Event{ Type: watch.Deleted, Object: oldObj, }) } // Do nothing if neither new nor old object passed the filter. }
func getPidsForProcess(name, pidFile string) ([]int, error) { if len(pidFile) > 0 { if pid, err := getPidFromPidFile(pidFile); err == nil { return []int{pid}, nil } else { // log the error and fall back to pidof runtime.HandleError(err) } } out, err := exec.Command("pidof", name).Output() if err != nil { return []int{}, fmt.Errorf("failed to find pid of %q: %v", name, err) } // The output of pidof is a list of pids. pids := []int{} for _, pidStr := range strings.Split(strings.TrimSpace(string(out)), " ") { pid, err := strconv.Atoi(pidStr) if err != nil { continue } pids = append(pids, pid) } return pids, nil }
func recordToSink(sink EventSink, event *api.Event, eventCorrelator *EventCorrelator, randGen *rand.Rand, sleepDuration time.Duration) { // Make a copy before modification, because there could be multiple listeners. // Events are safe to copy like this. eventCopy := *event event = &eventCopy result, err := eventCorrelator.EventCorrelate(event) if err != nil { utilruntime.HandleError(err) } if result.Skip { return } tries := 0 for { if recordEvent(sink, result.Event, result.Patch, result.Event.Count > 1, eventCorrelator) { break } tries++ if tries >= maxTriesPerEvent { glog.Errorf("Unable to write event '%#v' (retry limit exceeded!)", event) break } // Randomize the first sleep so that various clients won't all be // synced up if the master goes down. if tries == 1 { time.Sleep(time.Duration(float64(sleepDuration) * randGen.Float64())) } else { time.Sleep(sleepDuration) } } }
func (gcc *PodGCController) gc() { terminatedPods, _ := gcc.podStore.List(labels.Everything()) terminatedPodCount := len(terminatedPods) sort.Sort(byCreationTimestamp(terminatedPods)) deleteCount := terminatedPodCount - gcc.threshold if deleteCount > terminatedPodCount { deleteCount = terminatedPodCount } if deleteCount > 0 { glog.Infof("garbage collecting %v pods", deleteCount) } var wait sync.WaitGroup for i := 0; i < deleteCount; i++ { wait.Add(1) go func(namespace string, name string) { defer wait.Done() if err := gcc.deletePod(namespace, name); err != nil { // ignore not founds defer utilruntime.HandleError(err) } }(terminatedPods[i].Namespace, terminatedPods[i].Name) } wait.Wait() }
// RunUntil starts a watch and handles watch events. Will restart the watch if it is closed. // RunUntil starts a goroutine and returns immediately. It will exit when stopCh is closed. func (r *Reflector) RunUntil(stopCh <-chan struct{}) { glog.V(3).Infof("Starting reflector %v (%s) from %s", r.expectedType, r.resyncPeriod, r.name) go wait.Until(func() { if err := r.ListAndWatch(stopCh); err != nil { utilruntime.HandleError(err) } }, r.period, stopCh) }
// Run starts a watch and handles watch events. Will restart the watch if it is closed. // Run starts a goroutine and returns immediately. func (r *Reflector) Run() { glog.V(3).Infof("Starting reflector %v (%s) from %s", r.expectedType, r.resyncPeriod, r.name) go wait.Until(func() { if err := r.ListAndWatch(wait.NeverStop); err != nil { utilruntime.HandleError(err) } }, r.period, wait.NeverStop) }
// cleanupOrphanedPods deletes pods that are bound to nodes that don't // exist. func (nc *NodeController) cleanupOrphanedPods() { pods, err := nc.podStore.List(labels.Everything()) if err != nil { utilruntime.HandleError(err) return } for _, pod := range pods { if pod.Spec.NodeName == "" { continue } if _, exists, _ := nc.nodeStore.Store.GetByKey(pod.Spec.NodeName); exists { continue } if err := nc.forcefullyDeletePod(pod); err != nil { utilruntime.HandleError(err) } } }
// ObjectReplenenishmentDeleteFunc will replenish on every delete func ObjectReplenishmentDeleteFunc(options *ReplenishmentControllerOptions) func(obj interface{}) { return func(obj interface{}) { metaObject, err := meta.Accessor(obj) if err != nil { tombstone, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { glog.Errorf("replenishment controller could not get object from tombstone %+v, could take up to %v before quota is replenished", obj, options.ResyncPeriod()) utilruntime.HandleError(err) return } metaObject, err = meta.Accessor(tombstone.Obj) if err != nil { glog.Errorf("replenishment controller tombstone contained object that is not a meta %+v, could take up to %v before quota is replenished", tombstone.Obj, options.ResyncPeriod()) utilruntime.HandleError(err) return } } options.ReplenishmentFunc(options.GroupKind, metaObject.GetNamespace(), nil) } }
func (w *etcdWatcher) sendResult(res *etcd.Response) { switch res.Action { case EtcdCreate, EtcdGet: w.sendAdd(res) case EtcdSet, EtcdCAS: w.sendModify(res) case EtcdDelete, EtcdExpire, EtcdCAD: w.sendDelete(res) default: utilruntime.HandleError(fmt.Errorf("unknown action: %v", res.Action)) } }
func (*v1ProtocolHandler) waitForStreams(streams <-chan streamAndReply, expectedStreams int, expired <-chan time.Time) (*context, error) { ctx := &context{} receivedStreams := 0 replyChan := make(chan struct{}) stop := make(chan struct{}) defer close(stop) WaitForStreams: for { select { case stream := <-streams: streamType := stream.Headers().Get(api.StreamType) switch streamType { case api.StreamTypeError: ctx.errorStream = stream // This defer statement shouldn't be here, but due to previous refactoring, it ended up in // here. This is what 1.0.x kubelets do, so we're retaining that behavior. This is fixed in // the v2ProtocolHandler. defer stream.Reset() go waitStreamReply(stream.replySent, replyChan, stop) case api.StreamTypeStdin: ctx.stdinStream = stream go waitStreamReply(stream.replySent, replyChan, stop) case api.StreamTypeStdout: ctx.stdoutStream = stream go waitStreamReply(stream.replySent, replyChan, stop) case api.StreamTypeStderr: ctx.stderrStream = stream go waitStreamReply(stream.replySent, replyChan, stop) default: runtime.HandleError(fmt.Errorf("Unexpected stream type: %q", streamType)) } case <-replyChan: receivedStreams++ if receivedStreams == expectedStreams { break WaitForStreams } case <-expired: // TODO find a way to return the error to the user. Maybe use a separate // stream to report errors? return nil, errors.New("timed out waiting for client to create streams") } } if ctx.stdinStream != nil { ctx.stdinStream.Close() } return ctx, nil }
// ServeAttach handles requests to attach to a container. After creating/receiving the required // streams, it delegates the actual attaching to attacher. func ServeAttach(w http.ResponseWriter, req *http.Request, attacher Attacher, podName string, uid types.UID, container string, idleTimeout, streamCreationTimeout time.Duration, supportedProtocols []string) { ctx, ok := createStreams(req, w, supportedProtocols, idleTimeout, streamCreationTimeout) if !ok { // error is handled by createStreams return } defer ctx.conn.Close() err := attacher.AttachContainer(podName, uid, container, ctx.stdinStream, ctx.stdoutStream, ctx.stderrStream, ctx.tty) if err != nil { msg := fmt.Sprintf("error attaching to container: %v", err) runtime.HandleError(errors.New(msg)) fmt.Fprint(ctx.errorStream, msg) } }
// errorJSONFatal renders an error to the response, and if codec fails will render plaintext. // Returns the HTTP status code of the error. func errorJSONFatal(err error, codec runtime.Encoder, w http.ResponseWriter) int { utilruntime.HandleError(fmt.Errorf("apiserver was unable to write a JSON response: %v", err)) status := errToAPIStatus(err) code := int(status.Code) output, err := runtime.Encode(codec, status) if err != nil { w.WriteHeader(code) fmt.Fprintf(w, "%s: %s", status.Reason, status.Message) return code } w.Header().Set("Content-Type", "application/json") w.WriteHeader(code) w.Write(output) return code }
func (*v2ProtocolHandler) waitForStreams(streams <-chan streamAndReply, expectedStreams int, expired <-chan time.Time) (*context, error) { ctx := &context{} receivedStreams := 0 replyChan := make(chan struct{}) stop := make(chan struct{}) defer close(stop) WaitForStreams: for { select { case stream := <-streams: streamType := stream.Headers().Get(api.StreamType) switch streamType { case api.StreamTypeError: ctx.errorStream = stream go waitStreamReply(stream.replySent, replyChan, stop) case api.StreamTypeStdin: ctx.stdinStream = stream go waitStreamReply(stream.replySent, replyChan, stop) case api.StreamTypeStdout: ctx.stdoutStream = stream go waitStreamReply(stream.replySent, replyChan, stop) case api.StreamTypeStderr: ctx.stderrStream = stream go waitStreamReply(stream.replySent, replyChan, stop) default: runtime.HandleError(fmt.Errorf("Unexpected stream type: %q", streamType)) } case <-replyChan: receivedStreams++ if receivedStreams == expectedStreams { break WaitForStreams } case <-expired: // TODO find a way to return the error to the user. Maybe use a separate // stream to report errors? return nil, errors.New("timed out waiting for client to create streams") } } return ctx, nil }
// etcdGetInitialWatchState turns an etcd Get request into a watch equivalent func etcdGetInitialWatchState(ctx context.Context, client etcd.KeysAPI, key string, recursive bool, quorum bool, incoming chan<- *etcd.Response) (resourceVersion uint64, err error) { opts := etcd.GetOptions{ Recursive: recursive, Sort: false, Quorum: quorum, } resp, err := client.Get(ctx, key, &opts) if err != nil { if !etcdutil.IsEtcdNotFound(err) { utilruntime.HandleError(fmt.Errorf("watch was unable to retrieve the current index for the provided key (%q): %v", key, err)) return resourceVersion, toStorageErr(err, key, 0) } if etcdError, ok := err.(etcd.Error); ok { resourceVersion = etcdError.Index } return resourceVersion, nil } resourceVersion = resp.Index convertRecursiveResponse(resp.Node, resp, incoming) return }
// serveWatch handles serving requests to the server // TODO: the functionality in this method and in WatchServer.Serve is not cleanly decoupled. func serveWatch(watcher watch.Interface, scope RequestScope, req *restful.Request, res *restful.Response, timeout time.Duration) { // negotiate for the stream serializer serializer, err := negotiateOutputStreamSerializer(req.Request, scope.Serializer) if err != nil { scope.err(err, res.ResponseWriter, req.Request) return } if serializer.Framer == nil { scope.err(fmt.Errorf("no framer defined for %q available for embedded encoding", serializer.MediaType), res.ResponseWriter, req.Request) return } encoder := scope.Serializer.EncoderForVersion(serializer.Serializer, scope.Kind.GroupVersion()) useTextFraming := serializer.EncodesAsText // find the embedded serializer matching the media type embeddedEncoder := scope.Serializer.EncoderForVersion(serializer.Embedded.Serializer, scope.Kind.GroupVersion()) server := &WatchServer{ watching: watcher, scope: scope, useTextFraming: useTextFraming, mediaType: serializer.MediaType, framer: serializer.Framer, encoder: encoder, embeddedEncoder: embeddedEncoder, fixup: func(obj runtime.Object) { if err := setSelfLink(obj, req, scope.Namer); err != nil { utilruntime.HandleError(fmt.Errorf("failed to set link for object %v: %v", reflect.TypeOf(obj), err)) } }, t: &realTimeoutFactory{timeout}, } server.ServeHTTP(res.ResponseWriter, req.Request) }
// worker runs a worker thread that just dequeues items, processes them, and marks them done. // It enforces that the syncHandler is never invoked concurrently with the same key. func (rq *ResourceQuotaController) worker() { workFunc := func() bool { key, quit := rq.queue.Get() if quit { return true } defer rq.queue.Done(key) err := rq.syncHandler(key.(string)) if err == nil { rq.queue.Forget(key) return false } utilruntime.HandleError(err) rq.queue.AddRateLimited(key) return false } for { if quit := workFunc(); quit { glog.Infof("resource quota controller worker shutting down") return } } }
// errToAPIStatus converts an error to an unversioned.Status object. func errToAPIStatus(err error) *unversioned.Status { switch t := err.(type) { case statusError: status := t.Status() if len(status.Status) == 0 { status.Status = unversioned.StatusFailure } if status.Code == 0 { switch status.Status { case unversioned.StatusSuccess: status.Code = http.StatusOK case unversioned.StatusFailure: status.Code = http.StatusInternalServerError } } //TODO: check for invalid responses return &status default: status := http.StatusInternalServerError switch { //TODO: replace me with NewConflictErr case storage.IsTestFailed(err): status = http.StatusConflict } // Log errors that were not converted to an error status // by REST storage - these typically indicate programmer // error by not using pkg/api/errors, or unexpected failure // cases. runtime.HandleError(fmt.Errorf("apiserver received an error that is not an unversioned.Status: %v", err)) return &unversioned.Status{ Status: unversioned.StatusFailure, Code: int32(status), Reason: unversioned.StatusReasonUnknown, Message: err.Error(), } } }
// write renders a returned runtime.Object to the response as a stream or an encoded object. If the object // returned by the response implements rest.ResourceStreamer that interface will be used to render the // response. The Accept header and current API version will be passed in, and the output will be copied // directly to the response body. If content type is returned it is used, otherwise the content type will // be "application/octet-stream". All other objects are sent to standard JSON serialization. func write(statusCode int, gv unversioned.GroupVersion, s runtime.NegotiatedSerializer, object runtime.Object, w http.ResponseWriter, req *http.Request) { if stream, ok := object.(rest.ResourceStreamer); ok { out, flush, contentType, err := stream.InputStream(gv.String(), req.Header.Get("Accept")) if err != nil { errorNegotiated(err, s, gv, w, req) return } if out == nil { // No output provided - return StatusNoContent w.WriteHeader(http.StatusNoContent) return } defer out.Close() if wsstream.IsWebSocketRequest(req) { r := wsstream.NewReader(out, true) if err := r.Copy(w, req); err != nil { utilruntime.HandleError(fmt.Errorf("error encountered while streaming results via websocket: %v", err)) } return } if len(contentType) == 0 { contentType = "application/octet-stream" } w.Header().Set("Content-Type", contentType) w.WriteHeader(statusCode) writer := w.(io.Writer) if flush { writer = flushwriter.Wrap(w) } io.Copy(writer, out) return } writeNegotiated(s, gv, w, req, statusCode, object) }
// manageJob is the core method responsible for managing the number of running // pods according to what is specified in the job.Spec. func (jm *JobController) manageJob(activePods []*api.Pod, succeeded int32, job *batch.Job) int32 { var activeLock sync.Mutex active := int32(len(activePods)) parallelism := *job.Spec.Parallelism jobKey, err := controller.KeyFunc(job) if err != nil { glog.Errorf("Couldn't get key for job %#v: %v", job, err) return 0 } if active > parallelism { diff := active - parallelism jm.expectations.ExpectDeletions(jobKey, int(diff)) glog.V(4).Infof("Too many pods running job %q, need %d, deleting %d", jobKey, parallelism, diff) // Sort the pods in the order such that not-ready < ready, unscheduled // < scheduled, and pending < running. This ensures that we delete pods // in the earlier stages whenever possible. sort.Sort(controller.ActivePods(activePods)) active -= diff wait := sync.WaitGroup{} wait.Add(int(diff)) for i := int32(0); i < diff; i++ { go func(ix int32) { defer wait.Done() if err := jm.podControl.DeletePod(job.Namespace, activePods[ix].Name, job); err != nil { defer utilruntime.HandleError(err) // Decrement the expected number of deletes because the informer won't observe this deletion jm.expectations.DeletionObserved(jobKey) activeLock.Lock() active++ activeLock.Unlock() } }(i) } wait.Wait() } else if active < parallelism { wantActive := int32(0) if job.Spec.Completions == nil { // Job does not specify a number of completions. Therefore, number active // should be equal to parallelism, unless the job has seen at least // once success, in which leave whatever is running, running. if succeeded > 0 { wantActive = active } else { wantActive = parallelism } } else { // Job specifies a specific number of completions. Therefore, number // active should not ever exceed number of remaining completions. wantActive = *job.Spec.Completions - succeeded if wantActive > parallelism { wantActive = parallelism } } diff := wantActive - active if diff < 0 { glog.Errorf("More active than wanted: job %q, want %d, have %d", jobKey, wantActive, active) diff = 0 } jm.expectations.ExpectCreations(jobKey, int(diff)) glog.V(4).Infof("Too few pods running job %q, need %d, creating %d", jobKey, wantActive, diff) active += diff wait := sync.WaitGroup{} wait.Add(int(diff)) for i := int32(0); i < diff; i++ { go func() { defer wait.Done() if err := jm.podControl.CreatePods(job.Namespace, &job.Spec.Template, job); err != nil { defer utilruntime.HandleError(err) // Decrement the expected number of creates because the informer won't observe this pod jm.expectations.CreationObserved(jobKey) activeLock.Lock() active-- activeLock.Unlock() } }() } wait.Wait() } return active }
// syncJob will sync the job with the given key if it has had its expectations fulfilled, meaning // it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked // concurrently with the same key. func (jm *JobController) syncJob(key string) error { startTime := time.Now() defer func() { glog.V(4).Infof("Finished syncing job %q (%v)", key, time.Now().Sub(startTime)) }() if !jm.podStoreSynced() { // Sleep so we give the pod reflector goroutine a chance to run. time.Sleep(replicationcontroller.PodStoreSyncedPollPeriod) glog.V(4).Infof("Waiting for pods controller to sync, requeuing job %v", key) jm.queue.Add(key) return nil } obj, exists, err := jm.jobStore.Store.GetByKey(key) if !exists { glog.V(4).Infof("Job has been deleted: %v", key) jm.expectations.DeleteExpectations(key) return nil } if err != nil { glog.Errorf("Unable to retrieve job %v from store: %v", key, err) jm.queue.Add(key) return err } job := *obj.(*batch.Job) // Check the expectations of the job before counting active pods, otherwise a new pod can sneak in // and update the expectations after we've retrieved active pods from the store. If a new pod enters // the store after we've checked the expectation, the job sync is just deferred till the next relist. jobKey, err := controller.KeyFunc(&job) if err != nil { glog.Errorf("Couldn't get key for job %#v: %v", job, err) return err } jobNeedsSync := jm.expectations.SatisfiedExpectations(jobKey) selector, _ := unversioned.LabelSelectorAsSelector(job.Spec.Selector) podList, err := jm.podStore.Pods(job.Namespace).List(selector) if err != nil { glog.Errorf("Error getting pods for job %q: %v", key, err) jm.queue.Add(key) return err } activePods := controller.FilterActivePods(podList.Items) active := int32(len(activePods)) succeeded, failed := getStatus(podList.Items) conditions := len(job.Status.Conditions) if job.Status.StartTime == nil { now := unversioned.Now() job.Status.StartTime = &now } // if job was finished previously, we don't want to redo the termination if isJobFinished(&job) { return nil } if pastActiveDeadline(&job) { // TODO: below code should be replaced with pod termination resulting in // pod failures, rather than killing pods. Unfortunately none such solution // exists ATM. There's an open discussion in the topic in // https://github.com/kubernetes/kubernetes/issues/14602 which might give // some sort of solution to above problem. // kill remaining active pods wait := sync.WaitGroup{} wait.Add(int(active)) for i := int32(0); i < active; i++ { go func(ix int32) { defer wait.Done() if err := jm.podControl.DeletePod(job.Namespace, activePods[ix].Name, &job); err != nil { defer utilruntime.HandleError(err) } }(i) } wait.Wait() // update status values accordingly failed += active active = 0 job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobFailed, "DeadlineExceeded", "Job was active longer than specified deadline")) jm.recorder.Event(&job, api.EventTypeNormal, "DeadlineExceeded", "Job was active longer than specified deadline") } else { if jobNeedsSync { active = jm.manageJob(activePods, succeeded, &job) } completions := succeeded complete := false if job.Spec.Completions == nil { // This type of job is complete when any pod exits with success. // Each pod is capable of // determining whether or not the entire Job is done. Subsequent pods are // not expected to fail, but if they do, the failure is ignored. Once any // pod succeeds, the controller waits for remaining pods to finish, and // then the job is complete. if succeeded > 0 && active == 0 { complete = true } } else { // Job specifies a number of completions. This type of job signals // success by having that number of successes. Since we do not // start more pods than there are remaining completions, there should // not be any remaining active pods once this count is reached. if completions >= *job.Spec.Completions { complete = true if active > 0 { jm.recorder.Event(&job, api.EventTypeWarning, "TooManyActivePods", "Too many active pods running after completion count reached") } if completions > *job.Spec.Completions { jm.recorder.Event(&job, api.EventTypeWarning, "TooManySucceededPods", "Too many succeeded pods running after completion count reached") } } } if complete { job.Status.Conditions = append(job.Status.Conditions, newCondition(batch.JobComplete, "", "")) now := unversioned.Now() job.Status.CompletionTime = &now } } // no need to update the job if the status hasn't changed since last time if job.Status.Active != active || job.Status.Succeeded != succeeded || job.Status.Failed != failed || len(job.Status.Conditions) != conditions { job.Status.Active = active job.Status.Succeeded = succeeded job.Status.Failed = failed if err := jm.updateHandler(&job); err != nil { glog.Errorf("Failed to update job %v, requeuing. Error: %v", job.Name, err) jm.enqueueController(&job) } } return nil }
// monitorNodeStatus verifies node status are constantly updated by kubelet, and if not, // post "NodeReady==ConditionUnknown". It also evicts all pods if node is not ready or // not reachable for a long period of time. func (nc *NodeController) monitorNodeStatus() error { nodes, err := nc.kubeClient.Core().Nodes().List(api.ListOptions{}) if err != nil { return err } for _, node := range nodes.Items { if !nc.knownNodeSet.Has(node.Name) { glog.V(1).Infof("NodeController observed a new Node: %#v", node) nc.recordNodeEvent(node.Name, api.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in NodeController", node.Name)) nc.cancelPodEviction(node.Name) nc.knownNodeSet.Insert(node.Name) } } // If there's a difference between lengths of known Nodes and observed nodes // we must have removed some Node. if len(nc.knownNodeSet) != len(nodes.Items) { observedSet := make(sets.String) for _, node := range nodes.Items { observedSet.Insert(node.Name) } deleted := nc.knownNodeSet.Difference(observedSet) for nodeName := range deleted { glog.V(1).Infof("NodeController observed a Node deletion: %v", nodeName) nc.recordNodeEvent(nodeName, api.EventTypeNormal, "RemovingNode", fmt.Sprintf("Removing Node %v from NodeController", nodeName)) nc.evictPods(nodeName) nc.knownNodeSet.Delete(nodeName) } } seenReady := false for i := range nodes.Items { var gracePeriod time.Duration var observedReadyCondition api.NodeCondition var currentReadyCondition *api.NodeCondition node := &nodes.Items[i] for rep := 0; rep < nodeStatusUpdateRetry; rep++ { gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeStatus(node) if err == nil { break } name := node.Name node, err = nc.kubeClient.Core().Nodes().Get(name) if err != nil { glog.Errorf("Failed while getting a Node to retry updating NodeStatus. Probably Node %s was deleted.", name) break } } if err != nil { glog.Errorf("Update status of Node %v from NodeController exceeds retry count."+ "Skipping - no pods will be evicted.", node.Name) continue } decisionTimestamp := nc.now() if currentReadyCondition != nil { // Check eviction timeout against decisionTimestamp if observedReadyCondition.Status == api.ConditionFalse && decisionTimestamp.After(nc.nodeStatusMap[node.Name].readyTransitionTimestamp.Add(nc.podEvictionTimeout)) { if nc.evictPods(node.Name) { glog.V(4).Infof("Evicting pods on node %s: %v is later than %v + %v", node.Name, decisionTimestamp, nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout) } } if observedReadyCondition.Status == api.ConditionUnknown && decisionTimestamp.After(nc.nodeStatusMap[node.Name].probeTimestamp.Add(nc.podEvictionTimeout)) { if nc.evictPods(node.Name) { glog.V(4).Infof("Evicting pods on node %s: %v is later than %v + %v", node.Name, decisionTimestamp, nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout-gracePeriod) } } if observedReadyCondition.Status == api.ConditionTrue { // We do not treat a master node as a part of the cluster for network segmentation checking. if !system.IsMasterNode(node) { seenReady = true } if nc.cancelPodEviction(node.Name) { glog.V(2).Infof("Node %s is ready again, cancelled pod eviction", node.Name) } } // Report node event. if currentReadyCondition.Status != api.ConditionTrue && observedReadyCondition.Status == api.ConditionTrue { nc.recordNodeStatusChange(node, "NodeNotReady") if err = nc.markAllPodsNotReady(node.Name); err != nil { utilruntime.HandleError(fmt.Errorf("Unable to mark all pods NotReady on node %v: %v", node.Name, err)) } } // Check with the cloud provider to see if the node still exists. If it // doesn't, delete the node immediately. if currentReadyCondition.Status != api.ConditionTrue && nc.cloud != nil { exists, err := nc.nodeExistsInCloudProvider(node.Name) if err != nil { glog.Errorf("Error determining if node %v exists in cloud: %v", node.Name, err) continue } if !exists { glog.V(2).Infof("Deleting node (no longer present in cloud provider): %s", node.Name) nc.recordNodeEvent(node.Name, api.EventTypeNormal, "DeletingNode", fmt.Sprintf("Deleting Node %v because it's not present according to cloud provider", node.Name)) go func(nodeName string) { defer utilruntime.HandleCrash() // Kubelet is not reporting and Cloud Provider says node // is gone. Delete it without worrying about grace // periods. if err := nc.forcefullyDeleteNode(nodeName); err != nil { glog.Errorf("Unable to forcefully delete node %q: %v", nodeName, err) } }(node.Name) continue } } } } // NC don't see any Ready Node. We assume that the network is segmented and Nodes cannot connect to API server and // update their statuses. NC enteres network segmentation mode and cancels all evictions in progress. if !seenReady { nc.networkSegmentationMode = true nc.stopAllPodEvictions() glog.V(2).Info("NodeController is entering network segmentation mode.") } else { if nc.networkSegmentationMode { nc.forceUpdateAllProbeTimes() nc.networkSegmentationMode = false glog.V(2).Info("NodeController exited network segmentation mode.") } } return nil }