// RunQingYuanService periodically updates the qingyuan service func (c *Controller) RunQingYuanService(ch chan struct{}) { util.Until(func() { if err := c.UpdateQingYuanService(); err != nil { util.HandleError(fmt.Errorf("unable to sync qingyuan service: %v", err)) } }, c.EndpointInterval, ch) }
// RunUntil starts the controller until the provided ch is closed. func (c *Repair) RunUntil(ch chan struct{}) { util.Until(func() { if err := c.RunOnce(); err != nil { util.HandleError(err) } }, c.interval, ch) }
// manageReplicas checks and updates replicas for the given replication controller. func (rm *ReplicationManager) manageReplicas(filteredPods []*api.Pod, controller *api.ReplicationController) { diff := len(filteredPods) - controller.Spec.Replicas if diff < 0 { diff *= -1 if diff > rm.burstReplicas { diff = rm.burstReplicas } rm.expectations.ExpectCreations(controller, diff) wait := sync.WaitGroup{} wait.Add(diff) glog.V(2).Infof("Too few %q/%q replicas, need %d, creating %d", controller.Namespace, controller.Name, controller.Spec.Replicas, diff) for i := 0; i < diff; i++ { go func() { defer wait.Done() if err := rm.podControl.createReplica(controller.Namespace, controller); err != nil { // Decrement the expected number of creates because the informer won't observe this pod glog.V(2).Infof("Failed creation, decrementing expectations for controller %q/%q", controller.Namespace, controller.Name) rm.expectations.CreationObserved(controller) util.HandleError(err) } }() } wait.Wait() } else if diff > 0 { if diff > rm.burstReplicas { diff = rm.burstReplicas } rm.expectations.ExpectDeletions(controller, diff) glog.V(2).Infof("Too many %q/%q replicas, need %d, deleting %d", controller.Namespace, controller.Name, controller.Spec.Replicas, diff) // No need to sort pods if we are about to delete all of them if controller.Spec.Replicas != 0 { // Sort the pods in the order such that not-ready < ready, unscheduled // < scheduled, and pending < running. This ensures that we delete pods // in the earlier stages whenever possible. sort.Sort(activePods(filteredPods)) } wait := sync.WaitGroup{} wait.Add(diff) for i := 0; i < diff; i++ { go func(ix int) { defer wait.Done() if err := rm.podControl.deletePod(controller.Namespace, filteredPods[ix].Name); err != nil { // Decrement the expected number of deletes because the informer won't observe this deletion glog.V(2).Infof("Failed deletion, decrementing expectations for controller %q/%q", controller.Namespace, controller.Name) rm.expectations.DeletionObserved(controller) } }(i) } wait.Wait() } }
// errorJSONFatal renders an error to the response, and if codec fails will render plaintext. // Returns the HTTP status code of the error. func errorJSONFatal(err error, codec runtime.Codec, w http.ResponseWriter) int { util.HandleError(fmt.Errorf("apiserver was unable to write a JSON response: %v", err)) status := errToAPIStatus(err) output, err := codec.Encode(status) if err != nil { w.WriteHeader(status.Code) fmt.Fprintf(w, "%s: %s", status.Reason, status.Message) return status.Code } w.Header().Set("Content-Type", "application/json") w.WriteHeader(status.Code) w.Write(output) return status.Code }
// Loop infinitely, processing all service updates provided by the queue. func (s *ServiceController) watchServices(serviceQueue *cache.DeltaFIFO) { for { newItem := serviceQueue.Pop() deltas, ok := newItem.(cache.Deltas) if !ok { glog.Errorf("Received object from service watcher that wasn't Deltas: %+v", newItem) } delta := deltas.Newest() if delta == nil { glog.Errorf("Received nil delta from watcher queue.") continue } err, shouldRetry := s.processDelta(delta) if shouldRetry { // Add the failed service back to the queue so we'll retry it. glog.Errorf("Failed to process service delta. Retrying: %v", err) time.Sleep(processingRetryInterval) serviceQueue.AddIfNotPresent(deltas) } else if err != nil { util.HandleError(fmt.Errorf("Failed to process service delta. Not retrying: %v", err)) } } }
// errToAPIStatus converts an error to an api.Status object. func errToAPIStatus(err error) *api.Status { switch t := err.(type) { case statusError: status := t.Status() if len(status.Status) == 0 { status.Status = api.StatusFailure } if status.Code == 0 { switch status.Status { case api.StatusSuccess: status.Code = http.StatusOK case api.StatusFailure: status.Code = http.StatusInternalServerError } } //TODO: check for invalid responses return &status default: status := http.StatusInternalServerError switch { //TODO: replace me with NewConflictErr case tools.IsEtcdTestFailed(err): status = http.StatusConflict } // Log errors that were not converted to an error status // by REST storage - these typically indicate programmer // error by not using pkg/api/errors, or unexpected failure // cases. util.HandleError(fmt.Errorf("apiserver received an error that is not an api.Status: %v", err)) return &api.Status{ Status: api.StatusFailure, Code: status, Reason: api.StatusReasonUnknown, Message: err.Error(), } } }
// RunOnce verifies the state of the port allocations and returns an error if an unrecoverable problem occurs. func (c *Repair) RunOnce() error { // TODO: (per smarterclayton) if Get() or ListServices() is a weak consistency read, // or if they are executed against different leaders, // the ordering guarantee required to ensure no port is allocated twice is violated. // ListServices must return a ResourceVersion higher than the etcd index Get triggers, // and the release code must not release services that have had ports allocated but not yet been created // See #8295 // If etcd server is not running we should wait for some time and fail only then. This is particularly // important when we start apiserver and etcd at the same time. var latest *api.RangeAllocation var err error for i := 0; i < 10; i++ { if latest, err = c.alloc.Get(); err != nil { time.Sleep(time.Second) } else { break } } if err != nil { return fmt.Errorf("unable to refresh the port block: %v", err) } ctx := api.WithNamespace(api.NewDefaultContext(), api.NamespaceAll) list, err := c.registry.ListServices(ctx) if err != nil { return fmt.Errorf("unable to refresh the port block: %v", err) } r := portallocator.NewPortAllocator(c.portRange) for i := range list.Items { svc := &list.Items[i] ports := service.CollectServiceNodePorts(svc) if len(ports) == 0 { continue } for _, port := range ports { switch err := r.Allocate(port); err { case nil: case portallocator.ErrAllocated: // TODO: send event // port is broken, reallocate util.HandleError(fmt.Errorf("the port %d for service %s/%s was assigned to multiple services; please recreate", port, svc.Name, svc.Namespace)) case portallocator.ErrNotInRange: // TODO: send event // port is broken, reallocate util.HandleError(fmt.Errorf("the port %d for service %s/%s is not within the port range %v; please recreate", port, svc.Name, svc.Namespace, c.portRange)) case portallocator.ErrFull: // TODO: send event return fmt.Errorf("the port range %v is full; you must widen the port range in order to create new services", c.portRange) default: return fmt.Errorf("unable to allocate port %d for service %s/%s due to an unknown error, exiting: %v", port, svc.Name, svc.Namespace, err) } } } err = r.Snapshot(latest) if err != nil { return fmt.Errorf("unable to persist the updated port allocations: %v", err) } if err := c.alloc.CreateOrUpdate(latest); err != nil { return fmt.Errorf("unable to persist the updated port allocations: %v", err) } return nil }
// RunOnce verifies the state of the cluster IP allocations and returns an error if an unrecoverable problem occurs. func (c *Repair) RunOnce() error { // TODO: (per smarterclayton) if Get() or ListServices() is a weak consistency read, // or if they are executed against different leaders, // the ordering guarantee required to ensure no IP is allocated twice is violated. // ListServices must return a ResourceVersion higher than the etcd index Get triggers, // and the release code must not release services that have had IPs allocated but not yet been created // See #8295 // If etcd server is not running we should wait for some time and fail only then. This is particularly // important when we start apiserver and etcd at the same time. var latest *api.RangeAllocation var err error for i := 0; i < 10; i++ { if latest, err = c.alloc.Get(); err != nil { time.Sleep(time.Second) } else { break } } if err != nil { return fmt.Errorf("unable to refresh the service IP block: %v", err) } ctx := api.WithNamespace(api.NewDefaultContext(), api.NamespaceAll) list, err := c.registry.ListServices(ctx) if err != nil { return fmt.Errorf("unable to refresh the service IP block: %v", err) } r := ipallocator.NewCIDRRange(c.network) for _, svc := range list.Items { if !api.IsServiceIPSet(&svc) { continue } ip := net.ParseIP(svc.Spec.ClusterIP) if ip == nil { // cluster IP is broken, reallocate util.HandleError(fmt.Errorf("the cluster IP %s for service %s/%s is not a valid IP; please recreate", svc.Spec.ClusterIP, svc.Name, svc.Namespace)) continue } switch err := r.Allocate(ip); err { case nil: case ipallocator.ErrAllocated: // TODO: send event // cluster IP is broken, reallocate util.HandleError(fmt.Errorf("the cluster IP %s for service %s/%s was assigned to multiple services; please recreate", ip, svc.Name, svc.Namespace)) case ipallocator.ErrNotInRange: // TODO: send event // cluster IP is broken, reallocate util.HandleError(fmt.Errorf("the cluster IP %s for service %s/%s is not within the service CIDR %s; please recreate", ip, svc.Name, svc.Namespace, c.network)) case ipallocator.ErrFull: // TODO: send event return fmt.Errorf("the service CIDR %s is full; you must widen the CIDR in order to create new services") default: return fmt.Errorf("unable to allocate cluster IP %s for service %s/%s due to an unknown error, exiting: %v", ip, svc.Name, svc.Namespace, err) } } err = r.Snapshot(latest) if err != nil { return fmt.Errorf("unable to persist the updated service IP allocations: %v", err) } if err := c.alloc.CreateOrUpdate(latest); err != nil { return fmt.Errorf("unable to persist the updated service IP allocations: %v", err) } return nil }