func testNotReachable(ip string, port int) { url := fmt.Sprintf("http://%s:%d", ip, port) if ip == "" { Failf("Got empty IP for non-reachability check (%s)", url) } if port == 0 { Failf("Got port==0 for non-reachability check (%s)", url) } desc := fmt.Sprintf("the url %s to be *not* reachable", url) By(fmt.Sprintf("Waiting up to %v for %s", podStartTimeout, desc)) err := wait.PollImmediate(poll, podStartTimeout, func() (bool, error) { resp, err := httpGetNoConnectionPool(url) if err != nil { Logf("Successfully waited for %s", desc) return true, nil } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { Logf("Expecting %s to be unreachable but was reachable and got an error reading response: %v", url, err) return false, nil } Logf("Able to reach service %s when should no longer have been reachable, status: %q and body: %s", url, resp.Status, string(body)) return false, nil }) Expect(err).NotTo(HaveOccurred(), "Error waiting for %s", desc) }
func testReachableInTime(ip string, port int, timeout time.Duration) bool { url := fmt.Sprintf("http://%s:%d", ip, port) if ip == "" { Failf("Got empty IP for reachability check (%s)", url) return false } if port == 0 { Failf("Got port==0 for reachability check (%s)", url) return false } desc := fmt.Sprintf("the url %s to be reachable", url) By(fmt.Sprintf("Waiting up to %v for %s", timeout, desc)) start := time.Now() err := wait.PollImmediate(poll, timeout, func() (bool, error) { resp, err := httpGetNoConnectionPool(url) if err != nil { Logf("Got error waiting for reachability of %s: %v (%v)", url, err, time.Since(start)) return false, nil } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { Logf("Got error reading response from %s: %v", url, err) return false, nil } if resp.StatusCode != 200 { return false, fmt.Errorf("received non-success return status %q trying to access %s; got body: %s", resp.Status, url, string(body)) } if !strings.Contains(string(body), "test-webserver") { return false, fmt.Errorf("received response body without expected substring 'test-webserver': %s", string(body)) } Logf("Successfully reached %v", url) return true, nil }) if err != nil { Expect(err).NotTo(HaveOccurred(), "Error waiting for %s", desc) return false } return true }
// Run runs the CMServer. This should never exit. func (s *CMServer) Run(_ []string) error { if s.Kubeconfig == "" && s.Master == "" { glog.Warningf("Neither --kubeconfig nor --master was specified. Using default API client. This might not work.") } // This creates a client, first loading any specified kubeconfig // file, and then overriding the Master flag, if non-empty. kubeconfig, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig( &clientcmd.ClientConfigLoadingRules{ExplicitPath: s.Kubeconfig}, &clientcmd.ConfigOverrides{ClusterInfo: clientcmdapi.Cluster{Server: s.Master}}).ClientConfig() if err != nil { return err } kubeconfig.QPS = 20.0 kubeconfig.Burst = 30 kubeClient, err := client.New(kubeconfig) if err != nil { glog.Fatalf("Invalid API configuration: %v", err) } go func() { mux := http.NewServeMux() healthz.InstallHandler(mux) if s.EnableProfiling { mux.HandleFunc("/debug/pprof/", pprof.Index) mux.HandleFunc("/debug/pprof/profile", pprof.Profile) mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) } mux.Handle("/metrics", prometheus.Handler()) server := &http.Server{ Addr: net.JoinHostPort(s.Address.String(), strconv.Itoa(s.Port)), Handler: mux, } glog.Fatal(server.ListenAndServe()) }() go endpointcontroller.NewEndpointController(kubeClient, s.resyncPeriod). Run(s.ConcurrentEndpointSyncs, util.NeverStop) go replicationControllerPkg.NewReplicationManager(kubeClient, s.resyncPeriod, replicationControllerPkg.BurstReplicas). Run(s.ConcurrentRCSyncs, util.NeverStop) if s.TerminatedPodGCThreshold > 0 { go gc.New(kubeClient, s.resyncPeriod, s.TerminatedPodGCThreshold). Run(util.NeverStop) } cloud, err := cloudprovider.InitCloudProvider(s.CloudProvider, s.CloudConfigFile) if err != nil { glog.Fatalf("Cloud provider could not be initialized: %v", err) } nodeController := nodecontroller.NewNodeController(cloud, kubeClient, s.PodEvictionTimeout, util.NewTokenBucketRateLimiter(s.DeletingPodsQps, s.DeletingPodsBurst), util.NewTokenBucketRateLimiter(s.DeletingPodsQps, s.DeletingPodsBurst), s.NodeMonitorGracePeriod, s.NodeStartupGracePeriod, s.NodeMonitorPeriod, &s.ClusterCIDR, s.AllocateNodeCIDRs) nodeController.Run(s.NodeSyncPeriod) serviceController := servicecontroller.New(cloud, kubeClient, s.ClusterName) if err := serviceController.Run(s.ServiceSyncPeriod, s.NodeSyncPeriod); err != nil { glog.Errorf("Failed to start service controller: %v", err) } if s.AllocateNodeCIDRs { if cloud == nil { glog.Warning("allocate-node-cidrs is set, but no cloud provider specified. Will not manage routes.") } else if routes, ok := cloud.Routes(); !ok { glog.Warning("allocate-node-cidrs is set, but cloud provider does not support routes. Will not manage routes.") } else { routeController := routecontroller.New(routes, kubeClient, s.ClusterName, &s.ClusterCIDR) routeController.Run(s.NodeSyncPeriod) } } resourcequotacontroller.NewResourceQuotaController(kubeClient).Run(s.ResourceQuotaSyncPeriod) // If apiserver is not running we should wait for some time and fail only then. This is particularly // important when we start apiserver and controller manager at the same time. var versionStrings []string err = wait.PollImmediate(time.Second, 10*time.Second, func() (bool, error) { if versionStrings, err = client.ServerAPIVersions(kubeconfig); err == nil { return true, nil } glog.Errorf("Failed to get api versions from server: %v", err) return false, nil }) if err != nil { glog.Fatalf("Failed to get api versions from server: %v", err) } versions := &unversioned.APIVersions{Versions: versionStrings} resourceMap, err := kubeClient.Discovery().ServerResources() if err != nil { glog.Fatalf("Failed to get supported resources from server: %v", err) } namespacecontroller.NewNamespaceController(kubeClient, versions, s.NamespaceSyncPeriod).Run() groupVersion := "extensions/v1beta1" resources, found := resourceMap[groupVersion] // TODO: this needs to be dynamic so users don't have to restart their controller manager if they change the apiserver if containsVersion(versions, groupVersion) && found { glog.Infof("Starting %s apis", groupVersion) if containsResource(resources, "horizontalpodautoscalers") { glog.Infof("Starting horizontal pod controller.") podautoscaler.NewHorizontalController(kubeClient, metrics.NewHeapsterMetricsClient(kubeClient)). Run(s.HorizontalPodAutoscalerSyncPeriod) } if containsResource(resources, "daemonsets") { glog.Infof("Starting daemon set controller") go daemon.NewDaemonSetsController(kubeClient, s.resyncPeriod). Run(s.ConcurrentDSCSyncs, util.NeverStop) } if containsResource(resources, "jobs") { glog.Infof("Starting job controller") go job.NewJobController(kubeClient, s.resyncPeriod). Run(s.ConcurrentJobSyncs, util.NeverStop) } if containsResource(resources, "deployments") { glog.Infof("Starting deployment controller") deployment.New(kubeClient). Run(s.DeploymentControllerSyncPeriod) } } pvclaimBinder := volumeclaimbinder.NewPersistentVolumeClaimBinder(kubeClient, s.PVClaimBinderSyncPeriod) pvclaimBinder.Run() pvRecycler, err := volumeclaimbinder.NewPersistentVolumeRecycler(kubeClient, s.PVClaimBinderSyncPeriod, ProbeRecyclableVolumePlugins(s.VolumeConfigFlags)) if err != nil { glog.Fatalf("Failed to start persistent volume recycler: %+v", err) } pvRecycler.Run() var rootCA []byte if s.RootCAFile != "" { rootCA, err = ioutil.ReadFile(s.RootCAFile) if err != nil { return fmt.Errorf("error reading root-ca-file at %s: %v", s.RootCAFile, err) } if _, err := util.CertsFromPEM(rootCA); err != nil { return fmt.Errorf("error parsing root-ca-file at %s: %v", s.RootCAFile, err) } } else { rootCA = kubeconfig.CAData } if len(s.ServiceAccountKeyFile) > 0 { privateKey, err := serviceaccount.ReadPrivateKey(s.ServiceAccountKeyFile) if err != nil { glog.Errorf("Error reading key for service account token controller: %v", err) } else { serviceaccount.NewTokensController( kubeClient, serviceaccount.TokensControllerOptions{ TokenGenerator: serviceaccount.JWTTokenGenerator(privateKey), RootCA: rootCA, }, ).Run() } } serviceaccount.NewServiceAccountsController( kubeClient, serviceaccount.DefaultServiceAccountsControllerOptions(), ).Run() select {} }
// RunOnce verifies the state of the cluster IP allocations and returns an error if an unrecoverable problem occurs. func (c *Repair) RunOnce() error { // TODO: (per smarterclayton) if Get() or ListServices() is a weak consistency read, // or if they are executed against different leaders, // the ordering guarantee required to ensure no IP is allocated twice is violated. // ListServices must return a ResourceVersion higher than the etcd index Get triggers, // and the release code must not release services that have had IPs allocated but not yet been created // See #8295 // If etcd server is not running we should wait for some time and fail only then. This is particularly // important when we start apiserver and etcd at the same time. var latest *api.RangeAllocation var err error err = wait.PollImmediate(time.Second, 10*time.Second, func() (bool, error) { latest, err = c.alloc.Get() return err == nil, err }) if err != nil { return fmt.Errorf("unable to refresh the service IP block: %v", err) } ctx := api.WithNamespace(api.NewDefaultContext(), api.NamespaceAll) list, err := c.registry.ListServices(ctx, labels.Everything(), fields.Everything()) if err != nil { return fmt.Errorf("unable to refresh the service IP block: %v", err) } r := ipallocator.NewCIDRRange(c.network) for _, svc := range list.Items { if !api.IsServiceIPSet(&svc) { continue } ip := net.ParseIP(svc.Spec.ClusterIP) if ip == nil { // cluster IP is broken, reallocate util.HandleError(fmt.Errorf("the cluster IP %s for service %s/%s is not a valid IP; please recreate", svc.Spec.ClusterIP, svc.Name, svc.Namespace)) continue } switch err := r.Allocate(ip); err { case nil: case ipallocator.ErrAllocated: // TODO: send event // cluster IP is broken, reallocate util.HandleError(fmt.Errorf("the cluster IP %s for service %s/%s was assigned to multiple services; please recreate", ip, svc.Name, svc.Namespace)) case ipallocator.ErrNotInRange: // TODO: send event // cluster IP is broken, reallocate util.HandleError(fmt.Errorf("the cluster IP %s for service %s/%s is not within the service CIDR %s; please recreate", ip, svc.Name, svc.Namespace, c.network)) case ipallocator.ErrFull: // TODO: send event return fmt.Errorf("the service CIDR %v is full; you must widen the CIDR in order to create new services", r) default: return fmt.Errorf("unable to allocate cluster IP %s for service %s/%s due to an unknown error, exiting: %v", ip, svc.Name, svc.Namespace, err) } } err = r.Snapshot(latest) if err != nil { return fmt.Errorf("unable to persist the updated service IP allocations: %v", err) } if err := c.alloc.CreateOrUpdate(latest); err != nil { return fmt.Errorf("unable to persist the updated service IP allocations: %v", err) } return nil }