func getGCEClient(config io.Reader) *gce.GCECloud { // Creating the cloud interface involves resolving the metadata server to get // an oauth token. If this fails, the token provider assumes it's not on GCE. // No errors are thrown. So we need to keep retrying till it works because // we know we're on GCE. for { cloudInterface, err := cloudprovider.GetCloudProvider("gce", config) if err == nil { cloud := cloudInterface.(*gce.GCECloud) // If this controller is scheduled on a node without compute/rw // it won't be allowed to list backends. We can assume that the // user has no need for Ingress in this case. If they grant // permissions to the node they will have to restart the controller // manually to re-create the client. if _, err = cloud.ListBackendServices(); err == nil || utils.IsHTTPErrorCode(err, http.StatusForbidden) { return cloud } glog.Warningf("Failed to list backend services, retrying: %v", err) } else { glog.Warningf("Failed to retrieve cloud interface, retrying: %v", err) } time.Sleep(cloudClientRetryInterval) } }
// Delete deletes the Backend for the given port. func (b *Backends) Delete(port int64) (err error) { name := b.namer.BeName(port) glog.Infof("Deleting backend %v", name) defer func() { if utils.IsHTTPErrorCode(err, http.StatusNotFound) { err = nil } if err == nil { b.snapshotter.Delete(portKey(port)) } }() // Try deleting health checks even if a backend is not found. if err = b.cloud.DeleteBackendService(name); err != nil && !utils.IsHTTPErrorCode(err, http.StatusNotFound) { return err } if err = b.healthChecker.Delete(port); err != nil && !utils.IsHTTPErrorCode(err, http.StatusNotFound) { return err } return nil }
// IsHealthy returns an error if the cluster manager is unhealthy. func (c *ClusterManager) IsHealthy() (err error) { // TODO: Expand on this, for now we just want to detect when the GCE client // is broken. _, err = c.backendPool.List() // If this container is scheduled on a node without compute/rw it is // effectively useless, but it is healthy. Reporting it as unhealthy // will lead to container crashlooping. if utils.IsHTTPErrorCode(err, http.StatusForbidden) { glog.Infof("Reporting cluster as healthy, but unable to list backends: %v", err) return nil } return }
// Sync syncs kubernetes instances with the instances in the instance group. func (i *Instances) Sync(nodes []string) (err error) { glog.V(4).Infof("Syncing nodes %v", nodes) defer func() { // The node pool is only responsible for syncing nodes to instance // groups. It never creates/deletes, so if an instance groups is // not found there's nothing it can do about it anyway. Most cases // this will happen because the backend pool has deleted the instance // group, however if it happens because a user deletes the IG by mistake // we should just wait till the backend pool fixes it. if utils.IsHTTPErrorCode(err, http.StatusNotFound) { glog.Infof("Node pool encountered a 404, ignoring: %v", err) err = nil } }() pool := i.snapshotter.Snapshot() for igName := range pool { gceNodes := sets.NewString() gceNodes, err = i.list(igName) if err != nil { return err } kubeNodes := sets.NewString(nodes...) // A node deleted via kubernetes could still exist as a gce vm. We don't // want to route requests to it. Similarly, a node added to kubernetes // needs to get added to the instance group so we do route requests to it. removeNodes := gceNodes.Difference(kubeNodes).List() addNodes := kubeNodes.Difference(gceNodes).List() if len(removeNodes) != 0 { if err = i.Remove( igName, gceNodes.Difference(kubeNodes).List()); err != nil { return err } } if len(addNodes) != 0 { if err = i.Add( igName, kubeNodes.Difference(gceNodes).List()); err != nil { return err } } } return nil }
// DeleteInstanceGroup deletes the given IG by name, from all zones. func (i *Instances) DeleteInstanceGroup(name string) error { defer i.snapshotter.Delete(name) errs := []error{} zones, err := i.ListZones() if err != nil { return err } for _, zone := range zones { if err := i.cloud.DeleteInstanceGroup(name, zone); err != nil { if !utils.IsHTTPErrorCode(err, http.StatusNotFound) { errs = append(errs, err) } } else { glog.Infof("Deleted instance group %v in zone %v", name, zone) } } if len(errs) == 0 { return nil } return fmt.Errorf("%v", errs) }
// sync manages Ingress create/updates/deletes. func (lbc *LoadBalancerController) sync(key string) (err error) { if !lbc.hasSynced() { time.Sleep(storeSyncPollPeriod) return fmt.Errorf("Waiting for stores to sync") } glog.V(3).Infof("Syncing %v", key) paths, err := lbc.ingLister.List() if err != nil { return err } nodePorts := lbc.tr.toNodePorts(&paths) lbNames := lbc.ingLister.Store.ListKeys() lbs, err := lbc.ListRuntimeInfo() if err != nil { return err } nodeNames, err := lbc.getReadyNodeNames() if err != nil { return err } obj, ingExists, err := lbc.ingLister.Store.GetByKey(key) if err != nil { return err } // This performs a 2 phase checkpoint with the cloud: // * Phase 1 creates/verifies resources are as expected. At the end of a // successful checkpoint we know that existing L7s are WAI, and the L7 // for the Ingress associated with "key" is ready for a UrlMap update. // If this encounters an error, eg for quota reasons, we want to invoke // Phase 2 right away and retry checkpointing. // * Phase 2 performs GC by refcounting shared resources. This needs to // happen periodically whether or not stage 1 fails. At the end of a // successful GC we know that there are no dangling cloud resources that // don't have an associated Kubernetes Ingress/Service/Endpoint. defer func() { if deferErr := lbc.CloudClusterManager.GC(lbNames, nodePorts); deferErr != nil { err = fmt.Errorf("Error during sync %v, error during GC %v", err, deferErr) } glog.V(3).Infof("Finished syncing %v", key) }() // Record any errors during sync and throw a single error at the end. This // allows us to free up associated cloud resources ASAP. var syncError error if err := lbc.CloudClusterManager.Checkpoint(lbs, nodeNames, nodePorts); err != nil { // TODO: Implement proper backoff for the queue. eventMsg := "GCE" if utils.IsHTTPErrorCode(err, http.StatusForbidden) { eventMsg += " :Quota" } if ingExists { lbc.recorder.Eventf(obj.(*extensions.Ingress), api.EventTypeWarning, eventMsg, err.Error()) } else { err = fmt.Errorf("%v Error: %v", eventMsg, err) } syncError = err } if !ingExists { return syncError } // Update the UrlMap of the single loadbalancer that came through the watch. l7, err := lbc.CloudClusterManager.l7Pool.Get(key) if err != nil { return fmt.Errorf("%v, unable to get loadbalancer: %v", syncError, err) } ing := *obj.(*extensions.Ingress) if urlMap, err := lbc.tr.toURLMap(&ing); err != nil { syncError = fmt.Errorf("%v, convert to url map error %v", syncError, err) } else if err := l7.UpdateUrlMap(urlMap); err != nil { lbc.recorder.Eventf(&ing, api.EventTypeWarning, "UrlMap", err.Error()) syncError = fmt.Errorf("%v, update url map error: %v", syncError, err) } else if err := lbc.updateIngressStatus(l7, ing); err != nil { lbc.recorder.Eventf(&ing, api.EventTypeWarning, "Status", err.Error()) syncError = fmt.Errorf("%v, update ingress error: %v", syncError, err) } return syncError }