Beispiel #1
0
func getGCEClient(config io.Reader) *gce.GCECloud {
	// Creating the cloud interface involves resolving the metadata server to get
	// an oauth token. If this fails, the token provider assumes it's not on GCE.
	// No errors are thrown. So we need to keep retrying till it works because
	// we know we're on GCE.
	for {
		cloudInterface, err := cloudprovider.GetCloudProvider("gce", config)
		if err == nil {
			cloud := cloudInterface.(*gce.GCECloud)

			// If this controller is scheduled on a node without compute/rw
			// it won't be allowed to list backends. We can assume that the
			// user has no need for Ingress in this case. If they grant
			// permissions to the node they will have to restart the controller
			// manually to re-create the client.
			if _, err = cloud.ListBackendServices(); err == nil || utils.IsHTTPErrorCode(err, http.StatusForbidden) {
				return cloud
			}
			glog.Warningf("Failed to list backend services, retrying: %v", err)
		} else {
			glog.Warningf("Failed to retrieve cloud interface, retrying: %v", err)
		}
		time.Sleep(cloudClientRetryInterval)
	}
}
Beispiel #2
0
// Delete deletes the Backend for the given port.
func (b *Backends) Delete(port int64) (err error) {
	name := b.namer.BeName(port)
	glog.Infof("Deleting backend %v", name)
	defer func() {
		if utils.IsHTTPErrorCode(err, http.StatusNotFound) {
			err = nil
		}
		if err == nil {
			b.snapshotter.Delete(portKey(port))
		}
	}()
	// Try deleting health checks even if a backend is not found.
	if err = b.cloud.DeleteBackendService(name); err != nil &&
		!utils.IsHTTPErrorCode(err, http.StatusNotFound) {
		return err
	}
	if err = b.healthChecker.Delete(port); err != nil &&
		!utils.IsHTTPErrorCode(err, http.StatusNotFound) {
		return err
	}
	return nil
}
Beispiel #3
0
// IsHealthy returns an error if the cluster manager is unhealthy.
func (c *ClusterManager) IsHealthy() (err error) {
	// TODO: Expand on this, for now we just want to detect when the GCE client
	// is broken.
	_, err = c.backendPool.List()

	// If this container is scheduled on a node without compute/rw it is
	// effectively useless, but it is healthy. Reporting it as unhealthy
	// will lead to container crashlooping.
	if utils.IsHTTPErrorCode(err, http.StatusForbidden) {
		glog.Infof("Reporting cluster as healthy, but unable to list backends: %v", err)
		return nil
	}
	return
}
Beispiel #4
0
// Sync syncs kubernetes instances with the instances in the instance group.
func (i *Instances) Sync(nodes []string) (err error) {
	glog.V(4).Infof("Syncing nodes %v", nodes)

	defer func() {
		// The node pool is only responsible for syncing nodes to instance
		// groups. It never creates/deletes, so if an instance groups is
		// not found there's nothing it can do about it anyway. Most cases
		// this will happen because the backend pool has deleted the instance
		// group, however if it happens because a user deletes the IG by mistake
		// we should just wait till the backend pool fixes it.
		if utils.IsHTTPErrorCode(err, http.StatusNotFound) {
			glog.Infof("Node pool encountered a 404, ignoring: %v", err)
			err = nil
		}
	}()

	pool := i.snapshotter.Snapshot()
	for igName := range pool {
		gceNodes := sets.NewString()
		gceNodes, err = i.list(igName)
		if err != nil {
			return err
		}
		kubeNodes := sets.NewString(nodes...)

		// A node deleted via kubernetes could still exist as a gce vm. We don't
		// want to route requests to it. Similarly, a node added to kubernetes
		// needs to get added to the instance group so we do route requests to it.

		removeNodes := gceNodes.Difference(kubeNodes).List()
		addNodes := kubeNodes.Difference(gceNodes).List()
		if len(removeNodes) != 0 {
			if err = i.Remove(
				igName, gceNodes.Difference(kubeNodes).List()); err != nil {
				return err
			}
		}

		if len(addNodes) != 0 {
			if err = i.Add(
				igName, kubeNodes.Difference(gceNodes).List()); err != nil {
				return err
			}
		}
	}
	return nil
}
Beispiel #5
0
// DeleteInstanceGroup deletes the given IG by name, from all zones.
func (i *Instances) DeleteInstanceGroup(name string) error {
	defer i.snapshotter.Delete(name)
	errs := []error{}

	zones, err := i.ListZones()
	if err != nil {
		return err
	}
	for _, zone := range zones {
		if err := i.cloud.DeleteInstanceGroup(name, zone); err != nil {
			if !utils.IsHTTPErrorCode(err, http.StatusNotFound) {
				errs = append(errs, err)
			}
		} else {
			glog.Infof("Deleted instance group %v in zone %v", name, zone)
		}
	}
	if len(errs) == 0 {
		return nil
	}
	return fmt.Errorf("%v", errs)
}
Beispiel #6
0
// sync manages Ingress create/updates/deletes.
func (lbc *LoadBalancerController) sync(key string) (err error) {
	if !lbc.hasSynced() {
		time.Sleep(storeSyncPollPeriod)
		return fmt.Errorf("Waiting for stores to sync")
	}
	glog.V(3).Infof("Syncing %v", key)

	paths, err := lbc.ingLister.List()
	if err != nil {
		return err
	}
	nodePorts := lbc.tr.toNodePorts(&paths)
	lbNames := lbc.ingLister.Store.ListKeys()
	lbs, err := lbc.ListRuntimeInfo()
	if err != nil {
		return err
	}
	nodeNames, err := lbc.getReadyNodeNames()
	if err != nil {
		return err
	}
	obj, ingExists, err := lbc.ingLister.Store.GetByKey(key)
	if err != nil {
		return err
	}

	// This performs a 2 phase checkpoint with the cloud:
	// * Phase 1 creates/verifies resources are as expected. At the end of a
	//   successful checkpoint we know that existing L7s are WAI, and the L7
	//   for the Ingress associated with "key" is ready for a UrlMap update.
	//   If this encounters an error, eg for quota reasons, we want to invoke
	//   Phase 2 right away and retry checkpointing.
	// * Phase 2 performs GC by refcounting shared resources. This needs to
	//   happen periodically whether or not stage 1 fails. At the end of a
	//   successful GC we know that there are no dangling cloud resources that
	//   don't have an associated Kubernetes Ingress/Service/Endpoint.

	defer func() {
		if deferErr := lbc.CloudClusterManager.GC(lbNames, nodePorts); deferErr != nil {
			err = fmt.Errorf("Error during sync %v, error during GC %v", err, deferErr)
		}
		glog.V(3).Infof("Finished syncing %v", key)
	}()

	// Record any errors during sync and throw a single error at the end. This
	// allows us to free up associated cloud resources ASAP.
	var syncError error
	if err := lbc.CloudClusterManager.Checkpoint(lbs, nodeNames, nodePorts); err != nil {
		// TODO: Implement proper backoff for the queue.
		eventMsg := "GCE"
		if utils.IsHTTPErrorCode(err, http.StatusForbidden) {
			eventMsg += " :Quota"
		}
		if ingExists {
			lbc.recorder.Eventf(obj.(*extensions.Ingress), api.EventTypeWarning, eventMsg, err.Error())
		} else {
			err = fmt.Errorf("%v Error: %v", eventMsg, err)
		}
		syncError = err
	}

	if !ingExists {
		return syncError
	}
	// Update the UrlMap of the single loadbalancer that came through the watch.
	l7, err := lbc.CloudClusterManager.l7Pool.Get(key)
	if err != nil {
		return fmt.Errorf("%v, unable to get loadbalancer: %v", syncError, err)
	}

	ing := *obj.(*extensions.Ingress)
	if urlMap, err := lbc.tr.toURLMap(&ing); err != nil {
		syncError = fmt.Errorf("%v, convert to url map error %v", syncError, err)
	} else if err := l7.UpdateUrlMap(urlMap); err != nil {
		lbc.recorder.Eventf(&ing, api.EventTypeWarning, "UrlMap", err.Error())
		syncError = fmt.Errorf("%v, update url map error: %v", syncError, err)
	} else if err := lbc.updateIngressStatus(l7, ing); err != nil {
		lbc.recorder.Eventf(&ing, api.EventTypeWarning, "Status", err.Error())
		syncError = fmt.Errorf("%v, update ingress error: %v", syncError, err)
	}
	return syncError
}