Example #1
0
// Run executes the controller's main loop and block until the service exits
// according to it's restart policy or Close() is called.
func (c *Controller) Run() (err error) {
	defer c.shutdown()
	sigc := make(chan os.Signal, 1)
	signal.Notify(sigc,
		syscall.SIGINT,
		syscall.SIGTERM,
		syscall.SIGQUIT)

	env := os.Environ()
	env = append(env, "CONTROLPLANE=1")
	env = append(env, fmt.Sprintf("CONTROLPLANE_CONSUMER_URL=http://localhost%s/api/metrics/store", c.options.Metric.Address))
	env = append(env, fmt.Sprintf("CONTROLPLANE_HOST_ID=%s", c.hostID))
	env = append(env, fmt.Sprintf("CONTROLPLANE_TENANT_ID=%s", c.tenantID))
	env = append(env, fmt.Sprintf("CONTROLPLANE_INSTANCE_ID=%s", c.options.Service.InstanceID))
	env = append(env, fmt.Sprintf("CONTROLPLANE_SERVICED_ID=%s", c.options.Service.ID))

	if err := writeEnvFile(env); err != nil {
		return err
	}

	args := []string{"-c", "exec " + strings.Join(c.options.Service.Command, " ")}

	startService := func() (*subprocess.Instance, chan error) {
		service, serviceExited, _ := subprocess.New(time.Second*10, env, "/bin/sh", args...)
		return service, serviceExited
	}

	sendSignal := func(service *subprocess.Instance, sig os.Signal) bool {
		switch {
		case c.PIDFile != "":
			c.forwardSignal(sig)
		case service != nil:
			service.Notify(sig)
		default:
			return false
		}
		return true
	}

	rpcDead, err := c.rpcHealthCheck()
	if err != nil {
		glog.Error("Could not setup RPC ping check: %s", err)
		return err
	}

	storageDead, err := c.storageHealthCheck()
	if err != nil {
		glog.Errorf("Could not set up storage check: %s", err)
		return err
	}

	prereqsPassed := make(chan bool)
	var startAfter <-chan time.Time
	var exitAfter <-chan time.Time
	var service *subprocess.Instance = nil
	serviceExited := make(chan error, 1)
	c.watchRemotePorts()
	if err := c.handleControlCenterImports(rpcDead); err != nil {
		glog.Error("Could not setup Control Center specific imports: ", err)
		return err
	}
	go c.checkPrereqs(prereqsPassed, rpcDead)
	go c.reapZombies(rpcDead)
	healthExit := make(chan struct{})
	defer close(healthExit)
	c.kickOffHealthChecks(healthExit)
	doRegisterEndpoints := true
	exited := false

	var shutdownService = func(service *subprocess.Instance, sig os.Signal) {
		c.options.Service.Autorestart = false
		if sendSignal(service, sig) {
			sigc = nil
			prereqsPassed = nil
			startAfter = nil
			rpcDead = nil
			exitAfter = time.After(time.Second * 30)
			close(healthExit)
		} else {
			c.exitStatus = 1
			exited = true
		}
	}

	var reregister <-chan struct{}

	for !exited {
		select {
		case sig := <-sigc:
			glog.Infof("Notifying subprocess of signal %v", sig)
			shutdownService(service, sig)

		case <-exitAfter:
			glog.Infof("Killing unresponsive subprocess")
			sendSignal(service, syscall.SIGKILL)
			c.exitStatus = 1
			exited = true

		case <-prereqsPassed:
			startAfter = time.After(time.Millisecond * 1)
			prereqsPassed = nil

		case exitError := <-serviceExited:
			if !c.options.Service.Autorestart {
				exitStatus, _ := utils.GetExitStatus(exitError)
				if c.options.Logforwarder.Enabled {
					time.Sleep(c.options.Logforwarder.SettleTime)
				}
				glog.Infof("Service Exited with status:%d due to %+v", exitStatus, exitError)
				//set loop to end
				exited = true
				//exit with exit code, defer so that other cleanup can happen
				c.exitStatus = exitStatus

			} else {
				glog.Infof("Restarting service process in 10 seconds.")
				service = nil
				startAfter = time.After(time.Second * 10)
			}

		case <-startAfter:
			glog.Infof("Starting service process.")
			service, serviceExited = startService()
			if doRegisterEndpoints {
				reregister = registerExportedEndpoints(c, rpcDead)
				doRegisterEndpoints = false
			}
			startAfter = nil
		case <-reregister:
			reregister = registerExportedEndpoints(c, rpcDead)
		case <-rpcDead:
			glog.Infof("RPC Server has gone away, cleaning up")
			shutdownService(service, syscall.SIGTERM)
		case <-storageDead:
			glog.Infof("Distributed storage for service %s has gone away; shutting down", c.options.Service.ID)
			shutdownService(service, syscall.SIGTERM)
		}
	}
	// Signal to health check registry that this instance is giving up the ghost.
	client, err := node.NewLBClient(c.options.ServicedEndpoint)
	if err != nil {
		glog.Errorf("Could not create a client to endpoint: %s, %s", c.options.ServicedEndpoint, err)
		return nil
	}
	defer client.Close()
	c.Close()
	var unused int
	client.LogHealthCheck(domain.HealthCheckResult{c.options.Service.ID, c.options.Service.InstanceID, "__instance_shutdown", time.Now().String(), "passed"}, &unused)
	return nil
}
Example #2
0
// NewController creates a new Controller for the given options
func NewController(options ControllerOptions) (*Controller, error) {
	c := &Controller{
		options: options,
	}
	c.closing = make(chan chan error)

	if len(options.ServicedEndpoint) <= 0 {
		return nil, ErrInvalidEndpoint
	}

	// set vifs subnet
	if err := vifs.SetSubnet(options.VirtualAddressSubnet); err != nil {
		glog.Errorf("Could not set VirtualAddressSubnet:%s %s", options.VirtualAddressSubnet, err)
		return c, fmt.Errorf("container: invalid VirtualAddressSubnet:%s error:%s", options.VirtualAddressSubnet, err)
	}

	// get service
	instanceID, err := strconv.Atoi(options.Service.InstanceID)
	if err != nil {
		glog.Errorf("Invalid instance from instanceID:%s", options.Service.InstanceID)
		return c, fmt.Errorf("Invalid instance from instanceID:%s", options.Service.InstanceID)
	}
	service, err := getService(options.ServicedEndpoint, options.Service.ID, instanceID)
	if err != nil {
		glog.Errorf("%+v", err)
		glog.Errorf("Invalid service from serviceID:%s", options.Service.ID)
		return c, ErrInvalidService
	}

	c.allowDirectConn = !service.HasEndpointsFor("import_all")
	glog.Infof("Allow container to container connections: %t", c.allowDirectConn)

	if service.PIDFile != "" {
		if strings.HasPrefix(service.PIDFile, "exec ") {
			cmd := service.PIDFile[5:len(service.PIDFile)]
			out, err := exec.Command("sh", "-c", cmd).Output()
			if err != nil {
				glog.Errorf("Unable to run command '%s'", cmd)
			} else {
				c.PIDFile = strings.Trim(string(out), "\n ")
			}
		} else {
			c.PIDFile = service.PIDFile
		}
	}

	// create config files
	if err := setupConfigFiles(service); err != nil {
		glog.Errorf("Could not setup config files error:%s", err)
		return c, fmt.Errorf("container: invalid ConfigFiles error:%s", err)
	}

	// get service tenantID
	c.tenantID, err = getServiceTenantID(options.ServicedEndpoint, options.Service.ID)
	if err != nil {
		glog.Errorf("Invalid tenantID from serviceID:%s", options.Service.ID)
		return c, ErrInvalidTenantID
	}

	// get host id
	c.hostID, err = getAgentHostID(options.ServicedEndpoint)
	if err != nil {
		glog.Errorf("Invalid hostID")
		return c, ErrInvalidHostID
	}

	if options.Logforwarder.Enabled {
		if err := setupLogstashFiles(service, options.Service.InstanceID, filepath.Dir(options.Logforwarder.Path)); err != nil {
			glog.Errorf("Could not setup logstash files error:%s", err)
			return c, fmt.Errorf("container: invalid LogStashFiles error:%s", err)
		}

		// make sure we pick up any logfile that was modified within the
		// last three years
		// TODO: Either expose the 3 years a configurable or get rid of it
		logforwarder, exited, err := subprocess.New(time.Second,
			nil,
			options.Logforwarder.Path,
			fmt.Sprintf("-idle-flush-time=%s", options.Logforwarder.IdleFlushTime),
			"-old-files-hours=26280",
			"-config", options.Logforwarder.ConfigFile)
		if err != nil {
			return nil, err
		}
		c.logforwarder = logforwarder
		c.logforwarderExited = exited
	}

	//build metric redirect url -- assumes 8444 is port mapped
	metricRedirect := options.Metric.RemoteEndoint
	if len(metricRedirect) == 0 {
		glog.V(1).Infof("container.Controller does not have metric forwarding")
	} else if !options.MetricForwarding {
		glog.V(1).Infof("Not forwarding metrics for this container (%v)", c.tenantID)
	} else {
		if len(c.tenantID) <= 0 {
			return nil, ErrInvalidTenantID
		}
		if len(c.hostID) <= 0 {
			return nil, ErrInvalidHostID
		}
		if len(options.Service.ID) <= 0 {
			return nil, ErrInvalidServiceID
		}

		metricRedirect += "?controlplane_tenant_id=" + c.tenantID
		metricRedirect += "&controlplane_service_id=" + options.Service.ID
		metricRedirect += "&controlplane_host_id=" + c.hostID
		metricRedirect += "&controlplane_instance_id=" + options.Service.InstanceID

		//build and serve the container metric forwarder
		forwarder, err := NewMetricForwarder(options.Metric.Address, metricRedirect)
		if err != nil {
			return c, err
		}
		c.metricForwarder = forwarder

		// setup network stats
		destination := fmt.Sprintf("http://localhost%s/api/metrics/store", options.Metric.Address)
		glog.Infof("pushing network stats to: %s", destination)
		go statReporter(destination, time.Second*15)
	}

	// Keep a copy of the service prerequisites in the Controller object.
	c.prereqs = service.Prereqs

	// get endpoints
	if err := c.getEndpoints(service); err != nil {
		return c, err
	}

	// check command
	glog.Infof("command: %v [%d]", options.Service.Command, len(options.Service.Command))
	if len(options.Service.Command) < 1 {
		glog.Errorf("Invalid commandif ")
		return c, ErrInvalidCommand
	}

	return c, nil
}