// Run executes the controller's main loop and block until the service exits // according to it's restart policy or Close() is called. func (c *Controller) Run() (err error) { defer c.shutdown() sigc := make(chan os.Signal, 1) signal.Notify(sigc, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) env := os.Environ() env = append(env, "CONTROLPLANE=1") env = append(env, fmt.Sprintf("CONTROLPLANE_CONSUMER_URL=http://localhost%s/api/metrics/store", c.options.Metric.Address)) env = append(env, fmt.Sprintf("CONTROLPLANE_HOST_ID=%s", c.hostID)) env = append(env, fmt.Sprintf("CONTROLPLANE_TENANT_ID=%s", c.tenantID)) env = append(env, fmt.Sprintf("CONTROLPLANE_INSTANCE_ID=%s", c.options.Service.InstanceID)) env = append(env, fmt.Sprintf("CONTROLPLANE_SERVICED_ID=%s", c.options.Service.ID)) if err := writeEnvFile(env); err != nil { return err } args := []string{"-c", "exec " + strings.Join(c.options.Service.Command, " ")} startService := func() (*subprocess.Instance, chan error) { service, serviceExited, _ := subprocess.New(time.Second*10, env, "/bin/sh", args...) return service, serviceExited } sendSignal := func(service *subprocess.Instance, sig os.Signal) bool { switch { case c.PIDFile != "": c.forwardSignal(sig) case service != nil: service.Notify(sig) default: return false } return true } rpcDead, err := c.rpcHealthCheck() if err != nil { glog.Error("Could not setup RPC ping check: %s", err) return err } storageDead, err := c.storageHealthCheck() if err != nil { glog.Errorf("Could not set up storage check: %s", err) return err } prereqsPassed := make(chan bool) var startAfter <-chan time.Time var exitAfter <-chan time.Time var service *subprocess.Instance = nil serviceExited := make(chan error, 1) c.watchRemotePorts() if err := c.handleControlCenterImports(rpcDead); err != nil { glog.Error("Could not setup Control Center specific imports: ", err) return err } go c.checkPrereqs(prereqsPassed, rpcDead) go c.reapZombies(rpcDead) healthExit := make(chan struct{}) defer close(healthExit) c.kickOffHealthChecks(healthExit) doRegisterEndpoints := true exited := false var shutdownService = func(service *subprocess.Instance, sig os.Signal) { c.options.Service.Autorestart = false if sendSignal(service, sig) { sigc = nil prereqsPassed = nil startAfter = nil rpcDead = nil exitAfter = time.After(time.Second * 30) close(healthExit) } else { c.exitStatus = 1 exited = true } } var reregister <-chan struct{} for !exited { select { case sig := <-sigc: glog.Infof("Notifying subprocess of signal %v", sig) shutdownService(service, sig) case <-exitAfter: glog.Infof("Killing unresponsive subprocess") sendSignal(service, syscall.SIGKILL) c.exitStatus = 1 exited = true case <-prereqsPassed: startAfter = time.After(time.Millisecond * 1) prereqsPassed = nil case exitError := <-serviceExited: if !c.options.Service.Autorestart { exitStatus, _ := utils.GetExitStatus(exitError) if c.options.Logforwarder.Enabled { time.Sleep(c.options.Logforwarder.SettleTime) } glog.Infof("Service Exited with status:%d due to %+v", exitStatus, exitError) //set loop to end exited = true //exit with exit code, defer so that other cleanup can happen c.exitStatus = exitStatus } else { glog.Infof("Restarting service process in 10 seconds.") service = nil startAfter = time.After(time.Second * 10) } case <-startAfter: glog.Infof("Starting service process.") service, serviceExited = startService() if doRegisterEndpoints { reregister = registerExportedEndpoints(c, rpcDead) doRegisterEndpoints = false } startAfter = nil case <-reregister: reregister = registerExportedEndpoints(c, rpcDead) case <-rpcDead: glog.Infof("RPC Server has gone away, cleaning up") shutdownService(service, syscall.SIGTERM) case <-storageDead: glog.Infof("Distributed storage for service %s has gone away; shutting down", c.options.Service.ID) shutdownService(service, syscall.SIGTERM) } } // Signal to health check registry that this instance is giving up the ghost. client, err := node.NewLBClient(c.options.ServicedEndpoint) if err != nil { glog.Errorf("Could not create a client to endpoint: %s, %s", c.options.ServicedEndpoint, err) return nil } defer client.Close() c.Close() var unused int client.LogHealthCheck(domain.HealthCheckResult{c.options.Service.ID, c.options.Service.InstanceID, "__instance_shutdown", time.Now().String(), "passed"}, &unused) return nil }
// NewController creates a new Controller for the given options func NewController(options ControllerOptions) (*Controller, error) { c := &Controller{ options: options, } c.closing = make(chan chan error) if len(options.ServicedEndpoint) <= 0 { return nil, ErrInvalidEndpoint } // set vifs subnet if err := vifs.SetSubnet(options.VirtualAddressSubnet); err != nil { glog.Errorf("Could not set VirtualAddressSubnet:%s %s", options.VirtualAddressSubnet, err) return c, fmt.Errorf("container: invalid VirtualAddressSubnet:%s error:%s", options.VirtualAddressSubnet, err) } // get service instanceID, err := strconv.Atoi(options.Service.InstanceID) if err != nil { glog.Errorf("Invalid instance from instanceID:%s", options.Service.InstanceID) return c, fmt.Errorf("Invalid instance from instanceID:%s", options.Service.InstanceID) } service, err := getService(options.ServicedEndpoint, options.Service.ID, instanceID) if err != nil { glog.Errorf("%+v", err) glog.Errorf("Invalid service from serviceID:%s", options.Service.ID) return c, ErrInvalidService } c.allowDirectConn = !service.HasEndpointsFor("import_all") glog.Infof("Allow container to container connections: %t", c.allowDirectConn) if service.PIDFile != "" { if strings.HasPrefix(service.PIDFile, "exec ") { cmd := service.PIDFile[5:len(service.PIDFile)] out, err := exec.Command("sh", "-c", cmd).Output() if err != nil { glog.Errorf("Unable to run command '%s'", cmd) } else { c.PIDFile = strings.Trim(string(out), "\n ") } } else { c.PIDFile = service.PIDFile } } // create config files if err := setupConfigFiles(service); err != nil { glog.Errorf("Could not setup config files error:%s", err) return c, fmt.Errorf("container: invalid ConfigFiles error:%s", err) } // get service tenantID c.tenantID, err = getServiceTenantID(options.ServicedEndpoint, options.Service.ID) if err != nil { glog.Errorf("Invalid tenantID from serviceID:%s", options.Service.ID) return c, ErrInvalidTenantID } // get host id c.hostID, err = getAgentHostID(options.ServicedEndpoint) if err != nil { glog.Errorf("Invalid hostID") return c, ErrInvalidHostID } if options.Logforwarder.Enabled { if err := setupLogstashFiles(service, options.Service.InstanceID, filepath.Dir(options.Logforwarder.Path)); err != nil { glog.Errorf("Could not setup logstash files error:%s", err) return c, fmt.Errorf("container: invalid LogStashFiles error:%s", err) } // make sure we pick up any logfile that was modified within the // last three years // TODO: Either expose the 3 years a configurable or get rid of it logforwarder, exited, err := subprocess.New(time.Second, nil, options.Logforwarder.Path, fmt.Sprintf("-idle-flush-time=%s", options.Logforwarder.IdleFlushTime), "-old-files-hours=26280", "-config", options.Logforwarder.ConfigFile) if err != nil { return nil, err } c.logforwarder = logforwarder c.logforwarderExited = exited } //build metric redirect url -- assumes 8444 is port mapped metricRedirect := options.Metric.RemoteEndoint if len(metricRedirect) == 0 { glog.V(1).Infof("container.Controller does not have metric forwarding") } else if !options.MetricForwarding { glog.V(1).Infof("Not forwarding metrics for this container (%v)", c.tenantID) } else { if len(c.tenantID) <= 0 { return nil, ErrInvalidTenantID } if len(c.hostID) <= 0 { return nil, ErrInvalidHostID } if len(options.Service.ID) <= 0 { return nil, ErrInvalidServiceID } metricRedirect += "?controlplane_tenant_id=" + c.tenantID metricRedirect += "&controlplane_service_id=" + options.Service.ID metricRedirect += "&controlplane_host_id=" + c.hostID metricRedirect += "&controlplane_instance_id=" + options.Service.InstanceID //build and serve the container metric forwarder forwarder, err := NewMetricForwarder(options.Metric.Address, metricRedirect) if err != nil { return c, err } c.metricForwarder = forwarder // setup network stats destination := fmt.Sprintf("http://localhost%s/api/metrics/store", options.Metric.Address) glog.Infof("pushing network stats to: %s", destination) go statReporter(destination, time.Second*15) } // Keep a copy of the service prerequisites in the Controller object. c.prereqs = service.Prereqs // get endpoints if err := c.getEndpoints(service); err != nil { return c, err } // check command glog.Infof("command: %v [%d]", options.Service.Command, len(options.Service.Command)) if len(options.Service.Command) < 1 { glog.Errorf("Invalid commandif ") return c, ErrInvalidCommand } return c, nil }