func (r *ExecRunner) run() { log.Infof("[%s] Starting (%s driver)", r.name, r.backend.Driver()) if r.stderr != "" { err := common.CreatePathToFile(r.stderr) if err != nil { backends.SetStatusLogErrorf(r.name, "Failed to create path to collector's stderr log: %s", r.stderr) } f := common.GetRotatedLog(r.stderr, r.context.UserConfig.LogRotationTime, r.context.UserConfig.LogMaxAge) defer f.Close() r.cmd.Stderr = f } if r.stdout != "" { err := common.CreatePathToFile(r.stdout) if err != nil { backends.SetStatusLogErrorf(r.name, "Failed to create path to collector's stdout log: %s", r.stdout) } f := common.GetRotatedLog(r.stderr, r.context.UserConfig.LogRotationTime, r.context.UserConfig.LogMaxAge) defer f.Close() r.cmd.Stdout = f } r.backend.SetStatus(backends.StatusRunning, "Running") err := r.cmd.Start() if err != nil { backends.SetStatusLogErrorf(r.name, "Failed to start collector: %s", err) } }
func (r *SvcRunner) ValidateBeforeStart() error { execPath, err := exec.LookPath(r.exec) if err != nil { return backends.SetStatusLogErrorf(r.name, "Failed to find collector executable %s", r.exec) } m, err := mgr.Connect() if err != nil { return backends.SetStatusLogErrorf(r.name, "Failed to connect to service manager: %v", err) } defer m.Disconnect() serviceConfig := mgr.Config{ DisplayName: "Graylog collector sidecar - " + r.name + " backend", Description: "Wrapper service for the NXLog backend", BinaryPathName: "\"" + r.exec + "\" " + strings.Join(r.args, " ")} s, err := m.OpenService(r.serviceName) // service exist so we only update the properties if err == nil { defer s.Close() log.Debugf("[%s] service %s already exists, updating properties", r.name) currentConfig, err := s.Config() if err == nil { currentConfig.DisplayName = serviceConfig.DisplayName currentConfig.Description = serviceConfig.Description currentConfig.BinaryPathName = serviceConfig.BinaryPathName } err = s.UpdateConfig(currentConfig) if err != nil { backends.SetStatusLogErrorf(r.name, "Failed to update service: %v", err) } // service needs to be created } else { s, err = m.CreateService(r.serviceName, execPath, serviceConfig) if err != nil { backends.SetStatusLogErrorf(r.name, "Failed to install service: %v", err) } defer s.Close() err = eventlog.InstallAsEventCreate(r.serviceName, eventlog.Error|eventlog.Warning|eventlog.Info) if err != nil { s.Delete() backends.SetStatusLogErrorf(r.name, "SetupEventLogSource() failed: %v", err) } } return nil }
func (r *SvcRunner) Start() error { if err := r.ValidateBeforeStart(); err != nil { log.Error(err.Error()) return err } r.startTime = time.Now() log.Infof("[%s] Starting (%s driver)", r.name, r.backend.Driver()) m, err := mgr.Connect() if err != nil { return backends.SetStatusLogErrorf(r.name, "Failed to connect to service manager: %v", err) } defer m.Disconnect() ws, err := m.OpenService(r.serviceName) if err != nil { return backends.SetStatusLogErrorf(r.name, "Could not access service: %v", err) } defer ws.Close() err = ws.Start("is", "manual-started") if err != nil { return backends.SetStatusLogErrorf(r.name, "Could not start service: %v", err) } r.isRunning = true go func() { for { time.Sleep(10 * time.Second) if r.isRunning && !r.Running() { backends.SetStatusLogErrorf(r.name, "Backend crashed, sending restart signal") r.Start() break } if !r.isRunning { break } } }() r.backend.SetStatus(backends.StatusRunning, "Running") return err }
func (r *ExecRunner) ValidateBeforeStart() error { _, err := exec.LookPath(r.exec) if err != nil { return backends.SetStatusLogErrorf(r.name, "Failed to find collector executable %q: %v", r.exec, err) } if r.isRunning { return errors.New("Failed to start collector, it's already running") } return nil }
func (r *SvcRunner) Running() bool { m, err := mgr.Connect() if err != nil { backends.SetStatusLogErrorf(r.name, "Failed to connect to service manager: %v", err) } defer m.Disconnect() s, err := m.OpenService(r.serviceName) // service exist so we only update the properties if err != nil { backends.SetStatusLogErrorf(r.name, "Can't get status of service %s cause it doesn't exist: %v", r.serviceName, err) } defer s.Close() status, err := s.Query() if err != nil { backends.SetStatusLogErrorf(r.name, "Can't query status of service %s: %v", r.serviceName, err) } return status.State == svc.Running }
func (r *SvcRunner) Stop() error { log.Infof("[%s] Stopping", r.name) // deactivate supervisor r.isRunning = false m, err := mgr.Connect() if err != nil { return backends.SetStatusLogErrorf(r.name, "Failed to connect to service manager: %v", err) } defer m.Disconnect() ws, err := m.OpenService(r.serviceName) if err != nil { return backends.SetStatusLogErrorf(r.name, "Could not access service: %v", err) } defer ws.Close() status, err := ws.Control(svc.Stop) if err != nil { return backends.SetStatusLogErrorf(r.name, "Could not send stop control: %v", err) } timeout := time.Now().Add(10 * time.Second) for status.State != svc.Stopped { if timeout.Before(time.Now()) { return backends.SetStatusLogErrorf(r.name, "Timeout waiting for service to go to stopped state: %v", err) } time.Sleep(300 * time.Millisecond) status, err = ws.Query() if err != nil { return backends.SetStatusLogErrorf(r.name, "Could not retrieve service status: %v", err) } } return nil }
func (r *ExecRunner) StartSupervisor() { if r.isSupervised == true { log.Debugf("[%s] Won't start second supervisor", r.Name()) return } r.isSupervised = true r.restartCount = 1 go func() { for { // blocks till process exits r.cmd.Wait() // ignore regular shutdown if !r.isRunning { time.Sleep(300 * time.Millisecond) continue } // After 60 seconds we can reset the restart counter if time.Since(r.startTime) > 60*time.Second { r.restartCount = 1 } // don't continue to restart after 3 tries, exit supervisor and wait for a configuration update if r.restartCount > 3 { backends.SetStatusLogErrorf(r.name, "Unable to start collector after 3 tries, giving up!") r.cmd.Wait() r.isRunning = false break } log.Errorf("[%s] Backend crashed, trying to restart %d/3", r.name, r.restartCount) r.restartCount += 1 r.Restart() } r.isSupervised = false }() }
// fetch configuration periodically func checkForUpdateAndRestart(httpClient *http.Client, checksum string, context *context.Ctx) string { time.Sleep(time.Duration(context.UserConfig.UpdateInterval) * time.Second) jsonConfig, err := api.RequestConfiguration(httpClient, checksum, context) if err != nil { log.Error("Can't fetch configuration from Graylog API: ", err) return "" } if jsonConfig.IsEmpty() { // etag match, skipping all other actions return jsonConfig.Checksum } for name, runner := range daemon.Daemon.Runner { backend := backends.Store.GetBackend(name) if backend.RenderOnChange(jsonConfig) { if !backend.ValidateConfigurationFile() { backends.SetStatusLogErrorf(name, "Collector configuration file is not valid, waiting for the next update.") continue } if runner.Running() { // collector was already started so a Restart will not fail err = runner.Restart() } else { // collector is not running, we do a fresh start err = runner.Start() } if err != nil { msg := "Failed to restart collector" backend.SetStatus(backends.StatusError, msg) log.Errorf("[%s] %s: %v", name, msg, err) } } } return jsonConfig.Checksum }