func (c *CmdWatchdog) Run() (err error) { // Watch over the running service // until it goes away, which will mean one of: // - crash // - system shutdown // - uninstall // - legitimate stoppage (ctl stop) // Testing loop: // - dial pipe. Pipe up equals service up. // - Do a blocking read. Server won't be writing anything, // so this is our canary for shutdown. // - If down, test existence of PidFile (maybe wait a moment). // - No PidFile: normal shutdown. We stop too. // - PidFile still there: crashed(?) Restart. // (file should be writable in that case) // // Note that we give up after 10 consecutive crashes if c.G().SocketInfo == nil { return errors.New("Uninitialized socket") } pipeName := c.G().SocketInfo.GetFile() if len(pipeName) == 0 { return errors.New("No pipe name") } countdown := c.restarts for { conn, err := npipe.DialTimeout(pipeName, time.Second*10) if conn == nil { // no service started. exit. return err } for { answer, err := bufio.NewReader(conn).ReadString('\n') // We should not have received anything, this should mean // the pipe has been closed - but test just in case if len(answer) == 0 || err != nil { break } } conn.Close() // Give the service a second to clean up its file time.Sleep(time.Second * 1) var fn string if fn, err = c.G().Env.GetPidFile(); err != nil { return err } crashed, _ := libkb.FileExists(fn) if !crashed { // apparently legitimate shutdown return nil } if countdown <= 0 { break } // restart server case (is this the right command line?) if _, err = ForkServer(c.G(), c.G().Env.GetCommandLine(), false); err != nil { return err } countdown-- } return fmt.Errorf("Watchdog observed %d crashes in a row. NOT reforking.", c.restarts) }
func (s SocketInfo) DialSocket() (ret net.Conn, err error) { return npipe.DialTimeout(s.file, 10) }