func (r *remote) Cleanup() { if r.daemonPid == -1 { return } r.closeManually = true r.rpcConn.Close() // Ask the daemon to quit syscall.Kill(r.daemonPid, syscall.SIGTERM) // Wait up to 15secs for it to stop for i := time.Duration(0); i < containerdShutdownTimeout; i += time.Second { if !system.IsProcessAlive(r.daemonPid) { break } time.Sleep(time.Second) } if system.IsProcessAlive(r.daemonPid) { logrus.Warnf("libcontainerd: containerd (%d) didn't stop within 15 secs, killing it\n", r.daemonPid) syscall.Kill(r.daemonPid, syscall.SIGKILL) } // cleanup some files os.Remove(filepath.Join(r.stateDir, containerdPidFilename)) os.Remove(filepath.Join(r.stateDir, containerdSockFilename)) }
func (r *remote) handleConnectionChange() { var transientFailureCount = 0 ticker := time.NewTicker(500 * time.Millisecond) defer ticker.Stop() healthClient := grpc_health_v1.NewHealthClient(r.rpcConn) for { <-ticker.C ctx, cancel := context.WithTimeout(context.Background(), containerdHealthCheckTimeout) _, err := healthClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{}) cancel() if err == nil { continue } logrus.Debugf("libcontainerd: containerd health check returned error: %v", err) if r.daemonPid != -1 { if strings.Contains(err.Error(), "is closing") { // Well, we asked for it to stop, just return return } // all other errors are transient // Reset state to be notified of next failure transientFailureCount++ if transientFailureCount >= maxConnectionRetryCount { transientFailureCount = 0 if system.IsProcessAlive(r.daemonPid) { system.KillProcess(r.daemonPid) } <-r.daemonWaitCh if err := r.runContainerdDaemon(); err != nil { //FIXME: Handle error logrus.Errorf("libcontainerd: error restarting containerd: %v", err) } continue } } } }
func (r *remote) runContainerdDaemon() error { pidFilename := filepath.Join(r.stateDir, containerdPidFilename) f, err := os.OpenFile(pidFilename, os.O_RDWR|os.O_CREATE, 0600) if err != nil { return err } defer f.Close() // File exist, check if the daemon is alive b := make([]byte, 8) n, err := f.Read(b) if err != nil && err != io.EOF { return err } if n > 0 { pid, err := strconv.ParseUint(string(b[:n]), 10, 64) if err != nil { return err } if system.IsProcessAlive(int(pid)) { logrus.Infof("libcontainerd: previous instance of containerd still alive (%d)", pid) r.daemonPid = int(pid) return nil } } // rewind the file _, err = f.Seek(0, os.SEEK_SET) if err != nil { return err } // Truncate it err = f.Truncate(0) if err != nil { return err } // Start a new instance args := []string{ "-l", fmt.Sprintf("unix://%s", r.rpcAddr), "--metrics-interval=0", "--start-timeout", "2m", "--state-dir", filepath.Join(r.stateDir, containerdStateDir), } if goruntime.GOOS == "solaris" { args = append(args, "--shim", "containerd-shim", "--runtime", "runc") } else { args = append(args, "--shim", "docker-containerd-shim") if r.runtime != "" { args = append(args, "--runtime") args = append(args, r.runtime) } } if r.debugLog { args = append(args, "--debug") } if len(r.runtimeArgs) > 0 { for _, v := range r.runtimeArgs { args = append(args, "--runtime-args") args = append(args, v) } logrus.Debugf("libcontainerd: runContainerdDaemon: runtimeArgs: %s", args) } cmd := exec.Command(containerdBinary, args...) // redirect containerd logs to docker logs cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr cmd.SysProcAttr = setSysProcAttr(true) cmd.Env = nil // clear the NOTIFY_SOCKET from the env when starting containerd for _, e := range os.Environ() { if !strings.HasPrefix(e, "NOTIFY_SOCKET") { cmd.Env = append(cmd.Env, e) } } if err := cmd.Start(); err != nil { return err } logrus.Infof("libcontainerd: new containerd process, pid: %d", cmd.Process.Pid) if err := setOOMScore(cmd.Process.Pid, r.oomScore); err != nil { system.KillProcess(cmd.Process.Pid) return err } if _, err := f.WriteString(fmt.Sprintf("%d", cmd.Process.Pid)); err != nil { system.KillProcess(cmd.Process.Pid) return err } r.daemonWaitCh = make(chan struct{}) go func() { cmd.Wait() close(r.daemonWaitCh) }() // Reap our child when needed r.daemonPid = cmd.Process.Pid return nil }