Exemple #1
0
func wait(pid, tgid, options int) (int, *sys.WaitStatus, error) {
	var s sys.WaitStatus
	if (pid != tgid) || (options != 0) {
		wpid, err := sys.Wait4(pid, &s, sys.WALL|options, nil)
		return wpid, &s, err
	} else {
		// If we call wait4/waitpid on a thread that is the leader of its group,
		// with options == 0, while ptracing and the thread leader has exited leaving
		// zombies of its own then waitpid hangs forever this is apparently intended
		// behaviour in the linux kernel because it's just so convenient.
		// Therefore we call wait4 in a loop with WNOHANG, sleeping a while between
		// calls and exiting when either wait4 succeeds or we find out that the thread
		// has become a zombie.
		// References:
		// https://sourceware.org/bugzilla/show_bug.cgi?id=12702
		// https://sourceware.org/bugzilla/show_bug.cgi?id=10095
		// https://sourceware.org/bugzilla/attachment.cgi?id=5685
		for {
			wpid, err := sys.Wait4(pid, &s, sys.WNOHANG|sys.WALL|options, nil)
			if err != nil {
				return 0, nil, err
			}
			if wpid != 0 {
				return wpid, &s, err
			}
			if status(pid) == STATUS_ZOMBIE {
				return pid, nil, nil
			}
			time.Sleep(200 * time.Millisecond)
		}
	}
}
Exemple #2
0
// ReapChildren is a long-running routine that blocks waiting for child
// processes to exit and reaps them, reporting reaped process IDs to the
// optional pids channel and any errors to the optional errors channel.
func ReapChildren(pids PidCh, errors ErrorCh, done chan struct{}) {
	c := make(chan os.Signal, 1)
	signal.Notify(c, unix.SIGCHLD)

	for {
	WAIT:
		// Block for an incoming signal that a child has exited.
		select {
		case <-c:
			// Got a child signal, drop out and reap.
		case <-done:
			return
		}

		// Try to reap children until there aren't any more. We never
		// block in here so that we are always responsive to signals, at
		// the expense of possibly leaving a child behind if we get
		// here too quickly. Any stragglers should get reaped the next
		// time we see a signal, so we won't leak in the long run.
		for {
		POLL:
			var status unix.WaitStatus
			pid, err := unix.Wait4(-1, &status, unix.WNOHANG, nil)
			switch err {
			case nil:
				// Got a child, clean this up and poll again.
				if pids != nil {
					pids <- pid
				}
				goto POLL

			case unix.ECHILD:
				// No more children, we are done.
				goto WAIT

			case unix.EINTR:
				// We got interrupted, try again. This likely
				// can't happen since we are calling Wait4 in a
				// non-blocking fashion, but it's good to be
				// complete and handle this case rather than
				// fail.
				goto POLL

			default:
				// We got some other error we didn't expect.
				// Wait for another SIGCHLD so we don't
				// potentially spam in here and chew up CPU.
				if errors != nil {
					errors <- err
				}
				goto WAIT
			}
		}
	}
}
Exemple #3
0
func wait(pid, tgid, options int) (int, *sys.WaitStatus, error) {
	var status sys.WaitStatus
	wpid, err := sys.Wait4(pid, &status, options, nil)
	return wpid, &status, err
}
Exemple #4
0
func (dbp *Process) wait(pid, options int) (int, *sys.WaitStatus, error) {
	var status sys.WaitStatus
	wpid, err := sys.Wait4(pid, &status, options, nil)
	return wpid, &status, err
}
Exemple #5
0
// ReapChildren is a long-running routine that blocks waiting for child
// processes to exit and reaps them, reporting reaped process IDs to the
// optional pids channel and any errors to the optional errors channel.
//
// The optional reapLock will be used to prevent reaping during periods
// when you know your application is waiting for subprocesses to return.
// You need to use care in order to prevent the reaper from stealing your
// return values from uses of packages like Go's exec. We use an RWMutex
// so that we don't serialize all of the application's execution of sub
// processes with each other, but we do serialize them with reaping. The
// application should get a read lock when it wants to do a wait.
func ReapChildren(pids PidCh, errors ErrorCh, done chan struct{}, reapLock *sync.RWMutex) {
	c := make(chan os.Signal, 1)
	signal.Notify(c, unix.SIGCHLD)

	for {
		// Block for an incoming signal that a child has exited.
		select {
		case <-c:
			// Got a child signal, drop out and reap.
		case <-done:
			return
		}

		// Attempt to reap all abandoned child processes after getting
		// the reap lock, which makes sure the application isn't doing
		// any waiting of its own. Note that we do the full write lock
		// here.
		func() {
			if reapLock != nil {
				reapLock.Lock()
				defer reapLock.Unlock()
			}

		POLL:
			// Try to reap children until there aren't any more. We
			// never block in here so that we are always responsive
			// to signals, at the expense of possibly leaving a
			// child behind if we get here too quickly. Any
			// stragglers should get reaped the next time we see a
			// signal, so we won't leak in the long run.
			var status unix.WaitStatus
			pid, err := unix.Wait4(-1, &status, unix.WNOHANG, nil)
			switch err {
			case nil:
				// Got a child, clean this up and poll again.
				if pid > 0 {
					if pids != nil {
						pids <- pid
					}
					goto POLL
				}
				return

			case unix.ECHILD:
				// No more children, we are done.
				return

			case unix.EINTR:
				// We got interrupted, try again. This likely
				// can't happen since we are calling Wait4 in a
				// non-blocking fashion, but it's good to be
				// complete and handle this case rather than
				// fail.
				goto POLL

			default:
				// We got some other error we didn't expect.
				// Wait for another SIGCHLD so we don't
				// potentially spam in here and chew up CPU.
				if errors != nil {
					errors <- err
				}
				return
			}
		}()
	}
}
Exemple #6
0
func (p *process) handleSigkilledShim(rst int, rerr error) (int, error) {
	if p.cmd == nil || p.cmd.Process == nil {
		e := unix.Kill(p.pid, 0)
		if e == syscall.ESRCH {
			return rst, rerr
		}

		// If it's not the same process, just mark it stopped and set
		// the status to 255
		if same, err := p.isSameProcess(); !same {
			logrus.Warnf("containerd: %s:%s (pid %d) is not the same process anymore (%v)", p.container.id, p.id, p.pid, err)
			p.stateLock.Lock()
			p.state = Stopped
			p.stateLock.Unlock()
			// Create the file so we get the exit event generated once monitor kicks in
			// without going to this all process again
			rerr = ioutil.WriteFile(filepath.Join(p.root, ExitStatusFile), []byte("255"), 0644)
			return 255, nil
		}

		ppid, err := readProcStatField(p.pid, 4)
		if err != nil {
			return rst, fmt.Errorf("could not check process ppid: %v (%v)", err, rerr)
		}
		if ppid == "1" {
			logrus.Warnf("containerd: %s:%s shim died, killing associated process", p.container.id, p.id)
			unix.Kill(p.pid, syscall.SIGKILL)
			// wait for the process to die
			for {
				e := unix.Kill(p.pid, 0)
				if e == syscall.ESRCH {
					break
				}
				time.Sleep(10 * time.Millisecond)
			}

			rst = 128 + int(syscall.SIGKILL)
			// Create the file so we get the exit event generated once monitor kicks in
			// without going to this all process again
			rerr = ioutil.WriteFile(filepath.Join(p.root, ExitStatusFile), []byte(fmt.Sprintf("%d", rst)), 0644)
		}

		return rst, rerr
	}

	// Possible that the shim was SIGKILLED
	e := unix.Kill(p.cmd.Process.Pid, 0)
	if e != syscall.ESRCH {
		return rst, rerr
	}

	// Ensure we got the shim ProcessState
	<-p.cmdDoneCh

	shimStatus := p.cmd.ProcessState.Sys().(syscall.WaitStatus)
	if shimStatus.Signaled() && shimStatus.Signal() == syscall.SIGKILL {
		logrus.Debugf("containerd: ExitStatus(container: %s, process: %s): shim was SIGKILL'ed reaping its child with pid %d", p.container.id, p.id, p.pid)

		var (
			status unix.WaitStatus
			rusage unix.Rusage
			wpid   int
		)

		// Some processes change their PR_SET_PDEATHSIG, so force kill them
		unix.Kill(p.pid, syscall.SIGKILL)

		for wpid == 0 {
			wpid, e = unix.Wait4(p.pid, &status, unix.WNOHANG, &rusage)
			if e != nil {
				logrus.Debugf("containerd: ExitStatus(container: %s, process: %s): Wait4(%d): %v", p.container.id, p.id, p.pid, rerr)
				return rst, rerr
			}
		}

		if wpid == p.pid {
			rerr = nil
			rst = 128 + int(shimStatus.Signal())
		} else {
			logrus.Errorf("containerd: ExitStatus(container: %s, process: %s): unexpected returned pid from wait4 %v (expected %v)", p.container.id, p.id, wpid, p.pid)
		}

		p.stateLock.Lock()
		p.state = Stopped
		p.stateLock.Unlock()
	}

	return rst, rerr
}