func getPids(cgroupName string) ([]int, error) { fsManager := fs.Manager{ Cgroups: &configs.Cgroup{ Name: cgroupName, }, } return fsManager.GetPids() }
// Ensures the system container is created and all non-kernel threads and process 1 // without a container are moved to it. // // The reason of leaving kernel threads at root cgroup is that we don't want to tie the // execution of these threads with to-be defined /system quota and create priority inversions. // func ensureSystemCgroups(rootContainer *fs.Manager, manager *fs.Manager) error { // Move non-kernel PIDs to the system container. attemptsRemaining := 10 var errs []error for attemptsRemaining >= 0 { // Only keep errors on latest attempt. errs = []error{} attemptsRemaining-- allPids, err := rootContainer.GetPids() if err != nil { errs = append(errs, fmt.Errorf("failed to list PIDs for root: %v", err)) continue } // Remove kernel pids and other protected PIDs (pid 1, PIDs already in system & kubelet containers) pids := make([]int, 0, len(allPids)) for _, pid := range allPids { if pid == 1 || isKernelPid(pid) { continue } pids = append(pids, pid) } glog.Infof("Found %d PIDs in root, %d of them are not to be moved", len(allPids), len(allPids)-len(pids)) // Check if we have moved all the non-kernel PIDs. if len(pids) == 0 { break } glog.Infof("Moving non-kernel processes: %v", pids) for _, pid := range pids { err := manager.Apply(pid) if err != nil { errs = append(errs, fmt.Errorf("failed to move PID %d into the system container %q: %v", pid, manager.Cgroups.Name, err)) } } } if attemptsRemaining < 0 { errs = append(errs, fmt.Errorf("ran out of attempts to create system containers %q", manager.Cgroups.Name)) } return utilerrors.NewAggregate(errs) }
func (e *LinuxExecutor) destroyCgroup() error { if e.groups == nil { return errors.New("Can't destroy: cgroup configuration empty") } manager := cgroupFs.Manager{} manager.Cgroups = e.groups pids, err := manager.GetPids() if err != nil { return fmt.Errorf("Failed to get pids in the cgroup %v: %v", e.groups.Name, err) } errs := new(multierror.Error) for _, pid := range pids { process, err := os.FindProcess(pid) if err != nil { multierror.Append(errs, fmt.Errorf("Failed to find Pid %v: %v", pid, err)) continue } if err := process.Kill(); err != nil { multierror.Append(errs, fmt.Errorf("Failed to kill Pid %v: %v", pid, err)) continue } if _, err := process.Wait(); err != nil { multierror.Append(errs, fmt.Errorf("Failed to wait Pid %v: %v", pid, err)) continue } } // Remove the cgroup. if err := manager.Destroy(); err != nil { multierror.Append(errs, fmt.Errorf("Failed to delete the cgroup directories: %v", err)) } if len(errs.Errors) != 0 { return fmt.Errorf("Failed to destroy cgroup: %v", errs) } return nil }
// Ensures the system container is created and all non-kernel threads and process 1 // without a container are moved to it. // // The reason of leaving kernel threads at root cgroup is that we don't want to tie the // execution of these threads with to-be defined /system quota and create priority inversions. // // The reason of leaving process 1 at root cgroup is that libcontainer hardcoded on // the base cgroup path based on process 1. Please see: // https://github.com/kubernetes/kubernetes/issues/12789#issuecomment-132384126 // for detail explanation. func ensureSystemContainer(rootContainer *fs.Manager, manager *fs.Manager) error { // Move non-kernel PIDs to the system container. attemptsRemaining := 10 var errs []error for attemptsRemaining >= 0 { // Only keep errors on latest attempt. errs = []error{} attemptsRemaining-- allPids, err := rootContainer.GetPids() if err != nil { errs = append(errs, fmt.Errorf("failed to list PIDs for root: %v", err)) continue } // Get PIDs already in target group so we can remove them from the list of // PIDs to move. systemCgroupPIDs, err := manager.GetPids() if err != nil { errs = append(errs, fmt.Errorf("failed to list PIDs for %s: %v", manager.Cgroups.Name, err)) continue } systemCgroupPIDMap := make(map[int]struct{}, len(systemCgroupPIDs)) for _, pid := range systemCgroupPIDs { systemCgroupPIDMap[pid] = struct{}{} } // Remove kernel pids and process 1 pids := make([]int, 0, len(allPids)) for _, pid := range allPids { if isKernelPid(pid) { continue } if _, ok := systemCgroupPIDMap[pid]; ok { continue } pids = append(pids, pid) } glog.Infof("Found %d PIDs in root, %d of them are kernel related", len(allPids), len(allPids)-len(pids)) // Check if we moved all the non-kernel PIDs. if len(pids) == 0 { break } glog.Infof("Moving non-kernel threads: %v", pids) for _, pid := range pids { err := manager.Apply(pid) if err != nil { errs = append(errs, fmt.Errorf("failed to move PID %d into the system container %q: %v", pid, manager.Cgroups.Name, err)) } } } if attemptsRemaining < 0 { errs = append(errs, fmt.Errorf("ran out of attempts to create system containers %q", manager.Cgroups.Name)) } return utilerrors.NewAggregate(errs) }