// Creates resource-only containerName if it does not already exist and moves // the current process to it. // // containerName must be an absolute container name. func RunInResourceContainer(containerName string) error { manager := fs.Manager{ Cgroups: &configs.Cgroup{ Name: containerName, AllowAllDevices: true, }, } return manager.Apply(os.Getpid()) }
// Ensures the system container is created and all non-kernel threads and process 1 // without a container are moved to it. // // The reason of leaving kernel threads at root cgroup is that we don't want to tie the // execution of these threads with to-be defined /system quota and create priority inversions. // // The reason of leaving process 1 at root cgroup is that libcontainer hardcoded on // the base cgroup path based on process 1. Please see: // https://github.com/kubernetes/kubernetes/issues/12789#issuecomment-132384126 // for detail explanation. func ensureSystemContainer(rootContainer *fs.Manager, manager *fs.Manager) error { // Move non-kernel PIDs to the system container. attemptsRemaining := 10 var errs []error for attemptsRemaining >= 0 { // Only keep errors on latest attempt. errs = []error{} attemptsRemaining-- allPids, err := rootContainer.GetPids() if err != nil { errs = append(errs, fmt.Errorf("failed to list PIDs for root: %v", err)) continue } // Remove kernel pids and process 1 pids := make([]int, 0, len(allPids)) for _, pid := range allPids { if isKernelPid(pid) { continue } // TODO(dawnchen): Remove this once the hard dependency on process 1 is removed // on systemd node. if pid == 1 { continue } pids = append(pids, pid) } glog.Infof("Found %d PIDs in root, %d of them are kernel related", len(allPids), len(allPids)-len(pids)) // Check if we moved all the non-kernel PIDs. if len(pids) == 0 { break } glog.Infof("Moving non-kernel threads: %v", pids) for _, pid := range pids { err := manager.Apply(pid) if err != nil { errs = append(errs, fmt.Errorf("failed to move PID %d into the system container %q: %v", pid, manager.Cgroups.Name, err)) continue } } } if attemptsRemaining < 0 { errs = append(errs, fmt.Errorf("ran out of attempts to create system containers %q", manager.Cgroups.Name)) } return errors.NewAggregate(errs) }
// Ensures that the Docker daemon is in the desired container. func ensureDockerInContainer(cadvisor cadvisor.Interface, oomScoreAdj int, manager *fs.Manager) error { // What container is Docker in? out, err := exec.Command("pidof", "docker").Output() if err != nil { return fmt.Errorf("failed to find pid of Docker container: %v", err) } // The output of pidof is a list of pids. // Docker may be forking and thus there would be more than one result. pids := []int{} for _, pidStr := range strings.Split(strings.TrimSpace(string(out)), " ") { pid, err := strconv.Atoi(pidStr) if err != nil { continue } pids = append(pids, pid) } // Move if the pid is not already in the desired container. errs := []error{} for _, pid := range pids { cont, err := getContainer(pid) if err != nil { errs = append(errs, fmt.Errorf("failed to find container of PID %d: %v", pid, err)) } if cont != manager.Cgroups.Name { err = manager.Apply(pid) if err != nil { errs = append(errs, fmt.Errorf("failed to move PID %d (in %q) to %q", pid, cont, manager.Cgroups.Name)) } } // Also apply oom-score-adj to processes oomAdjuster := oom.NewOomAdjuster() if err := oomAdjuster.ApplyOomScoreAdj(pid, oomScoreAdj); err != nil { errs = append(errs, fmt.Errorf("failed to apply oom score %d to PID %d", oomScoreAdj, pid)) } } return errors.NewAggregate(errs) }