Skip to content

Commit

Permalink
use a struct for cgroupscheck results
Browse files Browse the repository at this point in the history
Signed-off-by: Thorsten Klein <[email protected]>
  • Loading branch information
iwilltry42 committed May 12, 2021
1 parent 8e654a1 commit 3f91dcd
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 31 deletions.
4 changes: 2 additions & 2 deletions pkg/agent/containerd/containerd.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,10 +340,10 @@ func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
}

isRunningInUserNS := system.RunningInUserNS()
_, _, hasCFS, hasPIDs, _ := agent.CheckCgroups()
cgroupsCheck := agent.CheckCgroups()
// "/sys/fs/cgroup" is namespaced
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
disableCgroup := isRunningInUserNS && (!hasCFS || !hasPIDs || !cgroupfsWritable)
disableCgroup := isRunningInUserNS && (!cgroupsCheck.HasCFS || !cgroupsCheck.HasPIDs || !cgroupfsWritable)
if disableCgroup {
logrus.Warn("cgroup v2 controllers are not delegated for rootless. Disabling cgroup.")
}
Expand Down
67 changes: 38 additions & 29 deletions pkg/daemons/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,15 @@ import (

const unixPrefix = "unix://"

type CgroupCheck struct {
KubeletRoot string
RuntimeRoot string
HasCFS bool
HasPIDs bool
IsV2 bool
V2Evac bool // cgroupv2 needs evacuation of procs from /
}

func Agent(config *config.Agent) error {
rand.Seed(time.Now().UTC().UnixNano())

Expand Down Expand Up @@ -131,29 +140,29 @@ func startKubelet(cfg *config.Agent) error {
if err != nil || defaultIP.String() != cfg.NodeIP {
argsMap["node-ip"] = cfg.NodeIP
}
kubeletRoot, runtimeRoot, hasCFS, hasPIDs, evacuateCgroup2 := CheckCgroups()
if evacuateCgroup2 {
cgroupsCheck := CheckCgroups()
if cgroupsCheck.V2Evac {
// evacuate processes from cgroup / to /init
if err := cgrouputil.EvacuateCgroup2("init"); err != nil {
logrus.Errorf("failed to evacuate cgroup2: %+v", err)
return err
}
}
if !hasCFS {
if !cgroupsCheck.HasCFS {
logrus.Warn("Disabling CPU quotas due to missing cpu.cfs_period_us")
argsMap["cpu-cfs-quota"] = "false"
}
if !hasPIDs {
if !cgroupsCheck.HasPIDs {
logrus.Warn("Disabling pod PIDs limit feature due to missing cgroup pids support")
argsMap["cgroups-per-qos"] = "false"
argsMap["enforce-node-allocatable"] = ""
argsMap["feature-gates"] = addFeatureGate(argsMap["feature-gates"], "SupportPodPidsLimit=false")
}
if kubeletRoot != "" {
argsMap["kubelet-cgroups"] = kubeletRoot
if cgroupsCheck.KubeletRoot != "" {
argsMap["kubelet-cgroups"] = cgroupsCheck.KubeletRoot
}
if runtimeRoot != "" {
argsMap["runtime-cgroups"] = runtimeRoot
if cgroupsCheck.RuntimeRoot != "" {
argsMap["runtime-cgroups"] = cgroupsCheck.RuntimeRoot
}
if system.RunningInUserNS() {
argsMap["feature-gates"] = addFeatureGate(argsMap["feature-gates"], "DevicePlugins=false")
Expand All @@ -170,7 +179,7 @@ func startKubelet(cfg *config.Agent) error {
if cfg.Rootless {
// "/sys/fs/cgroup" is namespaced
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
if hasCFS && hasPIDs && cgroupfsWritable {
if cgroupsCheck.HasCFS && cgroupsCheck.HasPIDs && cgroupfsWritable {
logrus.Info("cgroup v2 controllers are delegated for rootless.")
// cgroupfs v2, delegated for rootless by systemd
argsMap["cgroup-driver"] = "cgroupfs"
Expand Down Expand Up @@ -201,45 +210,45 @@ func addFeatureGate(current, new string) string {
return current + "," + new
}

func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs, v2Evac bool) {
cgroupsModeV2 := cgroups.Mode() == cgroups.Unified
func CheckCgroups() (check CgroupCheck) {
check.IsV2 = cgroups.Mode() == cgroups.Unified

// For Unified (v2) cgroups we can directly check to see what controllers are mounted
// under the unified hierarchy.
if cgroupsModeV2 {
if check.IsV2 {

cgroupRoot, err := cgroupsv2.LoadManager("/sys/fs/cgroup", "/")
if err != nil {
logrus.Errorf("Failed to load root cgroup: %+v", err)
return "", "", false, false, v2Evac
return check
}

cgroupRootProcs, err := cgroupRoot.Procs(false)
if err != nil {
return "", "", false, false, v2Evac
return check
}

v2Evac = len(cgroupRootProcs) > 0
check.V2Evac = len(cgroupRootProcs) > 0

cgroupRootControllers, err := cgroupRoot.Controllers()
if err != nil {
return "", "", false, false, v2Evac
return check
}

// Intentionally using an expressionless switch to match the logic below
for _, controller := range cgroupRootControllers {
switch {
case controller == "cpu":
hasCFS = true
check.HasCFS = true
case controller == "pids":
hasPIDs = true
check.HasPIDs = true
}
}
}

f, err := os.Open("/proc/self/cgroup")
if err != nil {
return "", "", false, false, v2Evac
return check
}
defer f.Close()

Expand All @@ -254,7 +263,7 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs, v2Evac bo
// For v2, controllers = {""} (only contains a single empty string)
for _, controller := range controllers {
switch {
case controller == "name=systemd" || cgroupsModeV2:
case controller == "name=systemd" || check.IsV2:
// If we detect that we are running under a `.scope` unit with systemd
// we can assume we are being directly invoked from the command line
// and thus need to set our kubelet root to something out of the context
Expand All @@ -266,7 +275,7 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs, v2Evac bo
last := parts[len(parts)-1]
i := strings.LastIndex(last, ".scope")
if i > 0 {
kubeletRoot = "/" + version.Program
check.KubeletRoot = "/" + version.Program
}
case controller == "cpu":
// It is common for this to show up multiple times in /sys/fs/cgroup if the controllers are comounted:
Expand All @@ -275,25 +284,25 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs, v2Evac bo
// can fail if we use the comma-separated name. Instead, we check for the controller using the symlink.
p := filepath.Join("/sys/fs/cgroup", controller, parts[2], "cpu.cfs_period_us")
if _, err := os.Stat(p); err == nil {
hasCFS = true
check.HasCFS = true
}
case controller == "pids":
hasPIDs = true
check.HasPIDs = true
}
}
}

// If we're running with v1 and didn't find a scope assigned by systemd, we need to create our own root cgroup to avoid
// just inheriting from the parent process. The kubelet will take care of moving us into it when we start it up later.
if kubeletRoot == "" {
if check.KubeletRoot == "" {
// Examine process ID 1 to see if there is a cgroup assigned to it.
// When we are not in a container, process 1 is likely to be systemd or some other service manager.
// It either lives at `/` or `/init.scope` according to https://man7.org/linux/man-pages/man7/systemd.special.7.html
// When containerized, process 1 will be generally be in a cgroup, otherwise, we may be running in
// a host PID scenario but we don't support this.
g, err := os.Open("/proc/1/cgroup")
if err != nil {
return "", "", false, false, v2Evac
return check
}
defer g.Close()
scan = bufio.NewScanner(g)
Expand All @@ -307,15 +316,15 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs, v2Evac bo
// For v2, controllers = {""} (only contains a single empty string)
for _, controller := range controllers {
switch {
case controller == "name=systemd" || cgroupsModeV2:
case controller == "name=systemd" || check.IsV2:
last := parts[len(parts)-1]
if last != "/" && last != "/init.scope" {
kubeletRoot = "/" + version.Program
runtimeRoot = "/" + version.Program
check.KubeletRoot = "/" + version.Program
check.RuntimeRoot = "/" + version.Program
}
}
}
}
}
return kubeletRoot, runtimeRoot, hasCFS, hasPIDs, v2Evac
return check
}

0 comments on commit 3f91dcd

Please sign in to comment.