diff --git a/libcontainer/cgroups/systemd/apply_systemd.go b/libcontainer/cgroups/systemd/apply_systemd.go index 3161639f218..63a14b4caf3 100644 --- a/libcontainer/cgroups/systemd/apply_systemd.go +++ b/libcontainer/cgroups/systemd/apply_systemd.go @@ -236,53 +236,7 @@ func (m *Manager) Apply(pid int) error { return err } - if err := joinDevices(c, pid); err != nil { - return err - } - - // TODO: CpuQuota and CpuPeriod not available in systemd - // we need to manually join the cpu.cfs_quota_us and cpu.cfs_period_us - if err := joinCpu(c, pid); err != nil { - return err - } - - // TODO: MemoryReservation and MemorySwap not available in systemd - if err := joinMemory(c, pid); err != nil { - return err - } - - // we need to manually join the freezer, net_cls, net_prio, pids and cpuset cgroup in systemd - // because it does not currently support it via the dbus api. - if err := joinFreezer(c, pid); err != nil { - return err - } - - if err := joinNetPrio(c, pid); err != nil { - return err - } - if err := joinNetCls(c, pid); err != nil { - return err - } - - if err := joinPids(c, pid); err != nil { - return err - } - - if err := joinCpuset(c, pid); err != nil { - return err - } - - if err := joinHugetlb(c, pid); err != nil { - return err - } - - if err := joinPerfEvent(c, pid); err != nil { - return err - } - // FIXME: Systemd does have `BlockIODeviceWeight` property, but we got problem - // using that (at least on systemd 208, see https://github.com/opencontainers/runc/libcontainer/pull/354), - // so use fs work around for now. - if err := joinBlkio(c, pid); err != nil { + if err := joinCgroups(c, pid); err != nil { return err } @@ -347,43 +301,41 @@ func join(c *configs.Cgroup, subsystem string, pid int) (string, error) { return path, nil } -func joinCpu(c *configs.Cgroup, pid int) error { - _, err := join(c, "cpu", pid) - if err != nil && !cgroups.IsNotFound(err) { - return err - } - return nil -} - -func joinFreezer(c *configs.Cgroup, pid int) error { - _, err := join(c, "freezer", pid) - if err != nil && !cgroups.IsNotFound(err) { - return err - } - return nil -} - -func joinNetPrio(c *configs.Cgroup, pid int) error { - _, err := join(c, "net_prio", pid) - if err != nil && !cgroups.IsNotFound(err) { - return err - } - return nil -} - -func joinNetCls(c *configs.Cgroup, pid int) error { - _, err := join(c, "net_cls", pid) - if err != nil && !cgroups.IsNotFound(err) { - return err +func joinCgroups(c *configs.Cgroup, pid int) error { + for _, sys := range subsystems { + name := sys.Name() + switch name { + case "name=systemd": + // let systemd handle this + break + case "cpuset": + path, err := getSubsystemPath(c, name) + if err != nil && !cgroups.IsNotFound(err) { + return err + } + s := &fs.CpusetGroup{} + if err := s.ApplyDir(path, c, pid); err != nil { + return err + } + break + default: + _, err := join(c, name, pid) + if err != nil { + // Even if it's `not found` error, we'll return err + // because devices cgroup is hard requirement for + // container security. + if name == "devices" { + return err + } + // For other subsystems, omit the `not found` error + // because they are optional. + if !cgroups.IsNotFound(err) { + return err + } + } + } } - return nil -} -func joinPids(c *configs.Cgroup, pid int) error { - _, err := join(c, "pids", pid) - if err != nil && !cgroups.IsNotFound(err) { - return err - } return nil } @@ -519,27 +471,6 @@ func getUnitName(c *configs.Cgroup) string { return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name) } -// Atm we can't use the systemd device support because of two missing things: -// * Support for wildcards to allow mknod on any device -// * Support for wildcards to allow /dev/pts support -// -// The second is available in more recent systemd as "char-pts", but not in e.g. v208 which is -// in wide use. When both these are available we will be able to switch, but need to keep the old -// implementation for backwards compat. -// -// Note: we can't use systemd to set up the initial limits, and then change the cgroup -// because systemd will re-write the device settings if it needs to re-apply the cgroup context. -// This happens at least for v208 when any sibling unit is started. -func joinDevices(c *configs.Cgroup, pid int) error { - _, err := join(c, "devices", pid) - // Even if it's `not found` error, we'll return err because devices cgroup - // is hard requirement for container security. - if err != nil { - return err - } - return nil -} - func setKernelMemory(c *configs.Cgroup) error { path, err := getSubsystemPath(c, "memory") if err != nil && !cgroups.IsNotFound(err) { @@ -554,52 +485,3 @@ func setKernelMemory(c *configs.Cgroup) error { s := &fs.MemoryGroup{} return s.SetKernelMemory(path, c) } - -func joinMemory(c *configs.Cgroup, pid int) error { - _, err := join(c, "memory", pid) - if err != nil && !cgroups.IsNotFound(err) { - return err - } - return nil -} - -// systemd does not atm set up the cpuset controller, so we must manually -// join it. Additionally that is a very finicky controller where each -// level must have a full setup as the default for a new directory is "no cpus" -func joinCpuset(c *configs.Cgroup, pid int) error { - path, err := getSubsystemPath(c, "cpuset") - if err != nil && !cgroups.IsNotFound(err) { - return err - } - - s := &fs.CpusetGroup{} - - return s.ApplyDir(path, c, pid) -} - -// `BlockIODeviceWeight` property of systemd does not work properly, and systemd -// expects device path instead of major minor numbers, which is also confusing -// for users. So we use fs work around for now. -func joinBlkio(c *configs.Cgroup, pid int) error { - _, err := join(c, "blkio", pid) - if err != nil { - return err - } - return nil -} - -func joinHugetlb(c *configs.Cgroup, pid int) error { - _, err := join(c, "hugetlb", pid) - if err != nil && !cgroups.IsNotFound(err) { - return err - } - return nil -} - -func joinPerfEvent(c *configs.Cgroup, pid int) error { - _, err := join(c, "perf_event", pid) - if err != nil && !cgroups.IsNotFound(err) { - return err - } - return nil -}