Skip to content

Commit 6462e9d

Browse files
committed
runc update: implement memory.checkBeforeUpdate
This is aimed at solving the problem of cgroup v2 memory controller behavior which is not compatible with that of cgroup v1. In cgroup v1, if the new memory limit being set is lower than the current usage, setting the new limit fails. In cgroup v2, same operation succeeds, and the container is OOM killed. Introduce a new setting, memory.checkBeforeUpdate, and use it to mimic cgroup v1 behavior. Note that this is not 100% reliable because of TOCTOU, but this is the best we can do. Add some test cases. Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent b34a547 commit 6462e9d

File tree

11 files changed

+118
-12
lines changed

11 files changed

+118
-12
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ require (
1212
github.com/godbus/dbus/v5 v5.1.0
1313
github.com/moby/sys/mountinfo v0.6.2
1414
github.com/mrunalp/fileutils v0.5.0
15-
github.com/opencontainers/runtime-spec v1.0.3-0.20220718201635-a8106e99982b
15+
github.com/opencontainers/runtime-spec v1.0.3-0.20220909204839-494a5a6aca78
1616
github.com/opencontainers/selinux v1.10.2
1717
github.com/seccomp/libseccomp-golang v0.10.0
1818
github.com/sirupsen/logrus v1.9.0

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vyg
3131
github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI=
3232
github.com/mrunalp/fileutils v0.5.0 h1:NKzVxiH7eSk+OQ4M+ZYW1K6h27RUV3MI6NUTsHhU6Z4=
3333
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
34-
github.com/opencontainers/runtime-spec v1.0.3-0.20220718201635-a8106e99982b h1:udwtfS44rxYE/ViMLchHQBjfE60GZSB1arY7BFbyxLs=
35-
github.com/opencontainers/runtime-spec v1.0.3-0.20220718201635-a8106e99982b/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
34+
github.com/opencontainers/runtime-spec v1.0.3-0.20220909204839-494a5a6aca78 h1:R5M2qXZiK/mWPMT4VldCOiSL9HIAMuxQZWdG0CSM5+4=
35+
github.com/opencontainers/runtime-spec v1.0.3-0.20220909204839-494a5a6aca78/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
3636
github.com/opencontainers/selinux v1.10.2 h1:NFy2xCsjn7+WspbfZkUd5zyVeisV7VFbPSP96+8/ha4=
3737
github.com/opencontainers/selinux v1.10.2/go.mod h1:cARutUbaUrlRClyvxOICCgKixCs6L05aUsohzA3EkHQ=
3838
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=

libcontainer/cgroups/fs2/fs2.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,35 @@ func (m *Manager) OOMKillCount() (uint64, error) {
269269

270270
return c, err
271271
}
272+
273+
func CheckMemoryUsage(dirPath string, r *configs.Resources) error {
274+
if !r.MemoryCheckBeforeUpdate {
275+
return nil
276+
}
277+
278+
if r.Memory <= 0 && r.MemorySwap <= 0 {
279+
return nil
280+
}
281+
282+
usage, err := fscommon.GetCgroupParamUint(dirPath, "memory.current")
283+
if err != nil {
284+
// This check is on best-effort basis, so if we can't read the
285+
// current usage (cgroup not yet created, or any other error),
286+
// we should not fail.
287+
return nil
288+
}
289+
290+
if r.MemorySwap > 0 {
291+
if uint64(r.MemorySwap) <= usage {
292+
return fmt.Errorf("rejecting memory+swap limit %d <= usage %d", r.MemorySwap, usage)
293+
}
294+
}
295+
296+
if r.Memory > 0 {
297+
if uint64(r.Memory) <= usage {
298+
return fmt.Errorf("rejecting memory limit %d <= usage %d", r.Memory, usage)
299+
}
300+
}
301+
302+
return nil
303+
}

libcontainer/cgroups/fs2/memory.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ func setMemory(dirPath string, r *configs.Resources) error {
4040
if !isMemorySet(r) {
4141
return nil
4242
}
43+
44+
if err := CheckMemoryUsage(dirPath, r); err != nil {
45+
return err
46+
}
47+
4348
swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory)
4449
if err != nil {
4550
return err

libcontainer/cgroups/systemd/v2.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,14 @@ func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props
174174
return props, nil
175175
}
176176

177-
func genV2ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
177+
func genV2ResourcesProperties(dirPath string, r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
178+
// We need this check before setting systemd properties, otherwise
179+
// the container is OOM-killed and the systemd unit is removed
180+
// before we get to fsMgr.Set().
181+
if err := fs2.CheckMemoryUsage(dirPath, r); err != nil {
182+
return nil, err
183+
}
184+
178185
var properties []systemdDbus.Property
179186

180187
// NOTE: This is of questionable correctness because we insert our own
@@ -437,7 +444,7 @@ func (m *UnifiedManager) Set(r *configs.Resources) error {
437444
if r == nil {
438445
return nil
439446
}
440-
properties, err := genV2ResourcesProperties(r, m.dbus)
447+
properties, err := genV2ResourcesProperties(m.fsMgr.Path(""), r, m.dbus)
441448
if err != nil {
442449
return err
443450
}

libcontainer/configs/cgroup_linux.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,4 +155,9 @@ type Resources struct {
155155
// during Set() to figure out whether the freeze is required. Those
156156
// methods may be relatively slow, thus this flag.
157157
SkipFreezeOnSet bool `json:"-"`
158+
159+
// MemoryCheckBeforeUpdate is a flag for cgroup v2 managers to check
160+
// if the new memory limits (Memory and MemorySwap) being set are lower
161+
// than the current memory usage, and reject if so.
162+
MemoryCheckBeforeUpdate bool `json:"memory_check_before_update"`
158163
}

libcontainer/specconv/spec_linux.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,9 @@ func CreateCgroupConfig(opts *CreateOpts, defaultDevs []*devices.Device) (*confi
723723
if r.Memory.DisableOOMKiller != nil {
724724
c.Resources.OomKillDisable = *r.Memory.DisableOOMKiller
725725
}
726+
if r.Memory.CheckBeforeUpdate != nil {
727+
c.Resources.MemoryCheckBeforeUpdate = *r.Memory.CheckBeforeUpdate
728+
}
726729
}
727730
if r.CPU != nil {
728731
if r.CPU.Shares != nil {

tests/integration/update.bats

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,3 +726,43 @@ EOF
726726
runc resume test_update
727727
[ "$status" -eq 0 ]
728728
}
729+
730+
@test "update memory vs CheckBeforeUpdate" {
731+
requires cgroups_v2
732+
[ $EUID -ne 0 ] && requires rootless_cgroup
733+
734+
runc run -d --console-socket "$CONSOLE_SOCKET" test_update
735+
[ "$status" -eq 0 ]
736+
737+
# Setting memory to low value with checkBeforeUpdate=true should fail.
738+
runc update -r - test_update <<EOF
739+
{
740+
"memory": {
741+
"limit": 1024,
742+
"checkBeforeUpdate": true
743+
}
744+
}
745+
EOF
746+
[ "$status" -ne 0 ]
747+
[[ "$output" == *"rejecting memory limit"* ]]
748+
testcontainer test_update running
749+
750+
# Setting memory+swap to low value with checkBeforeUpdate=true should fail.
751+
runc update -r - test_update <<EOF
752+
{
753+
"memory": {
754+
"limit": 1024,
755+
"swap": 2048,
756+
"checkBeforeUpdate": true
757+
}
758+
}
759+
EOF
760+
[ "$status" -ne 0 ]
761+
[[ "$output" == *"rejecting memory+swap limit"* ]]
762+
testcontainer test_update running
763+
764+
# The container will be OOM killed, and runc might either succeed
765+
# or fail depending on the timing, so we don't check its exit code.
766+
runc update test_update --memory 1024
767+
testcontainer test_update stopped
768+
}

update.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
func i64Ptr(i int64) *int64 { return &i }
2121
func u64Ptr(i uint64) *uint64 { return &i }
2222
func u16Ptr(i uint16) *uint16 { return &i }
23+
func boolPtr(b bool) *bool { return &b }
2324

2425
var updateCommand = cli.Command{
2526
Name: "update",
@@ -37,7 +38,8 @@ The accepted format is as follow (unchanged values can be omitted):
3738
"memory": {
3839
"limit": 0,
3940
"reservation": 0,
40-
"swap": 0
41+
"swap": 0,
42+
"checkBeforeUpdate": true
4143
},
4244
"cpu": {
4345
"shares": 0,
@@ -136,11 +138,12 @@ other options are ignored.
136138

137139
r := specs.LinuxResources{
138140
Memory: &specs.LinuxMemory{
139-
Limit: i64Ptr(0),
140-
Reservation: i64Ptr(0),
141-
Swap: i64Ptr(0),
142-
Kernel: i64Ptr(0),
143-
KernelTCP: i64Ptr(0),
141+
Limit: i64Ptr(0),
142+
Reservation: i64Ptr(0),
143+
Swap: i64Ptr(0),
144+
Kernel: i64Ptr(0),
145+
KernelTCP: i64Ptr(0),
146+
CheckBeforeUpdate: boolPtr(false),
144147
},
145148
CPU: &specs.LinuxCPU{
146149
Shares: u64Ptr(0),
@@ -293,6 +296,7 @@ other options are ignored.
293296
config.Cgroups.Resources.Memory = *r.Memory.Limit
294297
config.Cgroups.Resources.MemoryReservation = *r.Memory.Reservation
295298
config.Cgroups.Resources.MemorySwap = *r.Memory.Swap
299+
config.Cgroups.Resources.MemoryCheckBeforeUpdate = *r.Memory.CheckBeforeUpdate
296300
config.Cgroups.Resources.PidsLimit = r.Pids.Limit
297301
config.Cgroups.Resources.Unified = r.Unified
298302

vendor/github.com/opencontainers/runtime-spec/specs-go/config.go

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)