This restores the behavior that existed prior to #16235 for setting
OOMKilled, while retaining the additional benefits it introduced around
emitting the oom event.
This also adds a test for the most obvious OOM cases which would have
caught this regression.
Fixes #18510
Signed-off-by: Euan <euank@amazon.com>
| ... | ... |
@@ -304,8 +304,7 @@ func (m *containerMonitor) shouldRestart(exitCode int) bool {
|
| 304 | 304 |
// received ack from the execution drivers |
| 305 | 305 |
func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error {
|
| 306 | 306 |
go func() {
|
| 307 |
- _, ok := <-chOOM |
|
| 308 |
- if ok {
|
|
| 307 |
+ for range chOOM {
|
|
| 309 | 308 |
m.logEvent("oom")
|
| 310 | 309 |
} |
| 311 | 310 |
}() |
| ... | ... |
@@ -171,7 +171,10 @@ func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execd |
| 171 | 171 |
return execdriver.ExitStatus{ExitCode: -1}, err
|
| 172 | 172 |
} |
| 173 | 173 |
|
| 174 |
+ // 'oom' is used to emit 'oom' events to the eventstream, 'oomKilled' is used |
|
| 175 |
+ // to set the 'OOMKilled' flag in state |
|
| 174 | 176 |
oom := notifyOnOOM(cont) |
| 177 |
+ oomKilled := notifyOnOOM(cont) |
|
| 175 | 178 |
if hooks.Start != nil {
|
| 176 | 179 |
pid, err := p.Pid() |
| 177 | 180 |
if err != nil {
|
| ... | ... |
@@ -198,7 +201,21 @@ func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execd |
| 198 | 198 |
} |
| 199 | 199 |
cont.Destroy() |
| 200 | 200 |
destroyed = true |
| 201 |
- _, oomKill := <-oom |
|
| 201 |
+ // oomKilled will have an oom event if any process within the container was |
|
| 202 |
+ // OOM killed at any time, not only if the init process OOMed. |
|
| 203 |
+ // |
|
| 204 |
+ // Perhaps we only want the OOMKilled flag to be set if the OOM |
|
| 205 |
+ // resulted in a container death, but there isn't a good way to do this |
|
| 206 |
+ // because the kernel's cgroup oom notification does not provide information |
|
| 207 |
+ // such as the PID. This could be heuristically done by checking that the OOM |
|
| 208 |
+ // happened within some very small time slice for the container dying (and |
|
| 209 |
+ // optionally exit-code 137), but I don't think the cgroup oom notification |
|
| 210 |
+ // can be used to reliably determine this |
|
| 211 |
+ // |
|
| 212 |
+ // Even if there were multiple OOMs, it's sufficient to read one value |
|
| 213 |
+ // because libcontainer's oom notify will discard the channel after the |
|
| 214 |
+ // cgroup is destroyed |
|
| 215 |
+ _, oomKill := <-oomKilled |
|
| 202 | 216 |
return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
|
| 203 | 217 |
} |
| 204 | 218 |
|
| 205 | 219 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,32 @@ |
| 0 |
+// +build !windows |
|
| 1 |
+ |
|
| 2 |
+package main |
|
| 3 |
+ |
|
| 4 |
+import ( |
|
| 5 |
+ "github.com/docker/docker/pkg/integration/checker" |
|
| 6 |
+ "github.com/go-check/check" |
|
| 7 |
+) |
|
| 8 |
+ |
|
| 9 |
+func (s *DockerSuite) TestInspectOomKilledTrue(c *check.C) {
|
|
| 10 |
+ testRequires(c, DaemonIsLinux, memoryLimitSupport) |
|
| 11 |
+ |
|
| 12 |
+ name := "testoomkilled" |
|
| 13 |
+ _, exitCode, _ := dockerCmdWithError("run", "--name", name, "-m", "10MB", "busybox", "sh", "-c", "x=a; while true; do x=$x$x$x$x; done")
|
|
| 14 |
+ |
|
| 15 |
+ c.Assert(exitCode, checker.Equals, 137, check.Commentf("OOM exit should be 137"))
|
|
| 16 |
+ |
|
| 17 |
+ oomKilled, err := inspectField(name, "State.OOMKilled") |
|
| 18 |
+ c.Assert(oomKilled, checker.Equals, "true") |
|
| 19 |
+ c.Assert(err, checker.IsNil) |
|
| 20 |
+} |
|
| 21 |
+ |
|
| 22 |
+func (s *DockerSuite) TestInspectOomKilledFalse(c *check.C) {
|
|
| 23 |
+ testRequires(c, DaemonIsLinux, memoryLimitSupport) |
|
| 24 |
+ |
|
| 25 |
+ name := "testoomkilled" |
|
| 26 |
+ dockerCmd(c, "run", "--name", name, "-m", "10MB", "busybox", "sh", "-c", "echo hello world") |
|
| 27 |
+ |
|
| 28 |
+ oomKilled, err := inspectField(name, "State.OOMKilled") |
|
| 29 |
+ c.Assert(oomKilled, checker.Equals, "false") |
|
| 30 |
+ c.Assert(err, checker.IsNil) |
|
| 31 |
+} |