Browse code

Set OOMKilled state on any OOM event

This restores the behavior that existed prior to #16235 for setting
OOMKilled, while retaining the additional benefits it introduced around
emitting the oom event.

This also adds a test for the most obvious OOM cases which would have
caught this regression.

Fixes #18510

Signed-off-by: Euan <euank@amazon.com>

Euan authored on 2015/12/05 14:01:31
Showing 3 changed files
... ...
@@ -304,8 +304,7 @@ func (m *containerMonitor) shouldRestart(exitCode int) bool {
304 304
 // received ack from the execution drivers
305 305
 func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error {
306 306
 	go func() {
307
-		_, ok := <-chOOM
308
-		if ok {
307
+		for range chOOM {
309 308
 			m.logEvent("oom")
310 309
 		}
311 310
 	}()
... ...
@@ -171,7 +171,10 @@ func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execd
171 171
 		return execdriver.ExitStatus{ExitCode: -1}, err
172 172
 	}
173 173
 
174
+	// 'oom' is used to emit 'oom' events to the eventstream, 'oomKilled' is used
175
+	// to set the 'OOMKilled' flag in state
174 176
 	oom := notifyOnOOM(cont)
177
+	oomKilled := notifyOnOOM(cont)
175 178
 	if hooks.Start != nil {
176 179
 		pid, err := p.Pid()
177 180
 		if err != nil {
... ...
@@ -198,7 +201,21 @@ func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execd
198 198
 	}
199 199
 	cont.Destroy()
200 200
 	destroyed = true
201
-	_, oomKill := <-oom
201
+	// oomKilled will have an oom event if any process within the container was
202
+	// OOM killed at any time, not only if the init process OOMed.
203
+	//
204
+	// Perhaps we only want the OOMKilled flag to be set if the OOM
205
+	// resulted in a container death, but there isn't a good way to do this
206
+	// because the kernel's cgroup oom notification does not provide information
207
+	// such as the PID. This could be heuristically done by checking that the OOM
208
+	// happened within some very small time slice for the container dying (and
209
+	// optionally exit-code 137), but I don't think the cgroup oom notification
210
+	// can be used to reliably determine this
211
+	//
212
+	// Even if there were multiple OOMs, it's sufficient to read one value
213
+	// because libcontainer's oom notify will discard the channel after the
214
+	// cgroup is destroyed
215
+	_, oomKill := <-oomKilled
202 216
 	return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
203 217
 }
204 218
 
205 219
new file mode 100644
... ...
@@ -0,0 +1,32 @@
0
+// +build !windows
1
+
2
+package main
3
+
4
+import (
5
+	"github.com/docker/docker/pkg/integration/checker"
6
+	"github.com/go-check/check"
7
+)
8
+
9
+func (s *DockerSuite) TestInspectOomKilledTrue(c *check.C) {
10
+	testRequires(c, DaemonIsLinux, memoryLimitSupport)
11
+
12
+	name := "testoomkilled"
13
+	_, exitCode, _ := dockerCmdWithError("run", "--name", name, "-m", "10MB", "busybox", "sh", "-c", "x=a; while true; do x=$x$x$x$x; done")
14
+
15
+	c.Assert(exitCode, checker.Equals, 137, check.Commentf("OOM exit should be 137"))
16
+
17
+	oomKilled, err := inspectField(name, "State.OOMKilled")
18
+	c.Assert(oomKilled, checker.Equals, "true")
19
+	c.Assert(err, checker.IsNil)
20
+}
21
+
22
+func (s *DockerSuite) TestInspectOomKilledFalse(c *check.C) {
23
+	testRequires(c, DaemonIsLinux, memoryLimitSupport)
24
+
25
+	name := "testoomkilled"
26
+	dockerCmd(c, "run", "--name", name, "-m", "10MB", "busybox", "sh", "-c", "echo hello world")
27
+
28
+	oomKilled, err := inspectField(name, "State.OOMKilled")
29
+	c.Assert(oomKilled, checker.Equals, "false")
30
+	c.Assert(err, checker.IsNil)
31
+}