Browse code

Events for OOM needs to be shift to an earlier time

It's worth to warn user as soon as possilbe when OOM happend.

Signed-off-by: Hu Keping <hukeping@huawei.com>

Hu Keping authored on 2015/09/11 19:01:47
Showing 11 changed files
... ...
@@ -783,7 +783,7 @@ func (container *Container) exec(ExecConfig *ExecConfig) error {
783 783
 	container.Lock()
784 784
 	defer container.Unlock()
785 785
 
786
-	callback := func(processConfig *execdriver.ProcessConfig, pid int) error {
786
+	callback := func(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error {
787 787
 		if processConfig.Tty {
788 788
 			// The callback is called after the process Start()
789 789
 			// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlave
... ...
@@ -881,7 +881,7 @@ func (daemon *Daemon) run(c *Container, pipes *execdriver.Pipes, startCallback e
881 881
 	hooks := execdriver.Hooks{
882 882
 		Start: startCallback,
883 883
 	}
884
-	hooks.PreStart = append(hooks.PreStart, func(processConfig *execdriver.ProcessConfig, pid int) error {
884
+	hooks.PreStart = append(hooks.PreStart, func(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error {
885 885
 		return c.setNetworkNamespaceKey(pid)
886 886
 	})
887 887
 	return daemon.execDriver.Run(c.command, pipes, hooks)
... ...
@@ -27,8 +27,9 @@ var (
27 27
 // DriverCallback defines a callback function which is used in "Run" and "Exec".
28 28
 // This allows work to be done in the parent process when the child is passing
29 29
 // through PreStart, Start and PostStop events.
30
-// Callbacks are provided a processConfig pointer and the pid of the child
31
-type DriverCallback func(processConfig *ProcessConfig, pid int) error
30
+// Callbacks are provided a processConfig pointer and the pid of the child.
31
+// The channel will be used to notify the OOM events.
32
+type DriverCallback func(processConfig *ProcessConfig, pid int, chOOM <-chan struct{}) error
32 33
 
33 34
 // Hooks is a struct containing function pointers to callbacks
34 35
 // used by any execdriver implementation exploiting hooks capabilities
... ...
@@ -324,13 +324,14 @@ func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execd
324 324
 
325 325
 	c.ContainerPid = pid
326 326
 
327
+	oomKill := false
328
+	oomKillNotification, err := notifyOnOOM(cgroupPaths)
329
+
327 330
 	if hooks.Start != nil {
328 331
 		logrus.Debugf("Invoking startCallback")
329
-		hooks.Start(&c.ProcessConfig, pid)
330
-	}
332
+		hooks.Start(&c.ProcessConfig, pid, oomKillNotification)
331 333
 
332
-	oomKill := false
333
-	oomKillNotification, err := notifyOnOOM(cgroupPaths)
334
+	}
334 335
 
335 336
 	<-waitLock
336 337
 	exitCode := getExitCode(c)
... ...
@@ -146,7 +146,11 @@ func (d *Driver) createNetwork(container *configs.Config, c *execdriver.Command,
146 146
 			configs.NewFunctionHook(func(s configs.HookState) error {
147 147
 				if len(hooks.PreStart) > 0 {
148 148
 					for _, fnHook := range hooks.PreStart {
149
-						if err := fnHook(&c.ProcessConfig, s.Pid); err != nil {
149
+						// A closed channel for OOM is returned here as it will be
150
+						// non-blocking and return the correct result when read.
151
+						chOOM := make(chan struct{})
152
+						close(chOOM)
153
+						if err := fnHook(&c.ProcessConfig, s.Pid, chOOM); err != nil {
150 154
 							return err
151 155
 						}
152 156
 					}
... ...
@@ -165,17 +165,18 @@ func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execd
165 165
 		return execdriver.ExitStatus{ExitCode: -1}, err
166 166
 	}
167 167
 
168
+	oom := notifyOnOOM(cont)
168 169
 	if hooks.Start != nil {
170
+
169 171
 		pid, err := p.Pid()
170 172
 		if err != nil {
171 173
 			p.Signal(os.Kill)
172 174
 			p.Wait()
173 175
 			return execdriver.ExitStatus{ExitCode: -1}, err
174 176
 		}
175
-		hooks.Start(&c.ProcessConfig, pid)
177
+		hooks.Start(&c.ProcessConfig, pid, oom)
176 178
 	}
177 179
 
178
-	oom := notifyOnOOM(cont)
179 180
 	waitF := p.Wait
180 181
 	if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
181 182
 		// we need such hack for tracking processes with inherited fds,
... ...
@@ -52,7 +52,12 @@ func (d *Driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessCo
52 52
 			p.Wait()
53 53
 			return -1, err
54 54
 		}
55
-		hooks.Start(&c.ProcessConfig, pid)
55
+
56
+		// A closed channel for OOM is returned here as it will be
57
+		// non-blocking and return the correct result when read.
58
+		chOOM := make(chan struct{})
59
+		close(chOOM)
60
+		hooks.Start(&c.ProcessConfig, pid, chOOM)
56 61
 	}
57 62
 
58 63
 	ps, err := p.Wait()
... ...
@@ -70,7 +70,11 @@ func (d *Driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessCo
70 70
 
71 71
 	// Invoke the start callback
72 72
 	if hooks.Start != nil {
73
-		hooks.Start(&c.ProcessConfig, int(pid))
73
+		// A closed channel for OOM is returned here as it will be
74
+		// non-blocking and return the correct result when read.
75
+		chOOM := make(chan struct{})
76
+		close(chOOM)
77
+		hooks.Start(&c.ProcessConfig, int(pid), chOOM)
74 78
 	}
75 79
 
76 80
 	if exitCode, err = hcsshim.WaitForProcessInComputeSystem(c.ID, pid); err != nil {
... ...
@@ -291,7 +291,11 @@ func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execd
291 291
 	d.Unlock()
292 292
 
293 293
 	if hooks.Start != nil {
294
-		hooks.Start(&c.ProcessConfig, int(pid))
294
+		// A closed channel for OOM is returned here as it will be
295
+		// non-blocking and return the correct result when read.
296
+		chOOM := make(chan struct{})
297
+		close(chOOM)
298
+		hooks.Start(&c.ProcessConfig, int(pid), chOOM)
295 299
 	}
296 300
 
297 301
 	var exitCode int32
... ...
@@ -162,9 +162,6 @@ func (m *containerMonitor) Start() error {
162 162
 
163 163
 		if m.shouldRestart(exitStatus.ExitCode) {
164 164
 			m.container.setRestarting(&exitStatus)
165
-			if exitStatus.OOMKilled {
166
-				m.container.logEvent("oom")
167
-			}
168 165
 			m.container.logEvent("die")
169 166
 			m.resetContainer(true)
170 167
 
... ...
@@ -179,9 +176,7 @@ func (m *containerMonitor) Start() error {
179 179
 			}
180 180
 			continue
181 181
 		}
182
-		if exitStatus.OOMKilled {
183
-			m.container.logEvent("oom")
184
-		}
182
+
185 183
 		m.container.logEvent("die")
186 184
 		m.resetContainer(true)
187 185
 		return err
... ...
@@ -250,7 +245,14 @@ func (m *containerMonitor) shouldRestart(exitCode int) bool {
250 250
 
251 251
 // callback ensures that the container's state is properly updated after we
252 252
 // received ack from the execution drivers
253
-func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid int) error {
253
+func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error {
254
+	go func() {
255
+		_, ok := <-chOOM
256
+		if ok {
257
+			m.container.logEvent("oom")
258
+		}
259
+	}()
260
+
254 261
 	if processConfig.Tty {
255 262
 		// The callback is called after the process Start()
256 263
 		// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlave
... ...
@@ -8,6 +8,8 @@ import (
8 8
 	"io/ioutil"
9 9
 	"os"
10 10
 	"os/exec"
11
+	"strings"
12
+	"time"
11 13
 	"unicode"
12 14
 
13 15
 	"github.com/go-check/check"
... ...
@@ -51,3 +53,99 @@ func (s *DockerSuite) TestEventsRedirectStdout(c *check.C) {
51 51
 	}
52 52
 
53 53
 }
54
+
55
+func (s *DockerSuite) TestEventsOOMDisableFalse(c *check.C) {
56
+	testRequires(c, DaemonIsLinux)
57
+	testRequires(c, oomControl)
58
+
59
+	errChan := make(chan error)
60
+	go func() {
61
+		defer close(errChan)
62
+		out, exitCode, _ := dockerCmdWithError("run", "--name", "oomFalse", "-m", "10MB", "busybox", "sh", "-c", "x=a; while true; do x=$x$x$x$x; done")
63
+		if expected := 137; exitCode != expected {
64
+			errChan <- fmt.Errorf("wrong exit code for OOM container: expected %d, got %d (output: %q)", expected, exitCode, out)
65
+		}
66
+	}()
67
+	select {
68
+	case err := <-errChan:
69
+		c.Assert(err, check.IsNil)
70
+	case <-time.After(30 * time.Second):
71
+		c.Fatal("Timeout waiting for container to die on OOM")
72
+	}
73
+
74
+	out, _ := dockerCmd(c, "events", "--since=0", "-f", "container=oomFalse", fmt.Sprintf("--until=%d", daemonTime(c).Unix()))
75
+	events := strings.Split(strings.TrimSuffix(out, "\n"), "\n")
76
+	if len(events) < 5 {
77
+		c.Fatalf("Missing expected event")
78
+	}
79
+
80
+	createEvent := strings.Fields(events[len(events)-5])
81
+	attachEvent := strings.Fields(events[len(events)-4])
82
+	startEvent := strings.Fields(events[len(events)-3])
83
+	oomEvent := strings.Fields(events[len(events)-2])
84
+	dieEvent := strings.Fields(events[len(events)-1])
85
+	if createEvent[len(createEvent)-1] != "create" {
86
+		c.Fatalf("event should be create, not %#v", createEvent)
87
+	}
88
+	if attachEvent[len(attachEvent)-1] != "attach" {
89
+		c.Fatalf("event should be attach, not %#v", attachEvent)
90
+	}
91
+	if startEvent[len(startEvent)-1] != "start" {
92
+		c.Fatalf("event should be start, not %#v", startEvent)
93
+	}
94
+	if oomEvent[len(oomEvent)-1] != "oom" {
95
+		c.Fatalf("event should be oom, not %#v", oomEvent)
96
+	}
97
+	if dieEvent[len(dieEvent)-1] != "die" {
98
+		c.Fatalf("event should be die, not %#v", dieEvent)
99
+	}
100
+}
101
+
102
+func (s *DockerSuite) TestEventsOOMDisableTrue(c *check.C) {
103
+	testRequires(c, DaemonIsLinux)
104
+	testRequires(c, oomControl)
105
+
106
+	errChan := make(chan error)
107
+	go func() {
108
+		defer close(errChan)
109
+		out, exitCode, _ := dockerCmdWithError("run", "--oom-kill-disable=true", "--name", "oomTrue", "-m", "10MB", "busybox", "sh", "-c", "x=a; while true; do x=$x$x$x$x; done")
110
+		if expected := 137; exitCode != expected {
111
+			errChan <- fmt.Errorf("wrong exit code for OOM container: expected %d, got %d (output: %q)", expected, exitCode, out)
112
+		}
113
+	}()
114
+	select {
115
+	case err := <-errChan:
116
+		c.Assert(err, check.IsNil)
117
+	case <-time.After(20 * time.Second):
118
+		defer dockerCmd(c, "kill", "oomTrue")
119
+
120
+		out, _ := dockerCmd(c, "events", "--since=0", "-f", "container=oomTrue", fmt.Sprintf("--until=%d", daemonTime(c).Unix()))
121
+		events := strings.Split(strings.TrimSuffix(out, "\n"), "\n")
122
+		if len(events) < 4 {
123
+			c.Fatalf("Missing expected event")
124
+		}
125
+
126
+		createEvent := strings.Fields(events[len(events)-4])
127
+		attachEvent := strings.Fields(events[len(events)-3])
128
+		startEvent := strings.Fields(events[len(events)-2])
129
+		oomEvent := strings.Fields(events[len(events)-1])
130
+
131
+		if createEvent[len(createEvent)-1] != "create" {
132
+			c.Fatalf("event should be create, not %#v", createEvent)
133
+		}
134
+		if attachEvent[len(attachEvent)-1] != "attach" {
135
+			c.Fatalf("event should be attach, not %#v", attachEvent)
136
+		}
137
+		if startEvent[len(startEvent)-1] != "start" {
138
+			c.Fatalf("event should be start, not %#v", startEvent)
139
+		}
140
+		if oomEvent[len(oomEvent)-1] != "oom" {
141
+			c.Fatalf("event should be oom, not %#v", oomEvent)
142
+		}
143
+
144
+		out, _ = dockerCmd(c, "inspect", "-f", "{{.State.Status}}", "oomTrue")
145
+		if strings.TrimSpace(out) != "running" {
146
+			c.Fatalf("container should be still running, not %v", out)
147
+		}
148
+	}
149
+}