It's worth to warn user as soon as possilbe when OOM happend.
Signed-off-by: Hu Keping <hukeping@huawei.com>
| ... | ... |
@@ -783,7 +783,7 @@ func (container *Container) exec(ExecConfig *ExecConfig) error {
|
| 783 | 783 |
container.Lock() |
| 784 | 784 |
defer container.Unlock() |
| 785 | 785 |
|
| 786 |
- callback := func(processConfig *execdriver.ProcessConfig, pid int) error {
|
|
| 786 |
+ callback := func(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error {
|
|
| 787 | 787 |
if processConfig.Tty {
|
| 788 | 788 |
// The callback is called after the process Start() |
| 789 | 789 |
// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlave |
| ... | ... |
@@ -881,7 +881,7 @@ func (daemon *Daemon) run(c *Container, pipes *execdriver.Pipes, startCallback e |
| 881 | 881 |
hooks := execdriver.Hooks{
|
| 882 | 882 |
Start: startCallback, |
| 883 | 883 |
} |
| 884 |
- hooks.PreStart = append(hooks.PreStart, func(processConfig *execdriver.ProcessConfig, pid int) error {
|
|
| 884 |
+ hooks.PreStart = append(hooks.PreStart, func(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error {
|
|
| 885 | 885 |
return c.setNetworkNamespaceKey(pid) |
| 886 | 886 |
}) |
| 887 | 887 |
return daemon.execDriver.Run(c.command, pipes, hooks) |
| ... | ... |
@@ -27,8 +27,9 @@ var ( |
| 27 | 27 |
// DriverCallback defines a callback function which is used in "Run" and "Exec". |
| 28 | 28 |
// This allows work to be done in the parent process when the child is passing |
| 29 | 29 |
// through PreStart, Start and PostStop events. |
| 30 |
-// Callbacks are provided a processConfig pointer and the pid of the child |
|
| 31 |
-type DriverCallback func(processConfig *ProcessConfig, pid int) error |
|
| 30 |
+// Callbacks are provided a processConfig pointer and the pid of the child. |
|
| 31 |
+// The channel will be used to notify the OOM events. |
|
| 32 |
+type DriverCallback func(processConfig *ProcessConfig, pid int, chOOM <-chan struct{}) error
|
|
| 32 | 33 |
|
| 33 | 34 |
// Hooks is a struct containing function pointers to callbacks |
| 34 | 35 |
// used by any execdriver implementation exploiting hooks capabilities |
| ... | ... |
@@ -324,13 +324,14 @@ func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execd |
| 324 | 324 |
|
| 325 | 325 |
c.ContainerPid = pid |
| 326 | 326 |
|
| 327 |
+ oomKill := false |
|
| 328 |
+ oomKillNotification, err := notifyOnOOM(cgroupPaths) |
|
| 329 |
+ |
|
| 327 | 330 |
if hooks.Start != nil {
|
| 328 | 331 |
logrus.Debugf("Invoking startCallback")
|
| 329 |
- hooks.Start(&c.ProcessConfig, pid) |
|
| 330 |
- } |
|
| 332 |
+ hooks.Start(&c.ProcessConfig, pid, oomKillNotification) |
|
| 331 | 333 |
|
| 332 |
- oomKill := false |
|
| 333 |
- oomKillNotification, err := notifyOnOOM(cgroupPaths) |
|
| 334 |
+ } |
|
| 334 | 335 |
|
| 335 | 336 |
<-waitLock |
| 336 | 337 |
exitCode := getExitCode(c) |
| ... | ... |
@@ -146,7 +146,11 @@ func (d *Driver) createNetwork(container *configs.Config, c *execdriver.Command, |
| 146 | 146 |
configs.NewFunctionHook(func(s configs.HookState) error {
|
| 147 | 147 |
if len(hooks.PreStart) > 0 {
|
| 148 | 148 |
for _, fnHook := range hooks.PreStart {
|
| 149 |
- if err := fnHook(&c.ProcessConfig, s.Pid); err != nil {
|
|
| 149 |
+ // A closed channel for OOM is returned here as it will be |
|
| 150 |
+ // non-blocking and return the correct result when read. |
|
| 151 |
+ chOOM := make(chan struct{})
|
|
| 152 |
+ close(chOOM) |
|
| 153 |
+ if err := fnHook(&c.ProcessConfig, s.Pid, chOOM); err != nil {
|
|
| 150 | 154 |
return err |
| 151 | 155 |
} |
| 152 | 156 |
} |
| ... | ... |
@@ -165,17 +165,18 @@ func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execd |
| 165 | 165 |
return execdriver.ExitStatus{ExitCode: -1}, err
|
| 166 | 166 |
} |
| 167 | 167 |
|
| 168 |
+ oom := notifyOnOOM(cont) |
|
| 168 | 169 |
if hooks.Start != nil {
|
| 170 |
+ |
|
| 169 | 171 |
pid, err := p.Pid() |
| 170 | 172 |
if err != nil {
|
| 171 | 173 |
p.Signal(os.Kill) |
| 172 | 174 |
p.Wait() |
| 173 | 175 |
return execdriver.ExitStatus{ExitCode: -1}, err
|
| 174 | 176 |
} |
| 175 |
- hooks.Start(&c.ProcessConfig, pid) |
|
| 177 |
+ hooks.Start(&c.ProcessConfig, pid, oom) |
|
| 176 | 178 |
} |
| 177 | 179 |
|
| 178 |
- oom := notifyOnOOM(cont) |
|
| 179 | 180 |
waitF := p.Wait |
| 180 | 181 |
if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
|
| 181 | 182 |
// we need such hack for tracking processes with inherited fds, |
| ... | ... |
@@ -52,7 +52,12 @@ func (d *Driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessCo |
| 52 | 52 |
p.Wait() |
| 53 | 53 |
return -1, err |
| 54 | 54 |
} |
| 55 |
- hooks.Start(&c.ProcessConfig, pid) |
|
| 55 |
+ |
|
| 56 |
+ // A closed channel for OOM is returned here as it will be |
|
| 57 |
+ // non-blocking and return the correct result when read. |
|
| 58 |
+ chOOM := make(chan struct{})
|
|
| 59 |
+ close(chOOM) |
|
| 60 |
+ hooks.Start(&c.ProcessConfig, pid, chOOM) |
|
| 56 | 61 |
} |
| 57 | 62 |
|
| 58 | 63 |
ps, err := p.Wait() |
| ... | ... |
@@ -70,7 +70,11 @@ func (d *Driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessCo |
| 70 | 70 |
|
| 71 | 71 |
// Invoke the start callback |
| 72 | 72 |
if hooks.Start != nil {
|
| 73 |
- hooks.Start(&c.ProcessConfig, int(pid)) |
|
| 73 |
+ // A closed channel for OOM is returned here as it will be |
|
| 74 |
+ // non-blocking and return the correct result when read. |
|
| 75 |
+ chOOM := make(chan struct{})
|
|
| 76 |
+ close(chOOM) |
|
| 77 |
+ hooks.Start(&c.ProcessConfig, int(pid), chOOM) |
|
| 74 | 78 |
} |
| 75 | 79 |
|
| 76 | 80 |
if exitCode, err = hcsshim.WaitForProcessInComputeSystem(c.ID, pid); err != nil {
|
| ... | ... |
@@ -291,7 +291,11 @@ func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execd |
| 291 | 291 |
d.Unlock() |
| 292 | 292 |
|
| 293 | 293 |
if hooks.Start != nil {
|
| 294 |
- hooks.Start(&c.ProcessConfig, int(pid)) |
|
| 294 |
+ // A closed channel for OOM is returned here as it will be |
|
| 295 |
+ // non-blocking and return the correct result when read. |
|
| 296 |
+ chOOM := make(chan struct{})
|
|
| 297 |
+ close(chOOM) |
|
| 298 |
+ hooks.Start(&c.ProcessConfig, int(pid), chOOM) |
|
| 295 | 299 |
} |
| 296 | 300 |
|
| 297 | 301 |
var exitCode int32 |
| ... | ... |
@@ -162,9 +162,6 @@ func (m *containerMonitor) Start() error {
|
| 162 | 162 |
|
| 163 | 163 |
if m.shouldRestart(exitStatus.ExitCode) {
|
| 164 | 164 |
m.container.setRestarting(&exitStatus) |
| 165 |
- if exitStatus.OOMKilled {
|
|
| 166 |
- m.container.logEvent("oom")
|
|
| 167 |
- } |
|
| 168 | 165 |
m.container.logEvent("die")
|
| 169 | 166 |
m.resetContainer(true) |
| 170 | 167 |
|
| ... | ... |
@@ -179,9 +176,7 @@ func (m *containerMonitor) Start() error {
|
| 179 | 179 |
} |
| 180 | 180 |
continue |
| 181 | 181 |
} |
| 182 |
- if exitStatus.OOMKilled {
|
|
| 183 |
- m.container.logEvent("oom")
|
|
| 184 |
- } |
|
| 182 |
+ |
|
| 185 | 183 |
m.container.logEvent("die")
|
| 186 | 184 |
m.resetContainer(true) |
| 187 | 185 |
return err |
| ... | ... |
@@ -250,7 +245,14 @@ func (m *containerMonitor) shouldRestart(exitCode int) bool {
|
| 250 | 250 |
|
| 251 | 251 |
// callback ensures that the container's state is properly updated after we |
| 252 | 252 |
// received ack from the execution drivers |
| 253 |
-func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid int) error {
|
|
| 253 |
+func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error {
|
|
| 254 |
+ go func() {
|
|
| 255 |
+ _, ok := <-chOOM |
|
| 256 |
+ if ok {
|
|
| 257 |
+ m.container.logEvent("oom")
|
|
| 258 |
+ } |
|
| 259 |
+ }() |
|
| 260 |
+ |
|
| 254 | 261 |
if processConfig.Tty {
|
| 255 | 262 |
// The callback is called after the process Start() |
| 256 | 263 |
// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlave |
| ... | ... |
@@ -8,6 +8,8 @@ import ( |
| 8 | 8 |
"io/ioutil" |
| 9 | 9 |
"os" |
| 10 | 10 |
"os/exec" |
| 11 |
+ "strings" |
|
| 12 |
+ "time" |
|
| 11 | 13 |
"unicode" |
| 12 | 14 |
|
| 13 | 15 |
"github.com/go-check/check" |
| ... | ... |
@@ -51,3 +53,99 @@ func (s *DockerSuite) TestEventsRedirectStdout(c *check.C) {
|
| 51 | 51 |
} |
| 52 | 52 |
|
| 53 | 53 |
} |
| 54 |
+ |
|
| 55 |
+func (s *DockerSuite) TestEventsOOMDisableFalse(c *check.C) {
|
|
| 56 |
+ testRequires(c, DaemonIsLinux) |
|
| 57 |
+ testRequires(c, oomControl) |
|
| 58 |
+ |
|
| 59 |
+ errChan := make(chan error) |
|
| 60 |
+ go func() {
|
|
| 61 |
+ defer close(errChan) |
|
| 62 |
+ out, exitCode, _ := dockerCmdWithError("run", "--name", "oomFalse", "-m", "10MB", "busybox", "sh", "-c", "x=a; while true; do x=$x$x$x$x; done")
|
|
| 63 |
+ if expected := 137; exitCode != expected {
|
|
| 64 |
+ errChan <- fmt.Errorf("wrong exit code for OOM container: expected %d, got %d (output: %q)", expected, exitCode, out)
|
|
| 65 |
+ } |
|
| 66 |
+ }() |
|
| 67 |
+ select {
|
|
| 68 |
+ case err := <-errChan: |
|
| 69 |
+ c.Assert(err, check.IsNil) |
|
| 70 |
+ case <-time.After(30 * time.Second): |
|
| 71 |
+ c.Fatal("Timeout waiting for container to die on OOM")
|
|
| 72 |
+ } |
|
| 73 |
+ |
|
| 74 |
+ out, _ := dockerCmd(c, "events", "--since=0", "-f", "container=oomFalse", fmt.Sprintf("--until=%d", daemonTime(c).Unix()))
|
|
| 75 |
+ events := strings.Split(strings.TrimSuffix(out, "\n"), "\n") |
|
| 76 |
+ if len(events) < 5 {
|
|
| 77 |
+ c.Fatalf("Missing expected event")
|
|
| 78 |
+ } |
|
| 79 |
+ |
|
| 80 |
+ createEvent := strings.Fields(events[len(events)-5]) |
|
| 81 |
+ attachEvent := strings.Fields(events[len(events)-4]) |
|
| 82 |
+ startEvent := strings.Fields(events[len(events)-3]) |
|
| 83 |
+ oomEvent := strings.Fields(events[len(events)-2]) |
|
| 84 |
+ dieEvent := strings.Fields(events[len(events)-1]) |
|
| 85 |
+ if createEvent[len(createEvent)-1] != "create" {
|
|
| 86 |
+ c.Fatalf("event should be create, not %#v", createEvent)
|
|
| 87 |
+ } |
|
| 88 |
+ if attachEvent[len(attachEvent)-1] != "attach" {
|
|
| 89 |
+ c.Fatalf("event should be attach, not %#v", attachEvent)
|
|
| 90 |
+ } |
|
| 91 |
+ if startEvent[len(startEvent)-1] != "start" {
|
|
| 92 |
+ c.Fatalf("event should be start, not %#v", startEvent)
|
|
| 93 |
+ } |
|
| 94 |
+ if oomEvent[len(oomEvent)-1] != "oom" {
|
|
| 95 |
+ c.Fatalf("event should be oom, not %#v", oomEvent)
|
|
| 96 |
+ } |
|
| 97 |
+ if dieEvent[len(dieEvent)-1] != "die" {
|
|
| 98 |
+ c.Fatalf("event should be die, not %#v", dieEvent)
|
|
| 99 |
+ } |
|
| 100 |
+} |
|
| 101 |
+ |
|
| 102 |
+func (s *DockerSuite) TestEventsOOMDisableTrue(c *check.C) {
|
|
| 103 |
+ testRequires(c, DaemonIsLinux) |
|
| 104 |
+ testRequires(c, oomControl) |
|
| 105 |
+ |
|
| 106 |
+ errChan := make(chan error) |
|
| 107 |
+ go func() {
|
|
| 108 |
+ defer close(errChan) |
|
| 109 |
+ out, exitCode, _ := dockerCmdWithError("run", "--oom-kill-disable=true", "--name", "oomTrue", "-m", "10MB", "busybox", "sh", "-c", "x=a; while true; do x=$x$x$x$x; done")
|
|
| 110 |
+ if expected := 137; exitCode != expected {
|
|
| 111 |
+ errChan <- fmt.Errorf("wrong exit code for OOM container: expected %d, got %d (output: %q)", expected, exitCode, out)
|
|
| 112 |
+ } |
|
| 113 |
+ }() |
|
| 114 |
+ select {
|
|
| 115 |
+ case err := <-errChan: |
|
| 116 |
+ c.Assert(err, check.IsNil) |
|
| 117 |
+ case <-time.After(20 * time.Second): |
|
| 118 |
+ defer dockerCmd(c, "kill", "oomTrue") |
|
| 119 |
+ |
|
| 120 |
+ out, _ := dockerCmd(c, "events", "--since=0", "-f", "container=oomTrue", fmt.Sprintf("--until=%d", daemonTime(c).Unix()))
|
|
| 121 |
+ events := strings.Split(strings.TrimSuffix(out, "\n"), "\n") |
|
| 122 |
+ if len(events) < 4 {
|
|
| 123 |
+ c.Fatalf("Missing expected event")
|
|
| 124 |
+ } |
|
| 125 |
+ |
|
| 126 |
+ createEvent := strings.Fields(events[len(events)-4]) |
|
| 127 |
+ attachEvent := strings.Fields(events[len(events)-3]) |
|
| 128 |
+ startEvent := strings.Fields(events[len(events)-2]) |
|
| 129 |
+ oomEvent := strings.Fields(events[len(events)-1]) |
|
| 130 |
+ |
|
| 131 |
+ if createEvent[len(createEvent)-1] != "create" {
|
|
| 132 |
+ c.Fatalf("event should be create, not %#v", createEvent)
|
|
| 133 |
+ } |
|
| 134 |
+ if attachEvent[len(attachEvent)-1] != "attach" {
|
|
| 135 |
+ c.Fatalf("event should be attach, not %#v", attachEvent)
|
|
| 136 |
+ } |
|
| 137 |
+ if startEvent[len(startEvent)-1] != "start" {
|
|
| 138 |
+ c.Fatalf("event should be start, not %#v", startEvent)
|
|
| 139 |
+ } |
|
| 140 |
+ if oomEvent[len(oomEvent)-1] != "oom" {
|
|
| 141 |
+ c.Fatalf("event should be oom, not %#v", oomEvent)
|
|
| 142 |
+ } |
|
| 143 |
+ |
|
| 144 |
+ out, _ = dockerCmd(c, "inspect", "-f", "{{.State.Status}}", "oomTrue")
|
|
| 145 |
+ if strings.TrimSpace(out) != "running" {
|
|
| 146 |
+ c.Fatalf("container should be still running, not %v", out)
|
|
| 147 |
+ } |
|
| 148 |
+ } |
|
| 149 |
+} |