Browse code

Merge pull request #8479 from vishh/OOM

Provide Out Of Memory information in container status

Michael Crosby authored on 2014/11/13 07:15:29
Showing 8 changed files
... ...
@@ -231,7 +231,7 @@ func (daemon *Daemon) register(container *Container, updateSuffixarray bool) err
231 231
 		log.Debugf("killing old running container %s", container.ID)
232 232
 
233 233
 		existingPid := container.Pid
234
-		container.SetStopped(0)
234
+		container.SetStopped(&execdriver.ExitStatus{0, false})
235 235
 
236 236
 		// We only have to handle this for lxc because the other drivers will ensure that
237 237
 		// no processes are left when docker dies
... ...
@@ -263,7 +263,7 @@ func (daemon *Daemon) register(container *Container, updateSuffixarray bool) err
263 263
 
264 264
 			log.Debugf("Marking as stopped")
265 265
 
266
-			container.SetStopped(-127)
266
+			container.SetStopped(&execdriver.ExitStatus{-127, false})
267 267
 			if err := container.ToDisk(); err != nil {
268 268
 				return err
269 269
 			}
... ...
@@ -991,7 +991,7 @@ func (daemon *Daemon) Diff(container *Container) (archive.Archive, error) {
991 991
 	return daemon.driver.Diff(container.ID, initID)
992 992
 }
993 993
 
994
-func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
994
+func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (execdriver.ExitStatus, error) {
995 995
 	return daemon.execDriver.Run(c.command, pipes, startCallback)
996 996
 }
997 997
 
... ...
@@ -40,9 +40,18 @@ type TtyTerminal interface {
40 40
 	Master() *os.File
41 41
 }
42 42
 
43
+// ExitStatus provides exit reasons for a container.
44
+type ExitStatus struct {
45
+	// The exit code with which the container exited.
46
+	ExitCode int
47
+
48
+	// Whether the container encountered an OOM.
49
+	OOMKilled bool
50
+}
51
+
43 52
 type Driver interface {
44
-	Run(c *Command, pipes *Pipes, startCallback StartCallback) (int, error) // Run executes the process and blocks until the process exits and returns the exit code
45
-	// Exec executes the process in a running container, blocks until the process exits and returns the exit code
53
+	Run(c *Command, pipes *Pipes, startCallback StartCallback) (ExitStatus, error) // Run executes the process and blocks until the process exits and returns the exit code
54
+	// Exec executes the process in an existing container, blocks until the process exits and returns the exit code
46 55
 	Exec(c *Command, processConfig *ProcessConfig, pipes *Pipes, startCallback StartCallback) (int, error)
47 56
 	Kill(c *Command, sig int) error
48 57
 	Pause(c *Command) error
... ...
@@ -55,7 +55,7 @@ func (d *driver) Name() string {
55 55
 	return fmt.Sprintf("%s-%s", DriverName, version)
56 56
 }
57 57
 
58
-func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
58
+func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (execdriver.ExitStatus, error) {
59 59
 	var (
60 60
 		term execdriver.Terminal
61 61
 		err  error
... ...
@@ -76,11 +76,11 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
76 76
 	})
77 77
 
78 78
 	if err := d.generateEnvConfig(c); err != nil {
79
-		return -1, err
79
+		return execdriver.ExitStatus{-1, false}, err
80 80
 	}
81 81
 	configPath, err := d.generateLXCConfig(c)
82 82
 	if err != nil {
83
-		return -1, err
83
+		return execdriver.ExitStatus{-1, false}, err
84 84
 	}
85 85
 	params := []string{
86 86
 		"lxc-start",
... ...
@@ -155,11 +155,11 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
155 155
 	c.ProcessConfig.Args = append([]string{name}, arg...)
156 156
 
157 157
 	if err := nodes.CreateDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil {
158
-		return -1, err
158
+		return execdriver.ExitStatus{-1, false}, err
159 159
 	}
160 160
 
161 161
 	if err := c.ProcessConfig.Start(); err != nil {
162
-		return -1, err
162
+		return execdriver.ExitStatus{-1, false}, err
163 163
 	}
164 164
 
165 165
 	var (
... ...
@@ -183,7 +183,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
183 183
 			c.ProcessConfig.Process.Kill()
184 184
 			c.ProcessConfig.Wait()
185 185
 		}
186
-		return -1, err
186
+		return execdriver.ExitStatus{-1, false}, err
187 187
 	}
188 188
 
189 189
 	c.ContainerPid = pid
... ...
@@ -194,7 +194,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
194 194
 
195 195
 	<-waitLock
196 196
 
197
-	return getExitCode(c), waitErr
197
+	return execdriver.ExitStatus{getExitCode(c), false}, waitErr
198 198
 }
199 199
 
200 200
 /// Return the exit code of the process
... ...
@@ -14,6 +14,7 @@ import (
14 14
 	"sync"
15 15
 	"syscall"
16 16
 
17
+	log "github.com/Sirupsen/logrus"
17 18
 	"github.com/docker/docker/daemon/execdriver"
18 19
 	"github.com/docker/docker/pkg/term"
19 20
 	"github.com/docker/libcontainer"
... ...
@@ -60,11 +61,20 @@ func NewDriver(root, initPath string) (*driver, error) {
60 60
 	}, nil
61 61
 }
62 62
 
63
-func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
63
+func (d *driver) notifyOnOOM(config *libcontainer.Config) (<-chan struct{}, error) {
64
+	return fs.NotifyOnOOM(config.Cgroups)
65
+}
66
+
67
+type execOutput struct {
68
+	exitCode int
69
+	err      error
70
+}
71
+
72
+func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (execdriver.ExitStatus, error) {
64 73
 	// take the Command and populate the libcontainer.Config from it
65 74
 	container, err := d.createContainer(c)
66 75
 	if err != nil {
67
-		return -1, err
76
+		return execdriver.ExitStatus{-1, false}, err
68 77
 	}
69 78
 
70 79
 	var term execdriver.Terminal
... ...
@@ -75,7 +85,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
75 75
 		term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes)
76 76
 	}
77 77
 	if err != nil {
78
-		return -1, err
78
+		return execdriver.ExitStatus{-1, false}, err
79 79
 	}
80 80
 	c.ProcessConfig.Terminal = term
81 81
 
... ...
@@ -92,40 +102,66 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
92 92
 	)
93 93
 
94 94
 	if err := d.createContainerRoot(c.ID); err != nil {
95
-		return -1, err
95
+		return execdriver.ExitStatus{-1, false}, err
96 96
 	}
97 97
 	defer d.cleanContainer(c.ID)
98 98
 
99 99
 	if err := d.writeContainerFile(container, c.ID); err != nil {
100
-		return -1, err
100
+		return execdriver.ExitStatus{-1, false}, err
101 101
 	}
102 102
 
103
-	return namespaces.Exec(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, dataPath, args, func(container *libcontainer.Config, console, dataPath, init string, child *os.File, args []string) *exec.Cmd {
104
-		c.ProcessConfig.Path = d.initPath
105
-		c.ProcessConfig.Args = append([]string{
106
-			DriverName,
107
-			"-console", console,
108
-			"-pipe", "3",
109
-			"-root", filepath.Join(d.root, c.ID),
110
-			"--",
111
-		}, args...)
112
-
113
-		// set this to nil so that when we set the clone flags anything else is reset
114
-		c.ProcessConfig.SysProcAttr = &syscall.SysProcAttr{
115
-			Cloneflags: uintptr(namespaces.GetNamespaceFlags(container.Namespaces)),
116
-		}
117
-		c.ProcessConfig.ExtraFiles = []*os.File{child}
103
+	execOutputChan := make(chan execOutput, 1)
104
+	waitForStart := make(chan struct{})
118 105
 
119
-		c.ProcessConfig.Env = container.Env
120
-		c.ProcessConfig.Dir = container.RootFs
106
+	go func() {
107
+		exitCode, err := namespaces.Exec(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, dataPath, args, func(container *libcontainer.Config, console, dataPath, init string, child *os.File, args []string) *exec.Cmd {
108
+			c.ProcessConfig.Path = d.initPath
109
+			c.ProcessConfig.Args = append([]string{
110
+				DriverName,
111
+				"-console", console,
112
+				"-pipe", "3",
113
+				"-root", filepath.Join(d.root, c.ID),
114
+				"--",
115
+			}, args...)
116
+
117
+			// set this to nil so that when we set the clone flags anything else is reset
118
+			c.ProcessConfig.SysProcAttr = &syscall.SysProcAttr{
119
+				Cloneflags: uintptr(namespaces.GetNamespaceFlags(container.Namespaces)),
120
+			}
121
+			c.ProcessConfig.ExtraFiles = []*os.File{child}
121 122
 
122
-		return &c.ProcessConfig.Cmd
123
-	}, func() {
124
-		if startCallback != nil {
125
-			c.ContainerPid = c.ProcessConfig.Process.Pid
126
-			startCallback(&c.ProcessConfig, c.ContainerPid)
127
-		}
128
-	})
123
+			c.ProcessConfig.Env = container.Env
124
+			c.ProcessConfig.Dir = container.RootFs
125
+
126
+			return &c.ProcessConfig.Cmd
127
+		}, func() {
128
+			close(waitForStart)
129
+			if startCallback != nil {
130
+				c.ContainerPid = c.ProcessConfig.Process.Pid
131
+				startCallback(&c.ProcessConfig, c.ContainerPid)
132
+			}
133
+		})
134
+		execOutputChan <- execOutput{exitCode, err}
135
+	}()
136
+
137
+	select {
138
+	case execOutput := <-execOutputChan:
139
+		return execdriver.ExitStatus{execOutput.exitCode, false}, execOutput.err
140
+	case <-waitForStart:
141
+		break
142
+	}
143
+
144
+	oomKill := false
145
+	oomKillNotification, err := d.notifyOnOOM(container)
146
+	if err == nil {
147
+		_, oomKill = <-oomKillNotification
148
+	} else {
149
+		log.Warnf("WARNING: Your kernel does not support OOM notifications: %s", err)
150
+	}
151
+	// wait for the container to exit.
152
+	execOutput := <-execOutputChan
153
+
154
+	return execdriver.ExitStatus{execOutput.exitCode, oomKill}, execOutput.err
129 155
 }
130 156
 
131 157
 func (d *driver) Kill(p *execdriver.Command, sig int) error {
... ...
@@ -100,7 +100,7 @@ func (m *containerMonitor) Close() error {
100 100
 func (m *containerMonitor) Start() error {
101 101
 	var (
102 102
 		err        error
103
-		exitStatus int
103
+		exitStatus execdriver.ExitStatus
104 104
 		// this variable indicates where we in execution flow:
105 105
 		// before Run or after
106 106
 		afterRun bool
... ...
@@ -110,7 +110,7 @@ func (m *containerMonitor) Start() error {
110 110
 	defer func() {
111 111
 		if afterRun {
112 112
 			m.container.Lock()
113
-			m.container.setStopped(exitStatus)
113
+			m.container.setStopped(&exitStatus)
114 114
 			defer m.container.Unlock()
115 115
 		}
116 116
 		m.Close()
... ...
@@ -138,7 +138,7 @@ func (m *containerMonitor) Start() error {
138 138
 			// if we receive an internal error from the initial start of a container then lets
139 139
 			// return it instead of entering the restart loop
140 140
 			if m.container.RestartCount == 0 {
141
-				m.container.ExitCode = exitStatus
141
+				m.container.ExitCode = -1
142 142
 				m.resetContainer(false)
143 143
 
144 144
 				return err
... ...
@@ -150,10 +150,10 @@ func (m *containerMonitor) Start() error {
150 150
 		// here container.Lock is already lost
151 151
 		afterRun = true
152 152
 
153
-		m.resetMonitor(err == nil && exitStatus == 0)
153
+		m.resetMonitor(err == nil && exitStatus.ExitCode == 0)
154 154
 
155
-		if m.shouldRestart(exitStatus) {
156
-			m.container.SetRestarting(exitStatus)
155
+		if m.shouldRestart(exitStatus.ExitCode) {
156
+			m.container.SetRestarting(&exitStatus)
157 157
 			m.container.LogEvent("die")
158 158
 			m.resetContainer(true)
159 159
 
... ...
@@ -164,12 +164,12 @@ func (m *containerMonitor) Start() error {
164 164
 			// we need to check this before reentering the loop because the waitForNextRestart could have
165 165
 			// been terminated by a request from a user
166 166
 			if m.shouldStop {
167
-				m.container.ExitCode = exitStatus
167
+				m.container.ExitCode = exitStatus.ExitCode
168 168
 				return err
169 169
 			}
170 170
 			continue
171 171
 		}
172
-		m.container.ExitCode = exitStatus
172
+		m.container.ExitCode = exitStatus.ExitCode
173 173
 		m.container.LogEvent("die")
174 174
 		m.resetContainer(true)
175 175
 		return err
... ...
@@ -209,7 +209,7 @@ func (m *containerMonitor) waitForNextRestart() {
209 209
 
210 210
 // shouldRestart checks the restart policy and applies the rules to determine if
211 211
 // the container's process should be restarted
212
-func (m *containerMonitor) shouldRestart(exitStatus int) bool {
212
+func (m *containerMonitor) shouldRestart(exitCode int) bool {
213 213
 	m.mux.Lock()
214 214
 	defer m.mux.Unlock()
215 215
 
... ...
@@ -228,7 +228,7 @@ func (m *containerMonitor) shouldRestart(exitStatus int) bool {
228 228
 			return false
229 229
 		}
230 230
 
231
-		return exitStatus != 0
231
+		return exitCode != 0
232 232
 	}
233 233
 
234 234
 	return false
... ...
@@ -5,6 +5,7 @@ import (
5 5
 	"sync"
6 6
 	"time"
7 7
 
8
+	"github.com/docker/docker/daemon/execdriver"
8 9
 	"github.com/docker/docker/pkg/units"
9 10
 )
10 11
 
... ...
@@ -13,6 +14,7 @@ type State struct {
13 13
 	Running    bool
14 14
 	Paused     bool
15 15
 	Restarting bool
16
+	OOMKilled  bool
16 17
 	Pid        int
17 18
 	ExitCode   int
18 19
 	Error      string // contains last known error when starting the container
... ...
@@ -149,25 +151,26 @@ func (s *State) setRunning(pid int) {
149 149
 	s.waitChan = make(chan struct{})
150 150
 }
151 151
 
152
-func (s *State) SetStopped(exitCode int) {
152
+func (s *State) SetStopped(exitStatus *execdriver.ExitStatus) {
153 153
 	s.Lock()
154
-	s.setStopped(exitCode)
154
+	s.setStopped(exitStatus)
155 155
 	s.Unlock()
156 156
 }
157 157
 
158
-func (s *State) setStopped(exitCode int) {
158
+func (s *State) setStopped(exitStatus *execdriver.ExitStatus) {
159 159
 	s.Running = false
160 160
 	s.Restarting = false
161 161
 	s.Pid = 0
162 162
 	s.FinishedAt = time.Now().UTC()
163
-	s.ExitCode = exitCode
163
+	s.ExitCode = exitStatus.ExitCode
164
+	s.OOMKilled = exitStatus.OOMKilled
164 165
 	close(s.waitChan) // fire waiters for stop
165 166
 	s.waitChan = make(chan struct{})
166 167
 }
167 168
 
168 169
 // SetRestarting is when docker hanldes the auto restart of containers when they are
169 170
 // in the middle of a stop and being restarted again
170
-func (s *State) SetRestarting(exitCode int) {
171
+func (s *State) SetRestarting(exitStatus *execdriver.ExitStatus) {
171 172
 	s.Lock()
172 173
 	// we should consider the container running when it is restarting because of
173 174
 	// all the checks in docker around rm/stop/etc
... ...
@@ -175,7 +178,8 @@ func (s *State) SetRestarting(exitCode int) {
175 175
 	s.Restarting = true
176 176
 	s.Pid = 0
177 177
 	s.FinishedAt = time.Now().UTC()
178
-	s.ExitCode = exitCode
178
+	s.ExitCode = exitStatus.ExitCode
179
+	s.OOMKilled = exitStatus.OOMKilled
179 180
 	close(s.waitChan) // fire waiters for stop
180 181
 	s.waitChan = make(chan struct{})
181 182
 	s.Unlock()
... ...
@@ -4,6 +4,8 @@ import (
4 4
 	"sync/atomic"
5 5
 	"testing"
6 6
 	"time"
7
+
8
+	"github.com/docker/docker/daemon/execdriver"
7 9
 )
8 10
 
9 11
 func TestStateRunStop(t *testing.T) {
... ...
@@ -47,7 +49,7 @@ func TestStateRunStop(t *testing.T) {
47 47
 			atomic.StoreInt64(&exit, int64(exitCode))
48 48
 			close(stopped)
49 49
 		}()
50
-		s.SetStopped(i)
50
+		s.SetStopped(&execdriver.ExitStatus{i, false})
51 51
 		if s.IsRunning() {
52 52
 			t.Fatal("State is running")
53 53
 		}
... ...
@@ -18,6 +18,7 @@ import (
18 18
 
19 19
 	log "github.com/Sirupsen/logrus"
20 20
 	"github.com/docker/docker/daemon"
21
+	"github.com/docker/docker/daemon/execdriver"
21 22
 	"github.com/docker/docker/engine"
22 23
 	"github.com/docker/docker/image"
23 24
 	"github.com/docker/docker/nat"
... ...
@@ -652,7 +653,7 @@ func TestRestore(t *testing.T) {
652 652
 	if err := container3.Run(); err != nil {
653 653
 		t.Fatal(err)
654 654
 	}
655
-	container2.SetStopped(0)
655
+	container2.SetStopped(&execdriver.ExitStatus{0, false})
656 656
 }
657 657
 
658 658
 func TestDefaultContainerName(t *testing.T) {