Browse code

This patch adds ability in docker to detect out of memory conditions in containers. Since the containers can handle the out of memory kernel kills gracefully, docker will only provide out of memory information as an additional metadata as part of container status. Docker-DCO-1.1-Signed-off-by: Vishnu Kannan <vishnuk@google.com> (github: vishh)

Vishnu Kannan authored on 2014/10/09 02:03:57
Showing 8 changed files
... ...
@@ -231,7 +231,7 @@ func (daemon *Daemon) register(container *Container, updateSuffixarray bool) err
231 231
 		log.Debugf("killing old running container %s", container.ID)
232 232
 
233 233
 		existingPid := container.Pid
234
-		container.SetStopped(0)
234
+		container.SetStopped(&execdriver.ExitStatus{0, false})
235 235
 
236 236
 		// We only have to handle this for lxc because the other drivers will ensure that
237 237
 		// no processes are left when docker dies
... ...
@@ -263,7 +263,7 @@ func (daemon *Daemon) register(container *Container, updateSuffixarray bool) err
263 263
 
264 264
 			log.Debugf("Marking as stopped")
265 265
 
266
-			container.SetStopped(-127)
266
+			container.SetStopped(&execdriver.ExitStatus{-127, false})
267 267
 			if err := container.ToDisk(); err != nil {
268 268
 				return err
269 269
 			}
... ...
@@ -991,7 +991,7 @@ func (daemon *Daemon) Diff(container *Container) (archive.Archive, error) {
991 991
 	return daemon.driver.Diff(container.ID, initID)
992 992
 }
993 993
 
994
-func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
994
+func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (*execdriver.ExitStatus, error) {
995 995
 	return daemon.execDriver.Run(c.command, pipes, startCallback)
996 996
 }
997 997
 
... ...
@@ -40,9 +40,18 @@ type TtyTerminal interface {
40 40
 	Master() *os.File
41 41
 }
42 42
 
43
+// ExitStatus provides exit reasons for a container.
44
+type ExitStatus struct {
45
+	// The exit code with which the container exited.
46
+	ExitCode int
47
+
48
+	// Whether the container encountered an OOM.
49
+	OOMKilled bool
50
+}
51
+
43 52
 type Driver interface {
44 53
 	Run(c *Command, pipes *Pipes, startCallback StartCallback) (int, error) // Run executes the process and blocks until the process exits and returns the exit code
45
-	// Exec executes the process in a running container, blocks until the process exits and returns the exit code
54
+	// Exec executes the process in an existing container, blocks until the process exits and returns the exit code
46 55
 	Exec(c *Command, processConfig *ProcessConfig, pipes *Pipes, startCallback StartCallback) (int, error)
47 56
 	Kill(c *Command, sig int) error
48 57
 	Pause(c *Command) error
... ...
@@ -55,7 +55,7 @@ func (d *driver) Name() string {
55 55
 	return fmt.Sprintf("%s-%s", DriverName, version)
56 56
 }
57 57
 
58
-func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
58
+func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (*execdriver.ExitStatus, error) {
59 59
 	var (
60 60
 		term execdriver.Terminal
61 61
 		err  error
... ...
@@ -76,11 +76,11 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
76 76
 	})
77 77
 
78 78
 	if err := d.generateEnvConfig(c); err != nil {
79
-		return -1, err
79
+		return nil, err
80 80
 	}
81 81
 	configPath, err := d.generateLXCConfig(c)
82 82
 	if err != nil {
83
-		return -1, err
83
+		return nil, err
84 84
 	}
85 85
 	params := []string{
86 86
 		"lxc-start",
... ...
@@ -155,11 +155,11 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
155 155
 	c.ProcessConfig.Args = append([]string{name}, arg...)
156 156
 
157 157
 	if err := nodes.CreateDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil {
158
-		return -1, err
158
+		return nil, err
159 159
 	}
160 160
 
161 161
 	if err := c.ProcessConfig.Start(); err != nil {
162
-		return -1, err
162
+		return nil, err
163 163
 	}
164 164
 
165 165
 	var (
... ...
@@ -183,7 +183,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
183 183
 			c.ProcessConfig.Process.Kill()
184 184
 			c.ProcessConfig.Wait()
185 185
 		}
186
-		return -1, err
186
+		return nil, err
187 187
 	}
188 188
 
189 189
 	c.ContainerPid = pid
... ...
@@ -194,7 +194,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
194 194
 
195 195
 	<-waitLock
196 196
 
197
-	return getExitCode(c), waitErr
197
+	return &execdriver.ExitStatus{getExitCode(c), false}, waitErr
198 198
 }
199 199
 
200 200
 /// Return the exit code of the process
... ...
@@ -14,6 +14,7 @@ import (
14 14
 	"sync"
15 15
 	"syscall"
16 16
 
17
+	log "github.com/Sirupsen/logrus"
17 18
 	"github.com/docker/docker/daemon/execdriver"
18 19
 	"github.com/docker/docker/pkg/term"
19 20
 	"github.com/docker/libcontainer"
... ...
@@ -60,11 +61,20 @@ func NewDriver(root, initPath string) (*driver, error) {
60 60
 	}, nil
61 61
 }
62 62
 
63
-func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
63
+func (d *driver) notifyOnOOM(config *libcontainer.Config) (<-chan struct{}, error) {
64
+	return fs.NotifyOnOOM(config.Cgroups)
65
+}
66
+
67
+type execOutput struct {
68
+	exitCode int
69
+	err      error
70
+}
71
+
72
+func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (*execdriver.ExitStatus, error) {
64 73
 	// take the Command and populate the libcontainer.Config from it
65 74
 	container, err := d.createContainer(c)
66 75
 	if err != nil {
67
-		return -1, err
76
+		return nil, err
68 77
 	}
69 78
 
70 79
 	var term execdriver.Terminal
... ...
@@ -75,7 +85,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
75 75
 		term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes)
76 76
 	}
77 77
 	if err != nil {
78
-		return -1, err
78
+		return nil, err
79 79
 	}
80 80
 	c.ProcessConfig.Terminal = term
81 81
 
... ...
@@ -92,40 +102,70 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
92 92
 	)
93 93
 
94 94
 	if err := d.createContainerRoot(c.ID); err != nil {
95
-		return -1, err
95
+		return nil, err
96 96
 	}
97 97
 	defer d.cleanContainer(c.ID)
98 98
 
99 99
 	if err := d.writeContainerFile(container, c.ID); err != nil {
100
-		return -1, err
100
+		return nil, err
101 101
 	}
102 102
 
103
-	return namespaces.Exec(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, dataPath, args, func(container *libcontainer.Config, console, dataPath, init string, child *os.File, args []string) *exec.Cmd {
104
-		c.ProcessConfig.Path = d.initPath
105
-		c.ProcessConfig.Args = append([]string{
106
-			DriverName,
107
-			"-console", console,
108
-			"-pipe", "3",
109
-			"-root", filepath.Join(d.root, c.ID),
110
-			"--",
111
-		}, args...)
112
-
113
-		// set this to nil so that when we set the clone flags anything else is reset
114
-		c.ProcessConfig.SysProcAttr = &syscall.SysProcAttr{
115
-			Cloneflags: uintptr(namespaces.GetNamespaceFlags(container.Namespaces)),
116
-		}
117
-		c.ProcessConfig.ExtraFiles = []*os.File{child}
103
+	execOutputChan := make(chan execOutput, 0)
104
+	waitForStart := make(chan struct{}, 0)
118 105
 
119
-		c.ProcessConfig.Env = container.Env
120
-		c.ProcessConfig.Dir = container.RootFs
106
+	go func() {
107
+		exitCode, err := namespaces.Exec(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, dataPath, args, func(container *libcontainer.Config, console, dataPath, init string, child *os.File, args []string) *exec.Cmd {
108
+			c.ProcessConfig.Path = d.initPath
109
+			c.ProcessConfig.Args = append([]string{
110
+				DriverName,
111
+				"-console", console,
112
+				"-pipe", "3",
113
+				"-root", filepath.Join(d.root, c.ID),
114
+				"--",
115
+			}, args...)
116
+
117
+			// set this to nil so that when we set the clone flags anything else is reset
118
+			c.ProcessConfig.SysProcAttr = &syscall.SysProcAttr{
119
+				Cloneflags: uintptr(namespaces.GetNamespaceFlags(container.Namespaces)),
120
+			}
121
+			c.ProcessConfig.ExtraFiles = []*os.File{child}
121 122
 
122
-		return &c.ProcessConfig.Cmd
123
-	}, func() {
124
-		if startCallback != nil {
125
-			c.ContainerPid = c.ProcessConfig.Process.Pid
126
-			startCallback(&c.ProcessConfig, c.ContainerPid)
123
+			c.ProcessConfig.Env = container.Env
124
+			c.ProcessConfig.Dir = container.RootFs
125
+
126
+			return &c.ProcessConfig.Cmd
127
+		}, func() {
128
+			close(waitForStart)
129
+			if startCallback != nil {
130
+				c.ContainerPid = c.ProcessConfig.Process.Pid
131
+				startCallback(&c.ProcessConfig, c.ContainerPid)
132
+			}
133
+		})
134
+		execOutputChan <- execOutput{exitCode, err}
135
+	}()
136
+
137
+	select {
138
+	case execOutput := <-execOutputChan:
139
+		return &execdriver.ExitStatus{execOutput.exitCode, false}, execOutput.err
140
+	case <-waitForStart:
141
+		break
142
+	}
143
+
144
+	oomKill := false
145
+	go func() {
146
+		oomKillNotification, err := d.notifyOnOOM(container)
147
+		if err == nil {
148
+			if _, ok := <-oomKillNotification; ok {
149
+				oomKill = true
150
+			}
151
+		} else {
152
+			log.Infof("WARNING: Your kernel does not support OOM notifications: %s", err)
127 153
 		}
128
-	})
154
+	}()
155
+	// wait for the container to exit.
156
+	execOutput := <-execOutputChan
157
+
158
+	return &execdriver.ExitStatus{execOutput.exitCode, oomKill}, execOutput.err
129 159
 }
130 160
 
131 161
 func (d *driver) Kill(p *execdriver.Command, sig int) error {
... ...
@@ -100,7 +100,7 @@ func (m *containerMonitor) Close() error {
100 100
 func (m *containerMonitor) Start() error {
101 101
 	var (
102 102
 		err        error
103
-		exitStatus int
103
+		exitStatus *execdriver.ExitStatus
104 104
 		// this variable indicates where we in execution flow:
105 105
 		// before Run or after
106 106
 		afterRun bool
... ...
@@ -150,9 +150,9 @@ func (m *containerMonitor) Start() error {
150 150
 		// here container.Lock is already lost
151 151
 		afterRun = true
152 152
 
153
-		m.resetMonitor(err == nil && exitStatus == 0)
153
+		m.resetMonitor(err == nil && exitStatus.ExitCode == 0)
154 154
 
155
-		if m.shouldRestart(exitStatus) {
155
+		if m.shouldRestart(exitStatus.ExitCode) {
156 156
 			m.container.SetRestarting(exitStatus)
157 157
 			m.container.LogEvent("die")
158 158
 			m.resetContainer(true)
... ...
@@ -209,7 +209,7 @@ func (m *containerMonitor) waitForNextRestart() {
209 209
 
210 210
 // shouldRestart checks the restart policy and applies the rules to determine if
211 211
 // the container's process should be restarted
212
-func (m *containerMonitor) shouldRestart(exitStatus int) bool {
212
+func (m *containerMonitor) shouldRestart(exitCode int) bool {
213 213
 	m.mux.Lock()
214 214
 	defer m.mux.Unlock()
215 215
 
... ...
@@ -228,7 +228,7 @@ func (m *containerMonitor) shouldRestart(exitStatus int) bool {
228 228
 			return false
229 229
 		}
230 230
 
231
-		return exitStatus != 0
231
+		return exitCode != 0
232 232
 	}
233 233
 
234 234
 	return false
... ...
@@ -5,6 +5,7 @@ import (
5 5
 	"sync"
6 6
 	"time"
7 7
 
8
+	"github.com/docker/docker/daemon/execdriver"
8 9
 	"github.com/docker/docker/pkg/units"
9 10
 )
10 11
 
... ...
@@ -13,6 +14,7 @@ type State struct {
13 13
 	Running    bool
14 14
 	Paused     bool
15 15
 	Restarting bool
16
+	OOMKilled  bool
16 17
 	Pid        int
17 18
 	ExitCode   int
18 19
 	Error      string // contains last known error when starting the container
... ...
@@ -29,12 +31,16 @@ func NewState() *State {
29 29
 
30 30
 // String returns a human-readable description of the state
31 31
 func (s *State) String() string {
32
+	oomInfo := ""
33
+	if s.OOMKilled {
34
+		oomInfo = "possibly due to lack of memory"
35
+	}
32 36
 	if s.Running {
33 37
 		if s.Paused {
34 38
 			return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
35 39
 		}
36 40
 		if s.Restarting {
37
-			return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
41
+			return fmt.Sprintf("Restarting (%d) %s ago %s", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)), oomInfo)
38 42
 		}
39 43
 
40 44
 		return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
... ...
@@ -44,7 +50,7 @@ func (s *State) String() string {
44 44
 		return ""
45 45
 	}
46 46
 
47
-	return fmt.Sprintf("Exited (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
47
+	return fmt.Sprintf("Exited (%d) %s ago %s", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)), oomInfo)
48 48
 }
49 49
 
50 50
 // StateString returns a single string to describe state
... ...
@@ -149,25 +155,29 @@ func (s *State) setRunning(pid int) {
149 149
 	s.waitChan = make(chan struct{})
150 150
 }
151 151
 
152
-func (s *State) SetStopped(exitCode int) {
152
+func (s *State) SetStopped(exitStatus *execdriver.ExitStatus) {
153 153
 	s.Lock()
154
-	s.setStopped(exitCode)
154
+	s.setStopped(exitStatus)
155 155
 	s.Unlock()
156 156
 }
157 157
 
158
-func (s *State) setStopped(exitCode int) {
158
+func (s *State) setStopped(exitStatus *execdriver.ExitStatus) {
159 159
 	s.Running = false
160 160
 	s.Restarting = false
161 161
 	s.Pid = 0
162 162
 	s.FinishedAt = time.Now().UTC()
163
-	s.ExitCode = exitCode
163
+	s.ExitCode = exitStatus.ExitCode
164
+	s.OOMKilled = false
165
+	if exitStatus.OOMKilled {
166
+		s.OOMKilled = true
167
+	}
164 168
 	close(s.waitChan) // fire waiters for stop
165 169
 	s.waitChan = make(chan struct{})
166 170
 }
167 171
 
168 172
 // SetRestarting is when docker hanldes the auto restart of containers when they are
169 173
 // in the middle of a stop and being restarted again
170
-func (s *State) SetRestarting(exitCode int) {
174
+func (s *State) SetRestarting(exitStatus *execdriver.ExitStatus) {
171 175
 	s.Lock()
172 176
 	// we should consider the container running when it is restarting because of
173 177
 	// all the checks in docker around rm/stop/etc
... ...
@@ -175,7 +185,10 @@ func (s *State) SetRestarting(exitCode int) {
175 175
 	s.Restarting = true
176 176
 	s.Pid = 0
177 177
 	s.FinishedAt = time.Now().UTC()
178
-	s.ExitCode = exitCode
178
+	s.ExitCode = exitStatus.ExitCode
179
+	if exitStatus.OOMKilled {
180
+		s.OOMKilled = true
181
+	}
179 182
 	close(s.waitChan) // fire waiters for stop
180 183
 	s.waitChan = make(chan struct{})
181 184
 	s.Unlock()
... ...
@@ -4,6 +4,8 @@ import (
4 4
 	"sync/atomic"
5 5
 	"testing"
6 6
 	"time"
7
+
8
+	"github.com/docker/docker/daemon/execdriver"
7 9
 )
8 10
 
9 11
 func TestStateRunStop(t *testing.T) {
... ...
@@ -47,7 +49,7 @@ func TestStateRunStop(t *testing.T) {
47 47
 			atomic.StoreInt64(&exit, int64(exitCode))
48 48
 			close(stopped)
49 49
 		}()
50
-		s.SetStopped(i)
50
+		s.SetStopped(&execdriver.ExitStatus{i, false})
51 51
 		if s.IsRunning() {
52 52
 			t.Fatal("State is running")
53 53
 		}
... ...
@@ -18,6 +18,7 @@ import (
18 18
 
19 19
 	log "github.com/Sirupsen/logrus"
20 20
 	"github.com/docker/docker/daemon"
21
+	"github.com/docker/docker/daemon/execdriver"
21 22
 	"github.com/docker/docker/engine"
22 23
 	"github.com/docker/docker/image"
23 24
 	"github.com/docker/docker/nat"
... ...
@@ -652,7 +653,7 @@ func TestRestore(t *testing.T) {
652 652
 	if err := container3.Run(); err != nil {
653 653
 		t.Fatal(err)
654 654
 	}
655
-	container2.SetStopped(0)
655
+	container2.SetStopped(&execdriver.ExitStatus{0, false})
656 656
 }
657 657
 
658 658
 func TestDefaultContainerName(t *testing.T) {