Browse code

Add Restarting state when docker is handling the restart of containers

Signed-off-by: Michael Crosby <michael@docker.com>

Michael Crosby authored on 2014/08/12 03:07:37
Showing 2 changed files
... ...
@@ -74,66 +74,6 @@ func (m *containerMonitor) Close() error {
74 74
 	return nil
75 75
 }
76 76
 
77
-// reset resets the container's IO and ensures that the command is able to be executed again
78
-// by copying the data into a new struct
79
-func (m *containerMonitor) reset(successful bool) {
80
-	container := m.container
81
-
82
-	if container.Config.OpenStdin {
83
-		if err := container.stdin.Close(); err != nil {
84
-			utils.Errorf("%s: Error close stdin: %s", container.ID, err)
85
-		}
86
-	}
87
-
88
-	if err := container.stdout.Clean(); err != nil {
89
-		utils.Errorf("%s: Error close stdout: %s", container.ID, err)
90
-	}
91
-
92
-	if err := container.stderr.Clean(); err != nil {
93
-		utils.Errorf("%s: Error close stderr: %s", container.ID, err)
94
-	}
95
-
96
-	if container.command != nil && container.command.Terminal != nil {
97
-		if err := container.command.Terminal.Close(); err != nil {
98
-			utils.Errorf("%s: Error closing terminal: %s", container.ID, err)
99
-		}
100
-	}
101
-
102
-	// Re-create a brand new stdin pipe once the container exited
103
-	if container.Config.OpenStdin {
104
-		container.stdin, container.stdinPipe = io.Pipe()
105
-	}
106
-
107
-	container.LogEvent("die")
108
-
109
-	c := container.command.Cmd
110
-
111
-	container.command.Cmd = exec.Cmd{
112
-		Stdin:       c.Stdin,
113
-		Stdout:      c.Stdout,
114
-		Stderr:      c.Stderr,
115
-		Path:        c.Path,
116
-		Env:         c.Env,
117
-		ExtraFiles:  c.ExtraFiles,
118
-		Args:        c.Args,
119
-		Dir:         c.Dir,
120
-		SysProcAttr: c.SysProcAttr,
121
-	}
122
-
123
-	// the container exited successfully so we need to reset the failure counter
124
-	// and the timeIncrement back to the default values
125
-	if successful {
126
-		m.failureCount = 0
127
-		m.timeIncrement = defaultTimeIncrement
128
-	} else {
129
-		// otherwise we need to increment the amount of time we wait before restarting
130
-		// the process.  We will build up by multiplying the increment by 2
131
-
132
-		m.failureCount++
133
-		m.timeIncrement *= 2
134
-	}
135
-}
136
-
137 77
 // Start starts the containers process and monitors it according to the restart policy
138 78
 func (m *containerMonitor) Start() error {
139 79
 	var (
... ...
@@ -151,7 +91,7 @@ func (m *containerMonitor) Start() error {
151 151
 		m.container.RestartCount++
152 152
 
153 153
 		if err := m.container.startLoggingToDisk(); err != nil {
154
-			m.reset(false)
154
+			m.resetContainer()
155 155
 
156 156
 			return err
157 157
 		}
... ...
@@ -164,18 +104,23 @@ func (m *containerMonitor) Start() error {
164 164
 			utils.Errorf("Error running container: %s", err)
165 165
 		}
166 166
 
167
-		// we still wait to set the state as stopped and ensure that the locks were released
168
-		m.container.State.SetStopped(exitStatus)
169
-
170
-		// pass if we exited successfully
171
-		m.reset(err == nil && exitStatus == 0)
167
+		m.resetMonitor(err == nil && exitStatus == 0)
172 168
 
173 169
 		if m.shouldRestart(exitStatus) {
170
+			m.container.State.SetRestarting(exitStatus)
171
+
172
+			m.resetContainer()
173
+
174 174
 			// sleep with a small time increment between each restart to help avoid issues cased by quickly
175 175
 			// restarting the container because of some types of errors ( networking cut out, etc... )
176 176
 			time.Sleep(time.Duration(m.timeIncrement) * time.Millisecond)
177 177
 
178 178
 			continue
179
+		} else {
180
+			// we still wait to set the state as stopped and ensure that the locks were released
181
+			m.container.State.SetStopped(exitStatus)
182
+
183
+			m.resetContainer()
179 184
 		}
180 185
 
181 186
 		break
... ...
@@ -184,6 +129,23 @@ func (m *containerMonitor) Start() error {
184 184
 	return err
185 185
 }
186 186
 
187
+// resetMonitor resets the stateful fields on the containerMonitor based on the
188
+// previous runs success or failure
189
+func (m *containerMonitor) resetMonitor(successful bool) {
190
+	// the container exited successfully so we need to reset the failure counter
191
+	// and the timeIncrement back to the default values
192
+	if successful {
193
+		m.failureCount = 0
194
+		m.timeIncrement = defaultTimeIncrement
195
+	} else {
196
+		// otherwise we need to increment the amount of time we wait before restarting
197
+		// the process.  We will build up by multiplying the increment by 2
198
+
199
+		m.failureCount++
200
+		m.timeIncrement *= 2
201
+	}
202
+}
203
+
187 204
 // shouldRestart checks the restart policy and applies the rules to determine if
188 205
 // the container's process should be restarted
189 206
 func (m *containerMonitor) shouldRestart(exitStatus int) bool {
... ...
@@ -229,3 +191,50 @@ func (m *containerMonitor) callback(command *execdriver.Command) {
229 229
 		utils.Debugf("%s", err)
230 230
 	}
231 231
 }
232
+
233
+// resetContainer resets the container's IO and ensures that the command is able to be executed again
234
+// by copying the data into a new struct
235
+func (m *containerMonitor) resetContainer() {
236
+	container := m.container
237
+
238
+	if container.Config.OpenStdin {
239
+		if err := container.stdin.Close(); err != nil {
240
+			utils.Errorf("%s: Error close stdin: %s", container.ID, err)
241
+		}
242
+	}
243
+
244
+	if err := container.stdout.Clean(); err != nil {
245
+		utils.Errorf("%s: Error close stdout: %s", container.ID, err)
246
+	}
247
+
248
+	if err := container.stderr.Clean(); err != nil {
249
+		utils.Errorf("%s: Error close stderr: %s", container.ID, err)
250
+	}
251
+
252
+	if container.command != nil && container.command.Terminal != nil {
253
+		if err := container.command.Terminal.Close(); err != nil {
254
+			utils.Errorf("%s: Error closing terminal: %s", container.ID, err)
255
+		}
256
+	}
257
+
258
+	// Re-create a brand new stdin pipe once the container exited
259
+	if container.Config.OpenStdin {
260
+		container.stdin, container.stdinPipe = io.Pipe()
261
+	}
262
+
263
+	container.LogEvent("die")
264
+
265
+	c := container.command.Cmd
266
+
267
+	container.command.Cmd = exec.Cmd{
268
+		Stdin:       c.Stdin,
269
+		Stdout:      c.Stdout,
270
+		Stderr:      c.Stderr,
271
+		Path:        c.Path,
272
+		Env:         c.Env,
273
+		ExtraFiles:  c.ExtraFiles,
274
+		Args:        c.Args,
275
+		Dir:         c.Dir,
276
+		SysProcAttr: c.SysProcAttr,
277
+	}
278
+}
... ...
@@ -12,6 +12,7 @@ type State struct {
12 12
 	sync.RWMutex
13 13
 	Running    bool
14 14
 	Paused     bool
15
+	Restarting bool
15 16
 	Pid        int
16 17
 	ExitCode   int
17 18
 	StartedAt  time.Time
... ...
@@ -30,15 +31,22 @@ func (s *State) String() string {
30 30
 	s.RLock()
31 31
 	defer s.RUnlock()
32 32
 
33
+	if s.Restarting {
34
+		return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
35
+	}
36
+
33 37
 	if s.Running {
34 38
 		if s.Paused {
35 39
 			return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
36 40
 		}
41
+
37 42
 		return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
38 43
 	}
44
+
39 45
 	if s.FinishedAt.IsZero() {
40 46
 		return ""
41 47
 	}
48
+
42 49
 	return fmt.Sprintf("Exited (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
43 50
 }
44 51
 
... ...
@@ -135,6 +143,28 @@ func (s *State) SetStopped(exitCode int) {
135 135
 	s.Unlock()
136 136
 }
137 137
 
138
+// SetRestarting is when docker hanldes the auto restart of containers when they are
139
+// in the middle of a stop and being restarted again
140
+func (s *State) SetRestarting(exitCode int) {
141
+	s.Lock()
142
+	if s.Running {
143
+		s.Running = false
144
+		s.Pid = 0
145
+		s.FinishedAt = time.Now().UTC()
146
+		s.ExitCode = exitCode
147
+		close(s.waitChan) // fire waiters for stop
148
+		s.waitChan = make(chan struct{})
149
+	}
150
+	s.Unlock()
151
+}
152
+
153
+func (s *State) IsRestarting() bool {
154
+	s.RLock()
155
+	res := s.Restarting
156
+	s.RUnlock()
157
+	return res
158
+}
159
+
138 160
 func (s *State) SetPaused() {
139 161
 	s.Lock()
140 162
 	s.Paused = true