Signed-off-by: Michael Crosby <michael@docker.com>
| ... | ... |
@@ -74,66 +74,6 @@ func (m *containerMonitor) Close() error {
|
| 74 | 74 |
return nil |
| 75 | 75 |
} |
| 76 | 76 |
|
| 77 |
-// reset resets the container's IO and ensures that the command is able to be executed again |
|
| 78 |
-// by copying the data into a new struct |
|
| 79 |
-func (m *containerMonitor) reset(successful bool) {
|
|
| 80 |
- container := m.container |
|
| 81 |
- |
|
| 82 |
- if container.Config.OpenStdin {
|
|
| 83 |
- if err := container.stdin.Close(); err != nil {
|
|
| 84 |
- utils.Errorf("%s: Error close stdin: %s", container.ID, err)
|
|
| 85 |
- } |
|
| 86 |
- } |
|
| 87 |
- |
|
| 88 |
- if err := container.stdout.Clean(); err != nil {
|
|
| 89 |
- utils.Errorf("%s: Error close stdout: %s", container.ID, err)
|
|
| 90 |
- } |
|
| 91 |
- |
|
| 92 |
- if err := container.stderr.Clean(); err != nil {
|
|
| 93 |
- utils.Errorf("%s: Error close stderr: %s", container.ID, err)
|
|
| 94 |
- } |
|
| 95 |
- |
|
| 96 |
- if container.command != nil && container.command.Terminal != nil {
|
|
| 97 |
- if err := container.command.Terminal.Close(); err != nil {
|
|
| 98 |
- utils.Errorf("%s: Error closing terminal: %s", container.ID, err)
|
|
| 99 |
- } |
|
| 100 |
- } |
|
| 101 |
- |
|
| 102 |
- // Re-create a brand new stdin pipe once the container exited |
|
| 103 |
- if container.Config.OpenStdin {
|
|
| 104 |
- container.stdin, container.stdinPipe = io.Pipe() |
|
| 105 |
- } |
|
| 106 |
- |
|
| 107 |
- container.LogEvent("die")
|
|
| 108 |
- |
|
| 109 |
- c := container.command.Cmd |
|
| 110 |
- |
|
| 111 |
- container.command.Cmd = exec.Cmd{
|
|
| 112 |
- Stdin: c.Stdin, |
|
| 113 |
- Stdout: c.Stdout, |
|
| 114 |
- Stderr: c.Stderr, |
|
| 115 |
- Path: c.Path, |
|
| 116 |
- Env: c.Env, |
|
| 117 |
- ExtraFiles: c.ExtraFiles, |
|
| 118 |
- Args: c.Args, |
|
| 119 |
- Dir: c.Dir, |
|
| 120 |
- SysProcAttr: c.SysProcAttr, |
|
| 121 |
- } |
|
| 122 |
- |
|
| 123 |
- // the container exited successfully so we need to reset the failure counter |
|
| 124 |
- // and the timeIncrement back to the default values |
|
| 125 |
- if successful {
|
|
| 126 |
- m.failureCount = 0 |
|
| 127 |
- m.timeIncrement = defaultTimeIncrement |
|
| 128 |
- } else {
|
|
| 129 |
- // otherwise we need to increment the amount of time we wait before restarting |
|
| 130 |
- // the process. We will build up by multiplying the increment by 2 |
|
| 131 |
- |
|
| 132 |
- m.failureCount++ |
|
| 133 |
- m.timeIncrement *= 2 |
|
| 134 |
- } |
|
| 135 |
-} |
|
| 136 |
- |
|
| 137 | 77 |
// Start starts the containers process and monitors it according to the restart policy |
| 138 | 78 |
func (m *containerMonitor) Start() error {
|
| 139 | 79 |
var ( |
| ... | ... |
@@ -151,7 +91,7 @@ func (m *containerMonitor) Start() error {
|
| 151 | 151 |
m.container.RestartCount++ |
| 152 | 152 |
|
| 153 | 153 |
if err := m.container.startLoggingToDisk(); err != nil {
|
| 154 |
- m.reset(false) |
|
| 154 |
+ m.resetContainer() |
|
| 155 | 155 |
|
| 156 | 156 |
return err |
| 157 | 157 |
} |
| ... | ... |
@@ -164,18 +104,23 @@ func (m *containerMonitor) Start() error {
|
| 164 | 164 |
utils.Errorf("Error running container: %s", err)
|
| 165 | 165 |
} |
| 166 | 166 |
|
| 167 |
- // we still wait to set the state as stopped and ensure that the locks were released |
|
| 168 |
- m.container.State.SetStopped(exitStatus) |
|
| 169 |
- |
|
| 170 |
- // pass if we exited successfully |
|
| 171 |
- m.reset(err == nil && exitStatus == 0) |
|
| 167 |
+ m.resetMonitor(err == nil && exitStatus == 0) |
|
| 172 | 168 |
|
| 173 | 169 |
if m.shouldRestart(exitStatus) {
|
| 170 |
+ m.container.State.SetRestarting(exitStatus) |
|
| 171 |
+ |
|
| 172 |
+ m.resetContainer() |
|
| 173 |
+ |
|
| 174 | 174 |
// sleep with a small time increment between each restart to help avoid issues cased by quickly |
| 175 | 175 |
// restarting the container because of some types of errors ( networking cut out, etc... ) |
| 176 | 176 |
time.Sleep(time.Duration(m.timeIncrement) * time.Millisecond) |
| 177 | 177 |
|
| 178 | 178 |
continue |
| 179 |
+ } else {
|
|
| 180 |
+ // we still wait to set the state as stopped and ensure that the locks were released |
|
| 181 |
+ m.container.State.SetStopped(exitStatus) |
|
| 182 |
+ |
|
| 183 |
+ m.resetContainer() |
|
| 179 | 184 |
} |
| 180 | 185 |
|
| 181 | 186 |
break |
| ... | ... |
@@ -184,6 +129,23 @@ func (m *containerMonitor) Start() error {
|
| 184 | 184 |
return err |
| 185 | 185 |
} |
| 186 | 186 |
|
| 187 |
+// resetMonitor resets the stateful fields on the containerMonitor based on the |
|
| 188 |
+// previous runs success or failure |
|
| 189 |
+func (m *containerMonitor) resetMonitor(successful bool) {
|
|
| 190 |
+ // the container exited successfully so we need to reset the failure counter |
|
| 191 |
+ // and the timeIncrement back to the default values |
|
| 192 |
+ if successful {
|
|
| 193 |
+ m.failureCount = 0 |
|
| 194 |
+ m.timeIncrement = defaultTimeIncrement |
|
| 195 |
+ } else {
|
|
| 196 |
+ // otherwise we need to increment the amount of time we wait before restarting |
|
| 197 |
+ // the process. We will build up by multiplying the increment by 2 |
|
| 198 |
+ |
|
| 199 |
+ m.failureCount++ |
|
| 200 |
+ m.timeIncrement *= 2 |
|
| 201 |
+ } |
|
| 202 |
+} |
|
| 203 |
+ |
|
| 187 | 204 |
// shouldRestart checks the restart policy and applies the rules to determine if |
| 188 | 205 |
// the container's process should be restarted |
| 189 | 206 |
func (m *containerMonitor) shouldRestart(exitStatus int) bool {
|
| ... | ... |
@@ -229,3 +191,50 @@ func (m *containerMonitor) callback(command *execdriver.Command) {
|
| 229 | 229 |
utils.Debugf("%s", err)
|
| 230 | 230 |
} |
| 231 | 231 |
} |
| 232 |
+ |
|
| 233 |
+// resetContainer resets the container's IO and ensures that the command is able to be executed again |
|
| 234 |
+// by copying the data into a new struct |
|
| 235 |
+func (m *containerMonitor) resetContainer() {
|
|
| 236 |
+ container := m.container |
|
| 237 |
+ |
|
| 238 |
+ if container.Config.OpenStdin {
|
|
| 239 |
+ if err := container.stdin.Close(); err != nil {
|
|
| 240 |
+ utils.Errorf("%s: Error close stdin: %s", container.ID, err)
|
|
| 241 |
+ } |
|
| 242 |
+ } |
|
| 243 |
+ |
|
| 244 |
+ if err := container.stdout.Clean(); err != nil {
|
|
| 245 |
+ utils.Errorf("%s: Error close stdout: %s", container.ID, err)
|
|
| 246 |
+ } |
|
| 247 |
+ |
|
| 248 |
+ if err := container.stderr.Clean(); err != nil {
|
|
| 249 |
+ utils.Errorf("%s: Error close stderr: %s", container.ID, err)
|
|
| 250 |
+ } |
|
| 251 |
+ |
|
| 252 |
+ if container.command != nil && container.command.Terminal != nil {
|
|
| 253 |
+ if err := container.command.Terminal.Close(); err != nil {
|
|
| 254 |
+ utils.Errorf("%s: Error closing terminal: %s", container.ID, err)
|
|
| 255 |
+ } |
|
| 256 |
+ } |
|
| 257 |
+ |
|
| 258 |
+ // Re-create a brand new stdin pipe once the container exited |
|
| 259 |
+ if container.Config.OpenStdin {
|
|
| 260 |
+ container.stdin, container.stdinPipe = io.Pipe() |
|
| 261 |
+ } |
|
| 262 |
+ |
|
| 263 |
+ container.LogEvent("die")
|
|
| 264 |
+ |
|
| 265 |
+ c := container.command.Cmd |
|
| 266 |
+ |
|
| 267 |
+ container.command.Cmd = exec.Cmd{
|
|
| 268 |
+ Stdin: c.Stdin, |
|
| 269 |
+ Stdout: c.Stdout, |
|
| 270 |
+ Stderr: c.Stderr, |
|
| 271 |
+ Path: c.Path, |
|
| 272 |
+ Env: c.Env, |
|
| 273 |
+ ExtraFiles: c.ExtraFiles, |
|
| 274 |
+ Args: c.Args, |
|
| 275 |
+ Dir: c.Dir, |
|
| 276 |
+ SysProcAttr: c.SysProcAttr, |
|
| 277 |
+ } |
|
| 278 |
+} |
| ... | ... |
@@ -12,6 +12,7 @@ type State struct {
|
| 12 | 12 |
sync.RWMutex |
| 13 | 13 |
Running bool |
| 14 | 14 |
Paused bool |
| 15 |
+ Restarting bool |
|
| 15 | 16 |
Pid int |
| 16 | 17 |
ExitCode int |
| 17 | 18 |
StartedAt time.Time |
| ... | ... |
@@ -30,15 +31,22 @@ func (s *State) String() string {
|
| 30 | 30 |
s.RLock() |
| 31 | 31 |
defer s.RUnlock() |
| 32 | 32 |
|
| 33 |
+ if s.Restarting {
|
|
| 34 |
+ return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
|
|
| 35 |
+ } |
|
| 36 |
+ |
|
| 33 | 37 |
if s.Running {
|
| 34 | 38 |
if s.Paused {
|
| 35 | 39 |
return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
|
| 36 | 40 |
} |
| 41 |
+ |
|
| 37 | 42 |
return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
|
| 38 | 43 |
} |
| 44 |
+ |
|
| 39 | 45 |
if s.FinishedAt.IsZero() {
|
| 40 | 46 |
return "" |
| 41 | 47 |
} |
| 48 |
+ |
|
| 42 | 49 |
return fmt.Sprintf("Exited (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
|
| 43 | 50 |
} |
| 44 | 51 |
|
| ... | ... |
@@ -135,6 +143,28 @@ func (s *State) SetStopped(exitCode int) {
|
| 135 | 135 |
s.Unlock() |
| 136 | 136 |
} |
| 137 | 137 |
|
| 138 |
+// SetRestarting is when docker hanldes the auto restart of containers when they are |
|
| 139 |
+// in the middle of a stop and being restarted again |
|
| 140 |
+func (s *State) SetRestarting(exitCode int) {
|
|
| 141 |
+ s.Lock() |
|
| 142 |
+ if s.Running {
|
|
| 143 |
+ s.Running = false |
|
| 144 |
+ s.Pid = 0 |
|
| 145 |
+ s.FinishedAt = time.Now().UTC() |
|
| 146 |
+ s.ExitCode = exitCode |
|
| 147 |
+ close(s.waitChan) // fire waiters for stop |
|
| 148 |
+ s.waitChan = make(chan struct{})
|
|
| 149 |
+ } |
|
| 150 |
+ s.Unlock() |
|
| 151 |
+} |
|
| 152 |
+ |
|
| 153 |
+func (s *State) IsRestarting() bool {
|
|
| 154 |
+ s.RLock() |
|
| 155 |
+ res := s.Restarting |
|
| 156 |
+ s.RUnlock() |
|
| 157 |
+ return res |
|
| 158 |
+} |
|
| 159 |
+ |
|
| 138 | 160 |
func (s *State) SetPaused() {
|
| 139 | 161 |
s.Lock() |
| 140 | 162 |
s.Paused = true |