2b0776c8 |
package daemon
import (
"io"
"os/exec"
"sync"
"time"
|
6f4d8470 |
"github.com/Sirupsen/logrus" |
ee7dd44c |
"github.com/docker/docker/daemon/execdriver" |
b80fae73 |
"github.com/docker/docker/pkg/stringid" |
2b0776c8 |
"github.com/docker/docker/runconfig"
)
|
c0391bf5 |
const (
defaultTimeIncrement = 100
loggerCloseTimeout = 10 * time.Second
) |
860c13b7 |
|
2b0776c8 |
// containerMonitor monitors the execution of a container's main process. |
af053ccf |
// If a restart policy is specified for the container the monitor will ensure that the |
2b0776c8 |
// process is restarted based on the rules of the policy. When the container is finally stopped
// the monitor will reset and cleanup any of the container resources such as networking allocations
// and the rootfs
type containerMonitor struct {
mux sync.Mutex
|
860c13b7 |
// container is the container being monitored
container *Container
|
73ced636 |
// restartPolicy is the current policy being applied to the container monitor |
2b0776c8 |
restartPolicy runconfig.RestartPolicy |
860c13b7 |
// failureCount is the number of times the container has failed to
// start in a row
failureCount int
// shouldStop signals the monitor that the next time the container exits it is
// either because docker or the user asked for the container to be stopped
shouldStop bool
|
73ced636 |
// startSignal is a channel that is closes after the container initially starts |
2ec1b697 |
startSignal chan struct{}
|
972c8949 |
// stopChan is used to signal to the monitor whenever there is a wait for the
// next restart so that the timeIncrement is not honored and the user is not
// left waiting for nothing to happen during this time
stopChan chan struct{}
|
860c13b7 |
// timeIncrement is the amount of time to wait between restarts
// this is in milliseconds
timeIncrement int |
ebf5d465 |
// lastStartTime is the time which the monitor last exec'd the container's process
lastStartTime time.Time |
2b0776c8 |
}
|
2ec1b697 |
// newContainerMonitor returns an initialized containerMonitor for the provided container
// honoring the provided restart policy |
2b0776c8 |
func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) *containerMonitor {
return &containerMonitor{
container: container,
restartPolicy: policy, |
860c13b7 |
timeIncrement: defaultTimeIncrement, |
0d4d7e4a |
stopChan: make(chan struct{}),
startSignal: make(chan struct{}), |
2b0776c8 |
}
}
// Stop signals to the container monitor that it should stop monitoring the container
// for exits the next time the process dies
func (m *containerMonitor) ExitOnNext() {
m.mux.Lock() |
972c8949 |
// we need to protect having a double close of the channel when stop is called
// twice or else we will get a panic
if !m.shouldStop {
m.shouldStop = true
close(m.stopChan)
}
|
2b0776c8 |
m.mux.Unlock()
}
// Close closes the container's resources such as networking allocations and
// unmounts the contatiner's root filesystem
func (m *containerMonitor) Close() error {
// Cleanup networking and mounts
m.container.cleanup()
|
41870a42 |
// FIXME: here is race condition between two RUN instructions in Dockerfile
// because they share same runconfig and change image. Must be fixed
// in builder/builder.go
if err := m.container.toDisk(); err != nil { |
6f4d8470 |
logrus.Errorf("Error dumping container %s state to disk: %s", m.container.ID, err) |
41870a42 |
return err |
2b0776c8 |
}
return nil
}
// Start starts the containers process and monitors it according to the restart policy
func (m *containerMonitor) Start() error {
var ( |
860c13b7 |
err error |
46f29449 |
exitStatus execdriver.ExitStatus |
12ff89a3 |
// this variable indicates where we in execution flow:
// before Run or after
afterRun bool |
2b0776c8 |
) |
860c13b7 |
// ensure that when the monitor finally exits we release the networking and unmount the rootfs |
93d6adf8 |
defer func() { |
12ff89a3 |
if afterRun { |
93d6adf8 |
m.container.Lock() |
46f29449 |
m.container.setStopped(&exitStatus) |
93d6adf8 |
defer m.container.Unlock()
}
m.Close()
}() |
2b0776c8 |
// reset the restart count
m.container.RestartCount = -1
|
972c8949 |
for { |
2b0776c8 |
m.container.RestartCount++ |
860c13b7 |
|
47a6afb9 |
if err := m.container.startLogging(); err != nil { |
517ba44e |
m.resetContainer(false) |
2b0776c8 |
return err
}
|
3a7e0735 |
pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin) |
2b0776c8 |
|
6ae05936 |
m.container.LogEvent("start")
|
ebf5d465 |
m.lastStartTime = time.Now()
|
860c13b7 |
if exitStatus, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil { |
2ec1b697 |
// if we receive an internal error from the initial start of a container then lets
// return it instead of entering the restart loop |
25c519e8 |
if m.container.RestartCount == 0 { |
46f29449 |
m.container.ExitCode = -1 |
517ba44e |
m.resetContainer(false) |
2ec1b697 |
return err
}
|
6f4d8470 |
logrus.Errorf("Error running container: %s", err) |
2b0776c8 |
}
|
93d6adf8 |
// here container.Lock is already lost |
12ff89a3 |
afterRun = true |
93d6adf8 |
|
f96e04ff |
m.resetMonitor(err == nil && exitStatus.ExitCode == 0) |
2b0776c8 |
|
f96e04ff |
if m.shouldRestart(exitStatus.ExitCode) { |
46f29449 |
m.container.SetRestarting(&exitStatus) |
44cab4a4 |
if exitStatus.OOMKilled {
m.container.LogEvent("oom")
} |
972c8949 |
m.container.LogEvent("die") |
517ba44e |
m.resetContainer(true) |
a2afb2b1 |
|
be22d7ac |
// sleep with a small time increment between each restart to help avoid issues cased by quickly
// restarting the container because of some types of errors ( networking cut out, etc... ) |
972c8949 |
m.waitForNextRestart()
// we need to check this before reentering the loop because the waitForNextRestart could have
// been terminated by a request from a user
if m.shouldStop {
return err
} |
2b0776c8 |
continue
} |
44cab4a4 |
if exitStatus.OOMKilled {
m.container.LogEvent("oom")
} |
972c8949 |
m.container.LogEvent("die") |
517ba44e |
m.resetContainer(true) |
12ff89a3 |
return err |
972c8949 |
} |
2b0776c8 |
}
|
a2afb2b1 |
// resetMonitor resets the stateful fields on the containerMonitor based on the |
906974b1 |
// previous runs success or failure. Regardless of success, if the container had |
ebf5d465 |
// an execution time of more than 10s then reset the timer back to the default |
a2afb2b1 |
func (m *containerMonitor) resetMonitor(successful bool) { |
ebf5d465 |
executionTime := time.Now().Sub(m.lastStartTime).Seconds()
if executionTime > 10 { |
a2afb2b1 |
m.timeIncrement = defaultTimeIncrement
} else {
// otherwise we need to increment the amount of time we wait before restarting
// the process. We will build up by multiplying the increment by 2 |
ebf5d465 |
m.timeIncrement *= 2
} |
a2afb2b1 |
|
ebf5d465 |
// the container exited successfully so we need to reset the failure counter
if successful {
m.failureCount = 0
} else { |
a2afb2b1 |
m.failureCount++
}
}
|
972c8949 |
// waitForNextRestart waits with the default time increment to restart the container unless |
73ced636 |
// a user or docker asks for the container to be stopped |
972c8949 |
func (m *containerMonitor) waitForNextRestart() {
select {
case <-time.After(time.Duration(m.timeIncrement) * time.Millisecond):
case <-m.stopChan:
}
}
|
860c13b7 |
// shouldRestart checks the restart policy and applies the rules to determine if
// the container's process should be restarted |
f96e04ff |
func (m *containerMonitor) shouldRestart(exitCode int) bool { |
2b0776c8 |
m.mux.Lock() |
860c13b7 |
defer m.mux.Unlock()
// do not restart if the user or docker has requested that this container be stopped
if m.shouldStop {
return false
} |
2b0776c8 |
|
624bf81f |
switch {
case m.restartPolicy.IsAlways(): |
860c13b7 |
return true |
624bf81f |
case m.restartPolicy.IsOnFailure(): |
860c13b7 |
// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count |
e721ed9b |
if max := m.restartPolicy.MaximumRetryCount; max != 0 && m.failureCount > max { |
6f4d8470 |
logrus.Debugf("stopping restart of container %s because maximum failure could of %d has been reached", |
b80fae73 |
stringid.TruncateID(m.container.ID), max) |
860c13b7 |
return false
} |
2b0776c8 |
|
f96e04ff |
return exitCode != 0 |
860c13b7 |
} |
2b0776c8 |
|
860c13b7 |
return false |
2b0776c8 |
}
// callback ensures that the container's state is properly updated after we
// received ack from the execution drivers |
3a7e0735 |
func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid int) { |
4aa5da27 |
if processConfig.Tty { |
2b0776c8 |
// The callback is called after the process Start() |
bfc9d8bb |
// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlave |
2b0776c8 |
// which we close here. |
4aa5da27 |
if c, ok := processConfig.Stdout.(io.Closer); ok { |
2b0776c8 |
c.Close()
}
}
|
e0339d4b |
m.container.setRunning(pid) |
2b0776c8 |
|
1480168e |
// signal that the process has started
// close channel only if not closed
select {
case <-m.startSignal:
default: |
73ced636 |
close(m.startSignal)
} |
2ec1b697 |
|
2b0776c8 |
if err := m.container.ToDisk(); err != nil { |
af7f8187 |
logrus.Errorf("Error saving container to disk: %v", err) |
2b0776c8 |
}
} |
a2afb2b1 |
// resetContainer resets the container's IO and ensures that the command is able to be executed again
// by copying the data into a new struct |
517ba44e |
// if lock is true, then container locked during reset
func (m *containerMonitor) resetContainer(lock bool) { |
a2afb2b1 |
container := m.container |
517ba44e |
if lock {
container.Lock()
defer container.Unlock()
} |
a2afb2b1 |
if container.Config.OpenStdin { |
3a7e0735 |
if err := container.stdin.Close(); err != nil { |
6f4d8470 |
logrus.Errorf("%s: Error close stdin: %s", container.ID, err) |
a2afb2b1 |
}
}
|
3a7e0735 |
if err := container.stdout.Clean(); err != nil { |
6f4d8470 |
logrus.Errorf("%s: Error close stdout: %s", container.ID, err) |
a2afb2b1 |
}
|
3a7e0735 |
if err := container.stderr.Clean(); err != nil { |
6f4d8470 |
logrus.Errorf("%s: Error close stderr: %s", container.ID, err) |
a2afb2b1 |
}
|
4aa5da27 |
if container.command != nil && container.command.ProcessConfig.Terminal != nil {
if err := container.command.ProcessConfig.Terminal.Close(); err != nil { |
6f4d8470 |
logrus.Errorf("%s: Error closing terminal: %s", container.ID, err) |
a2afb2b1 |
}
}
// Re-create a brand new stdin pipe once the container exited
if container.Config.OpenStdin { |
3a7e0735 |
container.stdin, container.stdinPipe = io.Pipe() |
a2afb2b1 |
}
|
47a6afb9 |
if container.logDriver != nil { |
b6a42673 |
if container.logCopier != nil {
exit := make(chan struct{})
go func() {
container.logCopier.Wait()
close(exit)
}()
select { |
c0391bf5 |
case <-time.After(loggerCloseTimeout): |
6f4d8470 |
logrus.Warnf("Logger didn't exit in time: logs may be truncated") |
b6a42673 |
case <-exit:
}
} |
47a6afb9 |
container.logDriver.Close() |
b6a42673 |
container.logCopier = nil |
47a6afb9 |
container.logDriver = nil
}
|
4aa5da27 |
c := container.command.ProcessConfig.Cmd |
a2afb2b1 |
|
4aa5da27 |
container.command.ProcessConfig.Cmd = exec.Cmd{ |
a2afb2b1 |
Stdin: c.Stdin,
Stdout: c.Stdout,
Stderr: c.Stderr,
Path: c.Path,
Env: c.Env,
ExtraFiles: c.ExtraFiles,
Args: c.Args,
Dir: c.Dir,
SysProcAttr: c.SysProcAttr,
}
} |