GitList

daemon/monitor.go

2b0776c8	package daemon import ( "io" "os/exec" "sync" "time"
6f4d8470	"github.com/Sirupsen/logrus"
ee7dd44c	"github.com/docker/docker/daemon/execdriver"
b80fae73	"github.com/docker/docker/pkg/stringid"
2b0776c8	"github.com/docker/docker/runconfig" )
c0391bf5	const ( defaultTimeIncrement = 100 loggerCloseTimeout = 10 * time.Second )
860c13b7
2b0776c8	// containerMonitor monitors the execution of a container's main process.
af053ccf	// If a restart policy is specified for the container the monitor will ensure that the
2b0776c8	// process is restarted based on the rules of the policy. When the container is finally stopped // the monitor will reset and cleanup any of the container resources such as networking allocations // and the rootfs type containerMonitor struct { mux sync.Mutex
860c13b7	// container is the container being monitored container *Container
73ced636	// restartPolicy is the current policy being applied to the container monitor
2b0776c8	restartPolicy runconfig.RestartPolicy
860c13b7	// failureCount is the number of times the container has failed to // start in a row failureCount int // shouldStop signals the monitor that the next time the container exits it is // either because docker or the user asked for the container to be stopped shouldStop bool
73ced636	// startSignal is a channel that is closes after the container initially starts
2ec1b697	startSignal chan struct{}
972c8949	// stopChan is used to signal to the monitor whenever there is a wait for the // next restart so that the timeIncrement is not honored and the user is not // left waiting for nothing to happen during this time stopChan chan struct{}
860c13b7	// timeIncrement is the amount of time to wait between restarts // this is in milliseconds timeIncrement int
ebf5d465	// lastStartTime is the time which the monitor last exec'd the container's process lastStartTime time.Time
2b0776c8	}
2ec1b697	// newContainerMonitor returns an initialized containerMonitor for the provided container // honoring the provided restart policy
2b0776c8	func newContainerMonitor(container Container, policy runconfig.RestartPolicy) containerMonitor { return &containerMonitor{ container: container, restartPolicy: policy,
860c13b7	timeIncrement: defaultTimeIncrement,
0d4d7e4a	stopChan: make(chan struct{}), startSignal: make(chan struct{}),
2b0776c8	} } // Stop signals to the container monitor that it should stop monitoring the container // for exits the next time the process dies func (m *containerMonitor) ExitOnNext() { m.mux.Lock()
972c8949	// we need to protect having a double close of the channel when stop is called // twice or else we will get a panic if !m.shouldStop { m.shouldStop = true close(m.stopChan) }
2b0776c8	m.mux.Unlock() } // Close closes the container's resources such as networking allocations and // unmounts the contatiner's root filesystem func (m *containerMonitor) Close() error { // Cleanup networking and mounts m.container.cleanup()
41870a42	// FIXME: here is race condition between two RUN instructions in Dockerfile // because they share same runconfig and change image. Must be fixed // in builder/builder.go if err := m.container.toDisk(); err != nil {
6f4d8470	logrus.Errorf("Error dumping container %s state to disk: %s", m.container.ID, err)
41870a42	return err
2b0776c8	} return nil } // Start starts the containers process and monitors it according to the restart policy func (m *containerMonitor) Start() error { var (
860c13b7	err error
46f29449	exitStatus execdriver.ExitStatus
12ff89a3	// this variable indicates where we in execution flow: // before Run or after afterRun bool
2b0776c8	)
860c13b7	// ensure that when the monitor finally exits we release the networking and unmount the rootfs
93d6adf8	defer func() {
12ff89a3	if afterRun {
93d6adf8	m.container.Lock()
46f29449	m.container.setStopped(&exitStatus)
93d6adf8	defer m.container.Unlock() } m.Close() }()
2b0776c8	// reset the restart count m.container.RestartCount = -1
972c8949	for {
2b0776c8	m.container.RestartCount++
860c13b7
47a6afb9	if err := m.container.startLogging(); err != nil {
517ba44e	m.resetContainer(false)
2b0776c8	return err }
3a7e0735	pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin)
2b0776c8
6ae05936	m.container.LogEvent("start")
ebf5d465	m.lastStartTime = time.Now()
860c13b7	if exitStatus, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil {
2ec1b697	// if we receive an internal error from the initial start of a container then lets // return it instead of entering the restart loop
25c519e8	if m.container.RestartCount == 0 {
46f29449	m.container.ExitCode = -1
517ba44e	m.resetContainer(false)
2ec1b697	return err }
6f4d8470	logrus.Errorf("Error running container: %s", err)
2b0776c8	}
93d6adf8	// here container.Lock is already lost
12ff89a3	afterRun = true
93d6adf8
f96e04ff	m.resetMonitor(err == nil && exitStatus.ExitCode == 0)
2b0776c8
f96e04ff	if m.shouldRestart(exitStatus.ExitCode) {
46f29449	m.container.SetRestarting(&exitStatus)
44cab4a4	if exitStatus.OOMKilled { m.container.LogEvent("oom") }
972c8949	m.container.LogEvent("die")
517ba44e	m.resetContainer(true)
a2afb2b1
be22d7ac	// sleep with a small time increment between each restart to help avoid issues cased by quickly // restarting the container because of some types of errors ( networking cut out, etc... )
972c8949	m.waitForNextRestart() // we need to check this before reentering the loop because the waitForNextRestart could have // been terminated by a request from a user if m.shouldStop { return err }
2b0776c8	continue }
44cab4a4	if exitStatus.OOMKilled { m.container.LogEvent("oom") }
972c8949	m.container.LogEvent("die")
517ba44e	m.resetContainer(true)
12ff89a3	return err
972c8949	}
2b0776c8	}
a2afb2b1	// resetMonitor resets the stateful fields on the containerMonitor based on the
906974b1	// previous runs success or failure. Regardless of success, if the container had
ebf5d465	// an execution time of more than 10s then reset the timer back to the default
a2afb2b1	func (m *containerMonitor) resetMonitor(successful bool) {
ebf5d465	executionTime := time.Now().Sub(m.lastStartTime).Seconds() if executionTime > 10 {
a2afb2b1	m.timeIncrement = defaultTimeIncrement } else { // otherwise we need to increment the amount of time we wait before restarting // the process. We will build up by multiplying the increment by 2
ebf5d465	m.timeIncrement *= 2 }
a2afb2b1
ebf5d465	// the container exited successfully so we need to reset the failure counter if successful { m.failureCount = 0 } else {
a2afb2b1	m.failureCount++ } }
972c8949	// waitForNextRestart waits with the default time increment to restart the container unless
73ced636	// a user or docker asks for the container to be stopped
972c8949	func (m containerMonitor) waitForNextRestart() { select { case <-time.After(time.Duration(m.timeIncrement) time.Millisecond): case <-m.stopChan: } }
860c13b7	// shouldRestart checks the restart policy and applies the rules to determine if // the container's process should be restarted
f96e04ff	func (m *containerMonitor) shouldRestart(exitCode int) bool {
2b0776c8	m.mux.Lock()
860c13b7	defer m.mux.Unlock() // do not restart if the user or docker has requested that this container be stopped if m.shouldStop { return false }
2b0776c8
624bf81f	switch { case m.restartPolicy.IsAlways():
860c13b7	return true
624bf81f	case m.restartPolicy.IsOnFailure():
860c13b7	// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count
e721ed9b	if max := m.restartPolicy.MaximumRetryCount; max != 0 && m.failureCount > max {
6f4d8470	logrus.Debugf("stopping restart of container %s because maximum failure could of %d has been reached",
b80fae73	stringid.TruncateID(m.container.ID), max)
860c13b7	return false }
2b0776c8
f96e04ff	return exitCode != 0
860c13b7	}
2b0776c8
860c13b7	return false
2b0776c8	} // callback ensures that the container's state is properly updated after we // received ack from the execution drivers
3a7e0735	func (m containerMonitor) callback(processConfig execdriver.ProcessConfig, pid int) {
4aa5da27	if processConfig.Tty {
2b0776c8	// The callback is called after the process Start()
bfc9d8bb	// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlave
2b0776c8	// which we close here.
4aa5da27	if c, ok := processConfig.Stdout.(io.Closer); ok {
2b0776c8	c.Close() } }
e0339d4b	m.container.setRunning(pid)
2b0776c8
1480168e	// signal that the process has started // close channel only if not closed select { case <-m.startSignal: default:
73ced636	close(m.startSignal) }
2ec1b697
2b0776c8	if err := m.container.ToDisk(); err != nil {
af7f8187	logrus.Errorf("Error saving container to disk: %v", err)
2b0776c8	} }
a2afb2b1	// resetContainer resets the container's IO and ensures that the command is able to be executed again // by copying the data into a new struct
517ba44e	// if lock is true, then container locked during reset func (m *containerMonitor) resetContainer(lock bool) {
a2afb2b1	container := m.container
517ba44e	if lock { container.Lock() defer container.Unlock() }
a2afb2b1	if container.Config.OpenStdin {
3a7e0735	if err := container.stdin.Close(); err != nil {
6f4d8470	logrus.Errorf("%s: Error close stdin: %s", container.ID, err)
a2afb2b1	} }
3a7e0735	if err := container.stdout.Clean(); err != nil {
6f4d8470	logrus.Errorf("%s: Error close stdout: %s", container.ID, err)
a2afb2b1	}
3a7e0735	if err := container.stderr.Clean(); err != nil {
6f4d8470	logrus.Errorf("%s: Error close stderr: %s", container.ID, err)
a2afb2b1	}
4aa5da27	if container.command != nil && container.command.ProcessConfig.Terminal != nil { if err := container.command.ProcessConfig.Terminal.Close(); err != nil {
6f4d8470	logrus.Errorf("%s: Error closing terminal: %s", container.ID, err)
a2afb2b1	} } // Re-create a brand new stdin pipe once the container exited if container.Config.OpenStdin {
3a7e0735	container.stdin, container.stdinPipe = io.Pipe()
a2afb2b1	}
47a6afb9	if container.logDriver != nil {
b6a42673	if container.logCopier != nil { exit := make(chan struct{}) go func() { container.logCopier.Wait() close(exit) }() select {
c0391bf5	case <-time.After(loggerCloseTimeout):
6f4d8470	logrus.Warnf("Logger didn't exit in time: logs may be truncated")
b6a42673	case <-exit: } }
47a6afb9	container.logDriver.Close()
b6a42673	container.logCopier = nil
47a6afb9	container.logDriver = nil }
4aa5da27	c := container.command.ProcessConfig.Cmd
a2afb2b1
4aa5da27	container.command.ProcessConfig.Cmd = exec.Cmd{
a2afb2b1	Stdin: c.Stdin, Stdout: c.Stdout, Stderr: c.Stderr, Path: c.Path, Env: c.Env, ExtraFiles: c.ExtraFiles, Args: c.Args, Dir: c.Dir, SysProcAttr: c.SysProcAttr, } }