GitList

daemon/health.go

b6c7becb	package daemon import ( "bytes" "fmt" "runtime" "strings"
c3319445	"sync"
b6c7becb	"time" "golang.org/x/net/context"
91e197d6	"github.com/docker/docker/api/types"
5f81cf11	containertypes "github.com/docker/docker/api/types/container"
91e197d6	"github.com/docker/docker/api/types/strslice"
b6c7becb	"github.com/docker/docker/container" "github.com/docker/docker/daemon/exec"
1009e6a4	"github.com/sirupsen/logrus"
b6c7becb	) const ( // Longest healthcheck probe output message to store. Longer messages will be truncated. maxOutputLen = 4096 // Default interval between probe runs (from the end of the first to the start of the second). // Also the time before the first probe. defaultProbeInterval = 30 * time.Second // The maximum length of time a single probe run should take. If the probe takes longer // than this, the check is considered to have failed. defaultProbeTimeout = 30 * time.Second
e401f637	// The time given for the container to start before the health check starts considering // the container unstable. Defaults to none. defaultStartPeriod = 0 * time.Second
50e470fa	// Default number of consecutive failures of the health check // for the container to be considered unhealthy. defaultProbeRetries = 3
b6c7becb	// Maximum number of entries to record maxLogEntries = 5 ) const ( // Exit status codes that can be returned by the probe command.
62c1f0ef	exitStatusHealthy = 0 // Container is healthy
b6c7becb	) // probe implementations know how to run a particular type of probe. type probe interface { // Perform one run of the check. Returns the exit code and an optional // short diagnostic string. run(context.Context, Daemon, container.Container) (*types.HealthcheckResult, error) } // cmdProbe implements the "CMD" probe type. type cmdProbe struct { // Run the command with the system's default shell instead of execing it directly. shell bool } // exec the healthcheck command in the container. // Returns the exit code and probe output (if any)
5836d86a	func (p cmdProbe) run(ctx context.Context, d Daemon, cntr container.Container) (types.HealthcheckResult, error) { cmdSlice := strslice.StrSlice(cntr.Config.Healthcheck.Test)[1:]
b6c7becb	if p.shell {
5836d86a	cmdSlice = append(getShell(cntr.Config), cmdSlice...)
b6c7becb	} entrypoint, args := d.getEntrypointAndArgs(strslice.StrSlice{}, cmdSlice) execConfig := exec.NewConfig() execConfig.OpenStdin = false execConfig.OpenStdout = true execConfig.OpenStderr = true
5836d86a	execConfig.ContainerID = cntr.ID
b6c7becb	execConfig.DetachKeys = []byte{} execConfig.Entrypoint = entrypoint execConfig.Args = args execConfig.Tty = false execConfig.Privileged = false
5836d86a	execConfig.User = cntr.Config.User
852a943c	execConfig.WorkingDir = cntr.Config.WorkingDir
5836d86a	linkedEnv, err := d.setupLinkedContainers(cntr) if err != nil { return nil, err } execConfig.Env = container.ReplaceOrAppendEnvValues(cntr.CreateDaemonEnvironment(execConfig.Tty, linkedEnv), execConfig.Env)
b6c7becb
5836d86a	d.registerExecCommand(cntr, execConfig)
aa6bb5cb	attributes := map[string]string{ "execID": execConfig.ID, } d.LogContainerEventWithAttributes(cntr, "exec_create: "+execConfig.Entrypoint+" "+strings.Join(execConfig.Args, " "), attributes)
b6c7becb	output := &limitedBuffer{}
5836d86a	err = d.ContainerExecStart(ctx, execConfig.ID, nil, output, output)
b6c7becb	if err != nil { return nil, err } info, err := d.getExecConfig(execConfig.ID) if err != nil { return nil, err } if info.ExitCode == nil {
9b47b7b1	return nil, fmt.Errorf("healthcheck for container %s has no exit code", cntr.ID)
b6c7becb	} // Note: Go's json package will handle invalid UTF-8 for us out := output.String() return &types.HealthcheckResult{ End: time.Now(), ExitCode: *info.ExitCode, Output: out, }, nil } // Update the container's Status.Health struct based on the latest probe's result.
89b12347	func handleProbeResult(d Daemon, c container.Container, result *types.HealthcheckResult, done chan struct{}) {
b6c7becb	c.Lock() defer c.Unlock()
89b12347	// probe may have been cancelled while waiting on lock. Ignore result then select { case <-done: return default: }
b6c7becb	retries := c.Config.Healthcheck.Retries if retries <= 0 {
50e470fa	retries = defaultProbeRetries
b6c7becb	} h := c.State.Health
7db30ab0	oldStatus := h.Status()
b6c7becb	if len(h.Log) >= maxLogEntries { h.Log = append(h.Log[len(h.Log)+1-maxLogEntries:], result) } else { h.Log = append(h.Log, result) } if result.ExitCode == exitStatusHealthy { h.FailingStreak = 0
7db30ab0	h.SetStatus(types.Healthy)
e401f637	} else { // Failure (including invalid exit code) shouldIncrementStreak := true // If the container is starting (i.e. we never had a successful health check) // then we check if we are within the start period of the container in which // case we do not increment the failure streak.
7db30ab0	if h.Status() == types.Starting {
e401f637	startPeriod := timeoutWithDefault(c.Config.Healthcheck.StartPeriod, defaultStartPeriod) timeSinceStart := result.Start.Sub(c.State.StartedAt) // If still within the start period, then don't increment failing streak. if timeSinceStart < startPeriod { shouldIncrementStreak = false } } if shouldIncrementStreak { h.FailingStreak++ if h.FailingStreak >= retries {
7db30ab0	h.SetStatus(types.Unhealthy)
e401f637	}
b6c7becb	} // Else we're starting or healthy. Stay in that state. }
eed4c7b7	// replicate Health status changes
aacddda8	if err := c.CheckpointTo(d.containersReplica); err != nil {
eed4c7b7	// queries will be inconsistent until the next probe runs or other state mutations
aacddda8	// checkpoint the container
eed4c7b7	logrus.Errorf("Error replicating health state for container %s: %v", c.ID, err) }
7db30ab0	current := h.Status() if oldStatus != current { d.LogContainerEvent(c, "health_status: "+current)
b6c7becb	} } // Run the container's monitoring thread until notified via "stop". // There is never more than one monitor thread running per container at a time. func monitor(d Daemon, c container.Container, stop chan struct{}, probe probe) { probeTimeout := timeoutWithDefault(c.Config.Healthcheck.Timeout, defaultProbeTimeout) probeInterval := timeoutWithDefault(c.Config.Healthcheck.Interval, defaultProbeInterval) for { select { case <-stop:
a4a4f373	logrus.Debugf("Stop healthcheck monitoring for container %s (received while idle)", c.ID)
b6c7becb	return case <-time.After(probeInterval):
a4a4f373	logrus.Debugf("Running health check for container %s ...", c.ID)
b6c7becb	startTime := time.Now() ctx, cancelProbe := context.WithTimeout(context.Background(), probeTimeout)
67297ba0	results := make(chan *types.HealthcheckResult, 1)
b6c7becb	go func() {
3343d234	healthChecksCounter.Inc()
b6c7becb	result, err := probe.run(ctx, d, c) if err != nil {
3343d234	healthChecksFailedCounter.Inc()
a4a4f373	logrus.Warnf("Health check for container %s error: %v", c.ID, err)
b6c7becb	results <- &types.HealthcheckResult{ ExitCode: -1, Output: err.Error(), Start: startTime, End: time.Now(), } } else { result.Start = startTime
a4a4f373	logrus.Debugf("Health check for container %s done (exitCode=%d)", c.ID, result.ExitCode)
b6c7becb	results <- result } close(results) }() select { case <-stop:
a4a4f373	logrus.Debugf("Stop healthcheck monitoring for container %s (received while probing)", c.ID)
b6c7becb	cancelProbe()
67297ba0	// Wait for probe to exit (it might take a while to respond to the TERM // signal and we don't want dying probes to pile up). <-results
b6c7becb	return case result := <-results:
89b12347	handleProbeResult(d, c, result, stop)
b6c7becb	// Stop timeout cancelProbe() case <-ctx.Done():
a4a4f373	logrus.Debugf("Health check for container %s taking too long", c.ID)
b6c7becb	handleProbeResult(d, c, &types.HealthcheckResult{ ExitCode: -1, Output: fmt.Sprintf("Health check exceeded timeout (%v)", probeTimeout), Start: startTime, End: time.Now(),
89b12347	}, stop)
b6c7becb	cancelProbe() // Wait for probe to exit (it might take a while to respond to the TERM // signal and we don't want dying probes to pile up). <-results } } } } // Get a suitable probe implementation for the container's healthcheck configuration.
4016038b	// Nil will be returned if no healthcheck was configured or NONE was set.
b6c7becb	func getProbe(c *container.Container) probe { config := c.Config.Healthcheck if config == nil \|\| len(config.Test) == 0 { return nil } switch config.Test[0] { case "CMD": return &cmdProbe{shell: false} case "CMD-SHELL": return &cmdProbe{shell: true}
e987c554	case "NONE": return nil
b6c7becb	default:
a4a4f373	logrus.Warnf("Unknown healthcheck type '%s' (expected 'CMD') in container %s", config.Test[0], c.ID)
b6c7becb	return nil } } // Ensure the health-check monitor is running or not, depending on the current // state of the container. // Called from monitor.go, with c locked. func (d Daemon) updateHealthMonitor(c container.Container) { h := c.State.Health if h == nil { return // No healthcheck configured } probe := getProbe(c) wantRunning := c.Running && !c.Paused && probe != nil if wantRunning { if stop := h.OpenMonitorChannel(); stop != nil { go monitor(d, c, stop, probe) } } else { h.CloseMonitorChannel() } } // Reset the health state for a newly-started, restarted or restored container. // initHealthMonitor is called from monitor.go and we should never be running // two instances at once. // Called with c locked. func (d Daemon) initHealthMonitor(c container.Container) {
4016038b	// If no healthcheck is setup then don't init the monitor if getProbe(c) == nil {
b6c7becb	return } // This is needed in case we're auto-restarting d.stopHealthchecks(c)
b8793cff	if h := c.State.Health; h != nil {
7db30ab0	h.SetStatus(types.Starting)
b8793cff	h.FailingStreak = 0 } else {
b6c7becb	h := &container.Health{}
7db30ab0	h.SetStatus(types.Starting)
b6c7becb	c.State.Health = h } d.updateHealthMonitor(c) } // Called when the container is being stopped (whether because the health check is // failing or for any other reason). func (d Daemon) stopHealthchecks(c container.Container) { h := c.State.Health if h != nil { h.CloseMonitorChannel() } } // Buffer up to maxOutputLen bytes. Further data is discarded. type limitedBuffer struct { buf bytes.Buffer
c3319445	mu sync.Mutex
b6c7becb	truncated bool // indicates that data has been lost } // Append to limitedBuffer while there is room. func (b *limitedBuffer) Write(data []byte) (int, error) {
c3319445	b.mu.Lock() defer b.mu.Unlock()
b6c7becb	bufLen := b.buf.Len() dataLen := len(data) keep := min(maxOutputLen-bufLen, dataLen) if keep > 0 { b.buf.Write(data[:keep]) } if keep < dataLen { b.truncated = true } return dataLen, nil } // The contents of the buffer, with "..." appended if it overflowed. func (b *limitedBuffer) String() string {
c3319445	b.mu.Lock() defer b.mu.Unlock()
b6c7becb	out := b.buf.String() if b.truncated { out = out + "..." } return out } // If configuredValue is zero, use defaultValue instead. func timeoutWithDefault(configuredValue time.Duration, defaultValue time.Duration) time.Duration { if configuredValue == 0 { return defaultValue } return configuredValue } func min(x, y int) int { if x < y { return x } return y }
5f81cf11	func getShell(config *containertypes.Config) []string { if len(config.Shell) != 0 { return config.Shell } if runtime.GOOS != "windows" { return []string{"/bin/sh", "-c"} } return []string{"cmd", "/S", "/C"} }