This PR adds support for user-defined health-check probes for Docker
containers. It adds a `HEALTHCHECK` instruction to the Dockerfile syntax plus
some corresponding "docker run" options. It can be used with a restart policy
to automatically restart a container if the check fails.
The `HEALTHCHECK` instruction has two forms:
* `HEALTHCHECK [OPTIONS] CMD command` (check container health by running a command inside the container)
* `HEALTHCHECK NONE` (disable any healthcheck inherited from the base image)
The `HEALTHCHECK` instruction tells Docker how to test a container to check that
it is still working. This can detect cases such as a web server that is stuck in
an infinite loop and unable to handle new connections, even though the server
process is still running.
When a container has a healthcheck specified, it has a _health status_ in
addition to its normal status. This status is initially `starting`. Whenever a
health check passes, it becomes `healthy` (whatever state it was previously in).
After a certain number of consecutive failures, it becomes `unhealthy`.
The options that can appear before `CMD` are:
* `--interval=DURATION` (default: `30s`)
* `--timeout=DURATION` (default: `30s`)
* `--retries=N` (default: `1`)
The health check will first run **interval** seconds after the container is
started, and then again **interval** seconds after each previous check completes.
If a single run of the check takes longer than **timeout** seconds then the check
is considered to have failed.
It takes **retries** consecutive failures of the health check for the container
to be considered `unhealthy`.
There can only be one `HEALTHCHECK` instruction in a Dockerfile. If you list
more than one then only the last `HEALTHCHECK` will take effect.
The command after the `CMD` keyword can be either a shell command (e.g. `HEALTHCHECK
CMD /bin/check-running`) or an _exec_ array (as with other Dockerfile commands;
see e.g. `ENTRYPOINT` for details).
The command's exit status indicates the health status of the container.
The possible values are:
- 0: success - the container is healthy and ready for use
- 1: unhealthy - the container is not working correctly
- 2: starting - the container is not ready for use yet, but is working correctly
If the probe returns 2 ("starting") when the container has already moved out of the
"starting" state then it is treated as "unhealthy" instead.
For example, to check every five minutes or so that a web-server is able to
serve the site's main page within three seconds:
HEALTHCHECK --interval=5m --timeout=3s \
CMD curl -f http://localhost/ || exit 1
To help debug failing probes, any output text (UTF-8 encoded) that the command writes
on stdout or stderr will be stored in the health status and can be queried with
`docker inspect`. Such output should be kept short (only the first 4096 bytes
are stored currently).
When the health status of a container changes, a `health_status` event is
generated with the new status. The health status is also displayed in the
`docker ps` output.
Signed-off-by: Thomas Leonard <thomas.leonard@docker.com>
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
... | ... |
@@ -17,7 +17,7 @@ type execBackend interface { |
17 | 17 |
ContainerExecCreate(name string, config *types.ExecConfig) (string, error) |
18 | 18 |
ContainerExecInspect(id string) (*backend.ExecInspect, error) |
19 | 19 |
ContainerExecResize(name string, height, width int) error |
20 |
- ContainerExecStart(name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) error |
|
20 |
+ ContainerExecStart(ctx context.Context, name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) error |
|
21 | 21 |
ExecExists(name string) (bool, error) |
22 | 22 |
} |
23 | 23 |
|
... | ... |
@@ -106,7 +106,8 @@ func (s *containerRouter) postContainerExecStart(ctx context.Context, w http.Res |
106 | 106 |
} |
107 | 107 |
|
108 | 108 |
// Now run the user process in container. |
109 |
- if err := s.backend.ContainerExecStart(execName, stdin, stdout, stderr); err != nil { |
|
109 |
+ // Maybe we should we pass ctx here if we're not detaching? |
|
110 |
+ if err := s.backend.ContainerExecStart(context.Background(), execName, stdin, stdout, stderr); err != nil { |
|
110 | 111 |
if execStartCheck.Detach { |
111 | 112 |
return err |
112 | 113 |
} |
... | ... |
@@ -22,15 +22,16 @@ import ( |
22 | 22 |
) |
23 | 23 |
|
24 | 24 |
var validCommitCommands = map[string]bool{ |
25 |
- "cmd": true, |
|
26 |
- "entrypoint": true, |
|
27 |
- "env": true, |
|
28 |
- "expose": true, |
|
29 |
- "label": true, |
|
30 |
- "onbuild": true, |
|
31 |
- "user": true, |
|
32 |
- "volume": true, |
|
33 |
- "workdir": true, |
|
25 |
+ "cmd": true, |
|
26 |
+ "entrypoint": true, |
|
27 |
+ "healthcheck": true, |
|
28 |
+ "env": true, |
|
29 |
+ "expose": true, |
|
30 |
+ "label": true, |
|
31 |
+ "onbuild": true, |
|
32 |
+ "user": true, |
|
33 |
+ "volume": true, |
|
34 |
+ "workdir": true, |
|
34 | 35 |
} |
35 | 36 |
|
36 | 37 |
// BuiltinAllowedBuildArgs is list of built-in allowed build args |
... | ... |
@@ -3,40 +3,42 @@ package command |
3 | 3 |
|
4 | 4 |
// Define constants for the command strings |
5 | 5 |
const ( |
6 |
- Env = "env" |
|
7 |
- Label = "label" |
|
8 |
- Maintainer = "maintainer" |
|
9 |
- Add = "add" |
|
10 |
- Copy = "copy" |
|
11 |
- From = "from" |
|
12 |
- Onbuild = "onbuild" |
|
13 |
- Workdir = "workdir" |
|
14 |
- Run = "run" |
|
15 |
- Cmd = "cmd" |
|
16 |
- Entrypoint = "entrypoint" |
|
17 |
- Expose = "expose" |
|
18 |
- Volume = "volume" |
|
19 |
- User = "user" |
|
20 |
- StopSignal = "stopsignal" |
|
21 |
- Arg = "arg" |
|
6 |
+ Env = "env" |
|
7 |
+ Label = "label" |
|
8 |
+ Maintainer = "maintainer" |
|
9 |
+ Add = "add" |
|
10 |
+ Copy = "copy" |
|
11 |
+ From = "from" |
|
12 |
+ Onbuild = "onbuild" |
|
13 |
+ Workdir = "workdir" |
|
14 |
+ Run = "run" |
|
15 |
+ Cmd = "cmd" |
|
16 |
+ Entrypoint = "entrypoint" |
|
17 |
+ Expose = "expose" |
|
18 |
+ Volume = "volume" |
|
19 |
+ User = "user" |
|
20 |
+ StopSignal = "stopsignal" |
|
21 |
+ Arg = "arg" |
|
22 |
+ Healthcheck = "healthcheck" |
|
22 | 23 |
) |
23 | 24 |
|
24 | 25 |
// Commands is list of all Dockerfile commands |
25 | 26 |
var Commands = map[string]struct{}{ |
26 |
- Env: {}, |
|
27 |
- Label: {}, |
|
28 |
- Maintainer: {}, |
|
29 |
- Add: {}, |
|
30 |
- Copy: {}, |
|
31 |
- From: {}, |
|
32 |
- Onbuild: {}, |
|
33 |
- Workdir: {}, |
|
34 |
- Run: {}, |
|
35 |
- Cmd: {}, |
|
36 |
- Entrypoint: {}, |
|
37 |
- Expose: {}, |
|
38 |
- Volume: {}, |
|
39 |
- User: {}, |
|
40 |
- StopSignal: {}, |
|
41 |
- Arg: {}, |
|
27 |
+ Env: {}, |
|
28 |
+ Label: {}, |
|
29 |
+ Maintainer: {}, |
|
30 |
+ Add: {}, |
|
31 |
+ Copy: {}, |
|
32 |
+ From: {}, |
|
33 |
+ Onbuild: {}, |
|
34 |
+ Workdir: {}, |
|
35 |
+ Run: {}, |
|
36 |
+ Cmd: {}, |
|
37 |
+ Entrypoint: {}, |
|
38 |
+ Expose: {}, |
|
39 |
+ Volume: {}, |
|
40 |
+ User: {}, |
|
41 |
+ StopSignal: {}, |
|
42 |
+ Arg: {}, |
|
43 |
+ Healthcheck: {}, |
|
42 | 44 |
} |
... | ... |
@@ -12,7 +12,9 @@ import ( |
12 | 12 |
"regexp" |
13 | 13 |
"runtime" |
14 | 14 |
"sort" |
15 |
+ "strconv" |
|
15 | 16 |
"strings" |
17 |
+ "time" |
|
16 | 18 |
|
17 | 19 |
"github.com/Sirupsen/logrus" |
18 | 20 |
"github.com/docker/docker/api" |
... | ... |
@@ -426,6 +428,111 @@ func cmd(b *Builder, args []string, attributes map[string]bool, original string) |
426 | 426 |
return nil |
427 | 427 |
} |
428 | 428 |
|
429 |
+// parseOptInterval(flag) is the duration of flag.Value, or 0 if |
|
430 |
+// empty. An error is reported if the value is given and is not positive. |
|
431 |
+func parseOptInterval(f *Flag) (time.Duration, error) { |
|
432 |
+ s := f.Value |
|
433 |
+ if s == "" { |
|
434 |
+ return 0, nil |
|
435 |
+ } |
|
436 |
+ d, err := time.ParseDuration(s) |
|
437 |
+ if err != nil { |
|
438 |
+ return 0, err |
|
439 |
+ } |
|
440 |
+ if d <= 0 { |
|
441 |
+ return 0, fmt.Errorf("Interval %#v must be positive", f.name) |
|
442 |
+ } |
|
443 |
+ return d, nil |
|
444 |
+} |
|
445 |
+ |
|
446 |
+// HEALTHCHECK foo |
|
447 |
+// |
|
448 |
+// Set the default healthcheck command to run in the container (which may be empty). |
|
449 |
+// Argument handling is the same as RUN. |
|
450 |
+// |
|
451 |
+func healthcheck(b *Builder, args []string, attributes map[string]bool, original string) error { |
|
452 |
+ if len(args) == 0 { |
|
453 |
+ return fmt.Errorf("HEALTHCHECK requires an argument") |
|
454 |
+ } |
|
455 |
+ typ := strings.ToUpper(args[0]) |
|
456 |
+ args = args[1:] |
|
457 |
+ if typ == "NONE" { |
|
458 |
+ if len(args) != 0 { |
|
459 |
+ return fmt.Errorf("HEALTHCHECK NONE takes no arguments") |
|
460 |
+ } |
|
461 |
+ test := strslice.StrSlice{typ} |
|
462 |
+ b.runConfig.Healthcheck = &container.HealthConfig{ |
|
463 |
+ Test: test, |
|
464 |
+ } |
|
465 |
+ } else { |
|
466 |
+ if b.runConfig.Healthcheck != nil { |
|
467 |
+ oldCmd := b.runConfig.Healthcheck.Test |
|
468 |
+ if len(oldCmd) > 0 && oldCmd[0] != "NONE" { |
|
469 |
+ fmt.Fprintf(b.Stdout, "Note: overriding previous HEALTHCHECK: %v\n", oldCmd) |
|
470 |
+ } |
|
471 |
+ } |
|
472 |
+ |
|
473 |
+ healthcheck := container.HealthConfig{} |
|
474 |
+ |
|
475 |
+ flInterval := b.flags.AddString("interval", "") |
|
476 |
+ flTimeout := b.flags.AddString("timeout", "") |
|
477 |
+ flRetries := b.flags.AddString("retries", "") |
|
478 |
+ |
|
479 |
+ if err := b.flags.Parse(); err != nil { |
|
480 |
+ return err |
|
481 |
+ } |
|
482 |
+ |
|
483 |
+ switch typ { |
|
484 |
+ case "CMD": |
|
485 |
+ cmdSlice := handleJSONArgs(args, attributes) |
|
486 |
+ if len(cmdSlice) == 0 { |
|
487 |
+ return fmt.Errorf("Missing command after HEALTHCHECK CMD") |
|
488 |
+ } |
|
489 |
+ |
|
490 |
+ if !attributes["json"] { |
|
491 |
+ typ = "CMD-SHELL" |
|
492 |
+ } |
|
493 |
+ |
|
494 |
+ healthcheck.Test = strslice.StrSlice(append([]string{typ}, cmdSlice...)) |
|
495 |
+ default: |
|
496 |
+ return fmt.Errorf("Unknown type %#v in HEALTHCHECK (try CMD)", typ) |
|
497 |
+ } |
|
498 |
+ |
|
499 |
+ interval, err := parseOptInterval(flInterval) |
|
500 |
+ if err != nil { |
|
501 |
+ return err |
|
502 |
+ } |
|
503 |
+ healthcheck.Interval = interval |
|
504 |
+ |
|
505 |
+ timeout, err := parseOptInterval(flTimeout) |
|
506 |
+ if err != nil { |
|
507 |
+ return err |
|
508 |
+ } |
|
509 |
+ healthcheck.Timeout = timeout |
|
510 |
+ |
|
511 |
+ if flRetries.Value != "" { |
|
512 |
+ retries, err := strconv.ParseInt(flRetries.Value, 10, 32) |
|
513 |
+ if err != nil { |
|
514 |
+ return err |
|
515 |
+ } |
|
516 |
+ if retries < 1 { |
|
517 |
+ return fmt.Errorf("--retries must be at least 1 (not %d)", retries) |
|
518 |
+ } |
|
519 |
+ healthcheck.Retries = int(retries) |
|
520 |
+ } else { |
|
521 |
+ healthcheck.Retries = 0 |
|
522 |
+ } |
|
523 |
+ |
|
524 |
+ b.runConfig.Healthcheck = &healthcheck |
|
525 |
+ } |
|
526 |
+ |
|
527 |
+ if err := b.commit("", b.runConfig.Cmd, fmt.Sprintf("HEALTHCHECK %q", b.runConfig.Healthcheck)); err != nil { |
|
528 |
+ return err |
|
529 |
+ } |
|
530 |
+ |
|
531 |
+ return nil |
|
532 |
+} |
|
533 |
+ |
|
429 | 534 |
// ENTRYPOINT /usr/sbin/nginx |
430 | 535 |
// |
431 | 536 |
// Set the entrypoint (which defaults to sh -c on linux, or cmd /S /C on Windows) to |
... | ... |
@@ -58,22 +58,23 @@ var evaluateTable map[string]func(*Builder, []string, map[string]bool, string) e |
58 | 58 |
|
59 | 59 |
func init() { |
60 | 60 |
evaluateTable = map[string]func(*Builder, []string, map[string]bool, string) error{ |
61 |
- command.Env: env, |
|
62 |
- command.Label: label, |
|
63 |
- command.Maintainer: maintainer, |
|
64 |
- command.Add: add, |
|
65 |
- command.Copy: dispatchCopy, // copy() is a go builtin |
|
66 |
- command.From: from, |
|
67 |
- command.Onbuild: onbuild, |
|
68 |
- command.Workdir: workdir, |
|
69 |
- command.Run: run, |
|
70 |
- command.Cmd: cmd, |
|
71 |
- command.Entrypoint: entrypoint, |
|
72 |
- command.Expose: expose, |
|
73 |
- command.Volume: volume, |
|
74 |
- command.User: user, |
|
75 |
- command.StopSignal: stopSignal, |
|
76 |
- command.Arg: arg, |
|
61 |
+ command.Env: env, |
|
62 |
+ command.Label: label, |
|
63 |
+ command.Maintainer: maintainer, |
|
64 |
+ command.Add: add, |
|
65 |
+ command.Copy: dispatchCopy, // copy() is a go builtin |
|
66 |
+ command.From: from, |
|
67 |
+ command.Onbuild: onbuild, |
|
68 |
+ command.Workdir: workdir, |
|
69 |
+ command.Run: run, |
|
70 |
+ command.Cmd: cmd, |
|
71 |
+ command.Entrypoint: entrypoint, |
|
72 |
+ command.Expose: expose, |
|
73 |
+ command.Volume: volume, |
|
74 |
+ command.User: user, |
|
75 |
+ command.StopSignal: stopSignal, |
|
76 |
+ command.Arg: arg, |
|
77 |
+ command.Healthcheck: healthcheck, |
|
77 | 78 |
} |
78 | 79 |
} |
79 | 80 |
|
... | ... |
@@ -329,3 +329,32 @@ func parseMaybeJSONToList(rest string) (*Node, map[string]bool, error) { |
329 | 329 |
|
330 | 330 |
return parseStringsWhitespaceDelimited(rest) |
331 | 331 |
} |
332 |
+ |
|
333 |
+// The HEALTHCHECK command is like parseMaybeJSON, but has an extra type argument. |
|
334 |
+func parseHealthConfig(rest string) (*Node, map[string]bool, error) { |
|
335 |
+ // Find end of first argument |
|
336 |
+ var sep int |
|
337 |
+ for ; sep < len(rest); sep++ { |
|
338 |
+ if unicode.IsSpace(rune(rest[sep])) { |
|
339 |
+ break |
|
340 |
+ } |
|
341 |
+ } |
|
342 |
+ next := sep |
|
343 |
+ for ; next < len(rest); next++ { |
|
344 |
+ if !unicode.IsSpace(rune(rest[next])) { |
|
345 |
+ break |
|
346 |
+ } |
|
347 |
+ } |
|
348 |
+ |
|
349 |
+ if sep == 0 { |
|
350 |
+ return nil, nil, nil |
|
351 |
+ } |
|
352 |
+ |
|
353 |
+ typ := rest[:sep] |
|
354 |
+ cmd, attrs, err := parseMaybeJSON(rest[next:]) |
|
355 |
+ if err != nil { |
|
356 |
+ return nil, nil, err |
|
357 |
+ } |
|
358 |
+ |
|
359 |
+ return &Node{Value: typ, Next: cmd, Attributes: attrs}, nil, err |
|
360 |
+} |
... | ... |
@@ -66,22 +66,23 @@ func init() { |
66 | 66 |
// functions. Errors are propagated up by Parse() and the resulting AST can |
67 | 67 |
// be incorporated directly into the existing AST as a next. |
68 | 68 |
dispatch = map[string]func(string) (*Node, map[string]bool, error){ |
69 |
- command.User: parseString, |
|
70 |
- command.Onbuild: parseSubCommand, |
|
71 |
- command.Workdir: parseString, |
|
72 |
- command.Env: parseEnv, |
|
73 |
- command.Label: parseLabel, |
|
74 |
- command.Maintainer: parseString, |
|
75 |
- command.From: parseString, |
|
76 |
- command.Add: parseMaybeJSONToList, |
|
77 |
- command.Copy: parseMaybeJSONToList, |
|
78 |
- command.Run: parseMaybeJSON, |
|
79 |
- command.Cmd: parseMaybeJSON, |
|
80 |
- command.Entrypoint: parseMaybeJSON, |
|
81 |
- command.Expose: parseStringsWhitespaceDelimited, |
|
82 |
- command.Volume: parseMaybeJSONToList, |
|
83 |
- command.StopSignal: parseString, |
|
84 |
- command.Arg: parseNameOrNameVal, |
|
69 |
+ command.User: parseString, |
|
70 |
+ command.Onbuild: parseSubCommand, |
|
71 |
+ command.Workdir: parseString, |
|
72 |
+ command.Env: parseEnv, |
|
73 |
+ command.Label: parseLabel, |
|
74 |
+ command.Maintainer: parseString, |
|
75 |
+ command.From: parseString, |
|
76 |
+ command.Add: parseMaybeJSONToList, |
|
77 |
+ command.Copy: parseMaybeJSONToList, |
|
78 |
+ command.Run: parseMaybeJSON, |
|
79 |
+ command.Cmd: parseMaybeJSON, |
|
80 |
+ command.Entrypoint: parseMaybeJSON, |
|
81 |
+ command.Expose: parseStringsWhitespaceDelimited, |
|
82 |
+ command.Volume: parseMaybeJSONToList, |
|
83 |
+ command.StopSignal: parseString, |
|
84 |
+ command.Arg: parseNameOrNameVal, |
|
85 |
+ command.Healthcheck: parseHealthConfig, |
|
85 | 86 |
} |
86 | 87 |
} |
87 | 88 |
|
88 | 89 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,10 @@ |
0 |
+FROM debian |
|
1 |
+ADD check.sh main.sh /app/ |
|
2 |
+CMD /app/main.sh |
|
3 |
+HEALTHCHECK |
|
4 |
+HEALTHCHECK --interval=5s --timeout=3s --retries=1 \ |
|
5 |
+ CMD /app/check.sh --quiet |
|
6 |
+HEALTHCHECK CMD |
|
7 |
+HEALTHCHECK CMD a b |
|
8 |
+HEALTHCHECK --timeout=3s CMD ["foo"] |
|
9 |
+HEALTHCHECK CONNECT TCP 7000 |
0 | 10 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,9 @@ |
0 |
+(from "debian") |
|
1 |
+(add "check.sh" "main.sh" "/app/") |
|
2 |
+(cmd "/app/main.sh") |
|
3 |
+(healthcheck) |
|
4 |
+(healthcheck ["--interval=5s" "--timeout=3s" "--retries=1"] "CMD" "/app/check.sh --quiet") |
|
5 |
+(healthcheck "CMD") |
|
6 |
+(healthcheck "CMD" "a b") |
|
7 |
+(healthcheck ["--timeout=3s"] "CMD" "foo") |
|
8 |
+(healthcheck "CONNECT" "TCP 7000") |
0 | 9 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,49 @@ |
0 |
+package container |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "github.com/Sirupsen/logrus" |
|
4 |
+ "github.com/docker/engine-api/types" |
|
5 |
+) |
|
6 |
+ |
|
7 |
+// Health holds the current container health-check state |
|
8 |
+type Health struct { |
|
9 |
+ types.Health |
|
10 |
+ stop chan struct{} // Write struct{} to stop the monitor |
|
11 |
+} |
|
12 |
+ |
|
13 |
+// String returns a human-readable description of the health-check state |
|
14 |
+func (s *Health) String() string { |
|
15 |
+ if s.stop == nil { |
|
16 |
+ return "no healthcheck" |
|
17 |
+ } |
|
18 |
+ switch s.Status { |
|
19 |
+ case types.Starting: |
|
20 |
+ return "health: starting" |
|
21 |
+ default: // Healthy and Unhealthy are clear on their own |
|
22 |
+ return s.Status |
|
23 |
+ } |
|
24 |
+} |
|
25 |
+ |
|
26 |
+// OpenMonitorChannel creates and returns a new monitor channel. If there already is one, |
|
27 |
+// it returns nil. |
|
28 |
+func (s *Health) OpenMonitorChannel() chan struct{} { |
|
29 |
+ if s.stop == nil { |
|
30 |
+ logrus.Debugf("OpenMonitorChannel") |
|
31 |
+ s.stop = make(chan struct{}) |
|
32 |
+ return s.stop |
|
33 |
+ } |
|
34 |
+ return nil |
|
35 |
+} |
|
36 |
+ |
|
37 |
+// CloseMonitorChannel closes any existing monitor channel. |
|
38 |
+func (s *Health) CloseMonitorChannel() { |
|
39 |
+ if s.stop != nil { |
|
40 |
+ logrus.Debugf("CloseMonitorChannel: waiting for probe to stop") |
|
41 |
+ // This channel does not buffer. Once the write succeeds, the monitor |
|
42 |
+ // has read the stop request and will not make any further updates |
|
43 |
+ // to c.State.Health. |
|
44 |
+ s.stop <- struct{}{} |
|
45 |
+ s.stop = nil |
|
46 |
+ logrus.Debugf("CloseMonitorChannel done") |
|
47 |
+ } |
|
48 |
+} |
... | ... |
@@ -27,6 +27,7 @@ type State struct { |
27 | 27 |
StartedAt time.Time |
28 | 28 |
FinishedAt time.Time |
29 | 29 |
waitChan chan struct{} |
30 |
+ Health *Health |
|
30 | 31 |
} |
31 | 32 |
|
32 | 33 |
// NewState creates a default state object with a fresh channel for state changes. |
... | ... |
@@ -46,6 +47,9 @@ func (s *State) String() string { |
46 | 46 |
return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt))) |
47 | 47 |
} |
48 | 48 |
|
49 |
+ if h := s.Health; h != nil { |
|
50 |
+ return fmt.Sprintf("Up %s (%s)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)), h.String()) |
|
51 |
+ } |
|
49 | 52 |
return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt))) |
50 | 53 |
} |
51 | 54 |
|
... | ... |
@@ -80,6 +80,25 @@ func merge(userConf, imageConf *containertypes.Config) error { |
80 | 80 |
userConf.Entrypoint = imageConf.Entrypoint |
81 | 81 |
} |
82 | 82 |
} |
83 |
+ if imageConf.Healthcheck != nil { |
|
84 |
+ if userConf.Healthcheck == nil { |
|
85 |
+ userConf.Healthcheck = imageConf.Healthcheck |
|
86 |
+ } else { |
|
87 |
+ if len(userConf.Healthcheck.Test) == 0 { |
|
88 |
+ userConf.Healthcheck.Test = imageConf.Healthcheck.Test |
|
89 |
+ } |
|
90 |
+ if userConf.Healthcheck.Interval == 0 { |
|
91 |
+ userConf.Healthcheck.Interval = imageConf.Healthcheck.Interval |
|
92 |
+ } |
|
93 |
+ if userConf.Healthcheck.Timeout == 0 { |
|
94 |
+ userConf.Healthcheck.Timeout = imageConf.Healthcheck.Timeout |
|
95 |
+ } |
|
96 |
+ if userConf.Healthcheck.Retries == 0 { |
|
97 |
+ userConf.Healthcheck.Retries = imageConf.Healthcheck.Retries |
|
98 |
+ } |
|
99 |
+ } |
|
100 |
+ } |
|
101 |
+ |
|
83 | 102 |
if userConf.WorkingDir == "" { |
84 | 103 |
userConf.WorkingDir = imageConf.WorkingDir |
85 | 104 |
} |
... | ... |
@@ -14,11 +14,15 @@ import ( |
14 | 14 |
"github.com/docker/docker/errors" |
15 | 15 |
"github.com/docker/docker/libcontainerd" |
16 | 16 |
"github.com/docker/docker/pkg/pools" |
17 |
+ "github.com/docker/docker/pkg/signal" |
|
17 | 18 |
"github.com/docker/docker/pkg/term" |
18 | 19 |
"github.com/docker/engine-api/types" |
19 | 20 |
"github.com/docker/engine-api/types/strslice" |
20 | 21 |
) |
21 | 22 |
|
23 |
+// Seconds to wait after sending TERM before trying KILL |
|
24 |
+const termProcessTimeout = 10 |
|
25 |
+ |
|
22 | 26 |
func (d *Daemon) registerExecCommand(container *container.Container, config *exec.Config) { |
23 | 27 |
// Storing execs in container in order to kill them gracefully whenever the container is stopped or removed. |
24 | 28 |
container.ExecCommands.Add(config.ID, config) |
... | ... |
@@ -130,7 +134,8 @@ func (d *Daemon) ContainerExecCreate(name string, config *types.ExecConfig) (str |
130 | 130 |
|
131 | 131 |
// ContainerExecStart starts a previously set up exec instance. The |
132 | 132 |
// std streams are set up. |
133 |
-func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) (err error) { |
|
133 |
+// If ctx is cancelled, the process is terminated. |
|
134 |
+func (d *Daemon) ContainerExecStart(ctx context.Context, name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) (err error) { |
|
134 | 135 |
var ( |
135 | 136 |
cStdin io.ReadCloser |
136 | 137 |
cStdout, cStderr io.Writer |
... | ... |
@@ -197,15 +202,28 @@ func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io. |
197 | 197 |
return nil |
198 | 198 |
} |
199 | 199 |
|
200 |
- attachErr := container.AttachStreams(context.Background(), ec.StreamConfig, ec.OpenStdin, true, ec.Tty, cStdin, cStdout, cStderr, ec.DetachKeys) |
|
200 |
+ attachErr := container.AttachStreams(ctx, ec.StreamConfig, ec.OpenStdin, true, ec.Tty, cStdin, cStdout, cStderr, ec.DetachKeys) |
|
201 | 201 |
|
202 | 202 |
if err := d.containerd.AddProcess(c.ID, name, p); err != nil { |
203 | 203 |
return err |
204 | 204 |
} |
205 | 205 |
|
206 |
- err = <-attachErr |
|
207 |
- if err != nil { |
|
208 |
- return fmt.Errorf("attach failed with error: %v", err) |
|
206 |
+ select { |
|
207 |
+ case <-ctx.Done(): |
|
208 |
+ logrus.Debugf("Sending TERM signal to process %v in container %v", name, c.ID) |
|
209 |
+ d.containerd.SignalProcess(c.ID, name, int(signal.SignalMap["TERM"])) |
|
210 |
+ select { |
|
211 |
+ case <-time.After(termProcessTimeout * time.Second): |
|
212 |
+ logrus.Infof("Container %v, process %v failed to exit within %d seconds of signal TERM - using the force", c.ID, name, termProcessTimeout) |
|
213 |
+ d.containerd.SignalProcess(c.ID, name, int(signal.SignalMap["KILL"])) |
|
214 |
+ case <-attachErr: |
|
215 |
+ // TERM signal worked |
|
216 |
+ } |
|
217 |
+ return fmt.Errorf("context cancelled") |
|
218 |
+ case err := <-attachErr: |
|
219 |
+ if err != nil { |
|
220 |
+ return fmt.Errorf("attach failed with error: %v", err) |
|
221 |
+ } |
|
209 | 222 |
} |
210 | 223 |
return nil |
211 | 224 |
} |
212 | 225 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,314 @@ |
0 |
+package daemon |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "bytes" |
|
4 |
+ "fmt" |
|
5 |
+ "runtime" |
|
6 |
+ "strings" |
|
7 |
+ "time" |
|
8 |
+ |
|
9 |
+ "golang.org/x/net/context" |
|
10 |
+ |
|
11 |
+ "github.com/Sirupsen/logrus" |
|
12 |
+ "github.com/docker/docker/container" |
|
13 |
+ "github.com/docker/docker/daemon/exec" |
|
14 |
+ "github.com/docker/engine-api/types" |
|
15 |
+ "github.com/docker/engine-api/types/strslice" |
|
16 |
+) |
|
17 |
+ |
|
18 |
+const ( |
|
19 |
+ // Longest healthcheck probe output message to store. Longer messages will be truncated. |
|
20 |
+ maxOutputLen = 4096 |
|
21 |
+ |
|
22 |
+ // Default interval between probe runs (from the end of the first to the start of the second). |
|
23 |
+ // Also the time before the first probe. |
|
24 |
+ defaultProbeInterval = 30 * time.Second |
|
25 |
+ |
|
26 |
+ // The maximum length of time a single probe run should take. If the probe takes longer |
|
27 |
+ // than this, the check is considered to have failed. |
|
28 |
+ defaultProbeTimeout = 30 * time.Second |
|
29 |
+ |
|
30 |
+ // Shut down a container if it becomes Unhealthy. |
|
31 |
+ defaultExitOnUnhealthy = true |
|
32 |
+ |
|
33 |
+ // Maximum number of entries to record |
|
34 |
+ maxLogEntries = 5 |
|
35 |
+) |
|
36 |
+ |
|
37 |
+const ( |
|
38 |
+ // Exit status codes that can be returned by the probe command. |
|
39 |
+ |
|
40 |
+ exitStatusHealthy = 0 // Container is healthy |
|
41 |
+ exitStatusUnhealthy = 1 // Container is unhealthy |
|
42 |
+ exitStatusStarting = 2 // Container needs more time to start |
|
43 |
+) |
|
44 |
+ |
|
45 |
+// probe implementations know how to run a particular type of probe. |
|
46 |
+type probe interface { |
|
47 |
+ // Perform one run of the check. Returns the exit code and an optional |
|
48 |
+ // short diagnostic string. |
|
49 |
+ run(context.Context, *Daemon, *container.Container) (*types.HealthcheckResult, error) |
|
50 |
+} |
|
51 |
+ |
|
52 |
+// cmdProbe implements the "CMD" probe type. |
|
53 |
+type cmdProbe struct { |
|
54 |
+ // Run the command with the system's default shell instead of execing it directly. |
|
55 |
+ shell bool |
|
56 |
+} |
|
57 |
+ |
|
58 |
+// exec the healthcheck command in the container. |
|
59 |
+// Returns the exit code and probe output (if any) |
|
60 |
+func (p *cmdProbe) run(ctx context.Context, d *Daemon, container *container.Container) (*types.HealthcheckResult, error) { |
|
61 |
+ cmdSlice := strslice.StrSlice(container.Config.Healthcheck.Test)[1:] |
|
62 |
+ if p.shell { |
|
63 |
+ if runtime.GOOS != "windows" { |
|
64 |
+ cmdSlice = append([]string{"/bin/sh", "-c"}, cmdSlice...) |
|
65 |
+ } else { |
|
66 |
+ cmdSlice = append([]string{"cmd", "/S", "/C"}, cmdSlice...) |
|
67 |
+ } |
|
68 |
+ } |
|
69 |
+ entrypoint, args := d.getEntrypointAndArgs(strslice.StrSlice{}, cmdSlice) |
|
70 |
+ execConfig := exec.NewConfig() |
|
71 |
+ execConfig.OpenStdin = false |
|
72 |
+ execConfig.OpenStdout = true |
|
73 |
+ execConfig.OpenStderr = true |
|
74 |
+ execConfig.ContainerID = container.ID |
|
75 |
+ execConfig.DetachKeys = []byte{} |
|
76 |
+ execConfig.Entrypoint = entrypoint |
|
77 |
+ execConfig.Args = args |
|
78 |
+ execConfig.Tty = false |
|
79 |
+ execConfig.Privileged = false |
|
80 |
+ execConfig.User = container.Config.User |
|
81 |
+ |
|
82 |
+ d.registerExecCommand(container, execConfig) |
|
83 |
+ d.LogContainerEvent(container, "exec_create: "+execConfig.Entrypoint+" "+strings.Join(execConfig.Args, " ")) |
|
84 |
+ |
|
85 |
+ output := &limitedBuffer{} |
|
86 |
+ err := d.ContainerExecStart(ctx, execConfig.ID, nil, output, output) |
|
87 |
+ if err != nil { |
|
88 |
+ return nil, err |
|
89 |
+ } |
|
90 |
+ info, err := d.getExecConfig(execConfig.ID) |
|
91 |
+ if err != nil { |
|
92 |
+ return nil, err |
|
93 |
+ } |
|
94 |
+ if info.ExitCode == nil { |
|
95 |
+ return nil, fmt.Errorf("Healthcheck has no exit code!") |
|
96 |
+ } |
|
97 |
+ // Note: Go's json package will handle invalid UTF-8 for us |
|
98 |
+ out := output.String() |
|
99 |
+ return &types.HealthcheckResult{ |
|
100 |
+ End: time.Now(), |
|
101 |
+ ExitCode: *info.ExitCode, |
|
102 |
+ Output: out, |
|
103 |
+ }, nil |
|
104 |
+} |
|
105 |
+ |
|
106 |
+// Update the container's Status.Health struct based on the latest probe's result. |
|
107 |
+func handleProbeResult(d *Daemon, c *container.Container, result *types.HealthcheckResult) { |
|
108 |
+ c.Lock() |
|
109 |
+ defer c.Unlock() |
|
110 |
+ |
|
111 |
+ retries := c.Config.Healthcheck.Retries |
|
112 |
+ if retries <= 0 { |
|
113 |
+ retries = 1 // Default if unset or set to an invalid value |
|
114 |
+ } |
|
115 |
+ |
|
116 |
+ h := c.State.Health |
|
117 |
+ oldStatus := h.Status |
|
118 |
+ |
|
119 |
+ if len(h.Log) >= maxLogEntries { |
|
120 |
+ h.Log = append(h.Log[len(h.Log)+1-maxLogEntries:], result) |
|
121 |
+ } else { |
|
122 |
+ h.Log = append(h.Log, result) |
|
123 |
+ } |
|
124 |
+ |
|
125 |
+ if result.ExitCode == exitStatusHealthy { |
|
126 |
+ h.FailingStreak = 0 |
|
127 |
+ h.Status = types.Healthy |
|
128 |
+ } else if result.ExitCode == exitStatusStarting && c.State.Health.Status == types.Starting { |
|
129 |
+ // The container is not ready yet. Remain in the starting state. |
|
130 |
+ } else { |
|
131 |
+ // Failure (incuding invalid exit code) |
|
132 |
+ h.FailingStreak++ |
|
133 |
+ if c.State.Health.FailingStreak >= retries { |
|
134 |
+ h.Status = types.Unhealthy |
|
135 |
+ } |
|
136 |
+ // Else we're starting or healthy. Stay in that state. |
|
137 |
+ } |
|
138 |
+ |
|
139 |
+ if oldStatus != h.Status { |
|
140 |
+ d.LogContainerEvent(c, "health_status: "+h.Status) |
|
141 |
+ } |
|
142 |
+} |
|
143 |
+ |
|
144 |
+// Run the container's monitoring thread until notified via "stop". |
|
145 |
+// There is never more than one monitor thread running per container at a time. |
|
146 |
+func monitor(d *Daemon, c *container.Container, stop chan struct{}, probe probe) { |
|
147 |
+ probeTimeout := timeoutWithDefault(c.Config.Healthcheck.Timeout, defaultProbeTimeout) |
|
148 |
+ probeInterval := timeoutWithDefault(c.Config.Healthcheck.Interval, defaultProbeInterval) |
|
149 |
+ for { |
|
150 |
+ select { |
|
151 |
+ case <-stop: |
|
152 |
+ logrus.Debugf("Stop healthcheck monitoring (received while idle)") |
|
153 |
+ return |
|
154 |
+ case <-time.After(probeInterval): |
|
155 |
+ logrus.Debugf("Running health check...") |
|
156 |
+ startTime := time.Now() |
|
157 |
+ ctx, cancelProbe := context.WithTimeout(context.Background(), probeTimeout) |
|
158 |
+ results := make(chan *types.HealthcheckResult) |
|
159 |
+ go func() { |
|
160 |
+ result, err := probe.run(ctx, d, c) |
|
161 |
+ if err != nil { |
|
162 |
+ logrus.Warnf("Health check error: %v", err) |
|
163 |
+ results <- &types.HealthcheckResult{ |
|
164 |
+ ExitCode: -1, |
|
165 |
+ Output: err.Error(), |
|
166 |
+ Start: startTime, |
|
167 |
+ End: time.Now(), |
|
168 |
+ } |
|
169 |
+ } else { |
|
170 |
+ result.Start = startTime |
|
171 |
+ logrus.Debugf("Health check done (exitCode=%d)", result.ExitCode) |
|
172 |
+ results <- result |
|
173 |
+ } |
|
174 |
+ close(results) |
|
175 |
+ }() |
|
176 |
+ select { |
|
177 |
+ case <-stop: |
|
178 |
+ logrus.Debugf("Stop healthcheck monitoring (received while probing)") |
|
179 |
+ // Stop timeout and kill probe, but don't wait for probe to exit. |
|
180 |
+ cancelProbe() |
|
181 |
+ return |
|
182 |
+ case result := <-results: |
|
183 |
+ handleProbeResult(d, c, result) |
|
184 |
+ // Stop timeout |
|
185 |
+ cancelProbe() |
|
186 |
+ case <-ctx.Done(): |
|
187 |
+ logrus.Debugf("Health check taking too long") |
|
188 |
+ handleProbeResult(d, c, &types.HealthcheckResult{ |
|
189 |
+ ExitCode: -1, |
|
190 |
+ Output: fmt.Sprintf("Health check exceeded timeout (%v)", probeTimeout), |
|
191 |
+ Start: startTime, |
|
192 |
+ End: time.Now(), |
|
193 |
+ }) |
|
194 |
+ cancelProbe() |
|
195 |
+ // Wait for probe to exit (it might take a while to respond to the TERM |
|
196 |
+ // signal and we don't want dying probes to pile up). |
|
197 |
+ <-results |
|
198 |
+ } |
|
199 |
+ } |
|
200 |
+ } |
|
201 |
+} |
|
202 |
+ |
|
203 |
+// Get a suitable probe implementation for the container's healthcheck configuration. |
|
204 |
+func getProbe(c *container.Container) probe { |
|
205 |
+ config := c.Config.Healthcheck |
|
206 |
+ if config == nil || len(config.Test) == 0 { |
|
207 |
+ return nil |
|
208 |
+ } |
|
209 |
+ switch config.Test[0] { |
|
210 |
+ case "CMD": |
|
211 |
+ return &cmdProbe{shell: false} |
|
212 |
+ case "CMD-SHELL": |
|
213 |
+ return &cmdProbe{shell: true} |
|
214 |
+ default: |
|
215 |
+ logrus.Warnf("Unknown healthcheck type '%s' (expected 'CMD')", config.Test[0]) |
|
216 |
+ return nil |
|
217 |
+ } |
|
218 |
+} |
|
219 |
+ |
|
220 |
+// Ensure the health-check monitor is running or not, depending on the current |
|
221 |
+// state of the container. |
|
222 |
+// Called from monitor.go, with c locked. |
|
223 |
+func (d *Daemon) updateHealthMonitor(c *container.Container) { |
|
224 |
+ h := c.State.Health |
|
225 |
+ if h == nil { |
|
226 |
+ return // No healthcheck configured |
|
227 |
+ } |
|
228 |
+ |
|
229 |
+ probe := getProbe(c) |
|
230 |
+ wantRunning := c.Running && !c.Paused && probe != nil |
|
231 |
+ if wantRunning { |
|
232 |
+ if stop := h.OpenMonitorChannel(); stop != nil { |
|
233 |
+ go monitor(d, c, stop, probe) |
|
234 |
+ } |
|
235 |
+ } else { |
|
236 |
+ h.CloseMonitorChannel() |
|
237 |
+ } |
|
238 |
+} |
|
239 |
+ |
|
240 |
+// Reset the health state for a newly-started, restarted or restored container. |
|
241 |
+// initHealthMonitor is called from monitor.go and we should never be running |
|
242 |
+// two instances at once. |
|
243 |
+// Called with c locked. |
|
244 |
+func (d *Daemon) initHealthMonitor(c *container.Container) { |
|
245 |
+ if c.Config.Healthcheck == nil { |
|
246 |
+ return |
|
247 |
+ } |
|
248 |
+ |
|
249 |
+ // This is needed in case we're auto-restarting |
|
250 |
+ d.stopHealthchecks(c) |
|
251 |
+ |
|
252 |
+ if c.State.Health == nil { |
|
253 |
+ h := &container.Health{} |
|
254 |
+ h.Status = types.Starting |
|
255 |
+ h.FailingStreak = 0 |
|
256 |
+ c.State.Health = h |
|
257 |
+ } |
|
258 |
+ |
|
259 |
+ d.updateHealthMonitor(c) |
|
260 |
+} |
|
261 |
+ |
|
262 |
+// Called when the container is being stopped (whether because the health check is |
|
263 |
+// failing or for any other reason). |
|
264 |
+func (d *Daemon) stopHealthchecks(c *container.Container) { |
|
265 |
+ h := c.State.Health |
|
266 |
+ if h != nil { |
|
267 |
+ h.CloseMonitorChannel() |
|
268 |
+ } |
|
269 |
+} |
|
270 |
+ |
|
271 |
+// Buffer up to maxOutputLen bytes. Further data is discarded. |
|
272 |
+type limitedBuffer struct { |
|
273 |
+ buf bytes.Buffer |
|
274 |
+ truncated bool // indicates that data has been lost |
|
275 |
+} |
|
276 |
+ |
|
277 |
+// Append to limitedBuffer while there is room. |
|
278 |
+func (b *limitedBuffer) Write(data []byte) (int, error) { |
|
279 |
+ bufLen := b.buf.Len() |
|
280 |
+ dataLen := len(data) |
|
281 |
+ keep := min(maxOutputLen-bufLen, dataLen) |
|
282 |
+ if keep > 0 { |
|
283 |
+ b.buf.Write(data[:keep]) |
|
284 |
+ } |
|
285 |
+ if keep < dataLen { |
|
286 |
+ b.truncated = true |
|
287 |
+ } |
|
288 |
+ return dataLen, nil |
|
289 |
+} |
|
290 |
+ |
|
291 |
+// The contents of the buffer, with "..." appended if it overflowed. |
|
292 |
+func (b *limitedBuffer) String() string { |
|
293 |
+ out := b.buf.String() |
|
294 |
+ if b.truncated { |
|
295 |
+ out = out + "..." |
|
296 |
+ } |
|
297 |
+ return out |
|
298 |
+} |
|
299 |
+ |
|
300 |
+// If configuredValue is zero, use defaultValue instead. |
|
301 |
+func timeoutWithDefault(configuredValue time.Duration, defaultValue time.Duration) time.Duration { |
|
302 |
+ if configuredValue == 0 { |
|
303 |
+ return defaultValue |
|
304 |
+ } |
|
305 |
+ return configuredValue |
|
306 |
+} |
|
307 |
+ |
|
308 |
+func min(x, y int) int { |
|
309 |
+ if x < y { |
|
310 |
+ return x |
|
311 |
+ } |
|
312 |
+ return y |
|
313 |
+} |
0 | 314 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,112 @@ |
0 |
+package daemon |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "testing" |
|
4 |
+ "time" |
|
5 |
+ |
|
6 |
+ "github.com/docker/docker/container" |
|
7 |
+ "github.com/docker/docker/daemon/events" |
|
8 |
+ "github.com/docker/engine-api/types" |
|
9 |
+ containertypes "github.com/docker/engine-api/types/container" |
|
10 |
+ eventtypes "github.com/docker/engine-api/types/events" |
|
11 |
+) |
|
12 |
+ |
|
13 |
+func reset(c *container.Container) { |
|
14 |
+ c.State = &container.State{} |
|
15 |
+ c.State.Health = &container.Health{} |
|
16 |
+ c.State.Health.Status = types.Starting |
|
17 |
+} |
|
18 |
+ |
|
19 |
+func TestHealthStates(t *testing.T) { |
|
20 |
+ e := events.New() |
|
21 |
+ _, l, _ := e.Subscribe() |
|
22 |
+ defer e.Evict(l) |
|
23 |
+ |
|
24 |
+ expect := func(expected string) { |
|
25 |
+ select { |
|
26 |
+ case event := <-l: |
|
27 |
+ ev := event.(eventtypes.Message) |
|
28 |
+ if ev.Status != expected { |
|
29 |
+ t.Errorf("Expecting event %#v, but got %#v\n", expected, ev.Status) |
|
30 |
+ } |
|
31 |
+ case <-time.After(1 * time.Second): |
|
32 |
+ t.Errorf("Expecting event %#v, but got nothing\n", expected) |
|
33 |
+ } |
|
34 |
+ } |
|
35 |
+ |
|
36 |
+ c := &container.Container{ |
|
37 |
+ CommonContainer: container.CommonContainer{ |
|
38 |
+ ID: "container_id", |
|
39 |
+ Name: "container_name", |
|
40 |
+ Config: &containertypes.Config{ |
|
41 |
+ Image: "image_name", |
|
42 |
+ }, |
|
43 |
+ }, |
|
44 |
+ } |
|
45 |
+ daemon := &Daemon{ |
|
46 |
+ EventsService: e, |
|
47 |
+ } |
|
48 |
+ |
|
49 |
+ c.Config.Healthcheck = &containertypes.HealthConfig{ |
|
50 |
+ Retries: 1, |
|
51 |
+ } |
|
52 |
+ |
|
53 |
+ reset(c) |
|
54 |
+ |
|
55 |
+ handleResult := func(startTime time.Time, exitCode int) { |
|
56 |
+ handleProbeResult(daemon, c, &types.HealthcheckResult{ |
|
57 |
+ Start: startTime, |
|
58 |
+ End: startTime, |
|
59 |
+ ExitCode: exitCode, |
|
60 |
+ }) |
|
61 |
+ } |
|
62 |
+ |
|
63 |
+ // starting -> failed -> success -> failed |
|
64 |
+ |
|
65 |
+ handleResult(c.State.StartedAt.Add(1*time.Second), 1) |
|
66 |
+ expect("health_status: unhealthy") |
|
67 |
+ |
|
68 |
+ handleResult(c.State.StartedAt.Add(2*time.Second), 0) |
|
69 |
+ expect("health_status: healthy") |
|
70 |
+ |
|
71 |
+ handleResult(c.State.StartedAt.Add(3*time.Second), 1) |
|
72 |
+ expect("health_status: unhealthy") |
|
73 |
+ |
|
74 |
+ // starting -> starting -> starting -> |
|
75 |
+ // healthy -> starting (invalid transition) |
|
76 |
+ |
|
77 |
+ reset(c) |
|
78 |
+ |
|
79 |
+ handleResult(c.State.StartedAt.Add(20*time.Second), 2) |
|
80 |
+ handleResult(c.State.StartedAt.Add(40*time.Second), 2) |
|
81 |
+ if c.State.Health.Status != types.Starting { |
|
82 |
+ t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status) |
|
83 |
+ } |
|
84 |
+ |
|
85 |
+ handleResult(c.State.StartedAt.Add(50*time.Second), 0) |
|
86 |
+ expect("health_status: healthy") |
|
87 |
+ handleResult(c.State.StartedAt.Add(60*time.Second), 2) |
|
88 |
+ expect("health_status: unhealthy") |
|
89 |
+ |
|
90 |
+ // Test retries |
|
91 |
+ |
|
92 |
+ reset(c) |
|
93 |
+ c.Config.Healthcheck.Retries = 3 |
|
94 |
+ |
|
95 |
+ handleResult(c.State.StartedAt.Add(20*time.Second), 1) |
|
96 |
+ handleResult(c.State.StartedAt.Add(40*time.Second), 1) |
|
97 |
+ if c.State.Health.Status != types.Starting { |
|
98 |
+ t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status) |
|
99 |
+ } |
|
100 |
+ if c.State.Health.FailingStreak != 2 { |
|
101 |
+ t.Errorf("Expecting FailingStreak=2, but got %d\n", c.State.Health.FailingStreak) |
|
102 |
+ } |
|
103 |
+ handleResult(c.State.StartedAt.Add(60*time.Second), 1) |
|
104 |
+ expect("health_status: unhealthy") |
|
105 |
+ |
|
106 |
+ handleResult(c.State.StartedAt.Add(80*time.Second), 0) |
|
107 |
+ expect("health_status: healthy") |
|
108 |
+ if c.State.Health.FailingStreak != 0 { |
|
109 |
+ t.Errorf("Expecting FailingStreak=0, but got %d\n", c.State.Health.FailingStreak) |
|
110 |
+ } |
|
111 |
+} |
... | ... |
@@ -108,6 +108,15 @@ func (daemon *Daemon) getInspectData(container *container.Container, size bool) |
108 | 108 |
hostConfig.Links = append(hostConfig.Links, fmt.Sprintf("%s:%s", child.Name, linkAlias)) |
109 | 109 |
} |
110 | 110 |
|
111 |
+ var containerHealth *types.Health |
|
112 |
+ if container.State.Health != nil { |
|
113 |
+ containerHealth = &types.Health{ |
|
114 |
+ Status: container.State.Health.Status, |
|
115 |
+ FailingStreak: container.State.Health.FailingStreak, |
|
116 |
+ Log: append([]*types.HealthcheckResult{}, container.State.Health.Log...), |
|
117 |
+ } |
|
118 |
+ } |
|
119 |
+ |
|
111 | 120 |
containerState := &types.ContainerState{ |
112 | 121 |
Status: container.State.StateString(), |
113 | 122 |
Running: container.State.Running, |
... | ... |
@@ -120,6 +129,7 @@ func (daemon *Daemon) getInspectData(container *container.Container, size bool) |
120 | 120 |
Error: container.State.Error, |
121 | 121 |
StartedAt: container.State.StartedAt.Format(time.RFC3339Nano), |
122 | 122 |
FinishedAt: container.State.FinishedAt.Format(time.RFC3339Nano), |
123 |
+ Health: containerHealth, |
|
123 | 124 |
} |
124 | 125 |
|
125 | 126 |
contJSONBase := &types.ContainerJSONBase{ |
... | ... |
@@ -25,6 +25,7 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error { |
25 | 25 |
if runtime.GOOS == "windows" { |
26 | 26 |
return errors.New("Received StateOOM from libcontainerd on Windows. This should never happen.") |
27 | 27 |
} |
28 |
+ daemon.updateHealthMonitor(c) |
|
28 | 29 |
daemon.LogContainerEvent(c, "oom") |
29 | 30 |
case libcontainerd.StateExit: |
30 | 31 |
c.Lock() |
... | ... |
@@ -35,6 +36,7 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error { |
35 | 35 |
attributes := map[string]string{ |
36 | 36 |
"exitCode": strconv.Itoa(int(e.ExitCode)), |
37 | 37 |
} |
38 |
+ daemon.updateHealthMonitor(c) |
|
38 | 39 |
daemon.LogContainerEventWithAttributes(c, "die", attributes) |
39 | 40 |
daemon.Cleanup(c) |
40 | 41 |
// FIXME: here is race condition between two RUN instructions in Dockerfile |
... | ... |
@@ -54,6 +56,7 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error { |
54 | 54 |
"exitCode": strconv.Itoa(int(e.ExitCode)), |
55 | 55 |
} |
56 | 56 |
daemon.LogContainerEventWithAttributes(c, "die", attributes) |
57 |
+ daemon.updateHealthMonitor(c) |
|
57 | 58 |
return c.ToDisk() |
58 | 59 |
case libcontainerd.StateExitProcess: |
59 | 60 |
c.Lock() |
... | ... |
@@ -74,18 +77,24 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error { |
74 | 74 |
logrus.Warnf("Ignoring StateExitProcess for %v but no exec command found", e) |
75 | 75 |
} |
76 | 76 |
case libcontainerd.StateStart, libcontainerd.StateRestore: |
77 |
+ // Container is already locked in this case |
|
77 | 78 |
c.SetRunning(int(e.Pid), e.State == libcontainerd.StateStart) |
78 | 79 |
c.HasBeenManuallyStopped = false |
79 | 80 |
if err := c.ToDisk(); err != nil { |
80 | 81 |
c.Reset(false) |
81 | 82 |
return err |
82 | 83 |
} |
84 |
+ daemon.initHealthMonitor(c) |
|
83 | 85 |
daemon.LogContainerEvent(c, "start") |
84 | 86 |
case libcontainerd.StatePause: |
87 |
+ // Container is already locked in this case |
|
85 | 88 |
c.Paused = true |
89 |
+ daemon.updateHealthMonitor(c) |
|
86 | 90 |
daemon.LogContainerEvent(c, "pause") |
87 | 91 |
case libcontainerd.StateResume: |
92 |
+ // Container is already locked in this case |
|
88 | 93 |
c.Paused = false |
94 |
+ daemon.updateHealthMonitor(c) |
|
89 | 95 |
daemon.LogContainerEvent(c, "unpause") |
90 | 96 |
} |
91 | 97 |
|
... | ... |
@@ -41,6 +41,8 @@ func (daemon *Daemon) containerStop(container *container.Container, seconds int) |
41 | 41 |
return nil |
42 | 42 |
} |
43 | 43 |
|
44 |
+ daemon.stopHealthchecks(container) |
|
45 |
+ |
|
44 | 46 |
stopSignal := container.StopSignal() |
45 | 47 |
// 1. Send a stop signal |
46 | 48 |
if err := daemon.killPossiblyDeadProcess(container, stopSignal); err != nil { |
... | ... |
@@ -1470,6 +1470,73 @@ The `STOPSIGNAL` instruction sets the system call signal that will be sent to th |
1470 | 1470 |
This signal can be a valid unsigned number that matches a position in the kernel's syscall table, for instance 9, |
1471 | 1471 |
or a signal name in the format SIGNAME, for instance SIGKILL. |
1472 | 1472 |
|
1473 |
+## HEALTHCHECK |
|
1474 |
+ |
|
1475 |
+The `HEALTHCHECK` instruction has two forms: |
|
1476 |
+ |
|
1477 |
+* `HEALTHCHECK [OPTIONS] CMD command` (check container health by running a command inside the container) |
|
1478 |
+* `HEALTHCHECK NONE` (disable any healthcheck inherited from the base image) |
|
1479 |
+ |
|
1480 |
+The `HEALTHCHECK` instruction tells Docker how to test a container to check that |
|
1481 |
+it is still working. This can detect cases such as a web server that is stuck in |
|
1482 |
+an infinite loop and unable to handle new connections, even though the server |
|
1483 |
+process is still running. |
|
1484 |
+ |
|
1485 |
+When a container has a healthcheck specified, it has a _health status_ in |
|
1486 |
+addition to its normal status. This status is initially `starting`. Whenever a |
|
1487 |
+health check passes, it becomes `healthy` (whatever state it was previously in). |
|
1488 |
+After a certain number of consecutive failures, it becomes `unhealthy`. |
|
1489 |
+ |
|
1490 |
+The options that can appear before `CMD` are: |
|
1491 |
+ |
|
1492 |
+* `--interval=DURATION` (default: `30s`) |
|
1493 |
+* `--timeout=DURATION` (default: `30s`) |
|
1494 |
+* `--retries=N` (default: `1`) |
|
1495 |
+ |
|
1496 |
+The health check will first run **interval** seconds after the container is |
|
1497 |
+started, and then again **interval** seconds after each previous check completes. |
|
1498 |
+ |
|
1499 |
+If a single run of the check takes longer than **timeout** seconds then the check |
|
1500 |
+is considered to have failed. |
|
1501 |
+ |
|
1502 |
+It takes **retries** consecutive failures of the health check for the container |
|
1503 |
+to be considered `unhealthy`. |
|
1504 |
+ |
|
1505 |
+There can only be one `HEALTHCHECK` instruction in a Dockerfile. If you list |
|
1506 |
+more than one then only the last `HEALTHCHECK` will take effect. |
|
1507 |
+ |
|
1508 |
+The command after the `CMD` keyword can be either a shell command (e.g. `HEALTHCHECK |
|
1509 |
+CMD /bin/check-running`) or an _exec_ array (as with other Dockerfile commands; |
|
1510 |
+see e.g. `ENTRYPOINT` for details). |
|
1511 |
+ |
|
1512 |
+The command's exit status indicates the health status of the container. |
|
1513 |
+The possible values are: |
|
1514 |
+ |
|
1515 |
+- 0: success - the container is healthy and ready for use |
|
1516 |
+- 1: unhealthy - the container is not working correctly |
|
1517 |
+- 2: starting - the container is not ready for use yet, but is working correctly |
|
1518 |
+ |
|
1519 |
+If the probe returns 2 ("starting") when the container has already moved out of the |
|
1520 |
+"starting" state then it is treated as "unhealthy" instead. |
|
1521 |
+ |
|
1522 |
+For example, to check every five minutes or so that a web-server is able to |
|
1523 |
+serve the site's main page within three seconds: |
|
1524 |
+ |
|
1525 |
+ HEALTHCHECK --interval=5m --timeout=3s \ |
|
1526 |
+ CMD curl -f http://localhost/ || exit 1 |
|
1527 |
+ |
|
1528 |
+To help debug failing probes, any output text (UTF-8 encoded) that the command writes |
|
1529 |
+on stdout or stderr will be stored in the health status and can be queried with |
|
1530 |
+`docker inspect`. Such output should be kept short (only the first 4096 bytes |
|
1531 |
+are stored currently). |
|
1532 |
+ |
|
1533 |
+When the health status of a container changes, a `health_status` event is |
|
1534 |
+generated with the new status. |
|
1535 |
+ |
|
1536 |
+The `HEALTHCHECK` feature was added in Docker 1.12. |
|
1537 |
+ |
|
1538 |
+ |
|
1539 |
+ |
|
1473 | 1540 |
## Dockerfile examples |
1474 | 1541 |
|
1475 | 1542 |
Below you can see some examples of Dockerfile syntax. If you're interested in |
... | ... |
@@ -1250,6 +1250,7 @@ Dockerfile instruction and how the operator can override that setting. |
1250 | 1250 |
#entrypoint-default-command-to-execute-at-runtime) |
1251 | 1251 |
- [EXPOSE (Incoming Ports)](#expose-incoming-ports) |
1252 | 1252 |
- [ENV (Environment Variables)](#env-environment-variables) |
1253 |
+ - [HEALTHCHECK](#healthcheck) |
|
1253 | 1254 |
- [VOLUME (Shared Filesystems)](#volume-shared-filesystems) |
1254 | 1255 |
- [USER](#user) |
1255 | 1256 |
- [WORKDIR](#workdir) |
... | ... |
@@ -1398,6 +1399,65 @@ above, or already defined by the developer with a Dockerfile `ENV`: |
1398 | 1398 |
|
1399 | 1399 |
Similarly the operator can set the **hostname** with `-h`. |
1400 | 1400 |
|
1401 |
+### HEALTHCHECK |
|
1402 |
+ |
|
1403 |
+``` |
|
1404 |
+ --health-cmd Command to run to check health |
|
1405 |
+ --health-interval Time between running the check |
|
1406 |
+ --health-retries Consecutive failures needed to report unhealthy |
|
1407 |
+ --health-timeout Maximum time to allow one check to run |
|
1408 |
+ --no-healthcheck Disable any container-specified HEALTHCHECK |
|
1409 |
+``` |
|
1410 |
+ |
|
1411 |
+Example: |
|
1412 |
+ |
|
1413 |
+ $ docker run --name=test -d \ |
|
1414 |
+ --health-cmd='stat /etc/passwd || exit 1' \ |
|
1415 |
+ --health-interval=2s \ |
|
1416 |
+ busybox sleep 1d |
|
1417 |
+ $ sleep 2; docker inspect --format='{{.State.Health.Status}}' test |
|
1418 |
+ healthy |
|
1419 |
+ $ docker exec test rm /etc/passwd |
|
1420 |
+ $ sleep 2; docker inspect --format='{{json .State.Health}}' test |
|
1421 |
+ { |
|
1422 |
+ "Status": "unhealthy", |
|
1423 |
+ "FailingStreak": 3, |
|
1424 |
+ "Log": [ |
|
1425 |
+ { |
|
1426 |
+ "Start": "2016-05-25T17:22:04.635478668Z", |
|
1427 |
+ "End": "2016-05-25T17:22:04.7272552Z", |
|
1428 |
+ "ExitCode": 0, |
|
1429 |
+ "Output": " File: /etc/passwd\n Size: 334 \tBlocks: 8 IO Block: 4096 regular file\nDevice: 32h/50d\tInode: 12 Links: 1\nAccess: (0664/-rw-rw-r--) Uid: ( 0/ root) Gid: ( 0/ root)\nAccess: 2015-12-05 22:05:32.000000000\nModify: 2015..." |
|
1430 |
+ }, |
|
1431 |
+ { |
|
1432 |
+ "Start": "2016-05-25T17:22:06.732900633Z", |
|
1433 |
+ "End": "2016-05-25T17:22:06.822168935Z", |
|
1434 |
+ "ExitCode": 0, |
|
1435 |
+ "Output": " File: /etc/passwd\n Size: 334 \tBlocks: 8 IO Block: 4096 regular file\nDevice: 32h/50d\tInode: 12 Links: 1\nAccess: (0664/-rw-rw-r--) Uid: ( 0/ root) Gid: ( 0/ root)\nAccess: 2015-12-05 22:05:32.000000000\nModify: 2015..." |
|
1436 |
+ }, |
|
1437 |
+ { |
|
1438 |
+ "Start": "2016-05-25T17:22:08.823956535Z", |
|
1439 |
+ "End": "2016-05-25T17:22:08.897359124Z", |
|
1440 |
+ "ExitCode": 1, |
|
1441 |
+ "Output": "stat: can't stat '/etc/passwd': No such file or directory\n" |
|
1442 |
+ }, |
|
1443 |
+ { |
|
1444 |
+ "Start": "2016-05-25T17:22:10.898802931Z", |
|
1445 |
+ "End": "2016-05-25T17:22:10.969631866Z", |
|
1446 |
+ "ExitCode": 1, |
|
1447 |
+ "Output": "stat: can't stat '/etc/passwd': No such file or directory\n" |
|
1448 |
+ }, |
|
1449 |
+ { |
|
1450 |
+ "Start": "2016-05-25T17:22:12.971033523Z", |
|
1451 |
+ "End": "2016-05-25T17:22:13.082015516Z", |
|
1452 |
+ "ExitCode": 1, |
|
1453 |
+ "Output": "stat: can't stat '/etc/passwd': No such file or directory\n" |
|
1454 |
+ } |
|
1455 |
+ ] |
|
1456 |
+ } |
|
1457 |
+ |
|
1458 |
+The health status is also displayed in the `docker ps` output. |
|
1459 |
+ |
|
1401 | 1460 |
### TMPFS (mount tmpfs filesystems) |
1402 | 1461 |
|
1403 | 1462 |
```bash |
1404 | 1463 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,154 @@ |
0 |
+package main |
|
1 |
+ |
|
2 |
+import ( |
|
3 |
+ "encoding/json" |
|
4 |
+ "github.com/docker/docker/pkg/integration/checker" |
|
5 |
+ "github.com/docker/engine-api/types" |
|
6 |
+ "github.com/go-check/check" |
|
7 |
+ "strconv" |
|
8 |
+ "strings" |
|
9 |
+ "time" |
|
10 |
+) |
|
11 |
+ |
|
12 |
+func waitForStatus(c *check.C, name string, prev string, expected string) { |
|
13 |
+ prev = prev + "\n" |
|
14 |
+ expected = expected + "\n" |
|
15 |
+ for { |
|
16 |
+ out, _ := dockerCmd(c, "inspect", "--format={{.State.Status}}", name) |
|
17 |
+ if out == expected { |
|
18 |
+ return |
|
19 |
+ } |
|
20 |
+ c.Check(out, checker.Equals, prev) |
|
21 |
+ if out != prev { |
|
22 |
+ return |
|
23 |
+ } |
|
24 |
+ time.Sleep(100 * time.Millisecond) |
|
25 |
+ } |
|
26 |
+} |
|
27 |
+ |
|
28 |
+func waitForHealthStatus(c *check.C, name string, prev string, expected string) { |
|
29 |
+ prev = prev + "\n" |
|
30 |
+ expected = expected + "\n" |
|
31 |
+ for { |
|
32 |
+ out, _ := dockerCmd(c, "inspect", "--format={{.State.Health.Status}}", name) |
|
33 |
+ if out == expected { |
|
34 |
+ return |
|
35 |
+ } |
|
36 |
+ c.Check(out, checker.Equals, prev) |
|
37 |
+ if out != prev { |
|
38 |
+ return |
|
39 |
+ } |
|
40 |
+ time.Sleep(100 * time.Millisecond) |
|
41 |
+ } |
|
42 |
+} |
|
43 |
+ |
|
44 |
+func getHealth(c *check.C, name string) *types.Health { |
|
45 |
+ out, _ := dockerCmd(c, "inspect", "--format={{json .State.Health}}", name) |
|
46 |
+ var health types.Health |
|
47 |
+ err := json.Unmarshal([]byte(out), &health) |
|
48 |
+ c.Check(err, checker.Equals, nil) |
|
49 |
+ return &health |
|
50 |
+} |
|
51 |
+ |
|
52 |
+func (s *DockerSuite) TestHealth(c *check.C) { |
|
53 |
+ testRequires(c, DaemonIsLinux) // busybox doesn't work on Windows |
|
54 |
+ |
|
55 |
+ imageName := "testhealth" |
|
56 |
+ _, err := buildImage(imageName, |
|
57 |
+ `FROM busybox |
|
58 |
+ RUN echo OK > /status |
|
59 |
+ CMD ["/bin/sleep", "120"] |
|
60 |
+ STOPSIGNAL SIGKILL |
|
61 |
+ HEALTHCHECK --interval=1s --timeout=30s \ |
|
62 |
+ CMD cat /status`, |
|
63 |
+ true) |
|
64 |
+ |
|
65 |
+ c.Check(err, check.IsNil) |
|
66 |
+ |
|
67 |
+ // No health status before starting |
|
68 |
+ name := "test_health" |
|
69 |
+ dockerCmd(c, "create", "--name", name, imageName) |
|
70 |
+ out, _ := dockerCmd(c, "ps", "-a", "--format={{.Status}}") |
|
71 |
+ c.Check(out, checker.Equals, "Created\n") |
|
72 |
+ |
|
73 |
+ // Inspect the options |
|
74 |
+ out, _ = dockerCmd(c, "inspect", |
|
75 |
+ "--format='timeout={{.Config.Healthcheck.Timeout}} "+ |
|
76 |
+ "interval={{.Config.Healthcheck.Interval}} "+ |
|
77 |
+ "retries={{.Config.Healthcheck.Retries}} "+ |
|
78 |
+ "test={{.Config.Healthcheck.Test}}'", name) |
|
79 |
+ c.Check(out, checker.Equals, "timeout=30s interval=1s retries=0 test=[CMD-SHELL cat /status]\n") |
|
80 |
+ |
|
81 |
+ // Start |
|
82 |
+ dockerCmd(c, "start", name) |
|
83 |
+ waitForHealthStatus(c, name, "starting", "healthy") |
|
84 |
+ |
|
85 |
+ // Make it fail |
|
86 |
+ dockerCmd(c, "exec", name, "rm", "/status") |
|
87 |
+ waitForHealthStatus(c, name, "healthy", "unhealthy") |
|
88 |
+ |
|
89 |
+ // Inspect the status |
|
90 |
+ out, _ = dockerCmd(c, "inspect", "--format={{.State.Health.Status}}", name) |
|
91 |
+ c.Check(out, checker.Equals, "unhealthy\n") |
|
92 |
+ |
|
93 |
+ // Make it healthy again |
|
94 |
+ dockerCmd(c, "exec", name, "touch", "/status") |
|
95 |
+ waitForHealthStatus(c, name, "unhealthy", "healthy") |
|
96 |
+ |
|
97 |
+ // Remove container |
|
98 |
+ dockerCmd(c, "rm", "-f", name) |
|
99 |
+ |
|
100 |
+ // Disable the check from the CLI |
|
101 |
+ out, _ = dockerCmd(c, "create", "--name=noh", "--no-healthcheck", imageName) |
|
102 |
+ out, _ = dockerCmd(c, "inspect", "--format={{.Config.Healthcheck.Test}}", "noh") |
|
103 |
+ c.Check(out, checker.Equals, "[NONE]\n") |
|
104 |
+ dockerCmd(c, "rm", "noh") |
|
105 |
+ |
|
106 |
+ // Disable the check with a new build |
|
107 |
+ _, err = buildImage("no_healthcheck", |
|
108 |
+ `FROM testhealth |
|
109 |
+ HEALTHCHECK NONE`, true) |
|
110 |
+ c.Check(err, check.IsNil) |
|
111 |
+ |
|
112 |
+ out, _ = dockerCmd(c, "inspect", "--format={{.ContainerConfig.Healthcheck.Test}}", "no_healthcheck") |
|
113 |
+ c.Check(out, checker.Equals, "[NONE]\n") |
|
114 |
+ |
|
115 |
+ // Enable the checks from the CLI |
|
116 |
+ _, _ = dockerCmd(c, "run", "-d", "--name=fatal_healthcheck", |
|
117 |
+ "--health-interval=0.5s", |
|
118 |
+ "--health-retries=3", |
|
119 |
+ "--health-cmd=cat /status", |
|
120 |
+ "no_healthcheck") |
|
121 |
+ waitForHealthStatus(c, "fatal_healthcheck", "starting", "healthy") |
|
122 |
+ health := getHealth(c, "fatal_healthcheck") |
|
123 |
+ c.Check(health.Status, checker.Equals, "healthy") |
|
124 |
+ c.Check(health.FailingStreak, checker.Equals, 0) |
|
125 |
+ last := health.Log[len(health.Log)-1] |
|
126 |
+ c.Check(last.ExitCode, checker.Equals, 0) |
|
127 |
+ c.Check(last.Output, checker.Equals, "OK\n") |
|
128 |
+ |
|
129 |
+ // Fail the check, which should now make it exit |
|
130 |
+ dockerCmd(c, "exec", "fatal_healthcheck", "rm", "/status") |
|
131 |
+ waitForStatus(c, "fatal_healthcheck", "running", "exited") |
|
132 |
+ |
|
133 |
+ out, _ = dockerCmd(c, "inspect", "--format={{.State.Health.Status}}", "fatal_healthcheck") |
|
134 |
+ c.Check(out, checker.Equals, "unhealthy\n") |
|
135 |
+ failsStr, _ := dockerCmd(c, "inspect", "--format={{.State.Health.FailingStreak}}", "fatal_healthcheck") |
|
136 |
+ fails, err := strconv.Atoi(strings.TrimSpace(failsStr)) |
|
137 |
+ c.Check(err, check.IsNil) |
|
138 |
+ c.Check(fails >= 3, checker.Equals, true) |
|
139 |
+ dockerCmd(c, "rm", "-f", "fatal_healthcheck") |
|
140 |
+ |
|
141 |
+ // Check timeout |
|
142 |
+ // Note: if the interval is too small, it seems that Docker spends all its time running health |
|
143 |
+ // checks and never gets around to killing it. |
|
144 |
+ _, _ = dockerCmd(c, "run", "-d", "--name=test", |
|
145 |
+ "--health-interval=1s", "--health-cmd=sleep 5m", "--health-timeout=1ms", imageName) |
|
146 |
+ waitForHealthStatus(c, "test", "starting", "unhealthy") |
|
147 |
+ health = getHealth(c, "test") |
|
148 |
+ last = health.Log[len(health.Log)-1] |
|
149 |
+ c.Check(health.Status, checker.Equals, "unhealthy") |
|
150 |
+ c.Check(last.ExitCode, checker.Equals, -1) |
|
151 |
+ c.Check(last.Output, checker.Equals, "Health check exceeded timeout (1ms)") |
|
152 |
+ dockerCmd(c, "rm", "-f", "test") |
|
153 |
+} |
... | ... |
@@ -190,6 +190,17 @@ func (clnt *client) Signal(containerID string, sig int) error { |
190 | 190 |
return err |
191 | 191 |
} |
192 | 192 |
|
193 |
+func (clnt *client) SignalProcess(containerID string, pid string, sig int) error { |
|
194 |
+ clnt.lock(containerID) |
|
195 |
+ defer clnt.unlock(containerID) |
|
196 |
+ _, err := clnt.remote.apiClient.Signal(context.Background(), &containerd.SignalRequest{ |
|
197 |
+ Id: containerID, |
|
198 |
+ Pid: pid, |
|
199 |
+ Signal: uint32(sig), |
|
200 |
+ }) |
|
201 |
+ return err |
|
202 |
+} |
|
203 |
+ |
|
193 | 204 |
func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error { |
194 | 205 |
clnt.lock(containerID) |
195 | 206 |
defer clnt.unlock(containerID) |
... | ... |
@@ -304,6 +304,25 @@ func (clnt *client) Signal(containerID string, sig int) error { |
304 | 304 |
return nil |
305 | 305 |
} |
306 | 306 |
|
307 |
+// While Linux has support for the full range of signals, signals aren't really implemented on Windows. |
|
308 |
+// We try to terminate the specified process whatever signal is requested. |
|
309 |
+func (clnt *client) SignalProcess(containerID string, processFriendlyName string, sig int) error { |
|
310 |
+ clnt.lock(containerID) |
|
311 |
+ defer clnt.unlock(containerID) |
|
312 |
+ cont, err := clnt.getContainer(containerID) |
|
313 |
+ if err != nil { |
|
314 |
+ return err |
|
315 |
+ } |
|
316 |
+ |
|
317 |
+ for _, p := range cont.processes { |
|
318 |
+ if p.friendlyName == processFriendlyName { |
|
319 |
+ return hcsshim.TerminateProcessInComputeSystem(containerID, p.systemPid) |
|
320 |
+ } |
|
321 |
+ } |
|
322 |
+ |
|
323 |
+ return fmt.Errorf("SignalProcess could not find process %s in %s", processFriendlyName, containerID) |
|
324 |
+} |
|
325 |
+ |
|
307 | 326 |
// Resize handles a CLI event to resize an interactive docker run or docker exec |
308 | 327 |
// window. |
309 | 328 |
func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error { |
... | ... |
@@ -34,6 +34,7 @@ type Backend interface { |
34 | 34 |
type Client interface { |
35 | 35 |
Create(containerID string, spec Spec, options ...CreateOption) error |
36 | 36 |
Signal(containerID string, sig int) error |
37 |
+ SignalProcess(containerID string, processFriendlyName string, sig int) error |
|
37 | 38 |
AddProcess(containerID, processFriendlyName string, process Process) error |
38 | 39 |
Resize(containerID, processFriendlyName string, width, height int) error |
39 | 40 |
Pause(containerID string) error |
... | ... |
@@ -100,6 +100,12 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host |
100 | 100 |
flStopSignal = cmd.String([]string{"-stop-signal"}, signal.DefaultStopSignal, fmt.Sprintf("Signal to stop a container, %v by default", signal.DefaultStopSignal)) |
101 | 101 |
flIsolation = cmd.String([]string{"-isolation"}, "", "Container isolation technology") |
102 | 102 |
flShmSize = cmd.String([]string{"-shm-size"}, "", "Size of /dev/shm, default value is 64MB") |
103 |
+ // Healthcheck |
|
104 |
+ flNoHealthcheck = cmd.Bool([]string{"-no-healthcheck"}, false, "Disable any container-specified HEALTHCHECK") |
|
105 |
+ flHealthCmd = cmd.String([]string{"-health-cmd"}, "", "Command to run to check health") |
|
106 |
+ flHealthInterval = cmd.Duration([]string{"-health-interval"}, 0, "Time between running the check") |
|
107 |
+ flHealthTimeout = cmd.Duration([]string{"-health-timeout"}, 0, "Maximum time to allow one check to run") |
|
108 |
+ flHealthRetries = cmd.Int([]string{"-health-retries"}, 0, "Consecutive failures needed to report unhealthy") |
|
103 | 109 |
) |
104 | 110 |
|
105 | 111 |
cmd.Var(&flAttach, []string{"a", "-attach"}, "Attach to STDIN, STDOUT or STDERR") |
... | ... |
@@ -351,6 +357,39 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host |
351 | 351 |
return nil, nil, nil, cmd, err |
352 | 352 |
} |
353 | 353 |
|
354 |
+ // Healthcheck |
|
355 |
+ var healthConfig *container.HealthConfig |
|
356 |
+ haveHealthSettings := *flHealthCmd != "" || |
|
357 |
+ *flHealthInterval != 0 || |
|
358 |
+ *flHealthTimeout != 0 || |
|
359 |
+ *flHealthRetries != 0 |
|
360 |
+ if *flNoHealthcheck { |
|
361 |
+ if haveHealthSettings { |
|
362 |
+ return nil, nil, nil, cmd, fmt.Errorf("--no-healthcheck conflicts with --health-* options") |
|
363 |
+ } |
|
364 |
+ test := strslice.StrSlice{"NONE"} |
|
365 |
+ healthConfig = &container.HealthConfig{Test: test} |
|
366 |
+ } else if haveHealthSettings { |
|
367 |
+ var probe strslice.StrSlice |
|
368 |
+ if *flHealthCmd != "" { |
|
369 |
+ args := []string{"CMD-SHELL", *flHealthCmd} |
|
370 |
+ probe = strslice.StrSlice(args) |
|
371 |
+ } |
|
372 |
+ if *flHealthInterval < 0 { |
|
373 |
+ return nil, nil, nil, cmd, fmt.Errorf("--health-interval cannot be negative") |
|
374 |
+ } |
|
375 |
+ if *flHealthTimeout < 0 { |
|
376 |
+ return nil, nil, nil, cmd, fmt.Errorf("--health-timeout cannot be negative") |
|
377 |
+ } |
|
378 |
+ |
|
379 |
+ healthConfig = &container.HealthConfig{ |
|
380 |
+ Test: probe, |
|
381 |
+ Interval: *flHealthInterval, |
|
382 |
+ Timeout: *flHealthTimeout, |
|
383 |
+ Retries: *flHealthRetries, |
|
384 |
+ } |
|
385 |
+ } |
|
386 |
+ |
|
354 | 387 |
resources := container.Resources{ |
355 | 388 |
CgroupParent: *flCgroupParent, |
356 | 389 |
Memory: flMemory, |
... | ... |
@@ -399,6 +438,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*container.Config, *container.Host |
399 | 399 |
Entrypoint: entrypoint, |
400 | 400 |
WorkingDir: *flWorkingDir, |
401 | 401 |
Labels: ConvertKVStringsToMap(labels), |
402 |
+ Healthcheck: healthConfig, |
|
402 | 403 |
} |
403 | 404 |
if cmd.IsSet("-stop-signal") { |
404 | 405 |
config.StopSignal = *flStopSignal |
... | ... |
@@ -9,6 +9,7 @@ import ( |
9 | 9 |
"runtime" |
10 | 10 |
"strings" |
11 | 11 |
"testing" |
12 |
+ "time" |
|
12 | 13 |
|
13 | 14 |
flag "github.com/docker/docker/pkg/mflag" |
14 | 15 |
"github.com/docker/docker/runconfig" |
... | ... |
@@ -584,6 +585,45 @@ func TestParseRestartPolicy(t *testing.T) { |
584 | 584 |
} |
585 | 585 |
} |
586 | 586 |
|
587 |
+func TestParseHealth(t *testing.T) { |
|
588 |
+ checkOk := func(args ...string) *container.HealthConfig { |
|
589 |
+ config, _, _, _, err := parseRun(args) |
|
590 |
+ if err != nil { |
|
591 |
+ t.Fatalf("%#v: %v", args, err) |
|
592 |
+ } |
|
593 |
+ return config.Healthcheck |
|
594 |
+ } |
|
595 |
+ checkError := func(expected string, args ...string) { |
|
596 |
+ config, _, _, _, err := parseRun(args) |
|
597 |
+ if err == nil { |
|
598 |
+ t.Fatalf("Expected error, but got %#v", config) |
|
599 |
+ } |
|
600 |
+ if err.Error() != expected { |
|
601 |
+ t.Fatalf("Expected %#v, got %#v", expected, err) |
|
602 |
+ } |
|
603 |
+ } |
|
604 |
+ health := checkOk("--no-healthcheck", "img", "cmd") |
|
605 |
+ if health == nil || len(health.Test) != 1 || health.Test[0] != "NONE" { |
|
606 |
+ t.Fatalf("--no-healthcheck failed: %#v", health) |
|
607 |
+ } |
|
608 |
+ |
|
609 |
+ health = checkOk("--health-cmd=/check.sh -q", "img", "cmd") |
|
610 |
+ if len(health.Test) != 2 || health.Test[0] != "CMD-SHELL" || health.Test[1] != "/check.sh -q" { |
|
611 |
+ t.Fatalf("--health-cmd: got %#v", health.Test) |
|
612 |
+ } |
|
613 |
+ if health.Timeout != 0 { |
|
614 |
+ t.Fatalf("--health-cmd: timeout = %f", health.Timeout) |
|
615 |
+ } |
|
616 |
+ |
|
617 |
+ checkError("--no-healthcheck conflicts with --health-* options", |
|
618 |
+ "--no-healthcheck", "--health-cmd=/check.sh -q", "img", "cmd") |
|
619 |
+ |
|
620 |
+ health = checkOk("--health-timeout=2s", "--health-retries=3", "--health-interval=4.5s", "img", "cmd") |
|
621 |
+ if health.Timeout != 2*time.Second || health.Retries != 3 || health.Interval != 4500*time.Millisecond { |
|
622 |
+ t.Fatalf("--health-*: got %#v", health) |
|
623 |
+ } |
|
624 |
+} |
|
625 |
+ |
|
587 | 626 |
func TestParseLoggingOpts(t *testing.T) { |
588 | 627 |
// logging opts ko |
589 | 628 |
if _, _, _, _, err := parseRun([]string{"--log-driver=none", "--log-opt=anything", "img", "cmd"}); err == nil || err.Error() != "invalid logging opts for driver none" { |