Browse code

Add init process for zombie fighting

This adds a small C binary for fighting zombies. It is mounted under
`/dev/init` and is prepended to the args specified by the user. You
enable it via a daemon flag, `dockerd --init`, as it is disable by
default for backwards compat.

You can also override the daemon option or specify this on a per
container basis with `docker run --init=true|false`.

You can test this by running a process like this as the pid 1 in a
container and see the extra zombie that appears in the container as it
is running.

```c

int main(int argc, char ** argv) {
pid_t pid = fork();
if (pid == 0) {
pid = fork();
if (pid == 0) {
exit(0);
}
sleep(3);
exit(0);
}
printf("got pid %d and exited\n", pid);
sleep(20);
}
```

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>

Michael Crosby authored on 2016/06/28 06:38:47
Showing 22 changed files
... ...
@@ -255,6 +255,16 @@ RUN set -x \
255 255
 	&& cp bin/ctr /usr/local/bin/docker-containerd-ctr \
256 256
 	&& rm -rf "$GOPATH"
257 257
 
258
+ENV GRIMES_COMMIT f207601a8d19a534cc90d9e26e037e9931ccb9db
259
+RUN set -x \
260
+    && export GOPATH="$(mktemp -d)" \
261
+	&& git clone https://github.com/crosbymichael/grimes.git "$GOPATH/grimes" \
262
+	&& cd "$GOPATH/grimes" \
263
+	&& git checkout -q "$GRIMES_COMMIT" \
264
+	&& make \
265
+	&& cp init /usr/local/bin/docker-init \
266
+	&& rm -rf "$GOPATH"
267
+
258 268
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
259 269
 ENTRYPOINT ["hack/dind"]
260 270
 
... ...
@@ -198,6 +198,16 @@ RUN set -x \
198 198
 	&& cp bin/ctr /usr/local/bin/docker-containerd-ctr \
199 199
 	&& rm -rf "$GOPATH"
200 200
 
201
+ENV GRIMES_COMMIT f207601a8d19a534cc90d9e26e037e9931ccb9db
202
+RUN set -x \
203
+    && export GOPATH="$(mktemp -d)" \
204
+	&& git clone https://github.com/crosbymichael/grimes.git "$GOPATH/grimes" \
205
+	&& cd "$GOPATH/grimes" \
206
+	&& git checkout -q "$GRIMES_COMMIT" \
207
+	&& make \
208
+	&& cp init /usr/local/bin/docker-init \
209
+	&& rm -rf "$GOPATH"
210
+
201 211
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
202 212
 ENTRYPOINT ["hack/dind"]
203 213
 
... ...
@@ -196,6 +196,16 @@ RUN set -x \
196 196
 	&& cp bin/ctr /usr/local/bin/docker-containerd-ctr \
197 197
 	&& rm -rf "$GOPATH"
198 198
 
199
+ENV GRIMES_COMMIT f207601a8d19a534cc90d9e26e037e9931ccb9db
200
+RUN set -x \
201
+    && export GOPATH="$(mktemp -d)" \
202
+	&& git clone https://github.com/crosbymichael/grimes.git "$GOPATH/grimes" \
203
+	&& cd "$GOPATH/grimes" \
204
+	&& git checkout -q "$GRIMES_COMMIT" \
205
+	&& make \
206
+	&& cp init /usr/local/bin/docker-init \
207
+	&& rm -rf "$GOPATH"
208
+
199 209
 ENTRYPOINT ["hack/dind"]
200 210
 
201 211
 # Upload docker source
... ...
@@ -216,6 +216,16 @@ RUN set -x \
216 216
 	&& cp bin/ctr /usr/local/bin/docker-containerd-ctr \
217 217
 	&& rm -rf "$GOPATH"
218 218
 
219
+ENV GRIMES_COMMIT f207601a8d19a534cc90d9e26e037e9931ccb9db
220
+RUN set -x \
221
+    && export GOPATH="$(mktemp -d)" \
222
+	&& git clone https://github.com/crosbymichael/grimes.git "$GOPATH/grimes" \
223
+	&& cd "$GOPATH/grimes" \
224
+	&& git checkout -q "$GRIMES_COMMIT" \
225
+	&& make \
226
+	&& cp init /usr/local/bin/docker-init \
227
+	&& rm -rf "$GOPATH"
228
+
219 229
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
220 230
 ENTRYPOINT ["hack/dind"]
221 231
 
... ...
@@ -208,6 +208,16 @@ RUN set -x \
208 208
 	&& cp bin/ctr /usr/local/bin/docker-containerd-ctr \
209 209
 	&& rm -rf "$GOPATH"
210 210
 
211
+ENV GRIMES_COMMIT f207601a8d19a534cc90d9e26e037e9931ccb9db
212
+RUN set -x \
213
+    && export GOPATH="$(mktemp -d)" \
214
+	&& git clone https://github.com/crosbymichael/grimes.git "$GOPATH/grimes" \
215
+	&& cd "$GOPATH/grimes" \
216
+	&& git checkout -q "$GRIMES_COMMIT" \
217
+	&& make \
218
+	&& cp init /usr/local/bin/docker-init \
219
+	&& rm -rf "$GOPATH"
220
+
211 221
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
212 222
 ENTRYPOINT ["hack/dind"]
213 223
 
... ...
@@ -80,6 +80,16 @@ RUN set -x \
80 80
 	&& cp bin/ctr /usr/local/bin/docker-containerd-ctr \
81 81
 	&& rm -rf "$GOPATH"
82 82
 
83
+ENV GRIMES_COMMIT f207601a8d19a534cc90d9e26e037e9931ccb9db
84
+RUN set -x \
85
+    && export GOPATH="$(mktemp -d)" \
86
+	&& git clone https://github.com/crosbymichael/grimes.git "$GOPATH/grimes" \
87
+	&& cd "$GOPATH/grimes" \
88
+	&& git checkout -q "$GRIMES_COMMIT" \
89
+	&& make \
90
+	&& cp init /usr/local/bin/docker-init \
91
+	&& rm -rf "$GOPATH"
92
+
83 93
 ENV AUTO_GOPATH 1
84 94
 WORKDIR /usr/src/docker
85 95
 COPY . /usr/src/docker
... ...
@@ -321,6 +321,9 @@ type HostConfig struct {
321 321
 
322 322
 	// Mounts specs used by the container
323 323
 	Mounts []mount.Mount `json:",omitempty"`
324
+
325
+	// Run a custom init inside the container, if null, use the daemon's configured settings
326
+	Init *bool `json:",om        itempty"`
324 327
 }
325 328
 
326 329
 // Box specifies height and width dimensions. Used for sizing of a console.
... ...
@@ -35,6 +35,7 @@ type Config struct {
35 35
 	Runtimes             map[string]types.Runtime `json:"runtimes,omitempty"`
36 36
 	DefaultRuntime       string                   `json:"default-runtime,omitempty"`
37 37
 	OOMScoreAdjust       int                      `json:"oom-score-adjust,omitempty"`
38
+	Init                 bool                     `json:"init,omitempty"`
38 39
 }
39 40
 
40 41
 // bridgeConfig stores all the bridge driver specific
... ...
@@ -91,6 +92,7 @@ func (config *Config) InstallFlags(flags *pflag.FlagSet) {
91 91
 	flags.Var(runconfigopts.NewNamedRuntimeOpt("runtimes", &config.Runtimes, stockRuntimeName), "add-runtime", "Register an additional OCI compatible runtime")
92 92
 	flags.StringVar(&config.DefaultRuntime, "default-runtime", stockRuntimeName, "Default OCI runtime for containers")
93 93
 	flags.IntVar(&config.OOMScoreAdjust, "oom-score-adjust", -500, "Set the oom_score_adj for the daemon")
94
+	flags.BoolVar(&config.Init, "init", false, "Run an init in the container to forward signals and reap processes")
94 95
 
95 96
 	config.attachExperimentalFlags(flags)
96 97
 }
... ...
@@ -4,6 +4,7 @@ import (
4 4
 	"fmt"
5 5
 	"io"
6 6
 	"os"
7
+	"os/exec"
7 8
 	"path/filepath"
8 9
 	"sort"
9 10
 	"strconv"
... ...
@@ -585,6 +586,26 @@ func (daemon *Daemon) populateCommonSpec(s *specs.Spec, c *container.Container)
585 585
 		cwd = "/"
586 586
 	}
587 587
 	s.Process.Args = append([]string{c.Path}, c.Args...)
588
+
589
+	// only add the custom init if it is specified and the container is running in its
590
+	// own private pid namespace.  It does not make sense to add if it is running in the
591
+	// host namespace or another container's pid namespace where we already have an init
592
+	if c.HostConfig.PidMode.IsPrivate() {
593
+		if (c.HostConfig.Init != nil && *c.HostConfig.Init) ||
594
+			(c.HostConfig.Init == nil && daemon.configStore.Init) {
595
+			s.Process.Args = append([]string{"/dev/init", c.Path}, c.Args...)
596
+			path, err := exec.LookPath("docker-init")
597
+			if err != nil {
598
+				return err
599
+			}
600
+			s.Mounts = append(s.Mounts, specs.Mount{
601
+				Destination: "/dev/init",
602
+				Type:        "bind",
603
+				Source:      path,
604
+				Options:     []string{"bind", "ro"},
605
+			})
606
+		}
607
+	}
588 608
 	s.Process.Cwd = cwd
589 609
 	s.Process.Env = c.CreateDaemonEnvironment(linkedEnv)
590 610
 	s.Process.Terminal = c.Config.Tty
... ...
@@ -48,6 +48,7 @@ Options:
48 48
       -H, --host=[]                          Daemon socket(s) to connect to
49 49
       --help                                 Print usage
50 50
       --icc=true                             Enable inter-container communication
51
+      --init                                 Run an init inside containers to forward signals and reap processes
51 52
       --insecure-registry=[]                 Enable insecure registry communication
52 53
       --ip=0.0.0.0                           Default IP when binding container ports
53 54
       --ip-forward=true                      Enable net.ipv4.ip_forward
... ...
@@ -1140,6 +1141,7 @@ This is a full example of the allowed configuration options on Linux:
1140 1140
 	"group": "",
1141 1141
 	"cgroup-parent": "",
1142 1142
 	"default-ulimits": {},
1143
+	"init": false,
1143 1144
 	"ipv6": false,
1144 1145
 	"iptables": false,
1145 1146
 	"ip-forward": false,
... ...
@@ -255,7 +255,7 @@ bundle() {
255 255
 	source "$SCRIPTDIR/make/$bundle" "$@"
256 256
 }
257 257
 
258
-copy_containerd() {
258
+copy_binaries() {
259 259
 	dir="$1"
260 260
 	# Add nested executables to bundle dir so we have complete set of
261 261
 	# them available, but only if the native OS/ARCH is the same as the
... ...
@@ -263,7 +263,7 @@ copy_containerd() {
263 263
 	if [ "$(go env GOOS)/$(go env GOARCH)" == "$(go env GOHOSTOS)/$(go env GOHOSTARCH)" ]; then
264 264
 		if [ -x /usr/local/bin/docker-runc ]; then
265 265
 			echo "Copying nested executables into $dir"
266
-			for file in containerd containerd-shim containerd-ctr runc; do
266
+			for file in containerd containerd-shim containerd-ctr runc init; do
267 267
 				cp `which "docker-$file"` "$dir/"
268 268
 				if [ "$2" == "hash" ]; then
269 269
 					hash_files "$dir/docker-$file"
... ...
@@ -7,3 +7,4 @@ DOCKER_CONTAINERD_BINARY_NAME='docker-containerd'
7 7
 DOCKER_CONTAINERD_CTR_BINARY_NAME='docker-containerd-ctr'
8 8
 DOCKER_CONTAINERD_SHIM_BINARY_NAME='docker-containerd-shim'
9 9
 DOCKER_PROXY_BINARY_NAME='docker-proxy'
10
+DOCKER_INIT_BINARY_NAME='docker-init'
... ...
@@ -12,5 +12,5 @@ set -e
12 12
 	export BINARY_SHORT_NAME="$DOCKER_PROXY_BINARY_NAME"
13 13
 	export SOURCE_PATH='./vendor/src/github.com/docker/libnetwork/cmd/proxy'
14 14
 	source "${MAKEDIR}/.binary"
15
-	copy_containerd "$DEST" 'hash'
15
+	copy_binaries "$DEST" 'hash'
16 16
 )
... ...
@@ -12,4 +12,5 @@ rm -rf "$DEST"
12 12
 	install_binary "${DEST}/${DOCKER_CONTAINERD_CTR_BINARY_NAME}"
13 13
 	install_binary "${DEST}/${DOCKER_CONTAINERD_SHIM_BINARY_NAME}"
14 14
 	install_binary "${DEST}/${DOCKER_PROXY_BINARY_NAME}"
15
+	install_binary "${DEST}/${DOCKER_INIT_BINARY_NAME}"
15 16
 )
... ...
@@ -53,8 +53,8 @@ for d in "$CROSS/"*/*; do
53 53
 		cp -L "$d/$PROXY_BINARY_FULLNAME" "$TAR_PATH/${DOCKER_PROXY_BINARY_NAME}${BINARY_EXTENSION}"
54 54
 	fi
55 55
 
56
-	# copy over all the containerd binaries
57
-	copy_containerd $TAR_PATH
56
+	# copy over all the extra binaries
57
+	copy_binaries $TAR_PATH
58 58
 
59 59
 	if [ "$IS_TAR" == "true" ]; then
60 60
 		echo "Creating tgz from $BUILD_PATH and naming it $TGZ"
... ...
@@ -78,40 +78,6 @@ func (s *DockerSuite) TestEventsUntag(c *check.C) {
78 78
 	}
79 79
 }
80 80
 
81
-func (s *DockerSuite) TestEventsContainerFailStartDie(c *check.C) {
82
-	_, _, err := dockerCmdWithError("run", "--name", "testeventdie", "busybox", "blerg")
83
-	c.Assert(err, checker.NotNil, check.Commentf("Container run with command blerg should have failed, but it did not"))
84
-
85
-	out, _ := dockerCmd(c, "events", "--since=0", "--until", daemonUnixTime(c))
86
-	events := strings.Split(strings.TrimSpace(out), "\n")
87
-
88
-	nEvents := len(events)
89
-	c.Assert(nEvents, checker.GreaterOrEqualThan, 1) //Missing expected event
90
-
91
-	actions := eventActionsByIDAndType(c, events, "testeventdie", "container")
92
-
93
-	var startEvent bool
94
-	var dieEvent bool
95
-	for _, a := range actions {
96
-		switch a {
97
-		case "start":
98
-			startEvent = true
99
-		case "die":
100
-			dieEvent = true
101
-		}
102
-	}
103
-
104
-	// Windows platform is different from Linux, it will start container whatever
105
-	// so Windows can get start/die event but Linux can't
106
-	if daemonPlatform == "windows" {
107
-		c.Assert(startEvent, checker.True, check.Commentf("Start event not found: %v\n%v", actions, events))
108
-		c.Assert(dieEvent, checker.True, check.Commentf("Die event not found: %v\n%v", actions, events))
109
-	} else {
110
-		c.Assert(startEvent, checker.False, check.Commentf("Start event not expected: %v\n%v", actions, events))
111
-		c.Assert(dieEvent, checker.False, check.Commentf("Die event not expected: %v\n%v", actions, events))
112
-	}
113
-}
114
-
115 81
 func (s *DockerSuite) TestEventsLimit(c *check.C) {
116 82
 	var waitGroup sync.WaitGroup
117 83
 	errChan := make(chan error, 17)
... ...
@@ -2403,30 +2403,6 @@ func (s *DockerSuite) TestRunExposePort(c *check.C) {
2403 2403
 	c.Assert(out, checker.Contains, "invalid range format for --expose")
2404 2404
 }
2405 2405
 
2406
-func (s *DockerSuite) TestRunUnknownCommand(c *check.C) {
2407
-	out, _, _ := dockerCmdWithStdoutStderr(c, "create", "busybox", "/bin/nada")
2408
-
2409
-	cID := strings.TrimSpace(out)
2410
-	_, _, err := dockerCmdWithError("start", cID)
2411
-
2412
-	// Windows and Linux are different here by architectural design. Linux will
2413
-	// fail to start the container, so an error is expected. Windows will
2414
-	// successfully start the container, and once started attempt to execute
2415
-	// the command which will fail.
2416
-	if daemonPlatform == "windows" {
2417
-		// Wait for it to exit.
2418
-		waitExited(cID, 30*time.Second)
2419
-		c.Assert(err, check.IsNil)
2420
-	} else {
2421
-		c.Assert(err, check.NotNil)
2422
-	}
2423
-
2424
-	rc := inspectField(c, cID, "State.ExitCode")
2425
-	if rc == "0" {
2426
-		c.Fatalf("ExitCode(%v) cannot be 0", rc)
2427
-	}
2428
-}
2429
-
2430 2406
 func (s *DockerSuite) TestRunModeIpcHost(c *check.C) {
2431 2407
 	// Not applicable on Windows as uses Unix-specific capabilities
2432 2408
 	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)
... ...
@@ -1234,11 +1234,11 @@ func (s *DockerSuite) TestRunPidsLimit(c *check.C) {
1234 1234
 	testRequires(c, pidsLimit)
1235 1235
 
1236 1236
 	file := "/sys/fs/cgroup/pids/pids.max"
1237
-	out, _ := dockerCmd(c, "run", "--name", "skittles", "--pids-limit", "2", "busybox", "cat", file)
1238
-	c.Assert(strings.TrimSpace(out), checker.Equals, "2")
1237
+	out, _ := dockerCmd(c, "run", "--name", "skittles", "--pids-limit", "4", "busybox", "cat", file)
1238
+	c.Assert(strings.TrimSpace(out), checker.Equals, "4")
1239 1239
 
1240 1240
 	out = inspectField(c, "skittles", "HostConfig.PidsLimit")
1241
-	c.Assert(out, checker.Equals, "2", check.Commentf("setting the pids limit failed"))
1241
+	c.Assert(out, checker.Equals, "4", check.Commentf("setting the pids limit failed"))
1242 1242
 }
1243 1243
 
1244 1244
 func (s *DockerSuite) TestRunPrivilegedAllowedDevices(c *check.C) {
... ...
@@ -34,6 +34,7 @@ dockerd - Enable daemon mode
34 34
 [**-H**|**--host**[=*[]*]]
35 35
 [**--help**]
36 36
 [**--icc**[=*true*]]
37
+[**--init**[=*false*]]
37 38
 [**--insecure-registry**[=*[]*]]
38 39
 [**--ip**[=*0.0.0.0*]]
39 40
 [**--ip-forward**[=*true*]]
... ...
@@ -166,6 +167,9 @@ unix://[/path/to/socket] to use.
166 166
 **--icc**=*true*|*false*
167 167
   Allow unrestricted inter\-container and Docker daemon host communication. If disabled, containers can still be linked together using the **--link** option (see **docker-run(1)**). Default is true.
168 168
 
169
+**--init**
170
+Run an init process inside containers for signal forwarding and process reaping.
171
+
169 172
 **--insecure-registry**=[]
170 173
   Enable insecure registry communication, i.e., enable un-encrypted and/or untrusted communication.
171 174
 
... ...
@@ -59,7 +59,6 @@ func DefaultSpec() specs.Spec {
59 59
 			Options:     []string{"nosuid", "noexec", "nodev"},
60 60
 		},
61 61
 	}
62
-
63 62
 	s.Process.Capabilities = []string{
64 63
 		"CAP_CHOWN",
65 64
 		"CAP_DAC_OVERRIDE",
... ...
@@ -2,10 +2,11 @@ package opts
2 2
 
3 3
 import (
4 4
 	"fmt"
5
-	fopts "github.com/docker/docker/opts"
6 5
 	"net"
7 6
 	"os"
8 7
 	"strings"
8
+
9
+	fopts "github.com/docker/docker/opts"
9 10
 )
10 11
 
11 12
 // ValidateAttach validates that the specified string is a valid attach option.
... ...
@@ -103,6 +103,7 @@ type ContainerOptions struct {
103 103
 	healthRetries     int
104 104
 	runtime           string
105 105
 	autoRemove        bool
106
+	init              bool
106 107
 
107 108
 	Image string
108 109
 	Args  []string
... ...
@@ -243,6 +244,8 @@ func AddFlags(flags *pflag.FlagSet) *ContainerOptions {
243 243
 	flags.StringVar(&copts.shmSize, "shm-size", "", "Size of /dev/shm, default value is 64MB")
244 244
 	flags.StringVar(&copts.utsMode, "uts", "", "UTS namespace to use")
245 245
 	flags.StringVar(&copts.runtime, "runtime", "", "Runtime to use for this container")
246
+
247
+	flags.BoolVar(&copts.init, "init", false, "Run an init inside the container that forwards signals and reaps processes")
246 248
 	return copts
247 249
 }
248 250
 
... ...
@@ -593,6 +596,11 @@ func Parse(flags *pflag.FlagSet, copts *ContainerOptions) (*container.Config, *c
593 593
 		Runtime:        copts.runtime,
594 594
 	}
595 595
 
596
+	// only set this value if the user provided the flag, else it should default to nil
597
+	if flags.Changed("init") {
598
+		hostConfig.Init = &copts.init
599
+	}
600
+
596 601
 	// When allocating stdin in attached mode, close stdin at client disconnect
597 602
 	if config.OpenStdin && config.AttachStdin {
598 603
 		config.StdinOnce = true