Browse code

Fix container cleanup on daemon restart

When the daemon restores containers on daemon restart, it syncs up with
containerd to determine the existing state. For stopped containers it
then removes the container metadata from containerd.

In some cases this is not handled properly and causes an error when
someone attempts to start that container again.
In particular, this case is just a bad error check.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>

Brian Goff authored on 2018/02/09 02:57:38
Showing 2 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,84 @@
0
+package container
1
+
2
+import (
3
+	"context"
4
+	"fmt"
5
+	"io/ioutil"
6
+	"strconv"
7
+	"strings"
8
+	"testing"
9
+
10
+	"github.com/docker/docker/api/types"
11
+	"github.com/docker/docker/api/types/container"
12
+	"github.com/docker/docker/integration-cli/daemon"
13
+	"github.com/stretchr/testify/assert"
14
+	"golang.org/x/sys/unix"
15
+)
16
+
17
+// This is a regression test for #36145
18
+// It ensures that a container can be started when the daemon was improperly
19
+// shutdown when the daemon is brought back up.
20
+//
21
+// The regression is due to improper error handling preventing a container from
22
+// being restored and as such have the resources cleaned up.
23
+//
24
+// To test this, we need to kill dockerd, then kill both the containerd-shim and
25
+// the container process, then start dockerd back up and attempt to start the
26
+// container again.
27
+func TestContainerStartOnDaemonRestart(t *testing.T) {
28
+	t.Parallel()
29
+
30
+	d := daemon.New(t, "", "dockerd", daemon.Config{})
31
+	d.StartWithBusybox(t, "--iptables=false")
32
+	defer d.Stop(t)
33
+
34
+	client, err := d.NewClient()
35
+	assert.NoError(t, err, "error creating client")
36
+
37
+	ctx := context.Background()
38
+	c, err := client.ContainerCreate(ctx,
39
+		&container.Config{
40
+			Image: "busybox",
41
+			Cmd:   []string{"top"},
42
+		},
43
+		nil,
44
+		nil,
45
+		"",
46
+	)
47
+	assert.NoError(t, err, "error creating test container")
48
+	defer client.ContainerRemove(ctx, c.ID, types.ContainerRemoveOptions{Force: true})
49
+
50
+	err = client.ContainerStart(ctx, c.ID, types.ContainerStartOptions{})
51
+	assert.NoError(t, err, "error starting test container")
52
+
53
+	inspect, err := client.ContainerInspect(ctx, c.ID)
54
+	assert.NoError(t, err, "error getting inspect data")
55
+
56
+	ppid := getContainerdShimPid(t, inspect)
57
+
58
+	err = d.Kill()
59
+	assert.NoError(t, err, "failed to kill test daemon")
60
+
61
+	err = unix.Kill(inspect.State.Pid, unix.SIGKILL)
62
+	assert.NoError(t, err, "failed to kill container process")
63
+
64
+	err = unix.Kill(ppid, unix.SIGKILL)
65
+	assert.NoError(t, err, "failed to kill containerd-shim")
66
+
67
+	d.Start(t, "--iptables=false")
68
+
69
+	err = client.ContainerStart(ctx, c.ID, types.ContainerStartOptions{})
70
+	assert.NoError(t, err, "failed to start test container")
71
+}
72
+
73
+func getContainerdShimPid(t *testing.T, c types.ContainerJSON) int {
74
+	statB, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/stat", c.State.Pid))
75
+	assert.NoError(t, err, "error looking up containerd-shim pid")
76
+
77
+	// ppid is the 4th entry in `/proc/pid/stat`
78
+	ppid, err := strconv.Atoi(strings.Fields(string(statB))[3])
79
+	assert.NoError(t, err, "error converting ppid field to int")
80
+
81
+	assert.NotEqual(t, ppid, 1, "got unexpected ppid")
82
+	return ppid
83
+}
... ...
@@ -159,7 +159,7 @@ func (c *client) Restore(ctx context.Context, id string, attachStdio StdioCallba
159 159
 		return attachStdio(dio)
160 160
 	}
161 161
 	t, err := ctr.Task(ctx, attachIO)
162
-	if err != nil && !errdefs.IsNotFound(errors.Cause(err)) {
162
+	if err != nil && !containerderrors.IsNotFound(err) {
163 163
 		return false, -1, err
164 164
 	}
165 165