Browse code

Fix critical bug: can't restart a restarting container

When user try to restart a restarting container, docker client report
error: "container is already active", and container will be stopped
instead be restarted which is seriously wrong.

What's more critical is that when user try to start this container
again, it will always fail.

This error can also be reproduced with a `docker stop`+`docker start`.

And this commit will fix the bug.

Signed-off-by: Zhang Wei <zhangwei555@huawei.com>

Zhang Wei authored on 2016/04/07 23:05:29
Showing 5 changed files
... ...
@@ -3,6 +3,7 @@ package daemon
3 3
 import (
4 4
 	"fmt"
5 5
 	"runtime"
6
+	"strings"
6 7
 	"syscall"
7 8
 	"time"
8 9
 
... ...
@@ -81,7 +82,14 @@ func (daemon *Daemon) killWithSignal(container *container.Container, sig int) er
81 81
 	}
82 82
 
83 83
 	if err := daemon.kill(container, sig); err != nil {
84
-		return fmt.Errorf("Cannot kill container %s: %s", container.ID, err)
84
+		err = fmt.Errorf("Cannot kill container %s: %s", container.ID, err)
85
+		// if container or process not exists, ignore the error
86
+		if strings.Contains(err.Error(), "container not found") ||
87
+			strings.Contains(err.Error(), "no such process") {
88
+			logrus.Warnf("%s", err.Error())
89
+		} else {
90
+			return err
91
+		}
85 92
 	}
86 93
 
87 94
 	attributes := map[string]string{
... ...
@@ -218,3 +218,32 @@ func (s *DockerSuite) TestRestartPolicyAfterRestart(c *check.C) {
218 218
 	err = waitInspect(id, "{{.State.Status}}", "running", 30*time.Second)
219 219
 	c.Assert(err, check.IsNil)
220 220
 }
221
+
222
+func (s *DockerSuite) TestRestartContainerwithRestartPolicy(c *check.C) {
223
+	out1, _ := dockerCmd(c, "run", "-d", "--restart=on-failure:3", "busybox", "false")
224
+	out2, _ := dockerCmd(c, "run", "-d", "--restart=always", "busybox", "false")
225
+
226
+	id1 := strings.TrimSpace(string(out1))
227
+	id2 := strings.TrimSpace(string(out2))
228
+	err := waitInspect(id1, "{{ .State.Restarting }} {{ .State.Running }}", "false false", 30*time.Second)
229
+	c.Assert(err, checker.IsNil)
230
+
231
+	// TODO: fix racey problem during restart:
232
+	// https://jenkins.dockerproject.org/job/Docker-PRs-Win2Lin/24665/console
233
+	// Error response from daemon: Cannot restart container 6655f620d90b390527db23c0a15b3e46d86a58ecec20a5697ab228d860174251: remove /var/run/docker/libcontainerd/6655f620d90b390527db23c0a15b3e46d86a58ecec20a5697ab228d860174251/rootfs: device or resource busy
234
+	if _, _, err := dockerCmdWithError("restart", id1); err != nil {
235
+		// if restart met racey problem, try again
236
+		time.Sleep(500 * time.Millisecond)
237
+		dockerCmd(c, "restart", id1)
238
+	}
239
+	if _, _, err := dockerCmdWithError("restart", id2); err != nil {
240
+		// if restart met racey problem, try again
241
+		time.Sleep(500 * time.Millisecond)
242
+		dockerCmd(c, "restart", id2)
243
+	}
244
+
245
+	dockerCmd(c, "stop", id1)
246
+	dockerCmd(c, "stop", id2)
247
+	dockerCmd(c, "start", id1)
248
+	dockerCmd(c, "start", id2)
249
+}
... ...
@@ -134,7 +134,7 @@ func (clnt *client) Create(containerID string, spec Spec, options ...CreateOptio
134 134
 	defer clnt.unlock(containerID)
135 135
 
136 136
 	if ctr, err := clnt.getContainer(containerID); err == nil {
137
-		if ctr.restarting { // docker doesn't actually call start if restart is on atm, but probably should in the future
137
+		if ctr.restarting {
138 138
 			ctr.restartManager.Cancel()
139 139
 			ctr.clean()
140 140
 		} else {
... ...
@@ -124,6 +124,7 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
124 124
 			} else if restart {
125 125
 				st.State = StateRestart
126 126
 				ctr.restarting = true
127
+				ctr.client.deleteContainer(e.Id)
127 128
 				go func() {
128 129
 					err := <-wait
129 130
 					ctr.restarting = false
... ...
@@ -51,7 +51,7 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro
51 51
 	}()
52 52
 
53 53
 	if rm.canceled {
54
-		return false, nil, nil
54
+		return false, nil, fmt.Errorf("restartmanager canceled")
55 55
 	}
56 56
 
57 57
 	if rm.active {