Browse code

Fix ShouldRestart for on-failure handle

Currently if you restart docker daemon, all the containers with restart
policy `on-failure` regardless of its `RestartCount` will be started,
this will make daemon cost more extra time for restart.

This commit will stop these containers to do unnecessary start on
daemon's restart.

Signed-off-by: Zhang Wei <zhangwei555@huawei.com>

Zhang Wei authored on 2016/03/23 00:46:40
Showing 6 changed files
... ...
@@ -519,9 +519,8 @@ func copyEscapable(dst io.Writer, src io.ReadCloser, keys []byte) (written int64
519 519
 // ShouldRestart decides whether the daemon should restart the container or not.
520 520
 // This is based on the container's restart policy.
521 521
 func (container *Container) ShouldRestart() bool {
522
-	return container.HostConfig.RestartPolicy.Name == "always" ||
523
-		(container.HostConfig.RestartPolicy.Name == "unless-stopped" && !container.HasBeenManuallyStopped) ||
524
-		(container.HostConfig.RestartPolicy.Name == "on-failure" && container.ExitCode != 0)
522
+	shouldRestart, _, _ := container.restartManager.ShouldRestart(uint32(container.ExitCode), container.HasBeenManuallyStopped)
523
+	return shouldRestart
525 524
 }
526 525
 
527 526
 // AddBindMountPoint adds a new bind mount point configuration to the container.
... ...
@@ -912,8 +911,9 @@ func (container *Container) RestartManager(reset bool) restartmanager.RestartMan
912 912
 		container.restartManager = nil
913 913
 	}
914 914
 	if container.restartManager == nil {
915
-		container.restartManager = restartmanager.New(container.HostConfig.RestartPolicy)
915
+		container.restartManager = restartmanager.New(container.HostConfig.RestartPolicy, container.RestartCount)
916 916
 	}
917
+
917 918
 	return container.restartManager
918 919
 }
919 920
 
... ...
@@ -291,13 +291,14 @@ func (daemon *Daemon) restore() error {
291 291
 		wg.Add(1)
292 292
 		go func(c *container.Container) {
293 293
 			defer wg.Done()
294
+			rm := c.RestartManager(false)
294 295
 			if c.IsRunning() || c.IsPaused() {
295 296
 				// Fix activityCount such that graph mounts can be unmounted later
296 297
 				if err := daemon.layerStore.ReinitRWLayer(c.RWLayer); err != nil {
297 298
 					logrus.Errorf("Failed to ReinitRWLayer for %s due to %s", c.ID, err)
298 299
 					return
299 300
 				}
300
-				if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(c.RestartManager(true))); err != nil {
301
+				if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(rm)); err != nil {
301 302
 					logrus.Errorf("Failed to restore with containerd: %q", err)
302 303
 					return
303 304
 				}
... ...
@@ -150,6 +150,37 @@ func (s *DockerDaemonSuite) TestDaemonRestartUnlessStopped(c *check.C) {
150 150
 
151 151
 }
152 152
 
153
+func (s *DockerDaemonSuite) TestDaemonRestartOnFailure(c *check.C) {
154
+	err := s.d.StartWithBusybox()
155
+	c.Assert(err, check.IsNil)
156
+
157
+	out, err := s.d.Cmd("run", "-d", "--name", "test1", "--restart", "on-failure:3", "busybox:latest", "false")
158
+	c.Assert(err, check.IsNil, check.Commentf("run top1: %v", out))
159
+
160
+	// wait test1 to stop
161
+	hostArgs := []string{"--host", s.d.sock()}
162
+	err = waitInspectWithArgs("test1", "{{.State.Running}} {{.State.Restarting}}", "false false", 10*time.Second, hostArgs...)
163
+	c.Assert(err, checker.IsNil, check.Commentf("test1 should exit but not"))
164
+
165
+	// record last start time
166
+	out, err = s.d.Cmd("inspect", "-f={{.State.StartedAt}}", "test1")
167
+	c.Assert(err, checker.IsNil, check.Commentf("out: %v", out))
168
+	lastStartTime := out
169
+
170
+	err = s.d.Restart()
171
+	c.Assert(err, check.IsNil)
172
+
173
+	// test1 shouldn't restart at all
174
+	err = waitInspectWithArgs("test1", "{{.State.Running}} {{.State.Restarting}}", "false false", 0, hostArgs...)
175
+	c.Assert(err, checker.IsNil, check.Commentf("test1 should exit but not"))
176
+
177
+	// make sure test1 isn't restarted when daemon restart
178
+	// if "StartAt" time updates, means test1 was once restarted.
179
+	out, err = s.d.Cmd("inspect", "-f={{.State.StartedAt}}", "test1")
180
+	c.Assert(err, checker.IsNil, check.Commentf("out: %v", out))
181
+	c.Assert(out, checker.Equals, lastStartTime, check.Commentf("test1 shouldn't start after daemon restarts"))
182
+}
183
+
153 184
 func (s *DockerDaemonSuite) TestDaemonStartIptablesFalse(c *check.C) {
154 185
 	if err := s.d.Start("--iptables=false"); err != nil {
155 186
 		c.Fatalf("we should have been able to start the daemon with passing iptables=false: %v", err)
... ...
@@ -118,7 +118,7 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
118 118
 			st.State = StateExitProcess
119 119
 		}
120 120
 		if st.State == StateExit && ctr.restartManager != nil {
121
-			restart, wait, err := ctr.restartManager.ShouldRestart(e.Status)
121
+			restart, wait, err := ctr.restartManager.ShouldRestart(e.Status, false)
122 122
 			if err != nil {
123 123
 				logrus.Error(err)
124 124
 			} else if restart {
... ...
@@ -173,7 +173,7 @@ func (ctr *container) waitExit(pid uint32, processFriendlyName string, isFirstPr
173 173
 		defer ctr.client.unlock(ctr.containerID)
174 174
 
175 175
 		if si.State == StateExit && ctr.restartManager != nil {
176
-			restart, wait, err := ctr.restartManager.ShouldRestart(uint32(exitCode))
176
+			restart, wait, err := ctr.restartManager.ShouldRestart(uint32(exitCode), false)
177 177
 			if err != nil {
178 178
 				logrus.Error(err)
179 179
 			} else if restart {
... ...
@@ -16,14 +16,14 @@ const (
16 16
 // RestartManager defines object that controls container restarting rules.
17 17
 type RestartManager interface {
18 18
 	Cancel() error
19
-	ShouldRestart(exitCode uint32) (bool, chan error, error)
19
+	ShouldRestart(exitCode uint32, hasBeenManuallyStopped bool) (bool, chan error, error)
20 20
 }
21 21
 
22 22
 type restartManager struct {
23 23
 	sync.Mutex
24 24
 	sync.Once
25 25
 	policy       container.RestartPolicy
26
-	failureCount int
26
+	restartCount int
27 27
 	timeout      time.Duration
28 28
 	active       bool
29 29
 	cancel       chan struct{}
... ...
@@ -31,8 +31,8 @@ type restartManager struct {
31 31
 }
32 32
 
33 33
 // New returns a new restartmanager based on a policy.
34
-func New(policy container.RestartPolicy) RestartManager {
35
-	return &restartManager{policy: policy, cancel: make(chan struct{})}
34
+func New(policy container.RestartPolicy, restartCount int) RestartManager {
35
+	return &restartManager{policy: policy, restartCount: restartCount, cancel: make(chan struct{})}
36 36
 }
37 37
 
38 38
 func (rm *restartManager) SetPolicy(policy container.RestartPolicy) {
... ...
@@ -41,7 +41,7 @@ func (rm *restartManager) SetPolicy(policy container.RestartPolicy) {
41 41
 	rm.Unlock()
42 42
 }
43 43
 
44
-func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, error) {
44
+func (rm *restartManager) ShouldRestart(exitCode uint32, hasBeenManuallyStopped bool) (bool, chan error, error) {
45 45
 	rm.Lock()
46 46
 	unlockOnExit := true
47 47
 	defer func() {
... ...
@@ -58,12 +58,6 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro
58 58
 		return false, nil, fmt.Errorf("invalid call on active restartmanager")
59 59
 	}
60 60
 
61
-	if exitCode != 0 {
62
-		rm.failureCount++
63
-	} else {
64
-		rm.failureCount = 0
65
-	}
66
-
67 61
 	if rm.timeout == 0 {
68 62
 		rm.timeout = defaultTimeout
69 63
 	} else {
... ...
@@ -72,11 +66,13 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro
72 72
 
73 73
 	var restart bool
74 74
 	switch {
75
-	case rm.policy.IsAlways(), rm.policy.IsUnlessStopped():
75
+	case rm.policy.IsAlways():
76
+		restart = true
77
+	case rm.policy.IsUnlessStopped() && !hasBeenManuallyStopped:
76 78
 		restart = true
77 79
 	case rm.policy.IsOnFailure():
78 80
 		// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count
79
-		if max := rm.policy.MaximumRetryCount; max == 0 || rm.failureCount <= max {
81
+		if max := rm.policy.MaximumRetryCount; max == 0 || rm.restartCount < max {
80 82
 			restart = exitCode != 0
81 83
 		}
82 84
 	}
... ...
@@ -86,6 +82,8 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro
86 86
 		return false, nil, nil
87 87
 	}
88 88
 
89
+	rm.restartCount++
90
+
89 91
 	unlockOnExit = false
90 92
 	rm.active = true
91 93
 	rm.Unlock()