Currently if you restart docker daemon, all the containers with restart
policy `on-failure` regardless of its `RestartCount` will be started,
this will make daemon cost more extra time for restart.
This commit will stop these containers to do unnecessary start on
daemon's restart.
Signed-off-by: Zhang Wei <zhangwei555@huawei.com>
| ... | ... |
@@ -519,9 +519,8 @@ func copyEscapable(dst io.Writer, src io.ReadCloser, keys []byte) (written int64 |
| 519 | 519 |
// ShouldRestart decides whether the daemon should restart the container or not. |
| 520 | 520 |
// This is based on the container's restart policy. |
| 521 | 521 |
func (container *Container) ShouldRestart() bool {
|
| 522 |
- return container.HostConfig.RestartPolicy.Name == "always" || |
|
| 523 |
- (container.HostConfig.RestartPolicy.Name == "unless-stopped" && !container.HasBeenManuallyStopped) || |
|
| 524 |
- (container.HostConfig.RestartPolicy.Name == "on-failure" && container.ExitCode != 0) |
|
| 522 |
+ shouldRestart, _, _ := container.restartManager.ShouldRestart(uint32(container.ExitCode), container.HasBeenManuallyStopped) |
|
| 523 |
+ return shouldRestart |
|
| 525 | 524 |
} |
| 526 | 525 |
|
| 527 | 526 |
// AddBindMountPoint adds a new bind mount point configuration to the container. |
| ... | ... |
@@ -912,8 +911,9 @@ func (container *Container) RestartManager(reset bool) restartmanager.RestartMan |
| 912 | 912 |
container.restartManager = nil |
| 913 | 913 |
} |
| 914 | 914 |
if container.restartManager == nil {
|
| 915 |
- container.restartManager = restartmanager.New(container.HostConfig.RestartPolicy) |
|
| 915 |
+ container.restartManager = restartmanager.New(container.HostConfig.RestartPolicy, container.RestartCount) |
|
| 916 | 916 |
} |
| 917 |
+ |
|
| 917 | 918 |
return container.restartManager |
| 918 | 919 |
} |
| 919 | 920 |
|
| ... | ... |
@@ -291,13 +291,14 @@ func (daemon *Daemon) restore() error {
|
| 291 | 291 |
wg.Add(1) |
| 292 | 292 |
go func(c *container.Container) {
|
| 293 | 293 |
defer wg.Done() |
| 294 |
+ rm := c.RestartManager(false) |
|
| 294 | 295 |
if c.IsRunning() || c.IsPaused() {
|
| 295 | 296 |
// Fix activityCount such that graph mounts can be unmounted later |
| 296 | 297 |
if err := daemon.layerStore.ReinitRWLayer(c.RWLayer); err != nil {
|
| 297 | 298 |
logrus.Errorf("Failed to ReinitRWLayer for %s due to %s", c.ID, err)
|
| 298 | 299 |
return |
| 299 | 300 |
} |
| 300 |
- if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(c.RestartManager(true))); err != nil {
|
|
| 301 |
+ if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(rm)); err != nil {
|
|
| 301 | 302 |
logrus.Errorf("Failed to restore with containerd: %q", err)
|
| 302 | 303 |
return |
| 303 | 304 |
} |
| ... | ... |
@@ -150,6 +150,37 @@ func (s *DockerDaemonSuite) TestDaemonRestartUnlessStopped(c *check.C) {
|
| 150 | 150 |
|
| 151 | 151 |
} |
| 152 | 152 |
|
| 153 |
+func (s *DockerDaemonSuite) TestDaemonRestartOnFailure(c *check.C) {
|
|
| 154 |
+ err := s.d.StartWithBusybox() |
|
| 155 |
+ c.Assert(err, check.IsNil) |
|
| 156 |
+ |
|
| 157 |
+ out, err := s.d.Cmd("run", "-d", "--name", "test1", "--restart", "on-failure:3", "busybox:latest", "false")
|
|
| 158 |
+ c.Assert(err, check.IsNil, check.Commentf("run top1: %v", out))
|
|
| 159 |
+ |
|
| 160 |
+ // wait test1 to stop |
|
| 161 |
+ hostArgs := []string{"--host", s.d.sock()}
|
|
| 162 |
+ err = waitInspectWithArgs("test1", "{{.State.Running}} {{.State.Restarting}}", "false false", 10*time.Second, hostArgs...)
|
|
| 163 |
+ c.Assert(err, checker.IsNil, check.Commentf("test1 should exit but not"))
|
|
| 164 |
+ |
|
| 165 |
+ // record last start time |
|
| 166 |
+ out, err = s.d.Cmd("inspect", "-f={{.State.StartedAt}}", "test1")
|
|
| 167 |
+ c.Assert(err, checker.IsNil, check.Commentf("out: %v", out))
|
|
| 168 |
+ lastStartTime := out |
|
| 169 |
+ |
|
| 170 |
+ err = s.d.Restart() |
|
| 171 |
+ c.Assert(err, check.IsNil) |
|
| 172 |
+ |
|
| 173 |
+ // test1 shouldn't restart at all |
|
| 174 |
+ err = waitInspectWithArgs("test1", "{{.State.Running}} {{.State.Restarting}}", "false false", 0, hostArgs...)
|
|
| 175 |
+ c.Assert(err, checker.IsNil, check.Commentf("test1 should exit but not"))
|
|
| 176 |
+ |
|
| 177 |
+ // make sure test1 isn't restarted when daemon restart |
|
| 178 |
+ // if "StartAt" time updates, means test1 was once restarted. |
|
| 179 |
+ out, err = s.d.Cmd("inspect", "-f={{.State.StartedAt}}", "test1")
|
|
| 180 |
+ c.Assert(err, checker.IsNil, check.Commentf("out: %v", out))
|
|
| 181 |
+ c.Assert(out, checker.Equals, lastStartTime, check.Commentf("test1 shouldn't start after daemon restarts"))
|
|
| 182 |
+} |
|
| 183 |
+ |
|
| 153 | 184 |
func (s *DockerDaemonSuite) TestDaemonStartIptablesFalse(c *check.C) {
|
| 154 | 185 |
if err := s.d.Start("--iptables=false"); err != nil {
|
| 155 | 186 |
c.Fatalf("we should have been able to start the daemon with passing iptables=false: %v", err)
|
| ... | ... |
@@ -118,7 +118,7 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
|
| 118 | 118 |
st.State = StateExitProcess |
| 119 | 119 |
} |
| 120 | 120 |
if st.State == StateExit && ctr.restartManager != nil {
|
| 121 |
- restart, wait, err := ctr.restartManager.ShouldRestart(e.Status) |
|
| 121 |
+ restart, wait, err := ctr.restartManager.ShouldRestart(e.Status, false) |
|
| 122 | 122 |
if err != nil {
|
| 123 | 123 |
logrus.Error(err) |
| 124 | 124 |
} else if restart {
|
| ... | ... |
@@ -173,7 +173,7 @@ func (ctr *container) waitExit(pid uint32, processFriendlyName string, isFirstPr |
| 173 | 173 |
defer ctr.client.unlock(ctr.containerID) |
| 174 | 174 |
|
| 175 | 175 |
if si.State == StateExit && ctr.restartManager != nil {
|
| 176 |
- restart, wait, err := ctr.restartManager.ShouldRestart(uint32(exitCode)) |
|
| 176 |
+ restart, wait, err := ctr.restartManager.ShouldRestart(uint32(exitCode), false) |
|
| 177 | 177 |
if err != nil {
|
| 178 | 178 |
logrus.Error(err) |
| 179 | 179 |
} else if restart {
|
| ... | ... |
@@ -16,14 +16,14 @@ const ( |
| 16 | 16 |
// RestartManager defines object that controls container restarting rules. |
| 17 | 17 |
type RestartManager interface {
|
| 18 | 18 |
Cancel() error |
| 19 |
- ShouldRestart(exitCode uint32) (bool, chan error, error) |
|
| 19 |
+ ShouldRestart(exitCode uint32, hasBeenManuallyStopped bool) (bool, chan error, error) |
|
| 20 | 20 |
} |
| 21 | 21 |
|
| 22 | 22 |
type restartManager struct {
|
| 23 | 23 |
sync.Mutex |
| 24 | 24 |
sync.Once |
| 25 | 25 |
policy container.RestartPolicy |
| 26 |
- failureCount int |
|
| 26 |
+ restartCount int |
|
| 27 | 27 |
timeout time.Duration |
| 28 | 28 |
active bool |
| 29 | 29 |
cancel chan struct{}
|
| ... | ... |
@@ -31,8 +31,8 @@ type restartManager struct {
|
| 31 | 31 |
} |
| 32 | 32 |
|
| 33 | 33 |
// New returns a new restartmanager based on a policy. |
| 34 |
-func New(policy container.RestartPolicy) RestartManager {
|
|
| 35 |
- return &restartManager{policy: policy, cancel: make(chan struct{})}
|
|
| 34 |
+func New(policy container.RestartPolicy, restartCount int) RestartManager {
|
|
| 35 |
+ return &restartManager{policy: policy, restartCount: restartCount, cancel: make(chan struct{})}
|
|
| 36 | 36 |
} |
| 37 | 37 |
|
| 38 | 38 |
func (rm *restartManager) SetPolicy(policy container.RestartPolicy) {
|
| ... | ... |
@@ -41,7 +41,7 @@ func (rm *restartManager) SetPolicy(policy container.RestartPolicy) {
|
| 41 | 41 |
rm.Unlock() |
| 42 | 42 |
} |
| 43 | 43 |
|
| 44 |
-func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, error) {
|
|
| 44 |
+func (rm *restartManager) ShouldRestart(exitCode uint32, hasBeenManuallyStopped bool) (bool, chan error, error) {
|
|
| 45 | 45 |
rm.Lock() |
| 46 | 46 |
unlockOnExit := true |
| 47 | 47 |
defer func() {
|
| ... | ... |
@@ -58,12 +58,6 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro |
| 58 | 58 |
return false, nil, fmt.Errorf("invalid call on active restartmanager")
|
| 59 | 59 |
} |
| 60 | 60 |
|
| 61 |
- if exitCode != 0 {
|
|
| 62 |
- rm.failureCount++ |
|
| 63 |
- } else {
|
|
| 64 |
- rm.failureCount = 0 |
|
| 65 |
- } |
|
| 66 |
- |
|
| 67 | 61 |
if rm.timeout == 0 {
|
| 68 | 62 |
rm.timeout = defaultTimeout |
| 69 | 63 |
} else {
|
| ... | ... |
@@ -72,11 +66,13 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro |
| 72 | 72 |
|
| 73 | 73 |
var restart bool |
| 74 | 74 |
switch {
|
| 75 |
- case rm.policy.IsAlways(), rm.policy.IsUnlessStopped(): |
|
| 75 |
+ case rm.policy.IsAlways(): |
|
| 76 |
+ restart = true |
|
| 77 |
+ case rm.policy.IsUnlessStopped() && !hasBeenManuallyStopped: |
|
| 76 | 78 |
restart = true |
| 77 | 79 |
case rm.policy.IsOnFailure(): |
| 78 | 80 |
// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count |
| 79 |
- if max := rm.policy.MaximumRetryCount; max == 0 || rm.failureCount <= max {
|
|
| 81 |
+ if max := rm.policy.MaximumRetryCount; max == 0 || rm.restartCount < max {
|
|
| 80 | 82 |
restart = exitCode != 0 |
| 81 | 83 |
} |
| 82 | 84 |
} |
| ... | ... |
@@ -86,6 +82,8 @@ func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, erro |
| 86 | 86 |
return false, nil, nil |
| 87 | 87 |
} |
| 88 | 88 |
|
| 89 |
+ rm.restartCount++ |
|
| 90 |
+ |
|
| 89 | 91 |
unlockOnExit = false |
| 90 | 92 |
rm.active = true |
| 91 | 93 |
rm.Unlock() |