Browse code

Merge pull request #22125 from crosbymichael/restart-timeout

Reset restart timeout if execution longer than 10s

Vincent Demeester authored on 2016/04/26 02:15:32
Showing 6 changed files
... ...
@@ -520,7 +520,7 @@ func copyEscapable(dst io.Writer, src io.ReadCloser, keys []byte) (written int64
520 520
 // ShouldRestart decides whether the daemon should restart the container or not.
521 521
 // This is based on the container's restart policy.
522 522
 func (container *Container) ShouldRestart() bool {
523
-	shouldRestart, _, _ := container.restartManager.ShouldRestart(uint32(container.ExitCode), container.HasBeenManuallyStopped)
523
+	shouldRestart, _, _ := container.restartManager.ShouldRestart(uint32(container.ExitCode), container.HasBeenManuallyStopped, container.FinishedAt.Sub(container.StartedAt))
524 524
 	return shouldRestart
525 525
 }
526 526
 
... ...
@@ -2,6 +2,7 @@ package libcontainerd
2 2
 
3 3
 import (
4 4
 	"fmt"
5
+	"time"
5 6
 
6 7
 	"github.com/docker/docker/restartmanager"
7 8
 )
... ...
@@ -18,6 +19,7 @@ type containerCommon struct {
18 18
 	restartManager restartmanager.RestartManager
19 19
 	restarting     bool
20 20
 	processes      map[string]*process
21
+	startedAt      time.Time
21 22
 }
22 23
 
23 24
 // WithRestartManager sets the restartmanager to be used with the container.
... ...
@@ -6,6 +6,7 @@ import (
6 6
 	"os"
7 7
 	"path/filepath"
8 8
 	"syscall"
9
+	"time"
9 10
 
10 11
 	"github.com/Sirupsen/logrus"
11 12
 	containerd "github.com/docker/containerd/api/grpc/types"
... ...
@@ -90,6 +91,7 @@ func (ctr *container) start() error {
90 90
 		ctr.closeFifos(iopipe)
91 91
 		return err
92 92
 	}
93
+	ctr.startedAt = time.Now()
93 94
 
94 95
 	if err := ctr.client.backend.AttachStreams(ctr.containerID, *iopipe); err != nil {
95 96
 		return err
... ...
@@ -134,7 +136,7 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
134 134
 			st.State = StateExitProcess
135 135
 		}
136 136
 		if st.State == StateExit && ctr.restartManager != nil {
137
-			restart, wait, err := ctr.restartManager.ShouldRestart(e.Status, false)
137
+			restart, wait, err := ctr.restartManager.ShouldRestart(e.Status, false, time.Since(ctr.startedAt))
138 138
 			if err != nil {
139 139
 				logrus.Warnf("container %s %v", ctr.containerID, err)
140 140
 			} else if restart {
... ...
@@ -4,6 +4,7 @@ import (
4 4
 	"io"
5 5
 	"strings"
6 6
 	"syscall"
7
+	"time"
7 8
 
8 9
 	"github.com/Microsoft/hcsshim"
9 10
 	"github.com/Sirupsen/logrus"
... ...
@@ -78,6 +79,7 @@ func (ctr *container) start() error {
78 78
 		}
79 79
 		return err
80 80
 	}
81
+	ctr.startedAt = time.Now()
81 82
 
82 83
 	// Convert io.ReadClosers to io.Readers
83 84
 	if stdout != nil {
... ...
@@ -168,7 +170,7 @@ func (ctr *container) waitExit(pid uint32, processFriendlyName string, isFirstPr
168 168
 		}
169 169
 
170 170
 		if si.State == StateExit && ctr.restartManager != nil {
171
-			restart, wait, err := ctr.restartManager.ShouldRestart(uint32(exitCode), false)
171
+			restart, wait, err := ctr.restartManager.ShouldRestart(uint32(exitCode), false, time.Since(ctr.startedAt))
172 172
 			if err != nil {
173 173
 				logrus.Error(err)
174 174
 			} else if restart {
... ...
@@ -21,7 +21,7 @@ var ErrRestartCanceled = errors.New("restart canceled")
21 21
 // RestartManager defines object that controls container restarting rules.
22 22
 type RestartManager interface {
23 23
 	Cancel() error
24
-	ShouldRestart(exitCode uint32, hasBeenManuallyStopped bool) (bool, chan error, error)
24
+	ShouldRestart(exitCode uint32, hasBeenManuallyStopped bool, executionDuration time.Duration) (bool, chan error, error)
25 25
 }
26 26
 
27 27
 type restartManager struct {
... ...
@@ -46,7 +46,7 @@ func (rm *restartManager) SetPolicy(policy container.RestartPolicy) {
46 46
 	rm.Unlock()
47 47
 }
48 48
 
49
-func (rm *restartManager) ShouldRestart(exitCode uint32, hasBeenManuallyStopped bool) (bool, chan error, error) {
49
+func (rm *restartManager) ShouldRestart(exitCode uint32, hasBeenManuallyStopped bool, executionDuration time.Duration) (bool, chan error, error) {
50 50
 	if rm.policy.IsNone() {
51 51
 		return false, nil, nil
52 52
 	}
... ...
@@ -65,7 +65,11 @@ func (rm *restartManager) ShouldRestart(exitCode uint32, hasBeenManuallyStopped
65 65
 	if rm.active {
66 66
 		return false, nil, fmt.Errorf("invalid call on active restartmanager")
67 67
 	}
68
-
68
+	// if the container ran for more than 10s, reguardless of status and policy reset the
69
+	// the timeout back to the default.
70
+	if executionDuration.Seconds() >= 10 {
71
+		rm.timeout = 0
72
+	}
69 73
 	if rm.timeout == 0 {
70 74
 		rm.timeout = defaultTimeout
71 75
 	} else {
... ...
@@ -1,3 +1,34 @@
1 1
 package restartmanager
2 2
 
3
-// FIXME
3
+import (
4
+	"testing"
5
+	"time"
6
+
7
+	"github.com/docker/engine-api/types/container"
8
+)
9
+
10
+func TestRestartManagerTimeout(t *testing.T) {
11
+	rm := New(container.RestartPolicy{Name: "always"}, 0).(*restartManager)
12
+	should, _, err := rm.ShouldRestart(0, false, 1*time.Second)
13
+	if err != nil {
14
+		t.Fatal(err)
15
+	}
16
+	if !should {
17
+		t.Fatal("container should be restarted")
18
+	}
19
+	if rm.timeout != 100*time.Millisecond {
20
+		t.Fatalf("restart manager should have a timeout of 100ms but has %s", rm.timeout)
21
+	}
22
+}
23
+
24
+func TestRestartManagerTimeoutReset(t *testing.T) {
25
+	rm := New(container.RestartPolicy{Name: "always"}, 0).(*restartManager)
26
+	rm.timeout = 5 * time.Second
27
+	_, _, err := rm.ShouldRestart(0, false, 10*time.Second)
28
+	if err != nil {
29
+		t.Fatal(err)
30
+	}
31
+	if rm.timeout != 100*time.Millisecond {
32
+		t.Fatalf("restart manager should have a timeout of 100ms but has %s", rm.timeout)
33
+	}
34
+}