Browse code

Add config parameter to change stop timeout during daemon shutdown This fix tries to add a daemon config parameter `--shutdown-timeout` that specifies the timeout value to stop containers gracefully (before SIGKILL). The default value is 15s.

The `--shutdown-timeout` parameter is added to daemon options and
config file. It will also be updated during daemon reload.

Additional test cases have been added to cover the change.

This fix fixes #22471.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

Yong Tang authored on 2016/05/27 06:07:30
Showing 6 changed files
... ...
@@ -37,6 +37,10 @@ const (
37 37
 	disableNetworkBridge = "none"
38 38
 )
39 39
 
40
+const (
41
+	defaultShutdownTimeout = 15
42
+)
43
+
40 44
 // flatOptions contains configuration keys
41 45
 // that MUST NOT be parsed as deep structures.
42 46
 // Use this to differentiate these options
... ...
@@ -123,6 +127,10 @@ type CommonConfig struct {
123 123
 	// may take place at a time for each push.
124 124
 	MaxConcurrentUploads *int `json:"max-concurrent-uploads,omitempty"`
125 125
 
126
+	// ShutdownTimeout is the timeout value (in seconds) the daemon will wait for the container
127
+	// to stop when daemon is being shutdown
128
+	ShutdownTimeout int `json:"shutdown-timeout,omitempty"`
129
+
126 130
 	Debug     bool     `json:"debug,omitempty"`
127 131
 	Hosts     []string `json:"hosts,omitempty"`
128 132
 	LogLevel  string   `json:"log-level,omitempty"`
... ...
@@ -176,6 +184,7 @@ func (config *Config) InstallCommonFlags(flags *pflag.FlagSet) {
176 176
 	flags.StringVar(&config.CorsHeaders, "api-cors-header", "", "Set CORS headers in the remote API")
177 177
 	flags.IntVar(&maxConcurrentDownloads, "max-concurrent-downloads", defaultMaxConcurrentDownloads, "Set the max concurrent downloads for each pull")
178 178
 	flags.IntVar(&maxConcurrentUploads, "max-concurrent-uploads", defaultMaxConcurrentUploads, "Set the max concurrent uploads for each push")
179
+	flags.IntVar(&config.ShutdownTimeout, "shutdown-timeout", defaultShutdownTimeout, "Set the default shutdown timeout")
179 180
 
180 181
 	flags.StringVar(&config.SwarmDefaultAdvertiseAddr, "swarm-default-advertise-addr", "", "Set default address or interface for swarm advertised address")
181 182
 
... ...
@@ -732,12 +732,13 @@ func (daemon *Daemon) shutdownContainer(c *container.Container) error {
732 732
 	return nil
733 733
 }
734 734
 
735
-// ShutdownTimeout returns the shutdown timeout based on the max stopTimeout of the containers
735
+// ShutdownTimeout returns the shutdown timeout based on the max stopTimeout of the containers,
736
+// and is limited by daemon's ShutdownTimeout.
736 737
 func (daemon *Daemon) ShutdownTimeout() int {
737
-	// By default we use container.DefaultStopTimeout + 5s, which is 15s.
738
-	// TODO (yongtang): Will need to allow shutdown-timeout once #23036 is in place.
738
+	// By default we use daemon's ShutdownTimeout.
739
+	shutdownTimeout := daemon.configStore.ShutdownTimeout
740
+
739 741
 	graceTimeout := 5
740
-	shutdownTimeout := container.DefaultStopTimeout + graceTimeout
741 742
 	if daemon.containers != nil {
742 743
 		for _, c := range daemon.containers.List() {
743 744
 			if shutdownTimeout >= 0 {
... ...
@@ -769,7 +770,7 @@ func (daemon *Daemon) Shutdown() error {
769 769
 	}
770 770
 
771 771
 	if daemon.containers != nil {
772
-		logrus.Debug("starting clean shutdown of all containers...")
772
+		logrus.Debugf("start clean shutdown of all containers with a %d seconds timeout...", daemon.configStore.ShutdownTimeout)
773 773
 		daemon.containers.ApplyAll(func(c *container.Container) {
774 774
 			if !c.IsRunning() {
775 775
 				return
... ...
@@ -995,6 +996,7 @@ func (daemon *Daemon) initDiscovery(config *Config) error {
995 995
 // - Daemon max concurrent uploads
996 996
 // - Cluster discovery (reconfigure and restart).
997 997
 // - Daemon live restore
998
+// - Daemon shutdown timeout (in seconds).
998 999
 func (daemon *Daemon) Reload(config *Config) (err error) {
999 1000
 
1000 1001
 	daemon.configStore.reloadLock.Lock()
... ...
@@ -1055,6 +1057,11 @@ func (daemon *Daemon) Reload(config *Config) (err error) {
1055 1055
 		daemon.uploadManager.SetConcurrency(*daemon.configStore.MaxConcurrentUploads)
1056 1056
 	}
1057 1057
 
1058
+	if config.IsValueSet("shutdown-timeout") {
1059
+		daemon.configStore.ShutdownTimeout = config.ShutdownTimeout
1060
+		logrus.Debugf("Reset Shutdown Timeout: %d", daemon.configStore.ShutdownTimeout)
1061
+	}
1062
+
1058 1063
 	// We emit daemon reload event here with updatable configurations
1059 1064
 	attributes["debug"] = fmt.Sprintf("%t", daemon.configStore.Debug)
1060 1065
 	attributes["live-restore"] = fmt.Sprintf("%t", daemon.configStore.LiveRestoreEnabled)
... ...
@@ -1074,6 +1081,7 @@ func (daemon *Daemon) Reload(config *Config) (err error) {
1074 1074
 	}
1075 1075
 	attributes["max-concurrent-downloads"] = fmt.Sprintf("%d", *daemon.configStore.MaxConcurrentDownloads)
1076 1076
 	attributes["max-concurrent-uploads"] = fmt.Sprintf("%d", *daemon.configStore.MaxConcurrentUploads)
1077
+	attributes["shutdown-timeout"] = fmt.Sprintf("%d", daemon.configStore.ShutdownTimeout)
1077 1078
 
1078 1079
 	return nil
1079 1080
 }
... ...
@@ -64,6 +64,7 @@ Options:
64 64
       --raw-logs                              Full timestamps without ANSI coloring
65 65
       --registry-mirror value                 Preferred Docker registry mirror (default [])
66 66
       --selinux-enabled                       Enable selinux support
67
+      --shutdown-timeout=15                   Set the shutdown timeout value in seconds
67 68
   -s, --storage-driver string                 Storage driver to use
68 69
       --storage-opt value                     Storage driver options (default [])
69 70
       --swarm-default-advertise-addr string   Set default address or interface for swarm advertised address
... ...
@@ -1118,6 +1119,7 @@ This is a full example of the allowed configuration options on Linux:
1118 1118
 	"cluster-advertise": "",
1119 1119
 	"max-concurrent-downloads": 3,
1120 1120
 	"max-concurrent-uploads": 5,
1121
+	"shutdown-timeout": 15,
1121 1122
 	"debug": true,
1122 1123
 	"hosts": [],
1123 1124
 	"log-level": "",
... ...
@@ -1194,6 +1196,7 @@ This is a full example of the allowed configuration options on Windows:
1194 1194
     "graph": "",
1195 1195
     "cluster-store": "",
1196 1196
     "cluster-advertise": "",
1197
+    "shutdown-timeout": 15,
1197 1198
     "debug": true,
1198 1199
     "hosts": [],
1199 1200
     "log-level": "",
... ...
@@ -2920,3 +2920,57 @@ func (s *DockerDaemonSuite) TestDaemonWithUserlandProxyPath(c *check.C) {
2920 2920
 	c.Assert(out, checker.Contains, "driver failed programming external connectivity on endpoint")
2921 2921
 	c.Assert(out, checker.Contains, "/does/not/exist: no such file or directory")
2922 2922
 }
2923
+
2924
+// Test case for #22471
2925
+func (s *DockerDaemonSuite) TestDaemonShutdownTimeout(c *check.C) {
2926
+	testRequires(c, SameHostDaemon)
2927
+
2928
+	c.Assert(s.d.StartWithBusybox("--shutdown-timeout=3"), check.IsNil)
2929
+
2930
+	_, err := s.d.Cmd("run", "-d", "busybox", "top")
2931
+	c.Assert(err, check.IsNil)
2932
+
2933
+	syscall.Kill(s.d.cmd.Process.Pid, syscall.SIGINT)
2934
+
2935
+	select {
2936
+	case <-s.d.wait:
2937
+	case <-time.After(5 * time.Second):
2938
+	}
2939
+
2940
+	expectedMessage := `level=debug msg="start clean shutdown of all containers with a 3 seconds timeout..."`
2941
+	content, _ := ioutil.ReadFile(s.d.logFile.Name())
2942
+	c.Assert(string(content), checker.Contains, expectedMessage)
2943
+}
2944
+
2945
+// Test case for #22471
2946
+func (s *DockerDaemonSuite) TestDaemonShutdownTimeoutWithConfigFile(c *check.C) {
2947
+	testRequires(c, SameHostDaemon)
2948
+
2949
+	// daemon config file
2950
+	configFilePath := "test.json"
2951
+	configFile, err := os.Create(configFilePath)
2952
+	c.Assert(err, checker.IsNil)
2953
+	defer os.Remove(configFilePath)
2954
+
2955
+	daemonConfig := `{ "shutdown-timeout" : 8 }`
2956
+	fmt.Fprintf(configFile, "%s", daemonConfig)
2957
+	configFile.Close()
2958
+	c.Assert(s.d.Start(fmt.Sprintf("--config-file=%s", configFilePath)), check.IsNil)
2959
+
2960
+	configFile, err = os.Create(configFilePath)
2961
+	c.Assert(err, checker.IsNil)
2962
+	daemonConfig = `{ "shutdown-timeout" : 5 }`
2963
+	fmt.Fprintf(configFile, "%s", daemonConfig)
2964
+	configFile.Close()
2965
+
2966
+	syscall.Kill(s.d.cmd.Process.Pid, syscall.SIGHUP)
2967
+
2968
+	select {
2969
+	case <-s.d.wait:
2970
+	case <-time.After(3 * time.Second):
2971
+	}
2972
+
2973
+	expectedMessage := `level=debug msg="Reset Shutdown Timeout: 5"`
2974
+	content, _ := ioutil.ReadFile(s.d.logFile.Name())
2975
+	c.Assert(string(content), checker.Contains, expectedMessage)
2976
+}
... ...
@@ -418,7 +418,7 @@ func (s *DockerDaemonSuite) TestDaemonEvents(c *check.C) {
418 418
 
419 419
 	configFile, err = os.Create(configFilePath)
420 420
 	c.Assert(err, checker.IsNil)
421
-	daemonConfig = `{"max-concurrent-downloads":1,"labels":["bar=foo"]}`
421
+	daemonConfig = `{"max-concurrent-downloads":1,"labels":["bar=foo"], "shutdown-timeout": 10}`
422 422
 	fmt.Fprintf(configFile, "%s", daemonConfig)
423 423
 	configFile.Close()
424 424
 
... ...
@@ -429,7 +429,7 @@ func (s *DockerDaemonSuite) TestDaemonEvents(c *check.C) {
429 429
 	out, err = s.d.Cmd("events", "--since=0", "--until", daemonUnixTime(c))
430 430
 	c.Assert(err, checker.IsNil)
431 431
 
432
-	c.Assert(out, checker.Contains, fmt.Sprintf("daemon reload %s (cluster-advertise=, cluster-store=, cluster-store-opts={}, debug=true, default-runtime=runc, labels=[\"bar=foo\"], live-restore=false, max-concurrent-downloads=1, max-concurrent-uploads=5, name=%s, runtimes=runc:{docker-runc []})", daemonID, daemonName))
432
+	c.Assert(out, checker.Contains, fmt.Sprintf("daemon reload %s (cluster-advertise=, cluster-store=, cluster-store-opts={}, debug=true, default-runtime=runc, labels=[\"bar=foo\"], live-restore=false, max-concurrent-downloads=1, max-concurrent-uploads=5, name=%s, runtimes=runc:{docker-runc []}, shutdown-timeout=10)", daemonID, daemonName))
433 433
 }
434 434
 
435 435
 func (s *DockerDaemonSuite) TestDaemonEventsWithFilters(c *check.C) {
... ...
@@ -56,6 +56,7 @@ dockerd - Enable daemon mode
56 56
 [**--registry-mirror**[=*[]*]]
57 57
 [**-s**|**--storage-driver**[=*STORAGE-DRIVER*]]
58 58
 [**--selinux-enabled**]
59
+[**--shutdown-timeout**[=*15*]]
59 60
 [**--storage-opt**[=*[]*]]
60 61
 [**--swarm-default-advertise-addr**[=*IP|INTERFACE*]]
61 62
 [**--tls**]
... ...
@@ -246,6 +247,9 @@ output otherwise.
246 246
 **--selinux-enabled**=*true*|*false*
247 247
   Enable selinux support. Default is false.
248 248
 
249
+**--shutdown-timeout**=*15*
250
+  Set the shutdown timeout value in seconds. Default is `15`.
251
+
249 252
 **--storage-opt**=[]
250 253
   Set storage driver options. See STORAGE DRIVER OPTIONS.
251 254