Browse code

Merge pull request #11320 from estesp/fix-daemon-startup

Fix daemon shutdown on error after rework of daemon startup

Jessie Frazelle authored on 2015/03/18 11:04:25
Showing 4 changed files
... ...
@@ -825,6 +825,12 @@ func NewDaemonFromDirectory(config *Config, eng *engine.Engine) (*Daemon, error)
825 825
 	}
826 826
 	config.DisableNetwork = config.BridgeIface == disableNetworkBridge
827 827
 
828
+	// register portallocator release on shutdown
829
+	eng.OnShutdown(func() {
830
+		if err := portallocator.ReleaseAll(); err != nil {
831
+			log.Errorf("portallocator.ReleaseAll(): %s", err)
832
+		}
833
+	})
828 834
 	// Claim the pidfile first, to avoid any and all unexpected race conditions.
829 835
 	// Some of the init doesn't need a pidfile lock - but let's not try to be smart.
830 836
 	if config.Pidfile != "" {
... ...
@@ -887,6 +893,12 @@ func NewDaemonFromDirectory(config *Config, eng *engine.Engine) (*Daemon, error)
887 887
 		return nil, fmt.Errorf("error intializing graphdriver: %v", err)
888 888
 	}
889 889
 	log.Debugf("Using graph driver %s", driver)
890
+	// register cleanup for graph driver
891
+	eng.OnShutdown(func() {
892
+		if err := driver.Cleanup(); err != nil {
893
+			log.Errorf("Error during graph storage driver.Cleanup(): %v", err)
894
+		}
895
+	})
890 896
 
891 897
 	// As Docker on btrfs and SELinux are incompatible at present, error on both being enabled
892 898
 	if selinuxEnabled() && config.EnableSelinuxSupport && driver.String() == "btrfs" {
... ...
@@ -964,6 +976,12 @@ func NewDaemonFromDirectory(config *Config, eng *engine.Engine) (*Daemon, error)
964 964
 	if err != nil {
965 965
 		return nil, err
966 966
 	}
967
+	// register graph close on shutdown
968
+	eng.OnShutdown(func() {
969
+		if err := graph.Close(); err != nil {
970
+			log.Errorf("Error during container graph.Close(): %v", err)
971
+		}
972
+	})
967 973
 
968 974
 	localCopy := path.Join(config.Root, "init", fmt.Sprintf("dockerinit-%s", dockerversion.VERSION))
969 975
 	sysInitPath := utils.DockerInitPath(localCopy)
... ...
@@ -1012,22 +1030,9 @@ func NewDaemonFromDirectory(config *Config, eng *engine.Engine) (*Daemon, error)
1012 1012
 		defaultLogConfig: config.LogConfig,
1013 1013
 	}
1014 1014
 
1015
-	// Setup shutdown handlers
1016
-	// FIXME: can these shutdown handlers be registered closer to their source?
1017 1015
 	eng.OnShutdown(func() {
1018
-		// FIXME: if these cleanup steps can be called concurrently, register
1019
-		// them as separate handlers to speed up total shutdown time
1020 1016
 		if err := daemon.shutdown(); err != nil {
1021
-			log.Errorf("daemon.shutdown(): %s", err)
1022
-		}
1023
-		if err := portallocator.ReleaseAll(); err != nil {
1024
-			log.Errorf("portallocator.ReleaseAll(): %s", err)
1025
-		}
1026
-		if err := daemon.driver.Cleanup(); err != nil {
1027
-			log.Errorf("daemon.driver.Cleanup(): %v", err)
1028
-		}
1029
-		if err := daemon.containerGraph.Close(); err != nil {
1030
-			log.Errorf("daemon.containerGraph.Close(): %v", err)
1017
+			log.Errorf("Error during daemon.shutdown(): %v", err)
1031 1018
 		}
1032 1019
 	})
1033 1020
 
... ...
@@ -7,6 +7,7 @@ import (
7 7
 	"io"
8 8
 	"os"
9 9
 	"path/filepath"
10
+	"strings"
10 11
 
11 12
 	log "github.com/Sirupsen/logrus"
12 13
 	"github.com/docker/docker/autogen/dockerversion"
... ...
@@ -101,13 +102,11 @@ func mainDaemon() {
101 101
 	// load the daemon in the background so we can immediately start
102 102
 	// the http api so that connections don't fail while the daemon
103 103
 	// is booting
104
-	daemonWait := make(chan struct{})
104
+	daemonInitWait := make(chan error)
105 105
 	go func() {
106
-		defer close(daemonWait)
107
-
108 106
 		d, err := daemon.NewDaemon(daemonCfg, eng)
109 107
 		if err != nil {
110
-			log.Error(err)
108
+			daemonInitWait <- err
111 109
 			return
112 110
 		}
113 111
 
... ...
@@ -119,7 +118,7 @@ func mainDaemon() {
119 119
 		)
120 120
 
121 121
 		if err := d.Install(eng); err != nil {
122
-			log.Error(err)
122
+			daemonInitWait <- err
123 123
 			return
124 124
 		}
125 125
 
... ...
@@ -129,11 +128,10 @@ func mainDaemon() {
129 129
 		// after the daemon is done setting up we can tell the api to start
130 130
 		// accepting connections
131 131
 		if err := eng.Job("acceptconnections").Run(); err != nil {
132
-			log.Error(err)
132
+			daemonInitWait <- err
133 133
 			return
134 134
 		}
135
-
136
-		log.Debugf("daemon finished")
135
+		daemonInitWait <- nil
137 136
 	}()
138 137
 
139 138
 	// Serve api
... ...
@@ -150,16 +148,46 @@ func mainDaemon() {
150 150
 	job.Setenv("TlsCert", *flCert)
151 151
 	job.Setenv("TlsKey", *flKey)
152 152
 	job.SetenvBool("BufferRequests", true)
153
-	err := job.Run()
153
+
154
+	// The serve API job never exits unless an error occurs
155
+	// We need to start it as a goroutine and wait on it so
156
+	// daemon doesn't exit
157
+	serveAPIWait := make(chan error)
158
+	go func() {
159
+		if err := job.Run(); err != nil {
160
+			log.Errorf("ServeAPI error: %v", err)
161
+			serveAPIWait <- err
162
+			return
163
+		}
164
+		serveAPIWait <- nil
165
+	}()
154 166
 
155 167
 	// Wait for the daemon startup goroutine to finish
156 168
 	// This makes sure we can actually cleanly shutdown the daemon
157
-	log.Infof("waiting for daemon to initialize")
158
-	<-daemonWait
159
-	eng.Shutdown()
160
-	if err != nil {
161
-		// log errors here so the log output looks more consistent
162
-		log.Fatalf("shutting down daemon due to errors: %v", err)
169
+	log.Debug("waiting for daemon to initialize")
170
+	errDaemon := <-daemonInitWait
171
+	if errDaemon != nil {
172
+		eng.Shutdown()
173
+		outStr := fmt.Sprintf("Shutting down daemon due to errors: %v", errDaemon)
174
+		if strings.Contains(errDaemon.Error(), "engine is shutdown") {
175
+			// if the error is "engine is shutdown", we've already reported (or
176
+			// will report below in API server errors) the error
177
+			outStr = "Shutting down daemon due to reported errors"
178
+		}
179
+		// we must "fatal" exit here as the API server may be happy to
180
+		// continue listening forever if the error had no impact to API
181
+		log.Fatal(outStr)
182
+	} else {
183
+		log.Info("Daemon has completed initialization")
163 184
 	}
164 185
 
186
+	// Daemon is fully initialized and handling API traffic
187
+	// Wait for serve API job to complete
188
+	errAPI := <-serveAPIWait
189
+	// If we have an error here it is unique to API (as daemonErr would have
190
+	// exited the daemon process above)
191
+	if errAPI != nil {
192
+		log.Errorf("Shutting down due to ServeAPI error: %v", errAPI)
193
+	}
194
+	eng.Shutdown()
165 195
 }
... ...
@@ -482,6 +482,33 @@ func TestDaemonUpgradeWithVolumes(t *testing.T) {
482 482
 	logDone("daemon - volumes from old(pre 1.3) daemon work")
483 483
 }
484 484
 
485
+// GH#11320 - verify that the daemon exits on failure properly
486
+// Note that this explicitly tests the conflict of {-b,--bridge} and {--bip} options as the means
487
+// to get a daemon init failure; no other tests for -b/--bip conflict are therefore required
488
+func TestDaemonExitOnFailure(t *testing.T) {
489
+	d := NewDaemon(t)
490
+	defer d.Stop()
491
+
492
+	//attempt to start daemon with incorrect flags (we know -b and --bip conflict)
493
+	if err := d.Start("--bridge", "nosuchbridge", "--bip", "1.1.1.1"); err != nil {
494
+		//verify we got the right error
495
+		if !strings.Contains(err.Error(), "Daemon exited and never started") {
496
+			t.Fatalf("Expected daemon not to start, got %v", err)
497
+		}
498
+		// look in the log and make sure we got the message that daemon is shutting down
499
+		runCmd := exec.Command("grep", "Shutting down daemon due to", d.LogfileName())
500
+		if out, _, err := runCommandWithOutput(runCmd); err != nil {
501
+			t.Fatalf("Expected 'shutting down daemon due to error' message; but doesn't exist in log: %q, err: %v", out, err)
502
+		}
503
+	} else {
504
+		//if we didn't get an error and the daemon is running, this is a failure
505
+		d.Stop()
506
+		t.Fatal("Conflicting options should cause the daemon to error out with a failure")
507
+	}
508
+
509
+	logDone("daemon - verify no start on daemon init errors")
510
+}
511
+
485 512
 func TestDaemonUlimitDefaults(t *testing.T) {
486 513
 	testRequires(t, NativeExecDriver)
487 514
 	d := NewDaemon(t)
... ...
@@ -267,6 +267,10 @@ func (d *Daemon) Cmd(name string, arg ...string) (string, error) {
267 267
 	return string(b), err
268 268
 }
269 269
 
270
+func (d *Daemon) LogfileName() string {
271
+	return d.logFile.Name()
272
+}
273
+
270 274
 func daemonHost() string {
271 275
 	daemonUrlStr := "unix://" + api.DEFAULTUNIXSOCKET
272 276
 	if daemonHostVar := os.Getenv("DOCKER_HOST"); daemonHostVar != "" {