Fix daemon shutdown on error after rework of daemon startup
Jessie Frazelle authored on 2015/03/18 11:04:25... | ... |
@@ -825,6 +825,12 @@ func NewDaemonFromDirectory(config *Config, eng *engine.Engine) (*Daemon, error) |
825 | 825 |
} |
826 | 826 |
config.DisableNetwork = config.BridgeIface == disableNetworkBridge |
827 | 827 |
|
828 |
+ // register portallocator release on shutdown |
|
829 |
+ eng.OnShutdown(func() { |
|
830 |
+ if err := portallocator.ReleaseAll(); err != nil { |
|
831 |
+ log.Errorf("portallocator.ReleaseAll(): %s", err) |
|
832 |
+ } |
|
833 |
+ }) |
|
828 | 834 |
// Claim the pidfile first, to avoid any and all unexpected race conditions. |
829 | 835 |
// Some of the init doesn't need a pidfile lock - but let's not try to be smart. |
830 | 836 |
if config.Pidfile != "" { |
... | ... |
@@ -887,6 +893,12 @@ func NewDaemonFromDirectory(config *Config, eng *engine.Engine) (*Daemon, error) |
887 | 887 |
return nil, fmt.Errorf("error intializing graphdriver: %v", err) |
888 | 888 |
} |
889 | 889 |
log.Debugf("Using graph driver %s", driver) |
890 |
+ // register cleanup for graph driver |
|
891 |
+ eng.OnShutdown(func() { |
|
892 |
+ if err := driver.Cleanup(); err != nil { |
|
893 |
+ log.Errorf("Error during graph storage driver.Cleanup(): %v", err) |
|
894 |
+ } |
|
895 |
+ }) |
|
890 | 896 |
|
891 | 897 |
// As Docker on btrfs and SELinux are incompatible at present, error on both being enabled |
892 | 898 |
if selinuxEnabled() && config.EnableSelinuxSupport && driver.String() == "btrfs" { |
... | ... |
@@ -964,6 +976,12 @@ func NewDaemonFromDirectory(config *Config, eng *engine.Engine) (*Daemon, error) |
964 | 964 |
if err != nil { |
965 | 965 |
return nil, err |
966 | 966 |
} |
967 |
+ // register graph close on shutdown |
|
968 |
+ eng.OnShutdown(func() { |
|
969 |
+ if err := graph.Close(); err != nil { |
|
970 |
+ log.Errorf("Error during container graph.Close(): %v", err) |
|
971 |
+ } |
|
972 |
+ }) |
|
967 | 973 |
|
968 | 974 |
localCopy := path.Join(config.Root, "init", fmt.Sprintf("dockerinit-%s", dockerversion.VERSION)) |
969 | 975 |
sysInitPath := utils.DockerInitPath(localCopy) |
... | ... |
@@ -1012,22 +1030,9 @@ func NewDaemonFromDirectory(config *Config, eng *engine.Engine) (*Daemon, error) |
1012 | 1012 |
defaultLogConfig: config.LogConfig, |
1013 | 1013 |
} |
1014 | 1014 |
|
1015 |
- // Setup shutdown handlers |
|
1016 |
- // FIXME: can these shutdown handlers be registered closer to their source? |
|
1017 | 1015 |
eng.OnShutdown(func() { |
1018 |
- // FIXME: if these cleanup steps can be called concurrently, register |
|
1019 |
- // them as separate handlers to speed up total shutdown time |
|
1020 | 1016 |
if err := daemon.shutdown(); err != nil { |
1021 |
- log.Errorf("daemon.shutdown(): %s", err) |
|
1022 |
- } |
|
1023 |
- if err := portallocator.ReleaseAll(); err != nil { |
|
1024 |
- log.Errorf("portallocator.ReleaseAll(): %s", err) |
|
1025 |
- } |
|
1026 |
- if err := daemon.driver.Cleanup(); err != nil { |
|
1027 |
- log.Errorf("daemon.driver.Cleanup(): %v", err) |
|
1028 |
- } |
|
1029 |
- if err := daemon.containerGraph.Close(); err != nil { |
|
1030 |
- log.Errorf("daemon.containerGraph.Close(): %v", err) |
|
1017 |
+ log.Errorf("Error during daemon.shutdown(): %v", err) |
|
1031 | 1018 |
} |
1032 | 1019 |
}) |
1033 | 1020 |
|
... | ... |
@@ -7,6 +7,7 @@ import ( |
7 | 7 |
"io" |
8 | 8 |
"os" |
9 | 9 |
"path/filepath" |
10 |
+ "strings" |
|
10 | 11 |
|
11 | 12 |
log "github.com/Sirupsen/logrus" |
12 | 13 |
"github.com/docker/docker/autogen/dockerversion" |
... | ... |
@@ -101,13 +102,11 @@ func mainDaemon() { |
101 | 101 |
// load the daemon in the background so we can immediately start |
102 | 102 |
// the http api so that connections don't fail while the daemon |
103 | 103 |
// is booting |
104 |
- daemonWait := make(chan struct{}) |
|
104 |
+ daemonInitWait := make(chan error) |
|
105 | 105 |
go func() { |
106 |
- defer close(daemonWait) |
|
107 |
- |
|
108 | 106 |
d, err := daemon.NewDaemon(daemonCfg, eng) |
109 | 107 |
if err != nil { |
110 |
- log.Error(err) |
|
108 |
+ daemonInitWait <- err |
|
111 | 109 |
return |
112 | 110 |
} |
113 | 111 |
|
... | ... |
@@ -119,7 +118,7 @@ func mainDaemon() { |
119 | 119 |
) |
120 | 120 |
|
121 | 121 |
if err := d.Install(eng); err != nil { |
122 |
- log.Error(err) |
|
122 |
+ daemonInitWait <- err |
|
123 | 123 |
return |
124 | 124 |
} |
125 | 125 |
|
... | ... |
@@ -129,11 +128,10 @@ func mainDaemon() { |
129 | 129 |
// after the daemon is done setting up we can tell the api to start |
130 | 130 |
// accepting connections |
131 | 131 |
if err := eng.Job("acceptconnections").Run(); err != nil { |
132 |
- log.Error(err) |
|
132 |
+ daemonInitWait <- err |
|
133 | 133 |
return |
134 | 134 |
} |
135 |
- |
|
136 |
- log.Debugf("daemon finished") |
|
135 |
+ daemonInitWait <- nil |
|
137 | 136 |
}() |
138 | 137 |
|
139 | 138 |
// Serve api |
... | ... |
@@ -150,16 +148,46 @@ func mainDaemon() { |
150 | 150 |
job.Setenv("TlsCert", *flCert) |
151 | 151 |
job.Setenv("TlsKey", *flKey) |
152 | 152 |
job.SetenvBool("BufferRequests", true) |
153 |
- err := job.Run() |
|
153 |
+ |
|
154 |
+ // The serve API job never exits unless an error occurs |
|
155 |
+ // We need to start it as a goroutine and wait on it so |
|
156 |
+ // daemon doesn't exit |
|
157 |
+ serveAPIWait := make(chan error) |
|
158 |
+ go func() { |
|
159 |
+ if err := job.Run(); err != nil { |
|
160 |
+ log.Errorf("ServeAPI error: %v", err) |
|
161 |
+ serveAPIWait <- err |
|
162 |
+ return |
|
163 |
+ } |
|
164 |
+ serveAPIWait <- nil |
|
165 |
+ }() |
|
154 | 166 |
|
155 | 167 |
// Wait for the daemon startup goroutine to finish |
156 | 168 |
// This makes sure we can actually cleanly shutdown the daemon |
157 |
- log.Infof("waiting for daemon to initialize") |
|
158 |
- <-daemonWait |
|
159 |
- eng.Shutdown() |
|
160 |
- if err != nil { |
|
161 |
- // log errors here so the log output looks more consistent |
|
162 |
- log.Fatalf("shutting down daemon due to errors: %v", err) |
|
169 |
+ log.Debug("waiting for daemon to initialize") |
|
170 |
+ errDaemon := <-daemonInitWait |
|
171 |
+ if errDaemon != nil { |
|
172 |
+ eng.Shutdown() |
|
173 |
+ outStr := fmt.Sprintf("Shutting down daemon due to errors: %v", errDaemon) |
|
174 |
+ if strings.Contains(errDaemon.Error(), "engine is shutdown") { |
|
175 |
+ // if the error is "engine is shutdown", we've already reported (or |
|
176 |
+ // will report below in API server errors) the error |
|
177 |
+ outStr = "Shutting down daemon due to reported errors" |
|
178 |
+ } |
|
179 |
+ // we must "fatal" exit here as the API server may be happy to |
|
180 |
+ // continue listening forever if the error had no impact to API |
|
181 |
+ log.Fatal(outStr) |
|
182 |
+ } else { |
|
183 |
+ log.Info("Daemon has completed initialization") |
|
163 | 184 |
} |
164 | 185 |
|
186 |
+ // Daemon is fully initialized and handling API traffic |
|
187 |
+ // Wait for serve API job to complete |
|
188 |
+ errAPI := <-serveAPIWait |
|
189 |
+ // If we have an error here it is unique to API (as daemonErr would have |
|
190 |
+ // exited the daemon process above) |
|
191 |
+ if errAPI != nil { |
|
192 |
+ log.Errorf("Shutting down due to ServeAPI error: %v", errAPI) |
|
193 |
+ } |
|
194 |
+ eng.Shutdown() |
|
165 | 195 |
} |
... | ... |
@@ -482,6 +482,33 @@ func TestDaemonUpgradeWithVolumes(t *testing.T) { |
482 | 482 |
logDone("daemon - volumes from old(pre 1.3) daemon work") |
483 | 483 |
} |
484 | 484 |
|
485 |
+// GH#11320 - verify that the daemon exits on failure properly |
|
486 |
+// Note that this explicitly tests the conflict of {-b,--bridge} and {--bip} options as the means |
|
487 |
+// to get a daemon init failure; no other tests for -b/--bip conflict are therefore required |
|
488 |
+func TestDaemonExitOnFailure(t *testing.T) { |
|
489 |
+ d := NewDaemon(t) |
|
490 |
+ defer d.Stop() |
|
491 |
+ |
|
492 |
+ //attempt to start daemon with incorrect flags (we know -b and --bip conflict) |
|
493 |
+ if err := d.Start("--bridge", "nosuchbridge", "--bip", "1.1.1.1"); err != nil { |
|
494 |
+ //verify we got the right error |
|
495 |
+ if !strings.Contains(err.Error(), "Daemon exited and never started") { |
|
496 |
+ t.Fatalf("Expected daemon not to start, got %v", err) |
|
497 |
+ } |
|
498 |
+ // look in the log and make sure we got the message that daemon is shutting down |
|
499 |
+ runCmd := exec.Command("grep", "Shutting down daemon due to", d.LogfileName()) |
|
500 |
+ if out, _, err := runCommandWithOutput(runCmd); err != nil { |
|
501 |
+ t.Fatalf("Expected 'shutting down daemon due to error' message; but doesn't exist in log: %q, err: %v", out, err) |
|
502 |
+ } |
|
503 |
+ } else { |
|
504 |
+ //if we didn't get an error and the daemon is running, this is a failure |
|
505 |
+ d.Stop() |
|
506 |
+ t.Fatal("Conflicting options should cause the daemon to error out with a failure") |
|
507 |
+ } |
|
508 |
+ |
|
509 |
+ logDone("daemon - verify no start on daemon init errors") |
|
510 |
+} |
|
511 |
+ |
|
485 | 512 |
func TestDaemonUlimitDefaults(t *testing.T) { |
486 | 513 |
testRequires(t, NativeExecDriver) |
487 | 514 |
d := NewDaemon(t) |
... | ... |
@@ -267,6 +267,10 @@ func (d *Daemon) Cmd(name string, arg ...string) (string, error) { |
267 | 267 |
return string(b), err |
268 | 268 |
} |
269 | 269 |
|
270 |
+func (d *Daemon) LogfileName() string { |
|
271 |
+ return d.logFile.Name() |
|
272 |
+} |
|
273 |
+ |
|
270 | 274 |
func daemonHost() string { |
271 | 275 |
daemonUrlStr := "unix://" + api.DEFAULTUNIXSOCKET |
272 | 276 |
if daemonHostVar := os.Getenv("DOCKER_HOST"); daemonHostVar != "" { |