Browse code

Cleanup: refactor shutdown and signal handling facility

This disentangles the following functions, which were previously all mixed together:

* 1) Waiting for jobs to terminate when shutting down
* 2) Handling signals in the Docker daemon
* 3) Per-subsystem cleanup handlers
* 4) pidfile management

Responsibilities are dispatched as follows:

* Signal traps are set in `main`, and trigger `engine.Shutdown`
* `engine.Shutdown` coordinates cleanup by waiting for jobs to complete, and calling shutdown handlers
* To perform cleanup at shutdown, each subsystem registers handlers with `engine.OnShutdown`
* `daemon` is one subsystem, so it registers cleanup via `engine.OnShutdown`.
* `daemon` owns the pidfile, which is used to lock access to `/var/lib/docker`. Part of its cleanup is to remove the pidfile.

Signed-off-by: Solomon Hykes <solomon@docker.com>

Solomon Hykes authored on 2014/08/06 17:12:22
Showing 4 changed files
... ...
@@ -667,6 +667,20 @@ func NewDaemon(config *daemonconfig.Config, eng *engine.Engine) (*Daemon, error)
667 667
 }
668 668
 
669 669
 func NewDaemonFromDirectory(config *daemonconfig.Config, eng *engine.Engine) (*Daemon, error) {
670
+	// Claim the pidfile first, to avoid any and all unexpected race conditions.
671
+	// Some of the init doesn't need a pidfile lock - but let's not try to be smart.
672
+	if config.Pidfile != "" {
673
+		if err := utils.CreatePidFile(config.Pidfile); err != nil {
674
+			return nil, err
675
+		}
676
+		eng.OnShutdown(func() {
677
+			// Always release the pidfile last, just in case
678
+			utils.RemovePidFile(config.Pidfile)
679
+		})
680
+	}
681
+
682
+	// Check that the system is supported and we have sufficient privileges
683
+	// FIXME: return errors instead of calling Fatal
670 684
 	if runtime.GOOS != "linux" {
671 685
 		log.Fatalf("The Docker daemon is only supported on linux")
672 686
 	}
... ...
@@ -819,13 +833,32 @@ func NewDaemonFromDirectory(config *daemonconfig.Config, eng *engine.Engine) (*D
819 819
 		eng:            eng,
820 820
 		Sockets:        config.Sockets,
821 821
 	}
822
-
823 822
 	if err := daemon.checkLocaldns(); err != nil {
824 823
 		return nil, err
825 824
 	}
826 825
 	if err := daemon.restore(); err != nil {
827 826
 		return nil, err
828 827
 	}
828
+	// Setup shutdown handlers
829
+	// FIXME: can these shutdown handlers be registered closer to their source?
830
+	eng.OnShutdown(func() {
831
+		// FIXME: if these cleanup steps can be called concurrently, register
832
+		// them as separate handlers to speed up total shutdown time
833
+		// FIXME: use engine logging instead of utils.Errorf
834
+		if err := daemon.shutdown(); err != nil {
835
+			utils.Errorf("daemon.shutdown(): %s", err)
836
+		}
837
+		if err := portallocator.ReleaseAll(); err != nil {
838
+			utils.Errorf("portallocator.ReleaseAll(): %s", err)
839
+		}
840
+		if err := daemon.driver.Cleanup(); err != nil {
841
+			utils.Errorf("daemon.driver.Cleanup(): %s", err.Error())
842
+		}
843
+		if err := daemon.containerGraph.Close(); err != nil {
844
+			utils.Errorf("daemon.containerGraph.Close(): %s", err.Error())
845
+		}
846
+	})
847
+
829 848
 	return daemon, nil
830 849
 }
831 850
 
... ...
@@ -853,30 +886,6 @@ func (daemon *Daemon) shutdown() error {
853 853
 	return nil
854 854
 }
855 855
 
856
-func (daemon *Daemon) Close() error {
857
-	errorsStrings := []string{}
858
-	if err := daemon.shutdown(); err != nil {
859
-		utils.Errorf("daemon.shutdown(): %s", err)
860
-		errorsStrings = append(errorsStrings, err.Error())
861
-	}
862
-	if err := portallocator.ReleaseAll(); err != nil {
863
-		utils.Errorf("portallocator.ReleaseAll(): %s", err)
864
-		errorsStrings = append(errorsStrings, err.Error())
865
-	}
866
-	if err := daemon.driver.Cleanup(); err != nil {
867
-		utils.Errorf("daemon.driver.Cleanup(): %s", err.Error())
868
-		errorsStrings = append(errorsStrings, err.Error())
869
-	}
870
-	if err := daemon.containerGraph.Close(); err != nil {
871
-		utils.Errorf("daemon.containerGraph.Close(): %s", err.Error())
872
-		errorsStrings = append(errorsStrings, err.Error())
873
-	}
874
-	if len(errorsStrings) > 0 {
875
-		return fmt.Errorf("%s", strings.Join(errorsStrings, ", "))
876
-	}
877
-	return nil
878
-}
879
-
880 856
 func (daemon *Daemon) Mount(container *Container) error {
881 857
 	dir, err := daemon.driver.Get(container.ID, container.GetMountLabel())
882 858
 	if err != nil {
... ...
@@ -967,6 +976,8 @@ func (daemon *Daemon) Kill(c *Container, sig int) error {
967 967
 // from the content root, including images, volumes and
968 968
 // container filesystems.
969 969
 // Again: this will remove your entire docker daemon!
970
+// FIXME: this is deprecated, and only used in legacy
971
+// tests. Please remove.
970 972
 func (daemon *Daemon) Nuke() error {
971 973
 	var wg sync.WaitGroup
972 974
 	for _, container := range daemon.List() {
... ...
@@ -977,7 +988,6 @@ func (daemon *Daemon) Nuke() error {
977 977
 		}(container)
978 978
 	}
979 979
 	wg.Wait()
980
-	daemon.Close()
981 980
 
982 981
 	return os.RemoveAll(daemon.config.Root)
983 982
 }
... ...
@@ -10,6 +10,7 @@ import (
10 10
 	"github.com/docker/docker/dockerversion"
11 11
 	"github.com/docker/docker/engine"
12 12
 	flag "github.com/docker/docker/pkg/mflag"
13
+	"github.com/docker/docker/pkg/signal"
13 14
 	"github.com/docker/docker/sysinit"
14 15
 )
15 16
 
... ...
@@ -39,6 +40,7 @@ func mainDaemon() {
39 39
 	}
40 40
 
41 41
 	eng := engine.New()
42
+	signal.Trap(eng.Shutdown)
42 43
 	// Load builtins
43 44
 	if err := builtins.Register(eng); err != nil {
44 45
 		log.Fatal(err)
... ...
@@ -10,7 +10,6 @@ import (
10 10
 	"github.com/docker/docker/daemon"
11 11
 	"github.com/docker/docker/daemonconfig"
12 12
 	"github.com/docker/docker/engine"
13
-	"github.com/docker/docker/pkg/signal"
14 13
 	"github.com/docker/docker/utils"
15 14
 )
16 15
 
... ...
@@ -41,15 +40,11 @@ func InitPidfile(job *engine.Job) engine.Status {
41 41
 // The signals SIGINT, SIGQUIT and SIGTERM are intercepted for cleanup.
42 42
 func InitServer(job *engine.Job) engine.Status {
43 43
 	job.Logf("Creating server")
44
-	srv, err := NewServer(job.Eng, daemonconfig.ConfigFromJob(job))
44
+	cfg := daemonconfig.ConfigFromJob(job)
45
+	srv, err := NewServer(job.Eng, cfg)
45 46
 	if err != nil {
46 47
 		return job.Error(err)
47 48
 	}
48
-	job.Logf("Setting up signal traps")
49
-	signal.Trap(func() {
50
-		utils.RemovePidFile(srv.daemon.Config().Pidfile)
51
-		srv.Close()
52
-	})
53 49
 	job.Eng.Hack_SetGlobalVar("httpapi.server", srv)
54 50
 	job.Eng.Hack_SetGlobalVar("httpapi.daemon", srv.daemon)
55 51
 
... ...
@@ -23,7 +23,6 @@ package server
23 23
 
24 24
 import (
25 25
 	"sync"
26
-	"time"
27 26
 
28 27
 	"github.com/docker/docker/daemon"
29 28
 	"github.com/docker/docker/engine"
... ...
@@ -42,27 +41,6 @@ func (srv *Server) IsRunning() bool {
42 42
 	return srv.running
43 43
 }
44 44
 
45
-func (srv *Server) Close() error {
46
-	if srv == nil {
47
-		return nil
48
-	}
49
-	srv.SetRunning(false)
50
-	done := make(chan struct{})
51
-	go func() {
52
-		srv.tasks.Wait()
53
-		close(done)
54
-	}()
55
-	select {
56
-	// Waiting server jobs for 15 seconds, shutdown immediately after that time
57
-	case <-time.After(time.Second * 15):
58
-	case <-done:
59
-	}
60
-	if srv.daemon == nil {
61
-		return nil
62
-	}
63
-	return srv.daemon.Close()
64
-}
65
-
66 45
 type Server struct {
67 46
 	sync.RWMutex
68 47
 	daemon      *daemon.Daemon