Browse code

Add SIGUSR1 handler for dumping stack/goroutine traces

Add handler for SIGUSR1 based on feedback regarding when to dump
goroutine stacks. This will also dump goroutine stack traces on SIGQUIT
followed by a hard-exit from the daemon.

Docker-DCO-1.1-Signed-off-by: Phil Estes <estesp@linux.vnet.ibm.com> (github: estesp)

Phil Estes authored on 2015/04/21 13:24:24
Showing 4 changed files
... ...
@@ -747,6 +747,9 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
747 747
 		return nil, err
748 748
 	}
749 749
 
750
+	// set up SIGUSR1 handler to dump Go routine stacks
751
+	setupSigusr1Trap()
752
+
750 753
 	// set up the tmpDir to use a canonical path
751 754
 	tmp, err := tempDir(config.Root)
752 755
 	if err != nil {
753 756
new file mode 100644
... ...
@@ -0,0 +1,21 @@
0
+// +build !windows
1
+
2
+package daemon
3
+
4
+import (
5
+	"os"
6
+	"os/signal"
7
+	"syscall"
8
+
9
+	psignal "github.com/docker/docker/pkg/signal"
10
+)
11
+
12
+func setupSigusr1Trap() {
13
+	c := make(chan os.Signal, 1)
14
+	signal.Notify(c, syscall.SIGUSR1)
15
+	go func() {
16
+		for range c {
17
+			psignal.DumpStacks()
18
+		}
19
+	}()
20
+}
0 21
new file mode 100644
... ...
@@ -0,0 +1,7 @@
0
+// +build !linux,!darwin,!freebsd
1
+
2
+package signal
3
+
4
+func setupSigusr1Trap() {
5
+	return
6
+}
... ...
@@ -3,6 +3,7 @@ package signal
3 3
 import (
4 4
 	"os"
5 5
 	gosignal "os/signal"
6
+	"runtime"
6 7
 	"sync/atomic"
7 8
 	"syscall"
8 9
 
... ...
@@ -14,41 +15,50 @@ import (
14 14
 // (and the Docker engine in particular).
15 15
 //
16 16
 // * If SIGINT or SIGTERM are received, `cleanup` is called, then the process is terminated.
17
-// * If SIGINT or SIGTERM are repeated 3 times before cleanup is complete, then cleanup is
18
-// skipped and the process terminated directly.
19
-// * If "DEBUG" is set in the environment, SIGQUIT causes an exit without cleanup.
17
+// * If SIGINT or SIGTERM are received 3 times before cleanup is complete, then cleanup is
18
+//   skipped and the process is terminated immediately (allows force quit of stuck daemon)
19
+// * A SIGQUIT always causes an exit without cleanup, with a goroutine dump preceding exit.
20 20
 //
21 21
 func Trap(cleanup func()) {
22 22
 	c := make(chan os.Signal, 1)
23
-	signals := []os.Signal{os.Interrupt, syscall.SIGTERM}
24
-	if os.Getenv("DEBUG") == "" {
25
-		signals = append(signals, syscall.SIGQUIT)
26
-	}
23
+	// we will handle INT, TERM, QUIT here
24
+	signals := []os.Signal{os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT}
27 25
 	gosignal.Notify(c, signals...)
28 26
 	go func() {
29 27
 		interruptCount := uint32(0)
30 28
 		for sig := range c {
31 29
 			go func(sig os.Signal) {
32
-				logrus.Infof("Received signal '%v', starting shutdown of docker...", sig)
30
+				logrus.Infof("Processing signal '%v'", sig)
33 31
 				switch sig {
34 32
 				case os.Interrupt, syscall.SIGTERM:
35
-					// If the user really wants to interrupt, let him do so.
36 33
 					if atomic.LoadUint32(&interruptCount) < 3 {
37 34
 						// Initiate the cleanup only once
38 35
 						if atomic.AddUint32(&interruptCount, 1) == 1 {
39
-							// Call cleanup handler
36
+							// Call the provided cleanup handler
40 37
 							cleanup()
41 38
 							os.Exit(0)
42 39
 						} else {
43 40
 							return
44 41
 						}
45 42
 					} else {
46
-						logrus.Infof("Force shutdown of docker, interrupting cleanup")
43
+						// 3 SIGTERM/INT signals received; force exit without cleanup
44
+						logrus.Infof("Forcing docker daemon shutdown without cleanup; 3 interrupts received")
47 45
 					}
48 46
 				case syscall.SIGQUIT:
47
+					DumpStacks()
48
+					logrus.Infof("Forcing docker daemon shutdown without cleanup on SIGQUIT")
49 49
 				}
50
+				//for the SIGINT/TERM, and SIGQUIT non-clean shutdown case, exit with 128 + signal #
50 51
 				os.Exit(128 + int(sig.(syscall.Signal)))
51 52
 			}(sig)
52 53
 		}
53 54
 	}()
54 55
 }
56
+
57
+func DumpStacks() {
58
+	buf := make([]byte, 16384)
59
+	buf = buf[:runtime.Stack(buf, true)]
60
+	// Note that if the daemon is started with a less-verbose log-level than "info" (the default), the goroutine
61
+	// traces won't show up in the log.
62
+	logrus.Infof("=== BEGIN goroutine stack dump ===\n%s\n=== END goroutine stack dump ===", buf)
63
+}