Browse code

Make test suites dump daemon stack on test timeout

Use `OnTimeout` callback on test timeouts to trigger a stack dump for
running daemons. This will help analyze stuck tests.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>

Brian Goff authored on 2016/07/28 03:17:44
Showing 5 changed files
... ...
@@ -34,6 +34,12 @@ func init() {
34 34
 type DockerSuite struct {
35 35
 }
36 36
 
37
+func (s *DockerSuite) OnTimeout(c *check.C) {
38
+	if daemonPid > 0 && isLocalDaemon {
39
+		signalDaemonDump(daemonPid)
40
+	}
41
+}
42
+
37 43
 func (s *DockerSuite) TearDownTest(c *check.C) {
38 44
 	unpauseAllContainers()
39 45
 	deleteAllContainers()
... ...
@@ -54,6 +60,10 @@ type DockerRegistrySuite struct {
54 54
 	d   *Daemon
55 55
 }
56 56
 
57
+func (s *DockerRegistrySuite) OnTimeout(c *check.C) {
58
+	s.d.DumpStackAndQuit()
59
+}
60
+
57 61
 func (s *DockerRegistrySuite) SetUpTest(c *check.C) {
58 62
 	testRequires(c, DaemonIsLinux, RegistryHosting)
59 63
 	s.reg = setupRegistry(c, false, "", "")
... ...
@@ -82,6 +92,10 @@ type DockerSchema1RegistrySuite struct {
82 82
 	d   *Daemon
83 83
 }
84 84
 
85
+func (s *DockerSchema1RegistrySuite) OnTimeout(c *check.C) {
86
+	s.d.DumpStackAndQuit()
87
+}
88
+
85 89
 func (s *DockerSchema1RegistrySuite) SetUpTest(c *check.C) {
86 90
 	testRequires(c, DaemonIsLinux, RegistryHosting, NotArm64)
87 91
 	s.reg = setupRegistry(c, true, "", "")
... ...
@@ -110,6 +124,10 @@ type DockerRegistryAuthHtpasswdSuite struct {
110 110
 	d   *Daemon
111 111
 }
112 112
 
113
+func (s *DockerRegistryAuthHtpasswdSuite) OnTimeout(c *check.C) {
114
+	s.d.DumpStackAndQuit()
115
+}
116
+
113 117
 func (s *DockerRegistryAuthHtpasswdSuite) SetUpTest(c *check.C) {
114 118
 	testRequires(c, DaemonIsLinux, RegistryHosting)
115 119
 	s.reg = setupRegistry(c, false, "htpasswd", "")
... ...
@@ -140,6 +158,10 @@ type DockerRegistryAuthTokenSuite struct {
140 140
 	d   *Daemon
141 141
 }
142 142
 
143
+func (s *DockerRegistryAuthTokenSuite) OnTimeout(c *check.C) {
144
+	s.d.DumpStackAndQuit()
145
+}
146
+
143 147
 func (s *DockerRegistryAuthTokenSuite) SetUpTest(c *check.C) {
144 148
 	testRequires(c, DaemonIsLinux, RegistryHosting)
145 149
 	s.d = NewDaemon(c)
... ...
@@ -175,6 +197,10 @@ type DockerDaemonSuite struct {
175 175
 	d  *Daemon
176 176
 }
177 177
 
178
+func (s *DockerDaemonSuite) OnTimeout(c *check.C) {
179
+	s.d.DumpStackAndQuit()
180
+}
181
+
178 182
 func (s *DockerDaemonSuite) SetUpTest(c *check.C) {
179 183
 	testRequires(c, DaemonIsLinux)
180 184
 	s.d = NewDaemon(c)
... ...
@@ -218,6 +244,14 @@ type DockerSwarmSuite struct {
218 218
 	portIndex   int
219 219
 }
220 220
 
221
+func (s *DockerSwarmSuite) OnTimeout(c *check.C) {
222
+	s.daemonsLock.Lock()
223
+	defer s.daemonsLock.Unlock()
224
+	for _, d := range s.daemons {
225
+		d.DumpStackAndQuit()
226
+	}
227
+}
228
+
221 229
 func (s *DockerSwarmSuite) SetUpTest(c *check.C) {
222 230
 	testRequires(c, DaemonIsLinux)
223 231
 }
... ...
@@ -273,6 +273,16 @@ func (d *Daemon) Kill() error {
273 273
 	return nil
274 274
 }
275 275
 
276
+// DumpStackAndQuit sends SIGQUIT to the daemon, which triggers it to dump its
277
+// stack to its log file and exit
278
+// This is used primarily for gathering debug information on test timeout
279
+func (d *Daemon) DumpStackAndQuit() {
280
+	if d.cmd == nil || d.cmd.Process == nil {
281
+		return
282
+	}
283
+	signalDaemonDump(d.cmd.Process.Pid)
284
+}
285
+
276 286
 // Stop will send a SIGINT every second and wait for the daemon to stop.
277 287
 // If it timeouts, a SIGKILL is sent.
278 288
 // Stop will not delete the daemon directory. If a purged daemon is needed,
279 289
new file mode 100644
... ...
@@ -0,0 +1,9 @@
0
+// +build !windows
1
+
2
+package main
3
+
4
+import "syscall"
5
+
6
+func signalDaemonDump(pid int) {
7
+	syscall.Kill(pid, syscall.SIGQUIT)
8
+}
0 9
new file mode 100644
... ...
@@ -0,0 +1,42 @@
0
+package main
1
+
2
+import (
3
+	"strconv"
4
+	"syscall"
5
+	"unsafe"
6
+)
7
+
8
+func openEvent(desiredAccess uint32, inheritHandle bool, name string, proc *syscall.LazyProc) (handle syscall.Handle, err error) {
9
+	namep, _ := syscall.UTF16PtrFromString(name)
10
+	var _p2 uint32
11
+	if inheritHandle {
12
+		_p2 = 1
13
+	}
14
+	r0, _, e1 := proc.Call(uintptr(desiredAccess), uintptr(_p2), uintptr(unsafe.Pointer(namep)))
15
+	handle = syscall.Handle(r0)
16
+	if handle == syscall.InvalidHandle {
17
+		err = e1
18
+	}
19
+	return
20
+}
21
+
22
+func pulseEvent(handle syscall.Handle, proc *syscall.LazyProc) (err error) {
23
+	r0, _, _ := proc.Call(uintptr(handle))
24
+	if r0 != 0 {
25
+		err = syscall.Errno(r0)
26
+	}
27
+	return
28
+}
29
+
30
+func signalDaemonDump(pid int) {
31
+	modkernel32 := syscall.NewLazyDLL("kernel32.dll")
32
+	procOpenEvent := modkernel32.NewProc("OpenEventW")
33
+	procPulseEvent := modkernel32.NewProc("PulseEvent")
34
+
35
+	ev := "Global\\docker-daemon-" + strconv.Itoa(pid)
36
+	h2, _ := openEvent(0x0002, false, ev, procOpenEvent)
37
+	if h2 == 0 {
38
+		return
39
+	}
40
+	pulseEvent(h2, procPulseEvent)
41
+}
... ...
@@ -3,8 +3,11 @@ package main
3 3
 import (
4 4
 	"encoding/json"
5 5
 	"fmt"
6
+	"io/ioutil"
6 7
 	"os"
7 8
 	"os/exec"
9
+	"path/filepath"
10
+	"strconv"
8 11
 
9 12
 	"github.com/docker/docker/pkg/reexec"
10 13
 )
... ...
@@ -65,6 +68,9 @@ var (
65 65
 	// WindowsBaseImage is the name of the base image for Windows testing
66 66
 	// Environment variable WINDOWS_BASE_IMAGE can override this
67 67
 	WindowsBaseImage = "windowsservercore"
68
+
69
+	// daemonPid is the pid of the main test daemon
70
+	daemonPid int
68 71
 )
69 72
 
70 73
 const (
... ...
@@ -134,4 +140,12 @@ func init() {
134 134
 		WindowsBaseImage = os.Getenv("WINDOWS_BASE_IMAGE")
135 135
 		fmt.Println("INFO: Windows Base image is ", WindowsBaseImage)
136 136
 	}
137
+
138
+	dest := os.Getenv("DEST")
139
+	b, err = ioutil.ReadFile(filepath.Join(dest, "docker.pid"))
140
+	if err == nil {
141
+		if p, err := strconv.ParseInt(string(b), 10, 32); err == nil {
142
+			daemonPid = int(p)
143
+		}
144
+	}
137 145
 }