Use `OnTimeout` callback on test timeouts to trigger a stack dump for
running daemons. This will help analyze stuck tests.
Signed-off-by: Brian Goff <cpuguy83@gmail.com>
| ... | ... |
@@ -34,6 +34,12 @@ func init() {
|
| 34 | 34 |
type DockerSuite struct {
|
| 35 | 35 |
} |
| 36 | 36 |
|
| 37 |
+func (s *DockerSuite) OnTimeout(c *check.C) {
|
|
| 38 |
+ if daemonPid > 0 && isLocalDaemon {
|
|
| 39 |
+ signalDaemonDump(daemonPid) |
|
| 40 |
+ } |
|
| 41 |
+} |
|
| 42 |
+ |
|
| 37 | 43 |
func (s *DockerSuite) TearDownTest(c *check.C) {
|
| 38 | 44 |
unpauseAllContainers() |
| 39 | 45 |
deleteAllContainers() |
| ... | ... |
@@ -54,6 +60,10 @@ type DockerRegistrySuite struct {
|
| 54 | 54 |
d *Daemon |
| 55 | 55 |
} |
| 56 | 56 |
|
| 57 |
+func (s *DockerRegistrySuite) OnTimeout(c *check.C) {
|
|
| 58 |
+ s.d.DumpStackAndQuit() |
|
| 59 |
+} |
|
| 60 |
+ |
|
| 57 | 61 |
func (s *DockerRegistrySuite) SetUpTest(c *check.C) {
|
| 58 | 62 |
testRequires(c, DaemonIsLinux, RegistryHosting) |
| 59 | 63 |
s.reg = setupRegistry(c, false, "", "") |
| ... | ... |
@@ -82,6 +92,10 @@ type DockerSchema1RegistrySuite struct {
|
| 82 | 82 |
d *Daemon |
| 83 | 83 |
} |
| 84 | 84 |
|
| 85 |
+func (s *DockerSchema1RegistrySuite) OnTimeout(c *check.C) {
|
|
| 86 |
+ s.d.DumpStackAndQuit() |
|
| 87 |
+} |
|
| 88 |
+ |
|
| 85 | 89 |
func (s *DockerSchema1RegistrySuite) SetUpTest(c *check.C) {
|
| 86 | 90 |
testRequires(c, DaemonIsLinux, RegistryHosting, NotArm64) |
| 87 | 91 |
s.reg = setupRegistry(c, true, "", "") |
| ... | ... |
@@ -110,6 +124,10 @@ type DockerRegistryAuthHtpasswdSuite struct {
|
| 110 | 110 |
d *Daemon |
| 111 | 111 |
} |
| 112 | 112 |
|
| 113 |
+func (s *DockerRegistryAuthHtpasswdSuite) OnTimeout(c *check.C) {
|
|
| 114 |
+ s.d.DumpStackAndQuit() |
|
| 115 |
+} |
|
| 116 |
+ |
|
| 113 | 117 |
func (s *DockerRegistryAuthHtpasswdSuite) SetUpTest(c *check.C) {
|
| 114 | 118 |
testRequires(c, DaemonIsLinux, RegistryHosting) |
| 115 | 119 |
s.reg = setupRegistry(c, false, "htpasswd", "") |
| ... | ... |
@@ -140,6 +158,10 @@ type DockerRegistryAuthTokenSuite struct {
|
| 140 | 140 |
d *Daemon |
| 141 | 141 |
} |
| 142 | 142 |
|
| 143 |
+func (s *DockerRegistryAuthTokenSuite) OnTimeout(c *check.C) {
|
|
| 144 |
+ s.d.DumpStackAndQuit() |
|
| 145 |
+} |
|
| 146 |
+ |
|
| 143 | 147 |
func (s *DockerRegistryAuthTokenSuite) SetUpTest(c *check.C) {
|
| 144 | 148 |
testRequires(c, DaemonIsLinux, RegistryHosting) |
| 145 | 149 |
s.d = NewDaemon(c) |
| ... | ... |
@@ -175,6 +197,10 @@ type DockerDaemonSuite struct {
|
| 175 | 175 |
d *Daemon |
| 176 | 176 |
} |
| 177 | 177 |
|
| 178 |
+func (s *DockerDaemonSuite) OnTimeout(c *check.C) {
|
|
| 179 |
+ s.d.DumpStackAndQuit() |
|
| 180 |
+} |
|
| 181 |
+ |
|
| 178 | 182 |
func (s *DockerDaemonSuite) SetUpTest(c *check.C) {
|
| 179 | 183 |
testRequires(c, DaemonIsLinux) |
| 180 | 184 |
s.d = NewDaemon(c) |
| ... | ... |
@@ -218,6 +244,14 @@ type DockerSwarmSuite struct {
|
| 218 | 218 |
portIndex int |
| 219 | 219 |
} |
| 220 | 220 |
|
| 221 |
+func (s *DockerSwarmSuite) OnTimeout(c *check.C) {
|
|
| 222 |
+ s.daemonsLock.Lock() |
|
| 223 |
+ defer s.daemonsLock.Unlock() |
|
| 224 |
+ for _, d := range s.daemons {
|
|
| 225 |
+ d.DumpStackAndQuit() |
|
| 226 |
+ } |
|
| 227 |
+} |
|
| 228 |
+ |
|
| 221 | 229 |
func (s *DockerSwarmSuite) SetUpTest(c *check.C) {
|
| 222 | 230 |
testRequires(c, DaemonIsLinux) |
| 223 | 231 |
} |
| ... | ... |
@@ -273,6 +273,16 @@ func (d *Daemon) Kill() error {
|
| 273 | 273 |
return nil |
| 274 | 274 |
} |
| 275 | 275 |
|
| 276 |
+// DumpStackAndQuit sends SIGQUIT to the daemon, which triggers it to dump its |
|
| 277 |
+// stack to its log file and exit |
|
| 278 |
+// This is used primarily for gathering debug information on test timeout |
|
| 279 |
+func (d *Daemon) DumpStackAndQuit() {
|
|
| 280 |
+ if d.cmd == nil || d.cmd.Process == nil {
|
|
| 281 |
+ return |
|
| 282 |
+ } |
|
| 283 |
+ signalDaemonDump(d.cmd.Process.Pid) |
|
| 284 |
+} |
|
| 285 |
+ |
|
| 276 | 286 |
// Stop will send a SIGINT every second and wait for the daemon to stop. |
| 277 | 287 |
// If it timeouts, a SIGKILL is sent. |
| 278 | 288 |
// Stop will not delete the daemon directory. If a purged daemon is needed, |
| 0 | 9 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,42 @@ |
| 0 |
+package main |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "strconv" |
|
| 4 |
+ "syscall" |
|
| 5 |
+ "unsafe" |
|
| 6 |
+) |
|
| 7 |
+ |
|
| 8 |
+func openEvent(desiredAccess uint32, inheritHandle bool, name string, proc *syscall.LazyProc) (handle syscall.Handle, err error) {
|
|
| 9 |
+ namep, _ := syscall.UTF16PtrFromString(name) |
|
| 10 |
+ var _p2 uint32 |
|
| 11 |
+ if inheritHandle {
|
|
| 12 |
+ _p2 = 1 |
|
| 13 |
+ } |
|
| 14 |
+ r0, _, e1 := proc.Call(uintptr(desiredAccess), uintptr(_p2), uintptr(unsafe.Pointer(namep))) |
|
| 15 |
+ handle = syscall.Handle(r0) |
|
| 16 |
+ if handle == syscall.InvalidHandle {
|
|
| 17 |
+ err = e1 |
|
| 18 |
+ } |
|
| 19 |
+ return |
|
| 20 |
+} |
|
| 21 |
+ |
|
| 22 |
+func pulseEvent(handle syscall.Handle, proc *syscall.LazyProc) (err error) {
|
|
| 23 |
+ r0, _, _ := proc.Call(uintptr(handle)) |
|
| 24 |
+ if r0 != 0 {
|
|
| 25 |
+ err = syscall.Errno(r0) |
|
| 26 |
+ } |
|
| 27 |
+ return |
|
| 28 |
+} |
|
| 29 |
+ |
|
| 30 |
+func signalDaemonDump(pid int) {
|
|
| 31 |
+ modkernel32 := syscall.NewLazyDLL("kernel32.dll")
|
|
| 32 |
+ procOpenEvent := modkernel32.NewProc("OpenEventW")
|
|
| 33 |
+ procPulseEvent := modkernel32.NewProc("PulseEvent")
|
|
| 34 |
+ |
|
| 35 |
+ ev := "Global\\docker-daemon-" + strconv.Itoa(pid) |
|
| 36 |
+ h2, _ := openEvent(0x0002, false, ev, procOpenEvent) |
|
| 37 |
+ if h2 == 0 {
|
|
| 38 |
+ return |
|
| 39 |
+ } |
|
| 40 |
+ pulseEvent(h2, procPulseEvent) |
|
| 41 |
+} |
| ... | ... |
@@ -3,8 +3,11 @@ package main |
| 3 | 3 |
import ( |
| 4 | 4 |
"encoding/json" |
| 5 | 5 |
"fmt" |
| 6 |
+ "io/ioutil" |
|
| 6 | 7 |
"os" |
| 7 | 8 |
"os/exec" |
| 9 |
+ "path/filepath" |
|
| 10 |
+ "strconv" |
|
| 8 | 11 |
|
| 9 | 12 |
"github.com/docker/docker/pkg/reexec" |
| 10 | 13 |
) |
| ... | ... |
@@ -65,6 +68,9 @@ var ( |
| 65 | 65 |
// WindowsBaseImage is the name of the base image for Windows testing |
| 66 | 66 |
// Environment variable WINDOWS_BASE_IMAGE can override this |
| 67 | 67 |
WindowsBaseImage = "windowsservercore" |
| 68 |
+ |
|
| 69 |
+ // daemonPid is the pid of the main test daemon |
|
| 70 |
+ daemonPid int |
|
| 68 | 71 |
) |
| 69 | 72 |
|
| 70 | 73 |
const ( |
| ... | ... |
@@ -134,4 +140,12 @@ func init() {
|
| 134 | 134 |
WindowsBaseImage = os.Getenv("WINDOWS_BASE_IMAGE")
|
| 135 | 135 |
fmt.Println("INFO: Windows Base image is ", WindowsBaseImage)
|
| 136 | 136 |
} |
| 137 |
+ |
|
| 138 |
+ dest := os.Getenv("DEST")
|
|
| 139 |
+ b, err = ioutil.ReadFile(filepath.Join(dest, "docker.pid")) |
|
| 140 |
+ if err == nil {
|
|
| 141 |
+ if p, err := strconv.ParseInt(string(b), 10, 32); err == nil {
|
|
| 142 |
+ daemonPid = int(p) |
|
| 143 |
+ } |
|
| 144 |
+ } |
|
| 137 | 145 |
} |