Add stack dumps from the daemon(s) on test timeout
| ... | ... |
@@ -47,7 +47,7 @@ clone git github.com/Microsoft/hcsshim v0.3.6 |
| 47 | 47 |
clone git github.com/Microsoft/go-winio v0.3.4 |
| 48 | 48 |
clone git github.com/Sirupsen/logrus v0.10.0 # logrus is a common dependency among multiple deps |
| 49 | 49 |
clone git github.com/docker/libtrust 9cbd2a1374f46905c68a4eb3694a130610adc62a |
| 50 |
-clone git github.com/go-check/check 03a4d9dcf2f92eae8e90ed42aa2656f63fdd0b14 https://github.com/cpuguy83/check.git |
|
| 50 |
+clone git github.com/go-check/check 4ed411733c5785b40214c70bce814c3a3a689609 https://github.com/cpuguy83/check.git |
|
| 51 | 51 |
clone git github.com/gorilla/context v1.1 |
| 52 | 52 |
clone git github.com/gorilla/mux v1.1 |
| 53 | 53 |
clone git github.com/kr/pty 5cf931ef8f |
| ... | ... |
@@ -33,6 +33,12 @@ func init() {
|
| 33 | 33 |
type DockerSuite struct {
|
| 34 | 34 |
} |
| 35 | 35 |
|
| 36 |
+func (s *DockerSuite) OnTimeout(c *check.C) {
|
|
| 37 |
+ if daemonPid > 0 && isLocalDaemon {
|
|
| 38 |
+ signalDaemonDump(daemonPid) |
|
| 39 |
+ } |
|
| 40 |
+} |
|
| 41 |
+ |
|
| 36 | 42 |
func (s *DockerSuite) TearDownTest(c *check.C) {
|
| 37 | 43 |
unpauseAllContainers() |
| 38 | 44 |
deleteAllContainers() |
| ... | ... |
@@ -53,6 +59,10 @@ type DockerRegistrySuite struct {
|
| 53 | 53 |
d *Daemon |
| 54 | 54 |
} |
| 55 | 55 |
|
| 56 |
+func (s *DockerRegistrySuite) OnTimeout(c *check.C) {
|
|
| 57 |
+ s.d.DumpStackAndQuit() |
|
| 58 |
+} |
|
| 59 |
+ |
|
| 56 | 60 |
func (s *DockerRegistrySuite) SetUpTest(c *check.C) {
|
| 57 | 61 |
testRequires(c, DaemonIsLinux, RegistryHosting) |
| 58 | 62 |
s.reg = setupRegistry(c, false, "", "") |
| ... | ... |
@@ -81,6 +91,10 @@ type DockerSchema1RegistrySuite struct {
|
| 81 | 81 |
d *Daemon |
| 82 | 82 |
} |
| 83 | 83 |
|
| 84 |
+func (s *DockerSchema1RegistrySuite) OnTimeout(c *check.C) {
|
|
| 85 |
+ s.d.DumpStackAndQuit() |
|
| 86 |
+} |
|
| 87 |
+ |
|
| 84 | 88 |
func (s *DockerSchema1RegistrySuite) SetUpTest(c *check.C) {
|
| 85 | 89 |
testRequires(c, DaemonIsLinux, RegistryHosting, NotArm64) |
| 86 | 90 |
s.reg = setupRegistry(c, true, "", "") |
| ... | ... |
@@ -109,6 +123,10 @@ type DockerRegistryAuthHtpasswdSuite struct {
|
| 109 | 109 |
d *Daemon |
| 110 | 110 |
} |
| 111 | 111 |
|
| 112 |
+func (s *DockerRegistryAuthHtpasswdSuite) OnTimeout(c *check.C) {
|
|
| 113 |
+ s.d.DumpStackAndQuit() |
|
| 114 |
+} |
|
| 115 |
+ |
|
| 112 | 116 |
func (s *DockerRegistryAuthHtpasswdSuite) SetUpTest(c *check.C) {
|
| 113 | 117 |
testRequires(c, DaemonIsLinux, RegistryHosting) |
| 114 | 118 |
s.reg = setupRegistry(c, false, "htpasswd", "") |
| ... | ... |
@@ -139,6 +157,10 @@ type DockerRegistryAuthTokenSuite struct {
|
| 139 | 139 |
d *Daemon |
| 140 | 140 |
} |
| 141 | 141 |
|
| 142 |
+func (s *DockerRegistryAuthTokenSuite) OnTimeout(c *check.C) {
|
|
| 143 |
+ s.d.DumpStackAndQuit() |
|
| 144 |
+} |
|
| 145 |
+ |
|
| 142 | 146 |
func (s *DockerRegistryAuthTokenSuite) SetUpTest(c *check.C) {
|
| 143 | 147 |
testRequires(c, DaemonIsLinux, RegistryHosting) |
| 144 | 148 |
s.d = NewDaemon(c) |
| ... | ... |
@@ -174,6 +196,10 @@ type DockerDaemonSuite struct {
|
| 174 | 174 |
d *Daemon |
| 175 | 175 |
} |
| 176 | 176 |
|
| 177 |
+func (s *DockerDaemonSuite) OnTimeout(c *check.C) {
|
|
| 178 |
+ s.d.DumpStackAndQuit() |
|
| 179 |
+} |
|
| 180 |
+ |
|
| 177 | 181 |
func (s *DockerDaemonSuite) SetUpTest(c *check.C) {
|
| 178 | 182 |
testRequires(c, DaemonIsLinux) |
| 179 | 183 |
s.d = NewDaemon(c) |
| ... | ... |
@@ -217,6 +243,14 @@ type DockerSwarmSuite struct {
|
| 217 | 217 |
portIndex int |
| 218 | 218 |
} |
| 219 | 219 |
|
| 220 |
+func (s *DockerSwarmSuite) OnTimeout(c *check.C) {
|
|
| 221 |
+ s.daemonsLock.Lock() |
|
| 222 |
+ defer s.daemonsLock.Unlock() |
|
| 223 |
+ for _, d := range s.daemons {
|
|
| 224 |
+ d.DumpStackAndQuit() |
|
| 225 |
+ } |
|
| 226 |
+} |
|
| 227 |
+ |
|
| 220 | 228 |
func (s *DockerSwarmSuite) SetUpTest(c *check.C) {
|
| 221 | 229 |
testRequires(c, DaemonIsLinux) |
| 222 | 230 |
} |
| ... | ... |
@@ -273,6 +273,16 @@ func (d *Daemon) Kill() error {
|
| 273 | 273 |
return nil |
| 274 | 274 |
} |
| 275 | 275 |
|
| 276 |
+// DumpStackAndQuit sends SIGQUIT to the daemon, which triggers it to dump its |
|
| 277 |
+// stack to its log file and exit |
|
| 278 |
+// This is used primarily for gathering debug information on test timeout |
|
| 279 |
+func (d *Daemon) DumpStackAndQuit() {
|
|
| 280 |
+ if d.cmd == nil || d.cmd.Process == nil {
|
|
| 281 |
+ return |
|
| 282 |
+ } |
|
| 283 |
+ signalDaemonDump(d.cmd.Process.Pid) |
|
| 284 |
+} |
|
| 285 |
+ |
|
| 276 | 286 |
// Stop will send a SIGINT every second and wait for the daemon to stop. |
| 277 | 287 |
// If it timeouts, a SIGKILL is sent. |
| 278 | 288 |
// Stop will not delete the daemon directory. If a purged daemon is needed, |
| 0 | 9 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,42 @@ |
| 0 |
+package main |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "strconv" |
|
| 4 |
+ "syscall" |
|
| 5 |
+ "unsafe" |
|
| 6 |
+) |
|
| 7 |
+ |
|
| 8 |
+func openEvent(desiredAccess uint32, inheritHandle bool, name string, proc *syscall.LazyProc) (handle syscall.Handle, err error) {
|
|
| 9 |
+ namep, _ := syscall.UTF16PtrFromString(name) |
|
| 10 |
+ var _p2 uint32 |
|
| 11 |
+ if inheritHandle {
|
|
| 12 |
+ _p2 = 1 |
|
| 13 |
+ } |
|
| 14 |
+ r0, _, e1 := proc.Call(uintptr(desiredAccess), uintptr(_p2), uintptr(unsafe.Pointer(namep))) |
|
| 15 |
+ handle = syscall.Handle(r0) |
|
| 16 |
+ if handle == syscall.InvalidHandle {
|
|
| 17 |
+ err = e1 |
|
| 18 |
+ } |
|
| 19 |
+ return |
|
| 20 |
+} |
|
| 21 |
+ |
|
| 22 |
+func pulseEvent(handle syscall.Handle, proc *syscall.LazyProc) (err error) {
|
|
| 23 |
+ r0, _, _ := proc.Call(uintptr(handle)) |
|
| 24 |
+ if r0 != 0 {
|
|
| 25 |
+ err = syscall.Errno(r0) |
|
| 26 |
+ } |
|
| 27 |
+ return |
|
| 28 |
+} |
|
| 29 |
+ |
|
| 30 |
+func signalDaemonDump(pid int) {
|
|
| 31 |
+ modkernel32 := syscall.NewLazyDLL("kernel32.dll")
|
|
| 32 |
+ procOpenEvent := modkernel32.NewProc("OpenEventW")
|
|
| 33 |
+ procPulseEvent := modkernel32.NewProc("PulseEvent")
|
|
| 34 |
+ |
|
| 35 |
+ ev := "Global\\docker-daemon-" + strconv.Itoa(pid) |
|
| 36 |
+ h2, _ := openEvent(0x0002, false, ev, procOpenEvent) |
|
| 37 |
+ if h2 == 0 {
|
|
| 38 |
+ return |
|
| 39 |
+ } |
|
| 40 |
+ pulseEvent(h2, procPulseEvent) |
|
| 41 |
+} |
| ... | ... |
@@ -3,8 +3,11 @@ package main |
| 3 | 3 |
import ( |
| 4 | 4 |
"encoding/json" |
| 5 | 5 |
"fmt" |
| 6 |
+ "io/ioutil" |
|
| 6 | 7 |
"os" |
| 7 | 8 |
"os/exec" |
| 9 |
+ "path/filepath" |
|
| 10 |
+ "strconv" |
|
| 8 | 11 |
|
| 9 | 12 |
"github.com/docker/docker/pkg/reexec" |
| 10 | 13 |
) |
| ... | ... |
@@ -65,6 +68,9 @@ var ( |
| 65 | 65 |
// WindowsBaseImage is the name of the base image for Windows testing |
| 66 | 66 |
// Environment variable WINDOWS_BASE_IMAGE can override this |
| 67 | 67 |
WindowsBaseImage = "windowsservercore" |
| 68 |
+ |
|
| 69 |
+ // daemonPid is the pid of the main test daemon |
|
| 70 |
+ daemonPid int |
|
| 68 | 71 |
) |
| 69 | 72 |
|
| 70 | 73 |
const ( |
| ... | ... |
@@ -134,4 +140,12 @@ func init() {
|
| 134 | 134 |
WindowsBaseImage = os.Getenv("WINDOWS_BASE_IMAGE")
|
| 135 | 135 |
fmt.Println("INFO: Windows Base image is ", WindowsBaseImage)
|
| 136 | 136 |
} |
| 137 |
+ |
|
| 138 |
+ dest := os.Getenv("DEST")
|
|
| 139 |
+ b, err = ioutil.ReadFile(filepath.Join(dest, "docker.pid")) |
|
| 140 |
+ if err == nil {
|
|
| 141 |
+ if p, err := strconv.ParseInt(string(b), 10, 32); err == nil {
|
|
| 142 |
+ daemonPid = int(p) |
|
| 143 |
+ } |
|
| 144 |
+ } |
|
| 137 | 145 |
} |
| ... | ... |
@@ -514,6 +514,7 @@ type suiteRunner struct {
|
| 514 | 514 |
suite interface{}
|
| 515 | 515 |
setUpSuite, tearDownSuite *methodType |
| 516 | 516 |
setUpTest, tearDownTest *methodType |
| 517 |
+ onTimeout *methodType |
|
| 517 | 518 |
tests []*methodType |
| 518 | 519 |
tracker *resultTracker |
| 519 | 520 |
tempDir *tempDir |
| ... | ... |
@@ -591,6 +592,8 @@ func newSuiteRunner(suite interface{}, runConf *RunConf) *suiteRunner {
|
| 591 | 591 |
runner.setUpTest = method |
| 592 | 592 |
case "TearDownTest": |
| 593 | 593 |
runner.tearDownTest = method |
| 594 |
+ case "OnTimeout": |
|
| 595 |
+ runner.onTimeout = method |
|
| 594 | 596 |
default: |
| 595 | 597 |
prefix := "Test" |
| 596 | 598 |
if conf.Benchmark {
|
| ... | ... |
@@ -671,6 +674,23 @@ func (runner *suiteRunner) forkCall(method *methodType, kind funcKind, testName |
| 671 | 671 |
return c |
| 672 | 672 |
} |
| 673 | 673 |
|
| 674 |
+type timeoutErr struct {
|
|
| 675 |
+ method *methodType |
|
| 676 |
+ t time.Duration |
|
| 677 |
+} |
|
| 678 |
+ |
|
| 679 |
+func (e timeoutErr) Error() string {
|
|
| 680 |
+ return fmt.Sprintf("%s test timed out after %v", e.method.String(), e.t)
|
|
| 681 |
+} |
|
| 682 |
+ |
|
| 683 |
+func isTimeout(e error) bool {
|
|
| 684 |
+ if e == nil {
|
|
| 685 |
+ return false |
|
| 686 |
+ } |
|
| 687 |
+ _, ok := e.(timeoutErr) |
|
| 688 |
+ return ok |
|
| 689 |
+} |
|
| 690 |
+ |
|
| 674 | 691 |
// Same as forkCall(), but wait for call to finish before returning. |
| 675 | 692 |
func (runner *suiteRunner) runFunc(method *methodType, kind funcKind, testName string, logb *logger, dispatcher func(c *C)) *C {
|
| 676 | 693 |
var timeout <-chan time.Time |
| ... | ... |
@@ -681,7 +701,19 @@ func (runner *suiteRunner) runFunc(method *methodType, kind funcKind, testName s |
| 681 | 681 |
select {
|
| 682 | 682 |
case <-c.done: |
| 683 | 683 |
case <-timeout: |
| 684 |
- panic(fmt.Sprintf("%s test timed out after %v", method.String(), runner.checkTimeout))
|
|
| 684 |
+ if runner.onTimeout != nil {
|
|
| 685 |
+ // run the OnTimeout callback, allowing the suite to collect any sort of debug information it can |
|
| 686 |
+ // `runFixture` is syncronous, so run this in a separate goroutine with a timeout |
|
| 687 |
+ cChan := make(chan *C) |
|
| 688 |
+ go func() {
|
|
| 689 |
+ cChan <- runner.runFixture(runner.onTimeout, c.testName, c.logb) |
|
| 690 |
+ }() |
|
| 691 |
+ select {
|
|
| 692 |
+ case <-cChan: |
|
| 693 |
+ case <-time.After(runner.checkTimeout): |
|
| 694 |
+ } |
|
| 695 |
+ } |
|
| 696 |
+ panic(timeoutErr{method, runner.checkTimeout})
|
|
| 685 | 697 |
} |
| 686 | 698 |
return c |
| 687 | 699 |
} |
| ... | ... |
@@ -777,12 +809,14 @@ func (runner *suiteRunner) forkTest(method *methodType) *C {
|
| 777 | 777 |
c.logArgPanic(c.method, "*check.C") |
| 778 | 778 |
return |
| 779 | 779 |
} |
| 780 |
+ |
|
| 780 | 781 |
if strings.HasPrefix(c.method.Info.Name, "Test") {
|
| 781 | 782 |
c.ResetTimer() |
| 782 | 783 |
c.StartTimer() |
| 783 | 784 |
c.method.Call([]reflect.Value{reflect.ValueOf(c)})
|
| 784 | 785 |
return |
| 785 | 786 |
} |
| 787 |
+ |
|
| 786 | 788 |
if !strings.HasPrefix(c.method.Info.Name, "Benchmark") {
|
| 787 | 789 |
panic("unexpected method prefix: " + c.method.Info.Name)
|
| 788 | 790 |
} |
| ... | ... |
@@ -791,6 +825,7 @@ func (runner *suiteRunner) forkTest(method *methodType) *C {
|
| 791 | 791 |
c.N = benchN |
| 792 | 792 |
c.ResetTimer() |
| 793 | 793 |
c.StartTimer() |
| 794 |
+ |
|
| 794 | 795 |
c.method.Call([]reflect.Value{reflect.ValueOf(c)})
|
| 795 | 796 |
c.StopTimer() |
| 796 | 797 |
if c.status() != succeededSt || c.duration >= c.benchTime || benchN >= 1e9 {
|
| ... | ... |
@@ -825,7 +860,19 @@ func (runner *suiteRunner) runTest(method *methodType) *C {
|
| 825 | 825 |
select {
|
| 826 | 826 |
case <-c.done: |
| 827 | 827 |
case <-timeout: |
| 828 |
- panic(fmt.Sprintf("%s test timed out after %v", method.String(), runner.checkTimeout))
|
|
| 828 |
+ if runner.onTimeout != nil {
|
|
| 829 |
+ // run the OnTimeout callback, allowing the suite to collect any sort of debug information it can |
|
| 830 |
+ // `runFixture` is syncronous, so run this in a separate goroutine with a timeout |
|
| 831 |
+ cChan := make(chan *C) |
|
| 832 |
+ go func() {
|
|
| 833 |
+ cChan <- runner.runFixture(runner.onTimeout, c.testName, c.logb) |
|
| 834 |
+ }() |
|
| 835 |
+ select {
|
|
| 836 |
+ case <-cChan: |
|
| 837 |
+ case <-time.After(runner.checkTimeout): |
|
| 838 |
+ } |
|
| 839 |
+ } |
|
| 840 |
+ panic(timeoutErr{method, runner.checkTimeout})
|
|
| 829 | 841 |
} |
| 830 | 842 |
return c |
| 831 | 843 |
} |
| ... | ... |
@@ -846,7 +893,7 @@ func (runner *suiteRunner) skipTests(status funcStatus, methods []*methodType) {
|
| 846 | 846 |
func (runner *suiteRunner) checkFixtureArgs() bool {
|
| 847 | 847 |
succeeded := true |
| 848 | 848 |
argType := reflect.TypeOf(&C{})
|
| 849 |
- for _, method := range []*methodType{runner.setUpSuite, runner.tearDownSuite, runner.setUpTest, runner.tearDownTest} {
|
|
| 849 |
+ for _, method := range []*methodType{runner.setUpSuite, runner.tearDownSuite, runner.setUpTest, runner.tearDownTest, runner.onTimeout} {
|
|
| 850 | 850 |
if method != nil {
|
| 851 | 851 |
mt := method.Type() |
| 852 | 852 |
if mt.NumIn() != 1 || mt.In(0) != argType {
|