Browse code

Merge pull request #25143 from cpuguy83/update_go-check

Add stack dumps from the daemon(s) on test timeout

John Howard authored on 2016/08/16 14:09:20
Showing 7 changed files
... ...
@@ -47,7 +47,7 @@ clone git github.com/Microsoft/hcsshim v0.3.6
47 47
 clone git github.com/Microsoft/go-winio v0.3.4
48 48
 clone git github.com/Sirupsen/logrus v0.10.0 # logrus is a common dependency among multiple deps
49 49
 clone git github.com/docker/libtrust 9cbd2a1374f46905c68a4eb3694a130610adc62a
50
-clone git github.com/go-check/check 03a4d9dcf2f92eae8e90ed42aa2656f63fdd0b14 https://github.com/cpuguy83/check.git
50
+clone git github.com/go-check/check 4ed411733c5785b40214c70bce814c3a3a689609 https://github.com/cpuguy83/check.git
51 51
 clone git github.com/gorilla/context v1.1
52 52
 clone git github.com/gorilla/mux v1.1
53 53
 clone git github.com/kr/pty 5cf931ef8f
... ...
@@ -33,6 +33,12 @@ func init() {
33 33
 type DockerSuite struct {
34 34
 }
35 35
 
36
+func (s *DockerSuite) OnTimeout(c *check.C) {
37
+	if daemonPid > 0 && isLocalDaemon {
38
+		signalDaemonDump(daemonPid)
39
+	}
40
+}
41
+
36 42
 func (s *DockerSuite) TearDownTest(c *check.C) {
37 43
 	unpauseAllContainers()
38 44
 	deleteAllContainers()
... ...
@@ -53,6 +59,10 @@ type DockerRegistrySuite struct {
53 53
 	d   *Daemon
54 54
 }
55 55
 
56
+func (s *DockerRegistrySuite) OnTimeout(c *check.C) {
57
+	s.d.DumpStackAndQuit()
58
+}
59
+
56 60
 func (s *DockerRegistrySuite) SetUpTest(c *check.C) {
57 61
 	testRequires(c, DaemonIsLinux, RegistryHosting)
58 62
 	s.reg = setupRegistry(c, false, "", "")
... ...
@@ -81,6 +91,10 @@ type DockerSchema1RegistrySuite struct {
81 81
 	d   *Daemon
82 82
 }
83 83
 
84
+func (s *DockerSchema1RegistrySuite) OnTimeout(c *check.C) {
85
+	s.d.DumpStackAndQuit()
86
+}
87
+
84 88
 func (s *DockerSchema1RegistrySuite) SetUpTest(c *check.C) {
85 89
 	testRequires(c, DaemonIsLinux, RegistryHosting, NotArm64)
86 90
 	s.reg = setupRegistry(c, true, "", "")
... ...
@@ -109,6 +123,10 @@ type DockerRegistryAuthHtpasswdSuite struct {
109 109
 	d   *Daemon
110 110
 }
111 111
 
112
+func (s *DockerRegistryAuthHtpasswdSuite) OnTimeout(c *check.C) {
113
+	s.d.DumpStackAndQuit()
114
+}
115
+
112 116
 func (s *DockerRegistryAuthHtpasswdSuite) SetUpTest(c *check.C) {
113 117
 	testRequires(c, DaemonIsLinux, RegistryHosting)
114 118
 	s.reg = setupRegistry(c, false, "htpasswd", "")
... ...
@@ -139,6 +157,10 @@ type DockerRegistryAuthTokenSuite struct {
139 139
 	d   *Daemon
140 140
 }
141 141
 
142
+func (s *DockerRegistryAuthTokenSuite) OnTimeout(c *check.C) {
143
+	s.d.DumpStackAndQuit()
144
+}
145
+
142 146
 func (s *DockerRegistryAuthTokenSuite) SetUpTest(c *check.C) {
143 147
 	testRequires(c, DaemonIsLinux, RegistryHosting)
144 148
 	s.d = NewDaemon(c)
... ...
@@ -174,6 +196,10 @@ type DockerDaemonSuite struct {
174 174
 	d  *Daemon
175 175
 }
176 176
 
177
+func (s *DockerDaemonSuite) OnTimeout(c *check.C) {
178
+	s.d.DumpStackAndQuit()
179
+}
180
+
177 181
 func (s *DockerDaemonSuite) SetUpTest(c *check.C) {
178 182
 	testRequires(c, DaemonIsLinux)
179 183
 	s.d = NewDaemon(c)
... ...
@@ -217,6 +243,14 @@ type DockerSwarmSuite struct {
217 217
 	portIndex   int
218 218
 }
219 219
 
220
+func (s *DockerSwarmSuite) OnTimeout(c *check.C) {
221
+	s.daemonsLock.Lock()
222
+	defer s.daemonsLock.Unlock()
223
+	for _, d := range s.daemons {
224
+		d.DumpStackAndQuit()
225
+	}
226
+}
227
+
220 228
 func (s *DockerSwarmSuite) SetUpTest(c *check.C) {
221 229
 	testRequires(c, DaemonIsLinux)
222 230
 }
... ...
@@ -273,6 +273,16 @@ func (d *Daemon) Kill() error {
273 273
 	return nil
274 274
 }
275 275
 
276
+// DumpStackAndQuit sends SIGQUIT to the daemon, which triggers it to dump its
277
+// stack to its log file and exit
278
+// This is used primarily for gathering debug information on test timeout
279
+func (d *Daemon) DumpStackAndQuit() {
280
+	if d.cmd == nil || d.cmd.Process == nil {
281
+		return
282
+	}
283
+	signalDaemonDump(d.cmd.Process.Pid)
284
+}
285
+
276 286
 // Stop will send a SIGINT every second and wait for the daemon to stop.
277 287
 // If it timeouts, a SIGKILL is sent.
278 288
 // Stop will not delete the daemon directory. If a purged daemon is needed,
279 289
new file mode 100644
... ...
@@ -0,0 +1,9 @@
0
+// +build !windows
1
+
2
+package main
3
+
4
+import "syscall"
5
+
6
+func signalDaemonDump(pid int) {
7
+	syscall.Kill(pid, syscall.SIGQUIT)
8
+}
0 9
new file mode 100644
... ...
@@ -0,0 +1,42 @@
0
+package main
1
+
2
+import (
3
+	"strconv"
4
+	"syscall"
5
+	"unsafe"
6
+)
7
+
8
+func openEvent(desiredAccess uint32, inheritHandle bool, name string, proc *syscall.LazyProc) (handle syscall.Handle, err error) {
9
+	namep, _ := syscall.UTF16PtrFromString(name)
10
+	var _p2 uint32
11
+	if inheritHandle {
12
+		_p2 = 1
13
+	}
14
+	r0, _, e1 := proc.Call(uintptr(desiredAccess), uintptr(_p2), uintptr(unsafe.Pointer(namep)))
15
+	handle = syscall.Handle(r0)
16
+	if handle == syscall.InvalidHandle {
17
+		err = e1
18
+	}
19
+	return
20
+}
21
+
22
+func pulseEvent(handle syscall.Handle, proc *syscall.LazyProc) (err error) {
23
+	r0, _, _ := proc.Call(uintptr(handle))
24
+	if r0 != 0 {
25
+		err = syscall.Errno(r0)
26
+	}
27
+	return
28
+}
29
+
30
+func signalDaemonDump(pid int) {
31
+	modkernel32 := syscall.NewLazyDLL("kernel32.dll")
32
+	procOpenEvent := modkernel32.NewProc("OpenEventW")
33
+	procPulseEvent := modkernel32.NewProc("PulseEvent")
34
+
35
+	ev := "Global\\docker-daemon-" + strconv.Itoa(pid)
36
+	h2, _ := openEvent(0x0002, false, ev, procOpenEvent)
37
+	if h2 == 0 {
38
+		return
39
+	}
40
+	pulseEvent(h2, procPulseEvent)
41
+}
... ...
@@ -3,8 +3,11 @@ package main
3 3
 import (
4 4
 	"encoding/json"
5 5
 	"fmt"
6
+	"io/ioutil"
6 7
 	"os"
7 8
 	"os/exec"
9
+	"path/filepath"
10
+	"strconv"
8 11
 
9 12
 	"github.com/docker/docker/pkg/reexec"
10 13
 )
... ...
@@ -65,6 +68,9 @@ var (
65 65
 	// WindowsBaseImage is the name of the base image for Windows testing
66 66
 	// Environment variable WINDOWS_BASE_IMAGE can override this
67 67
 	WindowsBaseImage = "windowsservercore"
68
+
69
+	// daemonPid is the pid of the main test daemon
70
+	daemonPid int
68 71
 )
69 72
 
70 73
 const (
... ...
@@ -134,4 +140,12 @@ func init() {
134 134
 		WindowsBaseImage = os.Getenv("WINDOWS_BASE_IMAGE")
135 135
 		fmt.Println("INFO: Windows Base image is ", WindowsBaseImage)
136 136
 	}
137
+
138
+	dest := os.Getenv("DEST")
139
+	b, err = ioutil.ReadFile(filepath.Join(dest, "docker.pid"))
140
+	if err == nil {
141
+		if p, err := strconv.ParseInt(string(b), 10, 32); err == nil {
142
+			daemonPid = int(p)
143
+		}
144
+	}
137 145
 }
... ...
@@ -514,6 +514,7 @@ type suiteRunner struct {
514 514
 	suite                     interface{}
515 515
 	setUpSuite, tearDownSuite *methodType
516 516
 	setUpTest, tearDownTest   *methodType
517
+	onTimeout                 *methodType
517 518
 	tests                     []*methodType
518 519
 	tracker                   *resultTracker
519 520
 	tempDir                   *tempDir
... ...
@@ -591,6 +592,8 @@ func newSuiteRunner(suite interface{}, runConf *RunConf) *suiteRunner {
591 591
 			runner.setUpTest = method
592 592
 		case "TearDownTest":
593 593
 			runner.tearDownTest = method
594
+		case "OnTimeout":
595
+			runner.onTimeout = method
594 596
 		default:
595 597
 			prefix := "Test"
596 598
 			if conf.Benchmark {
... ...
@@ -671,6 +674,23 @@ func (runner *suiteRunner) forkCall(method *methodType, kind funcKind, testName
671 671
 	return c
672 672
 }
673 673
 
674
+type timeoutErr struct {
675
+	method *methodType
676
+	t      time.Duration
677
+}
678
+
679
+func (e timeoutErr) Error() string {
680
+	return fmt.Sprintf("%s test timed out after %v", e.method.String(), e.t)
681
+}
682
+
683
+func isTimeout(e error) bool {
684
+	if e == nil {
685
+		return false
686
+	}
687
+	_, ok := e.(timeoutErr)
688
+	return ok
689
+}
690
+
674 691
 // Same as forkCall(), but wait for call to finish before returning.
675 692
 func (runner *suiteRunner) runFunc(method *methodType, kind funcKind, testName string, logb *logger, dispatcher func(c *C)) *C {
676 693
 	var timeout <-chan time.Time
... ...
@@ -681,7 +701,19 @@ func (runner *suiteRunner) runFunc(method *methodType, kind funcKind, testName s
681 681
 	select {
682 682
 	case <-c.done:
683 683
 	case <-timeout:
684
-		panic(fmt.Sprintf("%s test timed out after %v", method.String(), runner.checkTimeout))
684
+		if runner.onTimeout != nil {
685
+			// run the OnTimeout callback, allowing the suite to collect any sort of debug information it can
686
+			// `runFixture` is syncronous, so run this in a separate goroutine with a timeout
687
+			cChan := make(chan *C)
688
+			go func() {
689
+				cChan <- runner.runFixture(runner.onTimeout, c.testName, c.logb)
690
+			}()
691
+			select {
692
+			case <-cChan:
693
+			case <-time.After(runner.checkTimeout):
694
+			}
695
+		}
696
+		panic(timeoutErr{method, runner.checkTimeout})
685 697
 	}
686 698
 	return c
687 699
 }
... ...
@@ -777,12 +809,14 @@ func (runner *suiteRunner) forkTest(method *methodType) *C {
777 777
 				c.logArgPanic(c.method, "*check.C")
778 778
 				return
779 779
 			}
780
+
780 781
 			if strings.HasPrefix(c.method.Info.Name, "Test") {
781 782
 				c.ResetTimer()
782 783
 				c.StartTimer()
783 784
 				c.method.Call([]reflect.Value{reflect.ValueOf(c)})
784 785
 				return
785 786
 			}
787
+
786 788
 			if !strings.HasPrefix(c.method.Info.Name, "Benchmark") {
787 789
 				panic("unexpected method prefix: " + c.method.Info.Name)
788 790
 			}
... ...
@@ -791,6 +825,7 @@ func (runner *suiteRunner) forkTest(method *methodType) *C {
791 791
 			c.N = benchN
792 792
 			c.ResetTimer()
793 793
 			c.StartTimer()
794
+
794 795
 			c.method.Call([]reflect.Value{reflect.ValueOf(c)})
795 796
 			c.StopTimer()
796 797
 			if c.status() != succeededSt || c.duration >= c.benchTime || benchN >= 1e9 {
... ...
@@ -825,7 +860,19 @@ func (runner *suiteRunner) runTest(method *methodType) *C {
825 825
 	select {
826 826
 	case <-c.done:
827 827
 	case <-timeout:
828
-		panic(fmt.Sprintf("%s test timed out after %v", method.String(), runner.checkTimeout))
828
+		if runner.onTimeout != nil {
829
+			// run the OnTimeout callback, allowing the suite to collect any sort of debug information it can
830
+			// `runFixture` is syncronous, so run this in a separate goroutine with a timeout
831
+			cChan := make(chan *C)
832
+			go func() {
833
+				cChan <- runner.runFixture(runner.onTimeout, c.testName, c.logb)
834
+			}()
835
+			select {
836
+			case <-cChan:
837
+			case <-time.After(runner.checkTimeout):
838
+			}
839
+		}
840
+		panic(timeoutErr{method, runner.checkTimeout})
829 841
 	}
830 842
 	return c
831 843
 }
... ...
@@ -846,7 +893,7 @@ func (runner *suiteRunner) skipTests(status funcStatus, methods []*methodType) {
846 846
 func (runner *suiteRunner) checkFixtureArgs() bool {
847 847
 	succeeded := true
848 848
 	argType := reflect.TypeOf(&C{})
849
-	for _, method := range []*methodType{runner.setUpSuite, runner.tearDownSuite, runner.setUpTest, runner.tearDownTest} {
849
+	for _, method := range []*methodType{runner.setUpSuite, runner.tearDownSuite, runner.setUpTest, runner.tearDownTest, runner.onTimeout} {
850 850
 		if method != nil {
851 851
 			mt := method.Type()
852 852
 			if mt.NumIn() != 1 || mt.In(0) != argType {