Browse code

Fix race/deadlock in v1 plugin handlers

When a plugin is activated, and then `plugins.Handle` is called to
register a new handler for a given plugin type, a deadlock occurs when
for anything which calls `waitActive`, including `Get`, and `GetAll`.

This happens because `Handle()` is setting `activated` to `false` to
ensure that plugin handlers are run on next activation.
Maybe these handlers should be called immediately for any plugins which
are already registered... but to preserve the existing behavior while
fixing the deadlock, track if handlers have been run on plugins and
reset when a new handler is registered.

The simplest way to reproduce the deadlock with Docker is to add a `-v
/foo` to the test container created for the external graphdriver tests.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>
(cherry picked from commit 2938dce794be7559ba73b4e9630015020a7fa937)
Signed-off-by: Victor Vieux <vieux@docker.com>
Signed-off-by: Vincent Demeester <vincent@sbr.pm>

Brian Goff authored on 2016/12/28 01:07:22
Showing 3 changed files
... ...
@@ -54,6 +54,10 @@ func (s *DockerExternalGraphdriverSuite) SetUpTest(c *check.C) {
54 54
 	s.d = NewDaemon(c)
55 55
 }
56 56
 
57
+func (s *DockerExternalGraphdriverSuite) OnTimeout(c *check.C) {
58
+	s.d.DumpStackAndQuit()
59
+}
60
+
57 61
 func (s *DockerExternalGraphdriverSuite) TearDownTest(c *check.C) {
58 62
 	s.d.Stop()
59 63
 	s.ds.TearDownTest(c)
60 64
new file mode 100644
... ...
@@ -0,0 +1,37 @@
0
+package plugins
1
+
2
+import (
3
+	"path/filepath"
4
+	"runtime"
5
+	"sync"
6
+	"testing"
7
+	"time"
8
+)
9
+
10
+// regression test for deadlock in handlers
11
+func TestPluginAddHandler(t *testing.T) {
12
+	// make a plugin which is pre-activated
13
+	p := &Plugin{activateWait: sync.NewCond(&sync.Mutex{})}
14
+	p.Manifest = &Manifest{Implements: []string{"bananas"}}
15
+	storage.plugins["qwerty"] = p
16
+
17
+	testActive(t, p)
18
+	Handle("bananas", func(_ string, _ *Client) {})
19
+	testActive(t, p)
20
+}
21
+
22
+func testActive(t *testing.T, p *Plugin) {
23
+	done := make(chan struct{})
24
+	go func() {
25
+		p.waitActive()
26
+		close(done)
27
+	}()
28
+
29
+	select {
30
+	case <-time.After(100 * time.Millisecond):
31
+		_, f, l, _ := runtime.Caller(1)
32
+		t.Fatalf("%s:%d: deadlock in waitActive", filepath.Base(f), l)
33
+	case <-done:
34
+	}
35
+
36
+}
... ...
@@ -70,12 +70,12 @@ type Plugin struct {
70 70
 	// Manifest of the plugin (see above)
71 71
 	Manifest *Manifest `json:"-"`
72 72
 
73
-	// error produced by activation
74
-	activateErr error
75
-	// specifies if the activation sequence is completed (not if it is successful or not)
76
-	activated bool
77 73
 	// wait for activation to finish
78 74
 	activateWait *sync.Cond
75
+	// error produced by activation
76
+	activateErr error
77
+	// keeps track of callback handlers run against this plugin
78
+	handlersRun bool
79 79
 }
80 80
 
81 81
 // BasePath returns the path to which all paths returned by the plugin are relative to.
... ...
@@ -112,19 +112,51 @@ func NewLocalPlugin(name, addr string) *Plugin {
112 112
 
113 113
 func (p *Plugin) activate() error {
114 114
 	p.activateWait.L.Lock()
115
-	if p.activated {
115
+
116
+	if p.activated() {
117
+		p.runHandlers()
116 118
 		p.activateWait.L.Unlock()
117 119
 		return p.activateErr
118 120
 	}
119 121
 
120 122
 	p.activateErr = p.activateWithLock()
121
-	p.activated = true
122 123
 
124
+	p.runHandlers()
123 125
 	p.activateWait.L.Unlock()
124 126
 	p.activateWait.Broadcast()
125 127
 	return p.activateErr
126 128
 }
127 129
 
130
+// runHandlers runs the registered handlers for the implemented plugin types
131
+// This should only be run after activation, and while the activation lock is held.
132
+func (p *Plugin) runHandlers() {
133
+	if !p.activated() {
134
+		return
135
+	}
136
+
137
+	handlers.RLock()
138
+	if !p.handlersRun {
139
+		for _, iface := range p.Manifest.Implements {
140
+			hdlrs, handled := handlers.extpointHandlers[iface]
141
+			if !handled {
142
+				continue
143
+			}
144
+			for _, handler := range hdlrs {
145
+				handler(p.name, p.client)
146
+			}
147
+		}
148
+		p.handlersRun = true
149
+	}
150
+	handlers.RUnlock()
151
+
152
+}
153
+
154
+// activated returns if the plugin has already been activated.
155
+// This should only be called with the activation lock held
156
+func (p *Plugin) activated() bool {
157
+	return p.Manifest != nil
158
+}
159
+
128 160
 func (p *Plugin) activateWithLock() error {
129 161
 	c, err := NewClient(p.Addr, p.TLSConfig)
130 162
 	if err != nil {
... ...
@@ -138,24 +170,12 @@ func (p *Plugin) activateWithLock() error {
138 138
 	}
139 139
 
140 140
 	p.Manifest = m
141
-
142
-	handlers.RLock()
143
-	for _, iface := range m.Implements {
144
-		hdlrs, handled := handlers.extpointHandlers[iface]
145
-		if !handled {
146
-			continue
147
-		}
148
-		for _, handler := range hdlrs {
149
-			handler(p.name, p.client)
150
-		}
151
-	}
152
-	handlers.RUnlock()
153 141
 	return nil
154 142
 }
155 143
 
156 144
 func (p *Plugin) waitActive() error {
157 145
 	p.activateWait.L.Lock()
158
-	for !p.activated {
146
+	for !p.activated() {
159 147
 		p.activateWait.Wait()
160 148
 	}
161 149
 	p.activateWait.L.Unlock()
... ...
@@ -163,7 +183,7 @@ func (p *Plugin) waitActive() error {
163 163
 }
164 164
 
165 165
 func (p *Plugin) implements(kind string) bool {
166
-	if err := p.waitActive(); err != nil {
166
+	if p.Manifest == nil {
167 167
 		return false
168 168
 	}
169 169
 	for _, driver := range p.Manifest.Implements {
... ...
@@ -232,7 +252,7 @@ func Get(name, imp string) (*Plugin, error) {
232 232
 	if err != nil {
233 233
 		return nil, err
234 234
 	}
235
-	if pl.implements(imp) {
235
+	if err := pl.waitActive(); err == nil && pl.implements(imp) {
236 236
 		logrus.Debugf("%s implements: %s", name, imp)
237 237
 		return pl, nil
238 238
 	}
... ...
@@ -249,9 +269,17 @@ func Handle(iface string, fn func(string, *Client)) {
249 249
 
250 250
 	hdlrs = append(hdlrs, fn)
251 251
 	handlers.extpointHandlers[iface] = hdlrs
252
+
253
+	storage.Lock()
252 254
 	for _, p := range storage.plugins {
253
-		p.activated = false
255
+		p.activateWait.L.Lock()
256
+		if p.activated() && p.implements(iface) {
257
+			p.handlersRun = false
258
+		}
259
+		p.activateWait.L.Unlock()
254 260
 	}
261
+	storage.Unlock()
262
+
255 263
 	handlers.Unlock()
256 264
 }
257 265
 
... ...
@@ -292,7 +320,7 @@ func GetAll(imp string) ([]*Plugin, error) {
292 292
 			logrus.Error(pl.err)
293 293
 			continue
294 294
 		}
295
-		if pl.pl.implements(imp) {
295
+		if err := pl.pl.waitActive(); err == nil && pl.pl.implements(imp) {
296 296
 			out = append(out, pl.pl)
297 297
 		}
298 298
 	}