Browse code

Fix panic in loading plugins

When a plugin is first found, it is loaded into the available plugins
even though it's not activated yet.
If activation fails it is taken out of the list.
While it is in the list, other callers may see it and try to check it's
manifest. If it is not fully activated yet, the manifest will be nil and
cause a panic.

This is especially problematic for drivers that are down and have not
been activated yet.

We could just not load the plugin into the available list until it's
fully active, however that will just cause multiple of the same plugin
to attemp to be loaded.

We could check if the manifest is nil and return early (instead of
panicing on a nil manifest), but this will cause a 2nd caller to receive
a response while the first caller is still waiting, which can be
awkward.

This change uses a condition variable to handle activation (instead of
sync.Once). If the plugin is not activated, callers will all wait until
it is activated and receive a broadcast from the condition variable
signaling that it's ok to proceed, in which case we'll check if their
was an error in activation and proceed accordingly.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>

Brian Goff authored on 2016/03/24 04:25:15
Showing 2 changed files
... ...
@@ -130,7 +130,7 @@ func (c *Client) callWithRetry(serviceMethod string, data io.Reader, retry bool)
130 130
 				return nil, err
131 131
 			}
132 132
 			retries++
133
-			logrus.Warnf("Unable to connect to plugin: %s, retrying in %v", req.URL, timeOff)
133
+			logrus.Warnf("Unable to connect to plugin: %s:%s, retrying in %v", req.URL.Host, req.URL.Path, timeOff)
134 134
 			time.Sleep(timeOff)
135 135
 			continue
136 136
 		}
... ...
@@ -65,23 +65,36 @@ type Plugin struct {
65 65
 	// Manifest of the plugin (see above)
66 66
 	Manifest *Manifest `json:"-"`
67 67
 
68
-	activatErr   error
69
-	activateOnce sync.Once
68
+	// error produced by activation
69
+	activateErr error
70
+	// specifies if the activation sequence is completed (not if it is sucessful or not)
71
+	activated bool
72
+	// wait for activation to finish
73
+	activateWait *sync.Cond
70 74
 }
71 75
 
72 76
 func newLocalPlugin(name, addr string) *Plugin {
73 77
 	return &Plugin{
74
-		Name:      name,
75
-		Addr:      addr,
76
-		TLSConfig: tlsconfig.Options{InsecureSkipVerify: true},
78
+		Name:         name,
79
+		Addr:         addr,
80
+		TLSConfig:    tlsconfig.Options{InsecureSkipVerify: true},
81
+		activateWait: sync.NewCond(&sync.Mutex{}),
77 82
 	}
78 83
 }
79 84
 
80 85
 func (p *Plugin) activate() error {
81
-	p.activateOnce.Do(func() {
82
-		p.activatErr = p.activateWithLock()
83
-	})
84
-	return p.activatErr
86
+	p.activateWait.L.Lock()
87
+	if p.activated {
88
+		p.activateWait.L.Unlock()
89
+		return p.activateErr
90
+	}
91
+
92
+	p.activateErr = p.activateWithLock()
93
+	p.activated = true
94
+
95
+	p.activateWait.L.Unlock()
96
+	p.activateWait.Broadcast()
97
+	return p.activateErr
85 98
 }
86 99
 
87 100
 func (p *Plugin) activateWithLock() error {
... ...
@@ -108,7 +121,19 @@ func (p *Plugin) activateWithLock() error {
108 108
 	return nil
109 109
 }
110 110
 
111
+func (p *Plugin) waitActive() error {
112
+	p.activateWait.L.Lock()
113
+	for !p.activated {
114
+		p.activateWait.Wait()
115
+	}
116
+	p.activateWait.L.Unlock()
117
+	return p.activateErr
118
+}
119
+
111 120
 func (p *Plugin) implements(kind string) bool {
121
+	if err := p.waitActive(); err != nil {
122
+		return false
123
+	}
112 124
 	for _, driver := range p.Manifest.Implements {
113 125
 		if driver == kind {
114 126
 			return true
... ...
@@ -221,7 +246,7 @@ func GetAll(imp string) ([]*Plugin, error) {
221 221
 	var out []*Plugin
222 222
 	for pl := range chPl {
223 223
 		if pl.err != nil {
224
-			logrus.Error(err)
224
+			logrus.Error(pl.err)
225 225
 			continue
226 226
 		}
227 227
 		if pl.pl.implements(imp) {