Browse code

Use containerd client `Reconnect()` API.

This fixes an issue where the containerd client is cached in a container
object in libcontainerd and becomes stale after containerd is restarted.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>

Brian Goff authored on 2018/03/24 03:25:53
Showing 3 changed files
... ...
@@ -114,6 +114,13 @@ type client struct {
114 114
 	containers map[string]*container
115 115
 }
116 116
 
117
+func (c *client) reconnect() error {
118
+	c.Lock()
119
+	err := c.remote.Reconnect()
120
+	c.Unlock()
121
+	return err
122
+}
123
+
117 124
 func (c *client) setRemote(remote *containerd.Client) {
118 125
 	c.Lock()
119 126
 	c.remote = remote
... ...
@@ -309,20 +309,17 @@ func (r *remote) monitorConnection(monitor *containerd.Client) {
309 309
 		}
310 310
 		<-r.daemonWaitCh
311 311
 
312
-		monitor.Close()
313 312
 		os.Remove(r.GRPC.Address)
314 313
 		if err := r.startContainerd(); err != nil {
315 314
 			r.logger.WithError(err).Error("failed restarting containerd")
316 315
 			continue
317 316
 		}
318 317
 
319
-		newMonitor, err := containerd.New(r.GRPC.Address)
320
-		if err != nil {
318
+		if err := monitor.Reconnect(); err != nil {
321 319
 			r.logger.WithError(err).Error("failed connect to containerd")
322 320
 			continue
323 321
 		}
324 322
 
325
-		monitor = newMonitor
326 323
 		var wg sync.WaitGroup
327 324
 
328 325
 		for _, c := range r.clients {
... ...
@@ -331,18 +328,12 @@ func (r *remote) monitorConnection(monitor *containerd.Client) {
331 331
 			go func(c *client) {
332 332
 				defer wg.Done()
333 333
 				c.logger.WithField("namespace", c.namespace).Debug("creating new containerd remote client")
334
-				c.remote.Close()
335
-
336
-				remote, err := containerd.New(r.GRPC.Address, containerd.WithDefaultNamespace(c.namespace))
337
-				if err != nil {
334
+				if err := c.reconnect(); err != nil {
338 335
 					r.logger.WithError(err).Error("failed to connect to containerd")
339 336
 					// TODO: Better way to handle this?
340 337
 					// This *shouldn't* happen, but this could wind up where the daemon
341 338
 					// is not able to communicate with an eventually up containerd
342
-					return
343 339
 				}
344
-
345
-				c.setRemote(remote)
346 340
 			}(c)
347 341
 
348 342
 			wg.Wait()
... ...
@@ -16,19 +16,3 @@ func (o oomScore) Apply(r Remote) error {
16 16
 	}
17 17
 	return fmt.Errorf("WithOOMScore option not supported for this remote")
18 18
 }
19
-
20
-// WithSubreaper sets whether containerd should register itself as a
21
-// subreaper
22
-func WithSubreaper(reap bool) RemoteOption {
23
-	return subreaper(reap)
24
-}
25
-
26
-type subreaper bool
27
-
28
-func (s subreaper) Apply(r Remote) error {
29
-	if remote, ok := r.(*remote); ok {
30
-		remote.NoSubreaper = !bool(s)
31
-		return nil
32
-	}
33
-	return fmt.Errorf("WithSubreaper option not supported for this remote")
34
-}