This fixes an issue where the containerd client is cached in a container
object in libcontainerd and becomes stale after containerd is restarted.
Signed-off-by: Brian Goff <cpuguy83@gmail.com>
| ... | ... |
@@ -114,6 +114,13 @@ type client struct {
|
| 114 | 114 |
containers map[string]*container |
| 115 | 115 |
} |
| 116 | 116 |
|
| 117 |
+func (c *client) reconnect() error {
|
|
| 118 |
+ c.Lock() |
|
| 119 |
+ err := c.remote.Reconnect() |
|
| 120 |
+ c.Unlock() |
|
| 121 |
+ return err |
|
| 122 |
+} |
|
| 123 |
+ |
|
| 117 | 124 |
func (c *client) setRemote(remote *containerd.Client) {
|
| 118 | 125 |
c.Lock() |
| 119 | 126 |
c.remote = remote |
| ... | ... |
@@ -309,20 +309,17 @@ func (r *remote) monitorConnection(monitor *containerd.Client) {
|
| 309 | 309 |
} |
| 310 | 310 |
<-r.daemonWaitCh |
| 311 | 311 |
|
| 312 |
- monitor.Close() |
|
| 313 | 312 |
os.Remove(r.GRPC.Address) |
| 314 | 313 |
if err := r.startContainerd(); err != nil {
|
| 315 | 314 |
r.logger.WithError(err).Error("failed restarting containerd")
|
| 316 | 315 |
continue |
| 317 | 316 |
} |
| 318 | 317 |
|
| 319 |
- newMonitor, err := containerd.New(r.GRPC.Address) |
|
| 320 |
- if err != nil {
|
|
| 318 |
+ if err := monitor.Reconnect(); err != nil {
|
|
| 321 | 319 |
r.logger.WithError(err).Error("failed connect to containerd")
|
| 322 | 320 |
continue |
| 323 | 321 |
} |
| 324 | 322 |
|
| 325 |
- monitor = newMonitor |
|
| 326 | 323 |
var wg sync.WaitGroup |
| 327 | 324 |
|
| 328 | 325 |
for _, c := range r.clients {
|
| ... | ... |
@@ -331,18 +328,12 @@ func (r *remote) monitorConnection(monitor *containerd.Client) {
|
| 331 | 331 |
go func(c *client) {
|
| 332 | 332 |
defer wg.Done() |
| 333 | 333 |
c.logger.WithField("namespace", c.namespace).Debug("creating new containerd remote client")
|
| 334 |
- c.remote.Close() |
|
| 335 |
- |
|
| 336 |
- remote, err := containerd.New(r.GRPC.Address, containerd.WithDefaultNamespace(c.namespace)) |
|
| 337 |
- if err != nil {
|
|
| 334 |
+ if err := c.reconnect(); err != nil {
|
|
| 338 | 335 |
r.logger.WithError(err).Error("failed to connect to containerd")
|
| 339 | 336 |
// TODO: Better way to handle this? |
| 340 | 337 |
// This *shouldn't* happen, but this could wind up where the daemon |
| 341 | 338 |
// is not able to communicate with an eventually up containerd |
| 342 |
- return |
|
| 343 | 339 |
} |
| 344 |
- |
|
| 345 |
- c.setRemote(remote) |
|
| 346 | 340 |
}(c) |
| 347 | 341 |
|
| 348 | 342 |
wg.Wait() |
| ... | ... |
@@ -16,19 +16,3 @@ func (o oomScore) Apply(r Remote) error {
|
| 16 | 16 |
} |
| 17 | 17 |
return fmt.Errorf("WithOOMScore option not supported for this remote")
|
| 18 | 18 |
} |
| 19 |
- |
|
| 20 |
-// WithSubreaper sets whether containerd should register itself as a |
|
| 21 |
-// subreaper |
|
| 22 |
-func WithSubreaper(reap bool) RemoteOption {
|
|
| 23 |
- return subreaper(reap) |
|
| 24 |
-} |
|
| 25 |
- |
|
| 26 |
-type subreaper bool |
|
| 27 |
- |
|
| 28 |
-func (s subreaper) Apply(r Remote) error {
|
|
| 29 |
- if remote, ok := r.(*remote); ok {
|
|
| 30 |
- remote.NoSubreaper = !bool(s) |
|
| 31 |
- return nil |
|
| 32 |
- } |
|
| 33 |
- return fmt.Errorf("WithSubreaper option not supported for this remote")
|
|
| 34 |
-} |