Browse code

Merge pull request #41800 from thaJeztah/daemon_improve_logging

daemon: improve log messages during startup / shutdown

Akihiro Suda authored on 2020/12/16 09:58:32
Showing 1 changed files
... ...
@@ -239,31 +239,36 @@ func (daemon *Daemon) restore() error {
239 239
 			_ = sem.Acquire(context.Background(), 1)
240 240
 			defer sem.Release(1)
241 241
 
242
-			container, err := daemon.load(id)
242
+			log := logrus.WithField("container", id)
243
+
244
+			c, err := daemon.load(id)
243 245
 			if err != nil {
244
-				logrus.Errorf("Failed to load container %v: %v", id, err)
246
+				log.WithError(err).Error("failed to load container")
245 247
 				return
246 248
 			}
247
-			if !system.IsOSSupported(container.OS) {
248
-				logrus.Errorf("Failed to load container %v: %s (%q)", id, system.ErrNotSupportedOperatingSystem, container.OS)
249
+			if !system.IsOSSupported(c.OS) {
250
+				log.Errorf("failed to load container: %s (%q)", system.ErrNotSupportedOperatingSystem, c.OS)
249 251
 				return
250 252
 			}
251 253
 			// Ignore the container if it does not support the current driver being used by the graph
252
-			currentDriverForContainerOS := daemon.graphDrivers[container.OS]
253
-			if (container.Driver == "" && currentDriverForContainerOS == "aufs") || container.Driver == currentDriverForContainerOS {
254
-				rwlayer, err := daemon.imageService.GetLayerByID(container.ID, container.OS)
254
+			currentDriverForContainerOS := daemon.graphDrivers[c.OS]
255
+			if (c.Driver == "" && currentDriverForContainerOS == "aufs") || c.Driver == currentDriverForContainerOS {
256
+				rwlayer, err := daemon.imageService.GetLayerByID(c.ID, c.OS)
255 257
 				if err != nil {
256
-					logrus.Errorf("Failed to load container mount %v: %v", id, err)
258
+					log.WithError(err).Error("failed to load container mount")
257 259
 					return
258 260
 				}
259
-				container.RWLayer = rwlayer
260
-				logrus.Debugf("Loaded container %v, isRunning: %v", container.ID, container.IsRunning())
261
+				c.RWLayer = rwlayer
262
+				log.WithFields(logrus.Fields{
263
+					"running": c.IsRunning(),
264
+					"paused":  c.IsPaused(),
265
+				}).Debug("loaded container")
261 266
 
262 267
 				mapLock.Lock()
263
-				containers[container.ID] = container
268
+				containers[c.ID] = c
264 269
 				mapLock.Unlock()
265 270
 			} else {
266
-				logrus.Debugf("Cannot load container %s because it was created with another graph driver.", container.ID)
271
+				log.Debugf("cannot load container because it was created with another storage driver")
267 272
 			}
268 273
 		}(v.Name())
269 274
 	}
... ...
@@ -280,15 +285,17 @@ func (daemon *Daemon) restore() error {
280 280
 			_ = sem.Acquire(context.Background(), 1)
281 281
 			defer sem.Release(1)
282 282
 
283
+			log := logrus.WithField("container", c.ID)
284
+
283 285
 			if err := daemon.registerName(c); err != nil {
284
-				logrus.Errorf("Failed to register container name %s: %s", c.ID, err)
286
+				log.WithError(err).Errorf("failed to register container name: %s", c.Name)
285 287
 				mapLock.Lock()
286 288
 				delete(containers, c.ID)
287 289
 				mapLock.Unlock()
288 290
 				return
289 291
 			}
290 292
 			if err := daemon.Register(c); err != nil {
291
-				logrus.Errorf("Failed to register container %s: %s", c.ID, err)
293
+				log.WithError(err).Error("failed to register container")
292 294
 				mapLock.Lock()
293 295
 				delete(containers, c.ID)
294 296
 				mapLock.Unlock()
... ...
@@ -300,7 +307,7 @@ func (daemon *Daemon) restore() error {
300 300
 			// Fixes https://github.com/docker/docker/issues/22536
301 301
 			if c.HostConfig.LogConfig.Type == "" {
302 302
 				if err := daemon.mergeAndVerifyLogConfig(&c.HostConfig.LogConfig); err != nil {
303
-					logrus.Errorf("Failed to verify log config for container %s: %q", c.ID, err)
303
+					log.WithError(err).Error("failed to verify log config for container")
304 304
 				}
305 305
 			}
306 306
 		}(c)
... ...
@@ -314,17 +321,18 @@ func (daemon *Daemon) restore() error {
314 314
 			_ = sem.Acquire(context.Background(), 1)
315 315
 			defer sem.Release(1)
316 316
 
317
+			log := logrus.WithField("container", c.ID)
318
+
317 319
 			daemon.backportMountSpec(c)
318 320
 			if err := daemon.checkpointAndSave(c); err != nil {
319
-				logrus.WithError(err).WithField("container", c.ID).Error("error saving backported mountspec to disk")
321
+				log.WithError(err).Error("error saving backported mountspec to disk")
320 322
 			}
321 323
 
322 324
 			daemon.setStateCounter(c)
323 325
 
324
-			logrus.WithFields(logrus.Fields{
325
-				"container": c.ID,
326
-				"running":   c.IsRunning(),
327
-				"paused":    c.IsPaused(),
326
+			log.WithFields(logrus.Fields{
327
+				"running": c.IsRunning(),
328
+				"paused":  c.IsPaused(),
328 329
 			}).Debug("restoring container")
329 330
 
330 331
 			var (
... ...
@@ -337,18 +345,18 @@ func (daemon *Daemon) restore() error {
337 337
 
338 338
 			alive, _, process, err = daemon.containerd.Restore(context.Background(), c.ID, c.InitializeStdio)
339 339
 			if err != nil && !errdefs.IsNotFound(err) {
340
-				logrus.Errorf("Failed to restore container %s with containerd: %s", c.ID, err)
340
+				log.WithError(err).Error("failed to restore container with containerd")
341 341
 				return
342 342
 			}
343 343
 			if !alive && process != nil {
344 344
 				ec, exitedAt, err = process.Delete(context.Background())
345 345
 				if err != nil && !errdefs.IsNotFound(err) {
346
-					logrus.WithError(err).Errorf("Failed to delete container %s from containerd", c.ID)
346
+					log.WithError(err).Error("failed to delete container from containerd")
347 347
 					return
348 348
 				}
349 349
 			} else if !daemon.configStore.LiveRestoreEnabled {
350 350
 				if err := daemon.shutdownContainer(c); err != nil && !errdefs.IsNotFound(err) {
351
-					logrus.WithError(err).WithField("container", c.ID).Error("error shutting down container")
351
+					log.WithError(err).Error("error shutting down container")
352 352
 					return
353 353
 				}
354 354
 				c.ResetRestartManager(false)
... ...
@@ -360,27 +368,23 @@ func (daemon *Daemon) restore() error {
360 360
 				if c.IsPaused() && alive {
361 361
 					s, err := daemon.containerd.Status(context.Background(), c.ID)
362 362
 					if err != nil {
363
-						logrus.WithError(err).WithField("container", c.ID).
364
-							Errorf("Failed to get container status")
363
+						log.WithError(err).Error("failed to get container status")
365 364
 					} else {
366
-						logrus.WithField("container", c.ID).WithField("state", s).
367
-							Info("restored container paused")
365
+						log.WithField("state", s).Info("restored container paused")
368 366
 						switch s {
369 367
 						case containerd.Paused, containerd.Pausing:
370 368
 							// nothing to do
371 369
 						case containerd.Stopped:
372 370
 							alive = false
373 371
 						case containerd.Unknown:
374
-							logrus.WithField("container", c.ID).
375
-								Error("Unknown status for container during restore")
372
+							log.Error("unknown status for paused container during restore")
376 373
 						default:
377 374
 							// running
378 375
 							c.Lock()
379 376
 							c.Paused = false
380 377
 							daemon.setStateCounter(c)
381 378
 							if err := c.CheckpointTo(daemon.containersReplica); err != nil {
382
-								logrus.WithError(err).WithField("container", c.ID).
383
-									Error("Failed to update stopped container state")
379
+								log.WithError(err).Error("failed to update paused container state")
384 380
 							}
385 381
 							c.Unlock()
386 382
 						}
... ...
@@ -392,7 +396,7 @@ func (daemon *Daemon) restore() error {
392 392
 					c.SetStopped(&container.ExitStatus{ExitCode: int(ec), ExitedAt: exitedAt})
393 393
 					daemon.Cleanup(c)
394 394
 					if err := c.CheckpointTo(daemon.containersReplica); err != nil {
395
-						logrus.Errorf("Failed to update stopped container %s state: %v", c.ID, err)
395
+						log.WithError(err).Error("failed to update stopped container state")
396 396
 					}
397 397
 					c.Unlock()
398 398
 				}
... ...
@@ -405,10 +409,10 @@ func (daemon *Daemon) restore() error {
405 405
 					// stopped/restarted/removed.
406 406
 					// See #29365 for related information.
407 407
 					// The error is only logged here.
408
-					logrus.Warnf("Failed to mount container on getting BaseFs path %v: %v", c.ID, err)
408
+					log.WithError(err).Warn("failed to mount container to get BaseFs path")
409 409
 				} else {
410 410
 					if err := daemon.Unmount(c); err != nil {
411
-						logrus.Warnf("Failed to umount container on getting BaseFs path %v: %v", c.ID, err)
411
+						log.WithError(err).Warn("failed to umount container to get BaseFs path")
412 412
 					}
413 413
 				}
414 414
 
... ...
@@ -416,7 +420,7 @@ func (daemon *Daemon) restore() error {
416 416
 				if !c.HostConfig.NetworkMode.IsContainer() && c.IsRunning() {
417 417
 					options, err := daemon.buildSandboxOptions(c)
418 418
 					if err != nil {
419
-						logrus.Warnf("Failed build sandbox option to restore container %s: %v", c.ID, err)
419
+						log.WithError(err).Warn("failed to build sandbox option to restore container")
420 420
 					}
421 421
 					mapLock.Lock()
422 422
 					activeSandboxes[c.NetworkSettings.SandboxID] = options
... ...
@@ -452,11 +456,12 @@ func (daemon *Daemon) restore() error {
452 452
 				// associated volumes, network links or both to also
453 453
 				// be removed. So we put the container in the "dead"
454 454
 				// state and leave further processing up to them.
455
-				logrus.Debugf("Resetting RemovalInProgress flag from %v", c.ID)
456 455
 				c.RemovalInProgress = false
457 456
 				c.Dead = true
458 457
 				if err := c.CheckpointTo(daemon.containersReplica); err != nil {
459
-					logrus.Errorf("Failed to update RemovalInProgress container %s state: %v", c.ID, err)
458
+					log.WithError(err).Error("failed to update RemovalInProgress container state")
459
+				} else {
460
+					log.Debugf("reset RemovalInProgress state for container")
460 461
 				}
461 462
 			}
462 463
 			c.Unlock()
... ...
@@ -476,7 +481,7 @@ func (daemon *Daemon) restore() error {
476 476
 			_ = sem.Acquire(context.Background(), 1)
477 477
 
478 478
 			if err := daemon.registerLinks(c, c.HostConfig); err != nil {
479
-				logrus.Errorf("failed to register link for container %s: %v", c.ID, err)
479
+				logrus.WithField("container", c.ID).WithError(err).Error("failed to register link for container")
480 480
 			}
481 481
 
482 482
 			sem.Release(1)
... ...
@@ -489,7 +494,10 @@ func (daemon *Daemon) restore() error {
489 489
 		group.Add(1)
490 490
 		go func(c *container.Container, chNotify chan struct{}) {
491 491
 			_ = sem.Acquire(context.Background(), 1)
492
-			logrus.Debugf("Starting container %s", c.ID)
492
+
493
+			log := logrus.WithField("container", c.ID)
494
+
495
+			log.Debug("starting container")
493 496
 
494 497
 			// ignore errors here as this is a best effort to wait for children to be
495 498
 			//   running before we try to start the container
... ...
@@ -509,7 +517,7 @@ func (daemon *Daemon) restore() error {
509 509
 			// Make sure networks are available before starting
510 510
 			daemon.waitForNetworks(c)
511 511
 			if err := daemon.containerStart(c, "", "", true); err != nil {
512
-				logrus.Errorf("Failed to start container %s: %s", c.ID, err)
512
+				log.WithError(err).Error("failed to start container")
513 513
 			}
514 514
 			close(chNotify)
515 515
 
... ...
@@ -525,7 +533,7 @@ func (daemon *Daemon) restore() error {
525 525
 			_ = sem.Acquire(context.Background(), 1)
526 526
 
527 527
 			if err := daemon.ContainerRm(cid, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err != nil {
528
-				logrus.Errorf("Failed to remove container %s: %s", cid, err)
528
+				logrus.WithField("container", cid).WithError(err).Error("failed to remove container")
529 529
 			}
530 530
 
531 531
 			sem.Release(1)
... ...
@@ -556,7 +564,7 @@ func (daemon *Daemon) restore() error {
556 556
 			_ = sem.Acquire(context.Background(), 1)
557 557
 
558 558
 			if err := daemon.prepareMountPoints(c); err != nil {
559
-				logrus.Error(err)
559
+				logrus.WithField("container", c.ID).WithError(err).Error("failed to prepare mountpoints for container")
560 560
 			}
561 561
 
562 562
 			sem.Release(1)
... ...
@@ -600,7 +608,7 @@ func (daemon *Daemon) RestartSwarmContainers() {
600 600
 					}
601 601
 
602 602
 					if err := daemon.containerStart(c, "", "", true); err != nil {
603
-						logrus.Error(err)
603
+						logrus.WithField("container", c.ID).WithError(err).Error("failed to start swarm container")
604 604
 					}
605 605
 
606 606
 					sem.Release(1)
... ...
@@ -634,7 +642,7 @@ func (daemon *Daemon) waitForNetworks(c *container.Container) {
634 634
 			dur := 60 * time.Second
635 635
 			timer := time.NewTimer(dur)
636 636
 
637
-			logrus.Debugf("Container %s waiting for network to be ready", c.Name)
637
+			logrus.WithField("container", c.ID).Debugf("Container %s waiting for network to be ready", c.Name)
638 638
 			select {
639 639
 			case <-daemon.discoveryWatcher.ReadyCh():
640 640
 			case <-timer.C:
... ...
@@ -1228,15 +1236,16 @@ func (daemon *Daemon) Shutdown() error {
1228 1228
 			if !c.IsRunning() {
1229 1229
 				return
1230 1230
 			}
1231
-			logrus.Debugf("stopping %s", c.ID)
1231
+			log := logrus.WithField("container", c.ID)
1232
+			log.Debug("shutting down container")
1232 1233
 			if err := daemon.shutdownContainer(c); err != nil {
1233
-				logrus.Errorf("Stop container error: %v", err)
1234
+				log.WithError(err).Error("failed to shut down container")
1234 1235
 				return
1235 1236
 			}
1236 1237
 			if mountid, err := daemon.imageService.GetLayerMountID(c.ID, c.OS); err == nil {
1237 1238
 				daemon.cleanupMountsByID(mountid)
1238 1239
 			}
1239
-			logrus.Debugf("container stopped %s", c.ID)
1240
+			log.Debugf("shut down container")
1240 1241
 		})
1241 1242
 	}
1242 1243
 
... ...
@@ -1287,7 +1296,7 @@ func (daemon *Daemon) Mount(container *container.Container) error {
1287 1287
 	if err != nil {
1288 1288
 		return err
1289 1289
 	}
1290
-	logrus.Debugf("container mounted via layerStore: %v", dir)
1290
+	logrus.WithField("container", container.ID).Debugf("container mounted via layerStore: %v", dir)
1291 1291
 
1292 1292
 	if container.BaseFS != nil && container.BaseFS.Path() != dir.Path() {
1293 1293
 		// The mount path reported by the graph driver should always be trusted on Windows, since the
... ...
@@ -1309,7 +1318,7 @@ func (daemon *Daemon) Unmount(container *container.Container) error {
1309 1309
 		return errors.New("RWLayer of container " + container.ID + " is unexpectedly nil")
1310 1310
 	}
1311 1311
 	if err := container.RWLayer.Unmount(); err != nil {
1312
-		logrus.Errorf("Error unmounting container %s: %s", container.ID, err)
1312
+		logrus.WithField("container", container.ID).WithError(err).Error("error unmounting container")
1313 1313
 		return err
1314 1314
 	}
1315 1315