Browse code

Windows: Fix handle leaks/logging if init proc start fails

Signed-off-by: John Howard <jhoward@microsoft.com>

Fixes #38719

Fixes some subtle bugs on Windows

- Fixes https://github.com/moby/moby/issues/38719. This one is the most important
as failure to start the init process in a Windows container will cause leaked
handles. (ie where the `ctr.hcsContainer.CreateProcess(...)` call fails).
The solution to the leak is to split out the `reapContainer` part of `reapProcess`
into a separate function. This ensures HCS resources are cleaned up correctly and
not leaked.

- Ensuring the reapProcess goroutine is started immediately the process
is actually started, so we don't leak in the case of failures such as
from `newIOFromProcess` or `attachStdio`

- libcontainerd on Windows (local, not containerd) was not sending the EventCreate
back to the monitor on Windows. Just LCOW. This was just an oversight from
refactoring a couple of years ago by Mikael as far as I can tell. Technically
not needed for functionality except for the logging being missing, but is correct.

John Howard authored on 2019/02/13 05:19:50
Showing 2 changed files
... ...
@@ -133,7 +133,8 @@ func translateContainerdStartErr(cmd string, setExitCode func(int), err error) e
133 133
 	if contains(errDesc, cmd) &&
134 134
 		(contains(errDesc, "executable file not found") ||
135 135
 			contains(errDesc, "no such file or directory") ||
136
-			contains(errDesc, "system cannot find the file specified")) {
136
+			contains(errDesc, "system cannot find the file specified") ||
137
+			contains(errDesc, "failed to run runc create/exec call")) {
137 138
 		setExitCode(127)
138 139
 		retErr = startInvalidConfigError(errDesc)
139 140
 	}
... ...
@@ -157,17 +157,32 @@ func (c *client) Create(_ context.Context, id string, spec *specs.Spec, runtimeO
157 157
 		return errors.WithStack(errdefs.Conflict(errors.New("id already in use")))
158 158
 	}
159 159
 
160
-	// spec.Linux must be nil for Windows containers, but spec.Windows
161
-	// will be filled in regardless of container platform.  This is a
162
-	// temporary workaround due to LCOW requiring layer folder paths,
163
-	// which are stored under spec.Windows.
164
-	//
165
-	// TODO: @darrenstahlmsft fix this once the OCI spec is updated to
166
-	// support layer folder paths for LCOW
160
+	var err error
167 161
 	if spec.Linux == nil {
168
-		return c.createWindows(id, spec, runtimeOptions)
162
+		err = c.createWindows(id, spec, runtimeOptions)
163
+	} else {
164
+		err = c.createLinux(id, spec, runtimeOptions)
169 165
 	}
170
-	return c.createLinux(id, spec, runtimeOptions)
166
+
167
+	if err == nil {
168
+		c.eventQ.Append(id, func() {
169
+			ei := libcontainerdtypes.EventInfo{
170
+				ContainerID: id,
171
+			}
172
+			c.logger.WithFields(logrus.Fields{
173
+				"container": id,
174
+				"event":     libcontainerdtypes.EventCreate,
175
+			}).Info("sending event")
176
+			err := c.backend.ProcessEvent(id, libcontainerdtypes.EventCreate, ei)
177
+			if err != nil {
178
+				c.logger.WithError(err).WithFields(logrus.Fields{
179
+					"container": id,
180
+					"event":     libcontainerdtypes.EventCreate,
181
+				}).Error("failed to process event")
182
+			}
183
+		})
184
+	}
185
+	return err
171 186
 }
172 187
 
173 188
 func (c *client) createWindows(id string, spec *specs.Spec, runtimeOptions interface{}) error {
... ...
@@ -561,23 +576,6 @@ func (c *client) createLinux(id string, spec *specs.Spec, runtimeOptions interfa
561 561
 	c.containers[id] = ctr
562 562
 	c.Unlock()
563 563
 
564
-	c.eventQ.Append(id, func() {
565
-		ei := libcontainerdtypes.EventInfo{
566
-			ContainerID: id,
567
-		}
568
-		c.logger.WithFields(logrus.Fields{
569
-			"container": ctr.id,
570
-			"event":     libcontainerdtypes.EventCreate,
571
-		}).Info("sending event")
572
-		err := c.backend.ProcessEvent(id, libcontainerdtypes.EventCreate, ei)
573
-		if err != nil {
574
-			c.logger.WithError(err).WithFields(logrus.Fields{
575
-				"container": id,
576
-				"event":     libcontainerdtypes.EventCreate,
577
-			}).Error("failed to process event")
578
-		}
579
-	})
580
-
581 564
 	logger.Debug("createLinux() completed successfully")
582 565
 	return nil
583 566
 }
... ...
@@ -655,7 +653,9 @@ func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachSt
655 655
 
656 656
 	// Configure the CommandLine/CommandArgs
657 657
 	setCommandLineAndArgs(ctr.isWindows, ctr.ociSpec.Process, createProcessParms)
658
-	logger.Debugf("start commandLine: %s", createProcessParms.CommandLine)
658
+	if ctr.isWindows {
659
+		logger.Debugf("start commandLine: %s", createProcessParms.CommandLine)
660
+	}
659 661
 
660 662
 	createProcessParms.User = ctr.ociSpec.Process.User.Username
661 663
 
... ...
@@ -671,14 +671,31 @@ func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachSt
671 671
 	}
672 672
 
673 673
 	ctr.Lock()
674
-	defer ctr.Unlock()
675 674
 
676 675
 	// Start the command running in the container.
677 676
 	newProcess, err := ctr.hcsContainer.CreateProcess(createProcessParms)
678 677
 	if err != nil {
679 678
 		logger.WithError(err).Error("CreateProcess() failed")
679
+		// Fix for https://github.com/moby/moby/issues/38719.
680
+		// If the init process failed to launch, we still need to reap the
681
+		// container to avoid leaking it.
682
+		//
683
+		// Note we use the explicit exit code of 127 which is the
684
+		// Linux shell equivalent of "command not found". Windows cannot
685
+		// know ahead of time whether or not the command exists, especially
686
+		// in the case of Hyper-V containers.
687
+		ctr.Unlock()
688
+		exitedAt := time.Now()
689
+		p := &process{
690
+			id:  libcontainerdtypes.InitProcessName,
691
+			pid: 0,
692
+		}
693
+		c.reapContainer(ctr, p, 127, exitedAt, nil, logger)
680 694
 		return -1, err
681 695
 	}
696
+
697
+	defer ctr.Unlock()
698
+
682 699
 	defer func() {
683 700
 		if err != nil {
684 701
 			if err := newProcess.Kill(); err != nil {
... ...
@@ -701,6 +718,12 @@ func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachSt
701 701
 	}
702 702
 	logger.WithField("pid", p.pid).Debug("init process started")
703 703
 
704
+	ctr.status = libcontainerdtypes.StatusRunning
705
+	ctr.init = p
706
+
707
+	// Spin up a go routine waiting for exit to handle cleanup
708
+	go c.reapProcess(ctr, p)
709
+
704 710
 	dio, err := newIOFromProcess(newProcess, ctr.ociSpec.Process.Terminal)
705 711
 	if err != nil {
706 712
 		logger.WithError(err).Error("failed to get stdio pipes")
... ...
@@ -708,14 +731,9 @@ func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachSt
708 708
 	}
709 709
 	_, err = attachStdio(dio)
710 710
 	if err != nil {
711
-		logger.WithError(err).Error("failed to attache stdio")
711
+		logger.WithError(err).Error("failed to attach stdio")
712 712
 		return -1, err
713 713
 	}
714
-	ctr.status = libcontainerdtypes.StatusRunning
715
-	ctr.init = p
716
-
717
-	// Spin up a go routine waiting for exit to handle cleanup
718
-	go c.reapProcess(ctr, p)
719 714
 
720 715
 	// Generate the associated event
721 716
 	c.eventQ.Append(id, func() {
... ...
@@ -1326,37 +1344,7 @@ func (c *client) reapProcess(ctr *container, p *process) int {
1326 1326
 	}
1327 1327
 
1328 1328
 	if p.id == libcontainerdtypes.InitProcessName {
1329
-		// Update container status
1330
-		ctr.Lock()
1331
-		ctr.status = libcontainerdtypes.StatusStopped
1332
-		ctr.exitedAt = exitedAt
1333
-		ctr.exitCode = uint32(exitCode)
1334
-		close(ctr.waitCh)
1335
-
1336
-		if err := c.shutdownContainer(ctr); err != nil {
1337
-			exitCode = -1
1338
-			logger.WithError(err).Warn("failed to shutdown container")
1339
-			thisErr := fmt.Errorf("failed to shutdown container: %s", err)
1340
-			if eventErr != nil {
1341
-				eventErr = fmt.Errorf("%s: %s", eventErr, thisErr)
1342
-			} else {
1343
-				eventErr = thisErr
1344
-			}
1345
-		} else {
1346
-			logger.Debug("completed container shutdown")
1347
-		}
1348
-		ctr.Unlock()
1349
-
1350
-		if err := ctr.hcsContainer.Close(); err != nil {
1351
-			exitCode = -1
1352
-			logger.WithError(err).Error("failed to clean hcs container resources")
1353
-			thisErr := fmt.Errorf("failed to terminate container: %s", err)
1354
-			if eventErr != nil {
1355
-				eventErr = fmt.Errorf("%s: %s", eventErr, thisErr)
1356
-			} else {
1357
-				eventErr = thisErr
1358
-			}
1359
-		}
1329
+		exitCode, eventErr = c.reapContainer(ctr, p, exitCode, exitedAt, eventErr, logger)
1360 1330
 	}
1361 1331
 
1362 1332
 	c.eventQ.Append(ctr.id, func() {
... ...
@@ -1390,3 +1378,40 @@ func (c *client) reapProcess(ctr *container, p *process) int {
1390 1390
 
1391 1391
 	return exitCode
1392 1392
 }
1393
+
1394
+// reapContainer shuts down the container and releases associated resources. It returns
1395
+// the error to be logged in the eventInfo sent back to the monitor.
1396
+func (c *client) reapContainer(ctr *container, p *process, exitCode int, exitedAt time.Time, eventErr error, logger *logrus.Entry) (int, error) {
1397
+	// Update container status
1398
+	ctr.Lock()
1399
+	ctr.status = libcontainerdtypes.StatusStopped
1400
+	ctr.exitedAt = exitedAt
1401
+	ctr.exitCode = uint32(exitCode)
1402
+	close(ctr.waitCh)
1403
+
1404
+	if err := c.shutdownContainer(ctr); err != nil {
1405
+		exitCode = -1
1406
+		logger.WithError(err).Warn("failed to shutdown container")
1407
+		thisErr := errors.Wrap(err, "failed to shutdown container")
1408
+		if eventErr != nil {
1409
+			eventErr = errors.Wrap(eventErr, thisErr.Error())
1410
+		} else {
1411
+			eventErr = thisErr
1412
+		}
1413
+	} else {
1414
+		logger.Debug("completed container shutdown")
1415
+	}
1416
+	ctr.Unlock()
1417
+
1418
+	if err := ctr.hcsContainer.Close(); err != nil {
1419
+		exitCode = -1
1420
+		logger.WithError(err).Error("failed to clean hcs container resources")
1421
+		thisErr := errors.Wrap(err, "failed to terminate container")
1422
+		if eventErr != nil {
1423
+			eventErr = errors.Wrap(eventErr, thisErr.Error())
1424
+		} else {
1425
+			eventErr = thisErr
1426
+		}
1427
+	}
1428
+	return exitCode, eventErr
1429
+}