Signed-off-by: John Howard <jhoward@microsoft.com>
Fixes #38719
Fixes some subtle bugs on Windows
- Fixes https://github.com/moby/moby/issues/38719. This one is the most important
as failure to start the init process in a Windows container will cause leaked
handles. (ie where the `ctr.hcsContainer.CreateProcess(...)` call fails).
The solution to the leak is to split out the `reapContainer` part of `reapProcess`
into a separate function. This ensures HCS resources are cleaned up correctly and
not leaked.
- Ensuring the reapProcess goroutine is started immediately the process
is actually started, so we don't leak in the case of failures such as
from `newIOFromProcess` or `attachStdio`
- libcontainerd on Windows (local, not containerd) was not sending the EventCreate
back to the monitor on Windows. Just LCOW. This was just an oversight from
refactoring a couple of years ago by Mikael as far as I can tell. Technically
not needed for functionality except for the logging being missing, but is correct.
| ... | ... |
@@ -133,7 +133,8 @@ func translateContainerdStartErr(cmd string, setExitCode func(int), err error) e |
| 133 | 133 |
if contains(errDesc, cmd) && |
| 134 | 134 |
(contains(errDesc, "executable file not found") || |
| 135 | 135 |
contains(errDesc, "no such file or directory") || |
| 136 |
- contains(errDesc, "system cannot find the file specified")) {
|
|
| 136 |
+ contains(errDesc, "system cannot find the file specified") || |
|
| 137 |
+ contains(errDesc, "failed to run runc create/exec call")) {
|
|
| 137 | 138 |
setExitCode(127) |
| 138 | 139 |
retErr = startInvalidConfigError(errDesc) |
| 139 | 140 |
} |
| ... | ... |
@@ -157,17 +157,32 @@ func (c *client) Create(_ context.Context, id string, spec *specs.Spec, runtimeO |
| 157 | 157 |
return errors.WithStack(errdefs.Conflict(errors.New("id already in use")))
|
| 158 | 158 |
} |
| 159 | 159 |
|
| 160 |
- // spec.Linux must be nil for Windows containers, but spec.Windows |
|
| 161 |
- // will be filled in regardless of container platform. This is a |
|
| 162 |
- // temporary workaround due to LCOW requiring layer folder paths, |
|
| 163 |
- // which are stored under spec.Windows. |
|
| 164 |
- // |
|
| 165 |
- // TODO: @darrenstahlmsft fix this once the OCI spec is updated to |
|
| 166 |
- // support layer folder paths for LCOW |
|
| 160 |
+ var err error |
|
| 167 | 161 |
if spec.Linux == nil {
|
| 168 |
- return c.createWindows(id, spec, runtimeOptions) |
|
| 162 |
+ err = c.createWindows(id, spec, runtimeOptions) |
|
| 163 |
+ } else {
|
|
| 164 |
+ err = c.createLinux(id, spec, runtimeOptions) |
|
| 169 | 165 |
} |
| 170 |
- return c.createLinux(id, spec, runtimeOptions) |
|
| 166 |
+ |
|
| 167 |
+ if err == nil {
|
|
| 168 |
+ c.eventQ.Append(id, func() {
|
|
| 169 |
+ ei := libcontainerdtypes.EventInfo{
|
|
| 170 |
+ ContainerID: id, |
|
| 171 |
+ } |
|
| 172 |
+ c.logger.WithFields(logrus.Fields{
|
|
| 173 |
+ "container": id, |
|
| 174 |
+ "event": libcontainerdtypes.EventCreate, |
|
| 175 |
+ }).Info("sending event")
|
|
| 176 |
+ err := c.backend.ProcessEvent(id, libcontainerdtypes.EventCreate, ei) |
|
| 177 |
+ if err != nil {
|
|
| 178 |
+ c.logger.WithError(err).WithFields(logrus.Fields{
|
|
| 179 |
+ "container": id, |
|
| 180 |
+ "event": libcontainerdtypes.EventCreate, |
|
| 181 |
+ }).Error("failed to process event")
|
|
| 182 |
+ } |
|
| 183 |
+ }) |
|
| 184 |
+ } |
|
| 185 |
+ return err |
|
| 171 | 186 |
} |
| 172 | 187 |
|
| 173 | 188 |
func (c *client) createWindows(id string, spec *specs.Spec, runtimeOptions interface{}) error {
|
| ... | ... |
@@ -561,23 +576,6 @@ func (c *client) createLinux(id string, spec *specs.Spec, runtimeOptions interfa |
| 561 | 561 |
c.containers[id] = ctr |
| 562 | 562 |
c.Unlock() |
| 563 | 563 |
|
| 564 |
- c.eventQ.Append(id, func() {
|
|
| 565 |
- ei := libcontainerdtypes.EventInfo{
|
|
| 566 |
- ContainerID: id, |
|
| 567 |
- } |
|
| 568 |
- c.logger.WithFields(logrus.Fields{
|
|
| 569 |
- "container": ctr.id, |
|
| 570 |
- "event": libcontainerdtypes.EventCreate, |
|
| 571 |
- }).Info("sending event")
|
|
| 572 |
- err := c.backend.ProcessEvent(id, libcontainerdtypes.EventCreate, ei) |
|
| 573 |
- if err != nil {
|
|
| 574 |
- c.logger.WithError(err).WithFields(logrus.Fields{
|
|
| 575 |
- "container": id, |
|
| 576 |
- "event": libcontainerdtypes.EventCreate, |
|
| 577 |
- }).Error("failed to process event")
|
|
| 578 |
- } |
|
| 579 |
- }) |
|
| 580 |
- |
|
| 581 | 564 |
logger.Debug("createLinux() completed successfully")
|
| 582 | 565 |
return nil |
| 583 | 566 |
} |
| ... | ... |
@@ -655,7 +653,9 @@ func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachSt |
| 655 | 655 |
|
| 656 | 656 |
// Configure the CommandLine/CommandArgs |
| 657 | 657 |
setCommandLineAndArgs(ctr.isWindows, ctr.ociSpec.Process, createProcessParms) |
| 658 |
- logger.Debugf("start commandLine: %s", createProcessParms.CommandLine)
|
|
| 658 |
+ if ctr.isWindows {
|
|
| 659 |
+ logger.Debugf("start commandLine: %s", createProcessParms.CommandLine)
|
|
| 660 |
+ } |
|
| 659 | 661 |
|
| 660 | 662 |
createProcessParms.User = ctr.ociSpec.Process.User.Username |
| 661 | 663 |
|
| ... | ... |
@@ -671,14 +671,31 @@ func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachSt |
| 671 | 671 |
} |
| 672 | 672 |
|
| 673 | 673 |
ctr.Lock() |
| 674 |
- defer ctr.Unlock() |
|
| 675 | 674 |
|
| 676 | 675 |
// Start the command running in the container. |
| 677 | 676 |
newProcess, err := ctr.hcsContainer.CreateProcess(createProcessParms) |
| 678 | 677 |
if err != nil {
|
| 679 | 678 |
logger.WithError(err).Error("CreateProcess() failed")
|
| 679 |
+ // Fix for https://github.com/moby/moby/issues/38719. |
|
| 680 |
+ // If the init process failed to launch, we still need to reap the |
|
| 681 |
+ // container to avoid leaking it. |
|
| 682 |
+ // |
|
| 683 |
+ // Note we use the explicit exit code of 127 which is the |
|
| 684 |
+ // Linux shell equivalent of "command not found". Windows cannot |
|
| 685 |
+ // know ahead of time whether or not the command exists, especially |
|
| 686 |
+ // in the case of Hyper-V containers. |
|
| 687 |
+ ctr.Unlock() |
|
| 688 |
+ exitedAt := time.Now() |
|
| 689 |
+ p := &process{
|
|
| 690 |
+ id: libcontainerdtypes.InitProcessName, |
|
| 691 |
+ pid: 0, |
|
| 692 |
+ } |
|
| 693 |
+ c.reapContainer(ctr, p, 127, exitedAt, nil, logger) |
|
| 680 | 694 |
return -1, err |
| 681 | 695 |
} |
| 696 |
+ |
|
| 697 |
+ defer ctr.Unlock() |
|
| 698 |
+ |
|
| 682 | 699 |
defer func() {
|
| 683 | 700 |
if err != nil {
|
| 684 | 701 |
if err := newProcess.Kill(); err != nil {
|
| ... | ... |
@@ -701,6 +718,12 @@ func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachSt |
| 701 | 701 |
} |
| 702 | 702 |
logger.WithField("pid", p.pid).Debug("init process started")
|
| 703 | 703 |
|
| 704 |
+ ctr.status = libcontainerdtypes.StatusRunning |
|
| 705 |
+ ctr.init = p |
|
| 706 |
+ |
|
| 707 |
+ // Spin up a go routine waiting for exit to handle cleanup |
|
| 708 |
+ go c.reapProcess(ctr, p) |
|
| 709 |
+ |
|
| 704 | 710 |
dio, err := newIOFromProcess(newProcess, ctr.ociSpec.Process.Terminal) |
| 705 | 711 |
if err != nil {
|
| 706 | 712 |
logger.WithError(err).Error("failed to get stdio pipes")
|
| ... | ... |
@@ -708,14 +731,9 @@ func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachSt |
| 708 | 708 |
} |
| 709 | 709 |
_, err = attachStdio(dio) |
| 710 | 710 |
if err != nil {
|
| 711 |
- logger.WithError(err).Error("failed to attache stdio")
|
|
| 711 |
+ logger.WithError(err).Error("failed to attach stdio")
|
|
| 712 | 712 |
return -1, err |
| 713 | 713 |
} |
| 714 |
- ctr.status = libcontainerdtypes.StatusRunning |
|
| 715 |
- ctr.init = p |
|
| 716 |
- |
|
| 717 |
- // Spin up a go routine waiting for exit to handle cleanup |
|
| 718 |
- go c.reapProcess(ctr, p) |
|
| 719 | 714 |
|
| 720 | 715 |
// Generate the associated event |
| 721 | 716 |
c.eventQ.Append(id, func() {
|
| ... | ... |
@@ -1326,37 +1344,7 @@ func (c *client) reapProcess(ctr *container, p *process) int {
|
| 1326 | 1326 |
} |
| 1327 | 1327 |
|
| 1328 | 1328 |
if p.id == libcontainerdtypes.InitProcessName {
|
| 1329 |
- // Update container status |
|
| 1330 |
- ctr.Lock() |
|
| 1331 |
- ctr.status = libcontainerdtypes.StatusStopped |
|
| 1332 |
- ctr.exitedAt = exitedAt |
|
| 1333 |
- ctr.exitCode = uint32(exitCode) |
|
| 1334 |
- close(ctr.waitCh) |
|
| 1335 |
- |
|
| 1336 |
- if err := c.shutdownContainer(ctr); err != nil {
|
|
| 1337 |
- exitCode = -1 |
|
| 1338 |
- logger.WithError(err).Warn("failed to shutdown container")
|
|
| 1339 |
- thisErr := fmt.Errorf("failed to shutdown container: %s", err)
|
|
| 1340 |
- if eventErr != nil {
|
|
| 1341 |
- eventErr = fmt.Errorf("%s: %s", eventErr, thisErr)
|
|
| 1342 |
- } else {
|
|
| 1343 |
- eventErr = thisErr |
|
| 1344 |
- } |
|
| 1345 |
- } else {
|
|
| 1346 |
- logger.Debug("completed container shutdown")
|
|
| 1347 |
- } |
|
| 1348 |
- ctr.Unlock() |
|
| 1349 |
- |
|
| 1350 |
- if err := ctr.hcsContainer.Close(); err != nil {
|
|
| 1351 |
- exitCode = -1 |
|
| 1352 |
- logger.WithError(err).Error("failed to clean hcs container resources")
|
|
| 1353 |
- thisErr := fmt.Errorf("failed to terminate container: %s", err)
|
|
| 1354 |
- if eventErr != nil {
|
|
| 1355 |
- eventErr = fmt.Errorf("%s: %s", eventErr, thisErr)
|
|
| 1356 |
- } else {
|
|
| 1357 |
- eventErr = thisErr |
|
| 1358 |
- } |
|
| 1359 |
- } |
|
| 1329 |
+ exitCode, eventErr = c.reapContainer(ctr, p, exitCode, exitedAt, eventErr, logger) |
|
| 1360 | 1330 |
} |
| 1361 | 1331 |
|
| 1362 | 1332 |
c.eventQ.Append(ctr.id, func() {
|
| ... | ... |
@@ -1390,3 +1378,40 @@ func (c *client) reapProcess(ctr *container, p *process) int {
|
| 1390 | 1390 |
|
| 1391 | 1391 |
return exitCode |
| 1392 | 1392 |
} |
| 1393 |
+ |
|
| 1394 |
+// reapContainer shuts down the container and releases associated resources. It returns |
|
| 1395 |
+// the error to be logged in the eventInfo sent back to the monitor. |
|
| 1396 |
+func (c *client) reapContainer(ctr *container, p *process, exitCode int, exitedAt time.Time, eventErr error, logger *logrus.Entry) (int, error) {
|
|
| 1397 |
+ // Update container status |
|
| 1398 |
+ ctr.Lock() |
|
| 1399 |
+ ctr.status = libcontainerdtypes.StatusStopped |
|
| 1400 |
+ ctr.exitedAt = exitedAt |
|
| 1401 |
+ ctr.exitCode = uint32(exitCode) |
|
| 1402 |
+ close(ctr.waitCh) |
|
| 1403 |
+ |
|
| 1404 |
+ if err := c.shutdownContainer(ctr); err != nil {
|
|
| 1405 |
+ exitCode = -1 |
|
| 1406 |
+ logger.WithError(err).Warn("failed to shutdown container")
|
|
| 1407 |
+ thisErr := errors.Wrap(err, "failed to shutdown container") |
|
| 1408 |
+ if eventErr != nil {
|
|
| 1409 |
+ eventErr = errors.Wrap(eventErr, thisErr.Error()) |
|
| 1410 |
+ } else {
|
|
| 1411 |
+ eventErr = thisErr |
|
| 1412 |
+ } |
|
| 1413 |
+ } else {
|
|
| 1414 |
+ logger.Debug("completed container shutdown")
|
|
| 1415 |
+ } |
|
| 1416 |
+ ctr.Unlock() |
|
| 1417 |
+ |
|
| 1418 |
+ if err := ctr.hcsContainer.Close(); err != nil {
|
|
| 1419 |
+ exitCode = -1 |
|
| 1420 |
+ logger.WithError(err).Error("failed to clean hcs container resources")
|
|
| 1421 |
+ thisErr := errors.Wrap(err, "failed to terminate container") |
|
| 1422 |
+ if eventErr != nil {
|
|
| 1423 |
+ eventErr = errors.Wrap(eventErr, thisErr.Error()) |
|
| 1424 |
+ } else {
|
|
| 1425 |
+ eventErr = thisErr |
|
| 1426 |
+ } |
|
| 1427 |
+ } |
|
| 1428 |
+ return exitCode, eventErr |
|
| 1429 |
+} |