Signed-off-by: John Howard <jhoward@microsoft.com>
This is a fix for a few related scenarios where it's impossible to remove layers or containers
until the host is rebooted. Generally (or at least easiest to repro) through a forced daemon kill
while a container is running.
Possibly slightly worse than that, as following a host reboot, the scratch layer would possibly be leaked and
left on disk under the dataroot\windowsfilter directory after the container is removed.
One such example of a failure:
1. run a long running container with the --rm flag
docker run --rm -d --name test microsoft/windowsservercore powershell sleep 30
2. Force kill the daemon not allowing it to cleanup. Simulates a crash or a host power-cycle.
3. (re-)Start daemon
4. docker ps -a
PS C:\control> docker ps -a
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
7aff773d782b malloc "powershell start-sl…" 11 seconds ago Removal In Progress malloc
5. Try to remove
PS C:\control> docker rm 7aff
Error response from daemon: container 7aff773d782bbf35d95095369ffcb170b7b8f0e6f8f65d5aff42abf61234855d: driver "windowsfilter" failed to remove root filesystem: rename C:\control\windowsfilter\7aff773d782bbf35d95095369ffcb170b7b8f0e6f8f65d5aff42abf61234855d C:\control\windowsfilter\7aff773d782bbf35d95095369ffcb170b7b8f0e6f8f65d5aff42abf61234855d-removing: Access is denied.
PS C:\control>
Step 5 fails.
| ... | ... |
@@ -6,7 +6,6 @@ import ( |
| 6 | 6 |
"bufio" |
| 7 | 7 |
"bytes" |
| 8 | 8 |
"encoding/json" |
| 9 |
- "errors" |
|
| 10 | 9 |
"fmt" |
| 11 | 10 |
"io" |
| 12 | 11 |
"io/ioutil" |
| ... | ... |
@@ -23,6 +22,7 @@ import ( |
| 23 | 23 |
"github.com/Microsoft/go-winio" |
| 24 | 24 |
"github.com/Microsoft/go-winio/archive/tar" |
| 25 | 25 |
"github.com/Microsoft/go-winio/backuptar" |
| 26 |
+ "github.com/Microsoft/go-winio/vhd" |
|
| 26 | 27 |
"github.com/Microsoft/hcsshim" |
| 27 | 28 |
"github.com/docker/docker/daemon/graphdriver" |
| 28 | 29 |
"github.com/docker/docker/pkg/archive" |
| ... | ... |
@@ -33,6 +33,7 @@ import ( |
| 33 | 33 |
"github.com/docker/docker/pkg/reexec" |
| 34 | 34 |
"github.com/docker/docker/pkg/system" |
| 35 | 35 |
units "github.com/docker/go-units" |
| 36 |
+ "github.com/pkg/errors" |
|
| 36 | 37 |
"github.com/sirupsen/logrus" |
| 37 | 38 |
"golang.org/x/sys/windows" |
| 38 | 39 |
) |
| ... | ... |
@@ -331,7 +332,18 @@ func (d *Driver) Remove(id string) error {
|
| 331 | 331 |
tmpID := fmt.Sprintf("%s-removing", rID)
|
| 332 | 332 |
tmpLayerPath := filepath.Join(d.info.HomeDir, tmpID) |
| 333 | 333 |
if err := os.Rename(layerPath, tmpLayerPath); err != nil && !os.IsNotExist(err) {
|
| 334 |
- return err |
|
| 334 |
+ if !os.IsPermission(err) {
|
|
| 335 |
+ return err |
|
| 336 |
+ } |
|
| 337 |
+ // If permission denied, it's possible that the scratch is still mounted, an |
|
| 338 |
+ // artifact after a hard daemon crash for example. Worth a shot to try detaching it |
|
| 339 |
+ // before retrying the rename. |
|
| 340 |
+ if detachErr := vhd.DetachVhd(filepath.Join(layerPath, "sandbox.vhdx")); detachErr != nil {
|
|
| 341 |
+ return errors.Wrapf(err, "failed to detach VHD: %s", detachErr) |
|
| 342 |
+ } |
|
| 343 |
+ if renameErr := os.Rename(layerPath, tmpLayerPath); renameErr != nil && !os.IsNotExist(renameErr) {
|
|
| 344 |
+ return errors.Wrapf(err, "second rename attempt following detach failed: %s", renameErr) |
|
| 345 |
+ } |
|
| 335 | 346 |
} |
| 336 | 347 |
if err := hcsshim.DestroyLayer(d.info, tmpID); err != nil {
|
| 337 | 348 |
logrus.Errorf("Failed to DestroyLayer %s: %s", id, err)
|