Currently, if a container removal has failed for some reason,
any client waiting for removal (e.g. `docker run --rm`) is
stuck, waiting for removal to succeed while it has failed already.
For more details and the reproducer, please check
https://github.com/moby/moby/issues/34945
This commit addresses that by allowing `ContainerWait()` with
`container.WaitCondition == "removed"` argument to return an
error in case of removal failure. The `ContainerWaitOKBody`
stucture returned to a client is amended with a pointer to `struct Error`,
containing an error message string, and the `Client.ContainerWait()`
is modified to return the error, if any, to the client.
Note that this feature is only available for API version >= 1.34.
In order for the old clients to be unstuck, we just close the connection
without writing anything -- this causes client's error.
Now, docker-cli would need a separate commit to bump the API to 1.34
and to show an error returned, if any.
[v2: recreate the waitRemove channel after closing]
[v3: document; keep legacy behavior for older clients]
[v4: convert Error from string to pointer to a struct]
[v5: don't emulate old behavior, send empty response in error case]
[v6: rename legacy* vars to include version suffix]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
... | ... |
@@ -280,11 +280,12 @@ func (s *containerRouter) postContainersWait(ctx context.Context, w http.Respons |
280 | 280 |
// Behavior changed in version 1.30 to handle wait condition and to |
281 | 281 |
// return headers immediately. |
282 | 282 |
version := httputils.VersionFromContext(ctx) |
283 |
- legacyBehavior := versions.LessThan(version, "1.30") |
|
283 |
+ legacyBehaviorPre130 := versions.LessThan(version, "1.30") |
|
284 |
+ legacyRemovalWaitPre134 := false |
|
284 | 285 |
|
285 | 286 |
// The wait condition defaults to "not-running". |
286 | 287 |
waitCondition := containerpkg.WaitConditionNotRunning |
287 |
- if !legacyBehavior { |
|
288 |
+ if !legacyBehaviorPre130 { |
|
288 | 289 |
if err := httputils.ParseForm(r); err != nil { |
289 | 290 |
return err |
290 | 291 |
} |
... | ... |
@@ -293,6 +294,7 @@ func (s *containerRouter) postContainersWait(ctx context.Context, w http.Respons |
293 | 293 |
waitCondition = containerpkg.WaitConditionNextExit |
294 | 294 |
case container.WaitConditionRemoved: |
295 | 295 |
waitCondition = containerpkg.WaitConditionRemoved |
296 |
+ legacyRemovalWaitPre134 = versions.LessThan(version, "1.34") |
|
296 | 297 |
} |
297 | 298 |
} |
298 | 299 |
|
... | ... |
@@ -306,7 +308,7 @@ func (s *containerRouter) postContainersWait(ctx context.Context, w http.Respons |
306 | 306 |
|
307 | 307 |
w.Header().Set("Content-Type", "application/json") |
308 | 308 |
|
309 |
- if !legacyBehavior { |
|
309 |
+ if !legacyBehaviorPre130 { |
|
310 | 310 |
// Write response header immediately. |
311 | 311 |
w.WriteHeader(http.StatusOK) |
312 | 312 |
if flusher, ok := w.(http.Flusher); ok { |
... | ... |
@@ -317,8 +319,22 @@ func (s *containerRouter) postContainersWait(ctx context.Context, w http.Respons |
317 | 317 |
// Block on the result of the wait operation. |
318 | 318 |
status := <-waitC |
319 | 319 |
|
320 |
+ // With API < 1.34, wait on WaitConditionRemoved did not return |
|
321 |
+ // in case container removal failed. The only way to report an |
|
322 |
+ // error back to the client is to not write anything (i.e. send |
|
323 |
+ // an empty response which will be treated as an error). |
|
324 |
+ if legacyRemovalWaitPre134 && status.Err() != nil { |
|
325 |
+ return nil |
|
326 |
+ } |
|
327 |
+ |
|
328 |
+ var waitError *container.ContainerWaitOKBodyError |
|
329 |
+ if status.Err() != nil { |
|
330 |
+ waitError = &container.ContainerWaitOKBodyError{Message: status.Err().Error()} |
|
331 |
+ } |
|
332 |
+ |
|
320 | 333 |
return json.NewEncoder(w).Encode(&container.ContainerWaitOKBody{ |
321 | 334 |
StatusCode: int64(status.ExitCode()), |
335 |
+ Error: waitError, |
|
322 | 336 |
}) |
323 | 337 |
} |
324 | 338 |
|
... | ... |
@@ -5723,6 +5723,13 @@ paths: |
5723 | 5723 |
description: "Exit code of the container" |
5724 | 5724 |
type: "integer" |
5725 | 5725 |
x-nullable: false |
5726 |
+ Error: |
|
5727 |
+ description: "container waiting error, if any" |
|
5728 |
+ type: "object" |
|
5729 |
+ properties: |
|
5730 |
+ Message: |
|
5731 |
+ description: "Details of an error" |
|
5732 |
+ type: "string" |
|
5726 | 5733 |
404: |
5727 | 5734 |
description: "no such container" |
5728 | 5735 |
schema: |
... | ... |
@@ -7,10 +7,22 @@ package container |
7 | 7 |
// See hack/generate-swagger-api.sh |
8 | 8 |
// ---------------------------------------------------------------------------- |
9 | 9 |
|
10 |
+// ContainerWaitOKBodyError container waiting error, if any |
|
11 |
+// swagger:model ContainerWaitOKBodyError |
|
12 |
+type ContainerWaitOKBodyError struct { |
|
13 |
+ |
|
14 |
+ // Details of an error |
|
15 |
+ Message string `json:"Message,omitempty"` |
|
16 |
+} |
|
17 |
+ |
|
10 | 18 |
// ContainerWaitOKBody container wait o k body |
11 | 19 |
// swagger:model ContainerWaitOKBody |
12 | 20 |
type ContainerWaitOKBody struct { |
13 | 21 |
|
22 |
+ // error |
|
23 |
+ // Required: true |
|
24 |
+ Error *ContainerWaitOKBodyError `json:"Error"` |
|
25 |
+ |
|
14 | 26 |
// Exit code of the container |
15 | 27 |
// Required: true |
16 | 28 |
StatusCode int64 `json:"StatusCode"` |
... | ... |
@@ -29,7 +29,7 @@ type State struct { |
29 | 29 |
Dead bool |
30 | 30 |
Pid int |
31 | 31 |
ExitCodeValue int `json:"ExitCode"` |
32 |
- ErrorMsg string `json:"Error"` // contains last known error when starting the container |
|
32 |
+ ErrorMsg string `json:"Error"` // contains last known error during container start or remove |
|
33 | 33 |
StartedAt time.Time |
34 | 34 |
FinishedAt time.Time |
35 | 35 |
Health *Health |
... | ... |
@@ -319,7 +319,10 @@ func (s *State) SetRestarting(exitStatus *ExitStatus) { |
319 | 319 |
// know the error that occurred when container transits to another state |
320 | 320 |
// when inspecting it |
321 | 321 |
func (s *State) SetError(err error) { |
322 |
- s.ErrorMsg = err.Error() |
|
322 |
+ s.ErrorMsg = "" |
|
323 |
+ if err != nil { |
|
324 |
+ s.ErrorMsg = err.Error() |
|
325 |
+ } |
|
323 | 326 |
} |
324 | 327 |
|
325 | 328 |
// IsPaused returns whether the container is paused or not. |
... | ... |
@@ -385,8 +388,18 @@ func (s *State) IsDead() bool { |
385 | 385 |
// closes the internal waitRemove channel to unblock callers waiting for a |
386 | 386 |
// container to be removed. |
387 | 387 |
func (s *State) SetRemoved() { |
388 |
+ s.SetRemovalError(nil) |
|
389 |
+} |
|
390 |
+ |
|
391 |
+// SetRemovalError is to be called in case a container remove failed. |
|
392 |
+// It sets an error and closes the internal waitRemove channel to unblock |
|
393 |
+// callers waiting for the container to be removed. |
|
394 |
+func (s *State) SetRemovalError(err error) { |
|
395 |
+ s.SetError(err) |
|
388 | 396 |
s.Lock() |
389 | 397 |
close(s.waitRemove) // Unblock those waiting on remove. |
398 |
+ // Recreate the channel so next ContainerWait will work |
|
399 |
+ s.waitRemove = make(chan struct{}) |
|
390 | 400 |
s.Unlock() |
391 | 401 |
} |
392 | 402 |
|
... | ... |
@@ -120,12 +120,16 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo |
120 | 120 |
metadata, err := daemon.stores[container.OS].layerStore.ReleaseRWLayer(container.RWLayer) |
121 | 121 |
layer.LogReleaseMetadata(metadata) |
122 | 122 |
if err != nil && err != layer.ErrMountDoesNotExist && !os.IsNotExist(errors.Cause(err)) { |
123 |
- return errors.Wrapf(err, "driver %q failed to remove root filesystem for %s", daemon.GraphDriverName(container.OS), container.ID) |
|
123 |
+ e := errors.Wrapf(err, "driver %q failed to remove root filesystem for %s", daemon.GraphDriverName(container.OS), container.ID) |
|
124 |
+ container.SetRemovalError(e) |
|
125 |
+ return e |
|
124 | 126 |
} |
125 | 127 |
} |
126 | 128 |
|
127 | 129 |
if err := system.EnsureRemoveAll(container.Root); err != nil { |
128 |
- return errors.Wrapf(err, "unable to remove filesystem for %s", container.ID) |
|
130 |
+ e := errors.Wrapf(err, "unable to remove filesystem for %s", container.ID) |
|
131 |
+ container.SetRemovalError(e) |
|
132 |
+ return e |
|
129 | 133 |
} |
130 | 134 |
|
131 | 135 |
linkNames := daemon.linkIndex.delete(container) |
... | ... |
@@ -17,12 +17,19 @@ keywords: "API, Docker, rcli, REST, documentation" |
17 | 17 |
|
18 | 18 |
[Docker Engine API v1.34](https://docs.docker.com/engine/api/v1.34/) documentation |
19 | 19 |
|
20 |
+* `POST /containers/(name)/wait?condition=removed` now also also returns |
|
21 |
+ in case of container removal failure. A pointer to a structure named |
|
22 |
+ `Error` added to the response JSON in order to indicate a failure. |
|
23 |
+ If `Error` is `null`, container removal has succeeded, otherwise |
|
24 |
+ the test of an error message indicating why container removal has failed |
|
25 |
+ is available from `Error.Message` field. |
|
26 |
+ |
|
20 | 27 |
## v1.33 API changes |
21 | 28 |
|
22 | 29 |
[Docker Engine API v1.33](https://docs.docker.com/engine/api/v1.33/) documentation |
23 | 30 |
|
24 | 31 |
* `GET /events` now supports filtering 4 more kinds of events: `config`, `node`, |
25 |
-`secret` and `service`. |
|
32 |
+`secret` and `service`. |
|
26 | 33 |
|
27 | 34 |
## v1.32 API changes |
28 | 35 |
|