Commit 8b7af1d0f added some code to update the DNSNames of all
endpoints attached to a sandbox by loading a new instance of each
affected endpoints from the datastore through a call to
`Network.EndpointByID()`.
This method then calls `Network.getEndpointFromStore()`, that in
turn calls `store.GetObject()`, which then calls `cache.get()`,
which calls `o.CopyTo(kvObject)`. This effectively creates a fresh
new instance of an Endpoint. However, endpoints are already kept in
memory by Sandbox, meaning we now have two in-memory instances of
the same Endpoint.
As it turns out, libnetwork is built around the idea that no two objects
representing the same thing should leave in-memory, otherwise breaking
mutex locking and optimistic locking (as both instances will have a drifting
version tracking ID -- dbIndex in libnetwork parliance).
In this specific case, this bug materializes by container rename failing
when applied a second time for a given container. An integration test is
added to make sure this won't happen again.
Signed-off-by: Albin Kerouanton <albinker@gmail.com>
| ... | ... |
@@ -2,6 +2,7 @@ package daemon // import "github.com/docker/docker/daemon" |
| 2 | 2 |
|
| 3 | 3 |
import ( |
| 4 | 4 |
"context" |
| 5 |
+ "fmt" |
|
| 5 | 6 |
"strings" |
| 6 | 7 |
|
| 7 | 8 |
"github.com/containerd/log" |
| ... | ... |
@@ -127,9 +128,9 @@ func (daemon *Daemon) ContainerRename(oldName, newName string) (retErr error) {
|
| 127 | 127 |
return err |
| 128 | 128 |
} |
| 129 | 129 |
|
| 130 |
- ep, err := nw.EndpointByID(epConfig.EndpointID) |
|
| 131 |
- if err != nil {
|
|
| 132 |
- return err |
|
| 130 |
+ ep := sb.GetEndpoint(epConfig.EndpointID) |
|
| 131 |
+ if ep == nil {
|
|
| 132 |
+ return fmt.Errorf("no endpoint attached to network %s found", nw.Name())
|
|
| 133 | 133 |
} |
| 134 | 134 |
|
| 135 | 135 |
oldDNSNames := make([]string, len(epConfig.DNSNames)) |
| ... | ... |
@@ -192,3 +192,25 @@ func TestRenameContainerWithLinkedContainer(t *testing.T) {
|
| 192 | 192 |
assert.NilError(t, err) |
| 193 | 193 |
assert.Check(t, is.Equal(db1ID, inspect.ID)) |
| 194 | 194 |
} |
| 195 |
+ |
|
| 196 |
+// Regression test for https://github.com/moby/moby/issues/47186 |
|
| 197 |
+func TestRenameContainerTwice(t *testing.T) {
|
|
| 198 |
+ ctx := setupTest(t) |
|
| 199 |
+ apiClient := testEnv.APIClient() |
|
| 200 |
+ |
|
| 201 |
+ ctrName := "c0" |
|
| 202 |
+ container.Run(ctx, t, apiClient, container.WithName("c0"))
|
|
| 203 |
+ defer func() {
|
|
| 204 |
+ container.Remove(ctx, t, apiClient, ctrName, containertypes.RemoveOptions{
|
|
| 205 |
+ Force: true, |
|
| 206 |
+ }) |
|
| 207 |
+ }() |
|
| 208 |
+ |
|
| 209 |
+ err := apiClient.ContainerRename(ctx, "c0", "c1") |
|
| 210 |
+ assert.NilError(t, err) |
|
| 211 |
+ ctrName = "c1" |
|
| 212 |
+ |
|
| 213 |
+ err = apiClient.ContainerRename(ctx, "c1", "c2") |
|
| 214 |
+ assert.NilError(t, err) |
|
| 215 |
+ ctrName = "c2" |
|
| 216 |
+} |
| ... | ... |
@@ -623,7 +623,7 @@ func (ep *Endpoint) addServiceInfoToCluster(sb *Sandbox) error {
|
| 623 | 623 |
// In case the deleteServiceInfoToCluster arrives first, this one is happening after the endpoint is |
| 624 | 624 |
// removed from the list, in this situation the delete will bail out not finding any data to cleanup |
| 625 | 625 |
// and the add will bail out not finding the endpoint on the sandbox. |
| 626 |
- if err := sb.getEndpoint(ep.ID()); err == nil {
|
|
| 626 |
+ if err := sb.GetEndpoint(ep.ID()); err == nil {
|
|
| 627 | 627 |
log.G(context.TODO()).Warnf("addServiceInfoToCluster suppressing service resolution ep is not anymore in the sandbox %s", ep.ID())
|
| 628 | 628 |
return nil |
| 629 | 629 |
} |
| ... | ... |
@@ -692,7 +692,7 @@ func (ep *Endpoint) deleteServiceInfoFromCluster(sb *Sandbox, fullRemove bool, m |
| 692 | 692 |
// get caught in disableServceInNetworkDB, but we check here to make the |
| 693 | 693 |
// nature of the condition more clear. |
| 694 | 694 |
// See comment in addServiceInfoToCluster() |
| 695 |
- if err := sb.getEndpoint(ep.ID()); err == nil {
|
|
| 695 |
+ if err := sb.GetEndpoint(ep.ID()); err == nil {
|
|
| 696 | 696 |
log.G(context.TODO()).Warnf("deleteServiceInfoFromCluster suppressing service resolution ep is not anymore in the sandbox %s", ep.ID())
|
| 697 | 697 |
return nil |
| 698 | 698 |
} |
| ... | ... |
@@ -1276,8 +1276,8 @@ func (n *Network) EndpointByName(name string) (*Endpoint, error) {
|
| 1276 | 1276 |
return e, nil |
| 1277 | 1277 |
} |
| 1278 | 1278 |
|
| 1279 |
-// EndpointByID returns the Endpoint which has the passed id. If not found, |
|
| 1280 |
-// the error ErrNoSuchEndpoint is returned. |
|
| 1279 |
+// EndpointByID should *never* be called as it's going to create a 2nd instance of an Endpoint. The first one lives in |
|
| 1280 |
+// the Sandbox the endpoint is attached to. Instead, the endpoint should be retrieved by calling [Sandbox.Endpoints()]. |
|
| 1281 | 1281 |
func (n *Network) EndpointByID(id string) (*Endpoint, error) {
|
| 1282 | 1282 |
if id == "" {
|
| 1283 | 1283 |
return nil, ErrInvalidID(id) |
| ... | ... |
@@ -334,7 +334,7 @@ func (sb *Sandbox) removeEndpointRaw(ep *Endpoint) {
|
| 334 | 334 |
} |
| 335 | 335 |
} |
| 336 | 336 |
|
| 337 |
-func (sb *Sandbox) getEndpoint(id string) *Endpoint {
|
|
| 337 |
+func (sb *Sandbox) GetEndpoint(id string) *Endpoint {
|
|
| 338 | 338 |
sb.mu.Lock() |
| 339 | 339 |
defer sb.mu.Unlock() |
| 340 | 340 |
|
| ... | ... |
@@ -554,7 +554,7 @@ func (sb *Sandbox) DisableService() (err error) {
|
| 554 | 554 |
} |
| 555 | 555 |
|
| 556 | 556 |
func (sb *Sandbox) clearNetworkResources(origEp *Endpoint) error {
|
| 557 |
- ep := sb.getEndpoint(origEp.id) |
|
| 557 |
+ ep := sb.GetEndpoint(origEp.id) |
|
| 558 | 558 |
if ep == nil {
|
| 559 | 559 |
return fmt.Errorf("could not find the sandbox endpoint data for endpoint %s",
|
| 560 | 560 |
origEp.id) |
| ... | ... |
@@ -57,7 +57,7 @@ func (n *Network) findLBEndpointSandbox() (*Endpoint, *Sandbox, error) {
|
| 57 | 57 |
if !ok {
|
| 58 | 58 |
return nil, nil, fmt.Errorf("Unable to get sandbox for %s(%s) in for %s", ep.Name(), ep.ID(), n.ID())
|
| 59 | 59 |
} |
| 60 |
- sep := sb.getEndpoint(ep.ID()) |
|
| 60 |
+ sep := sb.GetEndpoint(ep.ID()) |
|
| 61 | 61 |
if sep == nil {
|
| 62 | 62 |
return nil, nil, fmt.Errorf("Load balancing endpoint %s(%s) removed from %s", ep.Name(), ep.ID(), n.ID())
|
| 63 | 63 |
} |