Browse code

Acquire Sandbox.joinLeaveMu for Endpoint force-Delete

If an endpoint is still attached to a Sandbox when
Endpoint.Delete is called with force=true, sbLeave is
called. It may change the Sandbox's gateway, which may
conflict with a concurrent Join.

So, acquire the Sandbox's joinLeaveMu to do that, and
clarify the purpose of that mutex in struct Sandbox
comments.

Signed-off-by: Rob Murray <rob.murray@docker.com>

Rob Murray authored on 2025/09/11 18:18:57
Showing 2 changed files
... ...
@@ -914,15 +914,20 @@ func (ep *Endpoint) Delete(ctx context.Context, force bool) error {
914 914
 	sbid := ep.sandboxID
915 915
 	ep.mu.Unlock()
916 916
 
917
-	sb, _ := n.getController().SandboxByID(sbid)
918
-	if sb != nil && !force {
919
-		return &ActiveContainerError{name: name, id: epid}
920
-	}
921
-
922
-	if sb != nil {
923
-		if e := ep.sbLeave(context.WithoutCancel(ctx), sb, n, force); e != nil {
924
-			log.G(ctx).Warnf("failed to leave sandbox for endpoint %s : %v", name, e)
917
+	if sb, _ := n.getController().SandboxByID(sbid); sb != nil {
918
+		if !force {
919
+			return &ActiveContainerError{name: name, id: epid}
925 920
 		}
921
+		func() {
922
+			// Make sure this Delete isn't racing a Join/Leave/Delete that might also be
923
+			// updating the Sandbox's selection of gateway endpoints.
924
+			sb.joinLeaveMu.Lock()
925
+			defer sb.joinLeaveMu.Unlock()
926
+
927
+			if e := ep.sbLeave(context.WithoutCancel(ctx), sb, n, force); e != nil {
928
+				log.G(ctx).Warnf("failed to leave sandbox for endpoint %s : %v", name, e)
929
+			}
930
+		}()
926 931
 	}
927 932
 
928 933
 	if err = n.getController().deleteStoredEndpoint(ep); err != nil {
... ...
@@ -36,27 +36,34 @@ func (sb *Sandbox) processOptions(options ...SandboxOption) {
36 36
 // Sandbox provides the control over the network container entity.
37 37
 // It is a one to one mapping with the container.
38 38
 type Sandbox struct {
39
-	id                 string
40
-	containerID        string
41
-	config             containerConfig
42
-	extDNS             []extDNSEntry
43
-	osSbox             *osl.Namespace
44
-	controller         *Controller
45
-	resolver           *Resolver
46
-	resolverOnce       sync.Once
39
+	id              string
40
+	containerID     string
41
+	config          containerConfig
42
+	extDNS          []extDNSEntry
43
+	osSbox          *osl.Namespace
44
+	controller      *Controller
45
+	resolver        *Resolver
46
+	resolverOnce    sync.Once
47
+	dbIndex         uint64
48
+	dbExists        bool
49
+	isStub          bool
50
+	inDelete        bool
51
+	ingress         bool
52
+	ndotsSet        bool
53
+	oslTypes        []osl.SandboxType // slice of properties of this sandbox
54
+	loadBalancerNID string            // NID that this SB is a load balancer for
55
+	mu              sync.Mutex
56
+
57
+	// joinLeaveMu is required as well as mu to modify the following fields,
58
+	// acquire joinLeaveMu first, and keep it at-least until gateway changes
59
+	// have been applied following updates to endpoints.
60
+	//
61
+	// mu is required to access these fields.
62
+	joinLeaveMu        sync.Mutex
47 63
 	endpoints          []*Endpoint
48 64
 	epPriority         map[string]int
49 65
 	populatedEndpoints map[string]struct{}
50
-	joinLeaveMu        sync.Mutex
51
-	dbIndex            uint64
52
-	dbExists           bool
53
-	isStub             bool
54
-	inDelete           bool
55
-	ingress            bool
56
-	ndotsSet           bool
57
-	oslTypes           []osl.SandboxType // slice of properties of this sandbox
58
-	loadBalancerNID    string            // NID that this SB is a load balancer for
59
-	mu                 sync.Mutex
66
+
60 67
 	// This mutex is used to serialize service related operation for an endpoint
61 68
 	// The lock is here because the endpoint is saved into the store so is not unique
62 69
 	service sync.Mutex