Browse code

Merge pull request #50945 from robmry/cleanup_network_settings_on_join_err

Tidy up when endpoint join fails

Rob Murray authored on 2025/09/12 21:05:10
Showing 5 changed files
... ...
@@ -731,10 +731,14 @@ func (daemon *Daemon) connectToNetwork(ctx context.Context, cfg *config.Config,
731 731
 			}
732 732
 		}
733 733
 	}()
734
-	ctr.NetworkSettings.Networks[nwName] = endpointConfig
735 734
 
736 735
 	delete(ctr.NetworkSettings.Networks, n.ID())
737
-
736
+	ctr.NetworkSettings.Networks[nwName] = endpointConfig
737
+	defer func() {
738
+		if retErr != nil {
739
+			delete(ctr.NetworkSettings.Networks, nwName)
740
+		}
741
+	}()
738 742
 	if err := daemon.updateEndpointNetworkSettings(cfg, ctr, n, ep); err != nil {
739 743
 		return err
740 744
 	}
... ...
@@ -100,7 +100,7 @@ func (sb *Sandbox) clearDefaultGW() error {
100 100
 	if ep = sb.getEndpointInGWNetwork(); ep == nil {
101 101
 		return nil
102 102
 	}
103
-	if err := ep.sbLeave(context.TODO(), sb, false); err != nil {
103
+	if err := ep.sbLeave(context.TODO(), sb, ep.getNetwork(), false); err != nil {
104 104
 		return fmt.Errorf("container %s: endpoint leaving GW Network failed: %v", sb.containerID, err)
105 105
 	}
106 106
 	if err := ep.Delete(context.TODO(), false); err != nil {
... ...
@@ -176,17 +176,16 @@ func (c *Controller) defaultGwNetwork() (*Network, error) {
176 176
 	return n, err
177 177
 }
178 178
 
179
+func (sb *Sandbox) isGatewayEndpoint(epId string) bool {
180
+	gw4, gw6 := sb.getGatewayEndpoint()
181
+	return (gw4 != nil && epId == gw4.ID()) || (gw6 != nil && epId == gw6.ID())
182
+}
183
+
179 184
 // getGatewayEndpoint returns the endpoints providing external connectivity to
180 185
 // the sandbox. If the gateway is dual-stack, ep4 and ep6 will point at the same
181 186
 // endpoint. If there is no IPv4/IPv6 connectivity, nil pointers will be returned.
182 187
 func (sb *Sandbox) getGatewayEndpoint() (ep4, ep6 *Endpoint) {
183
-	return selectGatewayEndpoint(sb.Endpoints())
184
-}
185
-
186
-// selectGatewayEndpoint is like getGatewayEndpoint, but selects only from
187
-// endpoints.
188
-func selectGatewayEndpoint(endpoints []*Endpoint) (ep4, ep6 *Endpoint) {
189
-	for _, ep := range endpoints {
188
+	for _, ep := range sb.Endpoints() {
190 189
 		if ep.getNetwork().Type() == "null" || ep.getNetwork().Type() == "host" {
191 190
 			continue
192 191
 		}
... ...
@@ -514,9 +514,12 @@ func (ep *Endpoint) sbJoin(ctx context.Context, sb *Sandbox, options ...Endpoint
514 514
 	ep.mu.Unlock()
515 515
 	defer func() {
516 516
 		if retErr != nil {
517
-			ep.mu.Lock()
518
-			ep.sandboxID = ""
519
-			ep.mu.Unlock()
517
+			if err := ep.sbLeave(ctx, sb, n, true); err != nil {
518
+				log.G(ctx).WithFields(log.Fields{
519
+					"error":  err,
520
+					"retErr": retErr,
521
+				}).Warn("Failed to remove endpoint after join error")
522
+			}
520 523
 		}
521 524
 	}()
522 525
 
... ...
@@ -561,11 +564,6 @@ func (ep *Endpoint) sbJoin(ctx context.Context, sb *Sandbox, options ...Endpoint
561 561
 	gwepBefore4, gwepBefore6 := sb.getGatewayEndpoint()
562 562
 
563 563
 	sb.addEndpoint(ep)
564
-	defer func() {
565
-		if retErr != nil {
566
-			sb.removeEndpoint(ep)
567
-		}
568
-	}()
569 564
 
570 565
 	if err := sb.populateNetworkResources(ctx, ep); err != nil {
571 566
 		return err
... ...
@@ -753,20 +751,20 @@ func (ep *Endpoint) Leave(ctx context.Context, sb *Sandbox) error {
753 753
 	sb.joinLeaveMu.Lock()
754 754
 	defer sb.joinLeaveMu.Unlock()
755 755
 
756
-	return ep.sbLeave(ctx, sb, false)
757
-}
758
-
759
-func (ep *Endpoint) sbLeave(ctx context.Context, sb *Sandbox, force bool) error {
760 756
 	n, err := ep.getNetworkFromStore()
761 757
 	if err != nil {
762 758
 		return fmt.Errorf("failed to get network from store during leave: %v", err)
763 759
 	}
764 760
 
765
-	ep, err = n.getEndpointFromStore(ep.ID())
761
+	storedEp, err := n.getEndpointFromStore(ep.ID())
766 762
 	if err != nil {
767 763
 		return fmt.Errorf("failed to get endpoint from store during leave: %v", err)
768 764
 	}
769 765
 
766
+	return storedEp.sbLeave(ctx, sb, n, false)
767
+}
768
+
769
+func (ep *Endpoint) sbLeave(ctx context.Context, sb *Sandbox, n *Network, force bool) error {
770 770
 	ctx = log.WithLogger(ctx, log.G(ctx).WithFields(log.Fields{
771 771
 		"nid": n.ID(),
772 772
 		"net": n.Name(),
... ...
@@ -820,17 +818,38 @@ func (ep *Endpoint) sbLeave(ctx context.Context, sb *Sandbox, force bool) error
820 820
 	// Capture the addresses that were added to the container's /etc/hosts here,
821 821
 	// before the endpoint is deleted, so that they can be removed from /etc/hosts.
822 822
 	etcHostsAddrs := ep.getEtcHostsAddrs()
823
-
824
-	if err := sb.clearNetworkResources(ep); err != nil {
825
-		log.G(ctx).WithError(err).Warn("Failed to clean up network resources on container disconnect")
823
+	// Before removing the Endpoint from the Sandbox's list of endpoints, check whether
824
+	// it's acting as a gateway so that new gateways can be selected if it is.
825
+	wasGwEp := sb.isGatewayEndpoint(ep.id)
826
+
827
+	// Remove the sb's references to ep.
828
+	sb.mu.Lock()
829
+	osSbox := sb.osSbox
830
+	delete(sb.populatedEndpoints, ep.id)
831
+	delete(sb.epPriority, ep.id)
832
+	sb.endpoints = slices.DeleteFunc(sb.endpoints, func(other *Endpoint) bool { return other.id == ep.id })
833
+	sb.mu.Unlock()
834
+
835
+	// Delete interfaces, routes etc. from the OS.
836
+	if osSbox != nil {
837
+		releaseOSSboxResources(osSbox, ep)
838
+
839
+		// Even if the interface was initially created in the container's namespace, it's
840
+		// now been moved out. When a legacy link is deleted, the Endpoint is removed and
841
+		// then re-added to the Sandbox. So, to make sure the re-add works, note that the
842
+		// interface is now outside the container's netns.
843
+		ep.iface.createdInContainer = false
844
+	}
845
+
846
+	// Update gateway / static routes if the ep was the gateway.
847
+	var gwepAfter4, gwepAfter6 *Endpoint
848
+	if wasGwEp {
849
+		gwepAfter4, gwepAfter6 = sb.getGatewayEndpoint()
850
+		if err := sb.updateGateway(gwepAfter4, gwepAfter6); err != nil {
851
+			return fmt.Errorf("updating gateway endpoint: %w", err)
852
+		}
826 853
 	}
827 854
 
828
-	// Even if the interface was initially created in the container's namespace, it's
829
-	// now been moved out. When a legacy link is deleted, the Endpoint is removed and
830
-	// then re-added to the Sandbox. So, to make sure the re-add works, note that the
831
-	// interface is now outside the container's netns.
832
-	ep.iface.createdInContainer = false
833
-
834 855
 	// Update the store about the sandbox detach only after we
835 856
 	// have completed sb.clearNetworkResources above to avoid
836 857
 	// spurious logs when cleaning up the sandbox when the daemon
... ...
@@ -869,16 +888,17 @@ func (ep *Endpoint) sbLeave(ctx context.Context, sb *Sandbox, force bool) error
869 869
 		sb.resolver.SetForwardingPolicy(sb.hasExternalAccess())
870 870
 	}
871 871
 
872
-	// Find new endpoint(s) to provide external connectivity for the sandbox.
873
-	gwepAfter4, gwepAfter6 := sb.getGatewayEndpoint()
874
-	if gwepAfter4 != nil {
875
-		if err := gwepAfter4.programExternalConnectivity(ctx, gwepAfter4, gwepAfter6); err != nil {
876
-			log.G(ctx).WithError(err).Error("Failed to set IPv4 gateway")
872
+	// Configure the endpoints that now provide external connectivity for the sandbox.
873
+	if wasGwEp {
874
+		if gwepAfter4 != nil {
875
+			if err := gwepAfter4.programExternalConnectivity(ctx, gwepAfter4, gwepAfter6); err != nil {
876
+				log.G(ctx).WithError(err).Error("Failed to set IPv4 gateway")
877
+			}
877 878
 		}
878
-	}
879
-	if gwepAfter6 != nil && gwepAfter6 != gwepAfter4 {
880
-		if err := gwepAfter6.programExternalConnectivity(ctx, gwepAfter4, gwepAfter6); err != nil {
881
-			log.G(ctx).WithError(err).Error("Failed to set IPv6 gateway")
879
+		if gwepAfter6 != nil && gwepAfter6 != gwepAfter4 {
880
+			if err := gwepAfter6.programExternalConnectivity(ctx, gwepAfter4, gwepAfter6); err != nil {
881
+				log.G(ctx).WithError(err).Error("Failed to set IPv6 gateway")
882
+			}
882 883
 		}
883 884
 	}
884 885
 
... ...
@@ -914,15 +934,20 @@ func (ep *Endpoint) Delete(ctx context.Context, force bool) error {
914 914
 	sbid := ep.sandboxID
915 915
 	ep.mu.Unlock()
916 916
 
917
-	sb, _ := n.getController().SandboxByID(sbid)
918
-	if sb != nil && !force {
919
-		return &ActiveContainerError{name: name, id: epid}
920
-	}
921
-
922
-	if sb != nil {
923
-		if e := ep.sbLeave(context.WithoutCancel(ctx), sb, force); e != nil {
924
-			log.G(ctx).Warnf("failed to leave sandbox for endpoint %s : %v", name, e)
917
+	if sb, _ := n.getController().SandboxByID(sbid); sb != nil {
918
+		if !force {
919
+			return &ActiveContainerError{name: name, id: epid}
925 920
 		}
921
+		func() {
922
+			// Make sure this Delete isn't racing a Join/Leave/Delete that might also be
923
+			// updating the Sandbox's selection of gateway endpoints.
924
+			sb.joinLeaveMu.Lock()
925
+			defer sb.joinLeaveMu.Unlock()
926
+
927
+			if e := ep.sbLeave(context.WithoutCancel(ctx), sb, n, force); e != nil {
928
+				log.G(ctx).Warnf("failed to leave sandbox for endpoint %s : %v", name, e)
929
+			}
930
+		}()
926 931
 	}
927 932
 
928 933
 	if err = n.getController().deleteStoredEndpoint(ep); err != nil {
... ...
@@ -36,27 +36,34 @@ func (sb *Sandbox) processOptions(options ...SandboxOption) {
36 36
 // Sandbox provides the control over the network container entity.
37 37
 // It is a one to one mapping with the container.
38 38
 type Sandbox struct {
39
-	id                 string
40
-	containerID        string
41
-	config             containerConfig
42
-	extDNS             []extDNSEntry
43
-	osSbox             *osl.Namespace
44
-	controller         *Controller
45
-	resolver           *Resolver
46
-	resolverOnce       sync.Once
39
+	id              string
40
+	containerID     string
41
+	config          containerConfig
42
+	extDNS          []extDNSEntry
43
+	osSbox          *osl.Namespace
44
+	controller      *Controller
45
+	resolver        *Resolver
46
+	resolverOnce    sync.Once
47
+	dbIndex         uint64
48
+	dbExists        bool
49
+	isStub          bool
50
+	inDelete        bool
51
+	ingress         bool
52
+	ndotsSet        bool
53
+	oslTypes        []osl.SandboxType // slice of properties of this sandbox
54
+	loadBalancerNID string            // NID that this SB is a load balancer for
55
+	mu              sync.Mutex
56
+
57
+	// joinLeaveMu is required as well as mu to modify the following fields,
58
+	// acquire joinLeaveMu first, and keep it at-least until gateway changes
59
+	// have been applied following updates to endpoints.
60
+	//
61
+	// mu is required to access these fields.
62
+	joinLeaveMu        sync.Mutex
47 63
 	endpoints          []*Endpoint
48 64
 	epPriority         map[string]int
49 65
 	populatedEndpoints map[string]struct{}
50
-	joinLeaveMu        sync.Mutex
51
-	dbIndex            uint64
52
-	dbExists           bool
53
-	isStub             bool
54
-	inDelete           bool
55
-	ingress            bool
56
-	ndotsSet           bool
57
-	oslTypes           []osl.SandboxType // slice of properties of this sandbox
58
-	loadBalancerNID    string            // NID that this SB is a load balancer for
59
-	mu                 sync.Mutex
66
+
60 67
 	// This mutex is used to serialize service related operation for an endpoint
61 68
 	// The lock is here because the endpoint is saved into the store so is not unique
62 69
 	service sync.Mutex
... ...
@@ -312,30 +319,10 @@ func (sb *Sandbox) addEndpoint(ep *Endpoint) {
312 312
 	sb.mu.Lock()
313 313
 	defer sb.mu.Unlock()
314 314
 
315
-	l := len(sb.endpoints)
316
-	i := sort.Search(l, func(j int) bool {
315
+	i := sort.Search(len(sb.endpoints), func(j int) bool {
317 316
 		return ep.Less(sb.endpoints[j])
318 317
 	})
319
-
320
-	sb.endpoints = append(sb.endpoints, nil)
321
-	copy(sb.endpoints[i+1:], sb.endpoints[i:])
322
-	sb.endpoints[i] = ep
323
-}
324
-
325
-func (sb *Sandbox) removeEndpoint(ep *Endpoint) {
326
-	sb.mu.Lock()
327
-	defer sb.mu.Unlock()
328
-
329
-	sb.removeEndpointRaw(ep)
330
-}
331
-
332
-func (sb *Sandbox) removeEndpointRaw(ep *Endpoint) {
333
-	for i, e := range sb.endpoints {
334
-		if e == ep {
335
-			sb.endpoints = append(sb.endpoints[:i], sb.endpoints[i+1:]...)
336
-			return
337
-		}
338
-	}
318
+	sb.endpoints = slices.Insert(sb.endpoints, i, ep)
339 319
 }
340 320
 
341 321
 func (sb *Sandbox) GetEndpoint(id string) *Endpoint {
... ...
@@ -567,62 +554,6 @@ func (sb *Sandbox) DisableService() error {
567 567
 	return nil
568 568
 }
569 569
 
570
-func (sb *Sandbox) clearNetworkResources(origEp *Endpoint) error {
571
-	ep := sb.GetEndpoint(origEp.id)
572
-	if ep == nil {
573
-		return fmt.Errorf("could not find the sandbox endpoint data for endpoint %s",
574
-			origEp.id)
575
-	}
576
-
577
-	sb.mu.Lock()
578
-	osSbox := sb.osSbox
579
-	inDelete := sb.inDelete
580
-	sb.mu.Unlock()
581
-	if osSbox != nil {
582
-		releaseOSSboxResources(osSbox, ep)
583
-	}
584
-
585
-	sb.mu.Lock()
586
-	delete(sb.populatedEndpoints, ep.ID())
587
-
588
-	if len(sb.endpoints) == 0 {
589
-		// sb.endpoints should never be empty and this is unexpected error condition
590
-		// We log an error message to note this down for debugging purposes.
591
-		log.G(context.TODO()).Errorf("No endpoints in sandbox while trying to remove endpoint %s", ep.Name())
592
-		sb.mu.Unlock()
593
-		return nil
594
-	}
595
-
596
-	if !slices.Contains(sb.endpoints, ep) {
597
-		log.G(context.TODO()).Warnf("Endpoint %s has already been deleted", ep.Name())
598
-		sb.mu.Unlock()
599
-		return nil
600
-	}
601
-
602
-	gwepBefore4, gwepBefore6 := selectGatewayEndpoint(sb.endpoints)
603
-	sb.removeEndpointRaw(ep)
604
-	gwepAfter4, gwepAfter6 := selectGatewayEndpoint(sb.endpoints)
605
-	delete(sb.epPriority, ep.ID())
606
-
607
-	sb.mu.Unlock()
608
-
609
-	if (gwepAfter4 != nil && gwepBefore4 != gwepAfter4) || (gwepAfter6 != nil && gwepBefore6 != gwepAfter6) {
610
-		if err := sb.updateGateway(gwepAfter4, gwepAfter6); err != nil {
611
-			return fmt.Errorf("updating gateway endpoint: %w", err)
612
-		}
613
-	}
614
-
615
-	// Only update the store if we did not come here as part of
616
-	// sandbox delete. If we came here as part of delete then do
617
-	// not bother updating the store. The sandbox object will be
618
-	// deleted anyway
619
-	if !inDelete {
620
-		return sb.storeUpdate(context.TODO())
621
-	}
622
-
623
-	return nil
624
-}
625
-
626 570
 // Less defines an ordering over endpoints, with better candidates for the default
627 571
 // gateway sorted first.
628 572
 //
... ...
@@ -1075,7 +1075,8 @@ func TestBridgeIPAMStatus(t *testing.T) {
1075 1075
 	c := d.NewClientT(t, client.WithVersion("1.52"))
1076 1076
 
1077 1077
 	checkSubnets := func(
1078
-		netName string, want networktypes.SubnetStatuses) bool {
1078
+		netName string, want networktypes.SubnetStatuses,
1079
+	) bool {
1079 1080
 		t.Helper()
1080 1081
 		nw, err := c.NetworkInspect(ctx, netName, client.NetworkInspectOptions{})
1081 1082
 		if assert.Check(t, err) && assert.Check(t, nw.Status != nil) {
... ...
@@ -1114,7 +1115,8 @@ func TestBridgeIPAMStatus(t *testing.T) {
1114 1114
 				AuxAddress: map[string]string{
1115 1115
 					"reserved":   auxIPv4FromRange,
1116 1116
 					"reserved_1": auxIPv4OutOfRange,
1117
-				}}),
1117
+				},
1118
+			}),
1118 1119
 			network.WithIPv6(),
1119 1120
 			network.WithIPAMConfig(networktypes.IPAMConfig{
1120 1121
 				Subnet:  cidrv6.String(),
... ...
@@ -1216,3 +1218,76 @@ func TestBridgeIPAMStatus(t *testing.T) {
1216 1216
 		})
1217 1217
 	})
1218 1218
 }
1219
+
1220
+// TestJoinError checks that if network connection fails late in the process, it's
1221
+// rolled back properly - the failed connection should not show up in container
1222
+// or network inspect, and the container should not gain a network interface.
1223
+func TestJoinError(t *testing.T) {
1224
+	ctx := setupTest(t)
1225
+	d := daemon.New(t)
1226
+	d.StartWithBusybox(ctx, t)
1227
+	defer d.Stop(t)
1228
+	c := d.NewClientT(t)
1229
+
1230
+	const intNet = "intnet"
1231
+	const gwAddr = "192.168.123.1"
1232
+	network.CreateNoError(ctx, t, c, intNet,
1233
+		network.WithInternal(),
1234
+		network.WithIPAM("192.168.123.0/24", gwAddr),
1235
+	)
1236
+	defer network.RemoveNoError(ctx, t, c, intNet)
1237
+
1238
+	const extNet = "extnet"
1239
+	network.CreateNoError(ctx, t, c, extNet)
1240
+	defer network.RemoveNoError(ctx, t, c, extNet)
1241
+
1242
+	cid := ctr.Run(ctx, t, c,
1243
+		ctr.WithNetworkMode(intNet),
1244
+		ctr.WithPrivileged(true),
1245
+	)
1246
+	defer c.ContainerRemove(ctx, cid, client.ContainerRemoveOptions{Force: true})
1247
+
1248
+	// Add a default route to the container, so that connecting extNet will fail to
1249
+	// set up its own default route.
1250
+	res := ctr.ExecT(ctx, t, c, cid, []string{"ip", "route", "add", "default", "via", gwAddr})
1251
+	assert.Equal(t, res.ExitCode, 0)
1252
+
1253
+	// Expect an error when connecting extNet.
1254
+	err := c.NetworkConnect(ctx, extNet, cid, &networktypes.EndpointSettings{})
1255
+	assert.Check(t, is.ErrorContains(err, "failed to set gateway: file exists"))
1256
+
1257
+	// Only intNet should show up in container inspect.
1258
+	ctrInsp := ctr.Inspect(ctx, t, c, cid)
1259
+	assert.Check(t, is.Len(ctrInsp.NetworkSettings.Networks, 1))
1260
+	assert.Check(t, is.Contains(ctrInsp.NetworkSettings.Networks, intNet))
1261
+
1262
+	// extNet should not report any attached containers
1263
+	extNetInsp, err := c.NetworkInspect(ctx, extNet, client.NetworkInspectOptions{})
1264
+	assert.Check(t, err)
1265
+	assert.Check(t, is.Len(extNetInsp.Containers, 0))
1266
+
1267
+	// The container should have an eth0, but no eth1.
1268
+	res = ctr.ExecT(ctx, t, c, cid, []string{"ip", "link", "show", "eth0"})
1269
+	assert.Check(t, is.Equal(res.ExitCode, 0), "container should have an eth0")
1270
+	res = ctr.ExecT(ctx, t, c, cid, []string{"ip", "link", "show", "eth1"})
1271
+	assert.Check(t, is.Contains(res.Stderr(), "can't find device"), "container should not have an eth1")
1272
+
1273
+	// Remove the dodgy route.
1274
+	res = ctr.ExecT(ctx, t, c, cid, []string{"ip", "route", "del", "default", "via", gwAddr})
1275
+	assert.Equal(t, res.ExitCode, 0)
1276
+
1277
+	// Check network connect now succeeds.
1278
+	err = c.NetworkConnect(ctx, extNet, cid, &networktypes.EndpointSettings{})
1279
+	assert.Check(t, err)
1280
+	ctrInsp = ctr.Inspect(ctx, t, c, cid)
1281
+	assert.Check(t, is.Len(ctrInsp.NetworkSettings.Networks, 2))
1282
+	assert.Check(t, is.Contains(ctrInsp.NetworkSettings.Networks, intNet))
1283
+	assert.Check(t, is.Contains(ctrInsp.NetworkSettings.Networks, extNet))
1284
+	extNetInsp, err = c.NetworkInspect(ctx, extNet, client.NetworkInspectOptions{})
1285
+	assert.Check(t, err)
1286
+	assert.Check(t, is.Len(extNetInsp.Containers, 1))
1287
+	res = ctr.ExecT(ctx, t, c, cid, []string{"ip", "link", "show", "eth0"})
1288
+	assert.Check(t, is.Equal(res.ExitCode, 0), "container should have an eth0")
1289
+	res = ctr.ExecT(ctx, t, c, cid, []string{"ip", "link", "show", "eth1"})
1290
+	assert.Check(t, is.Equal(res.ExitCode, 0), "container should have an eth1")
1291
+}