Browse code

Add option --bridge-accept-fwmark

Packets with the given firewall mark are accepted by the bridge
driver's filter-FORWARD rules.

The value can either be an integer mark, or it can include a
mask in the format "<mark>/<mask>".

Signed-off-by: Rob Murray <rob.murray@docker.com>

Rob Murray authored on 2025/07/22 20:17:20
Showing 11 changed files
... ...
@@ -39,6 +39,7 @@ func installConfigFlags(conf *config.Config, flags *pflag.FlagSet) {
39 39
 	flags.BoolVar(&conf.BridgeConfig.EnableUserlandProxy, "userland-proxy", true, "Use userland proxy for loopback traffic")
40 40
 	flags.StringVar(&conf.BridgeConfig.UserlandProxyPath, "userland-proxy-path", conf.BridgeConfig.UserlandProxyPath, "Path to the userland proxy binary")
41 41
 	flags.BoolVar(&conf.BridgeConfig.AllowDirectRouting, "allow-direct-routing", false, "Allow remote access to published ports on container IP addresses")
42
+	flags.StringVar(&conf.BridgeConfig.BridgeAcceptFwMark, "bridge-accept-fwmark", "", "In bridge networks, accept packets with this firewall mark/mask")
42 43
 	flags.StringVar(&conf.CgroupParent, "cgroup-parent", "", "Set parent cgroup for all containers")
43 44
 	flags.StringVar(&conf.RemappedRoot, "userns-remap", "", "User/Group setting for user namespaces")
44 45
 	flags.BoolVar(&conf.LiveRestoreEnabled, "live-restore", false, "Enable live restore of docker when containers are still running")
... ...
@@ -6,6 +6,7 @@ import (
6 6
 	"net"
7 7
 	"os/exec"
8 8
 	"path/filepath"
9
+	"strconv"
9 10
 	"strings"
10 11
 
11 12
 	"github.com/containerd/cgroups/v3"
... ...
@@ -49,6 +50,7 @@ type BridgeConfig struct {
49 49
 	EnableUserlandProxy      bool   `json:"userland-proxy,omitempty"`
50 50
 	UserlandProxyPath        string `json:"userland-proxy-path,omitempty"`
51 51
 	AllowDirectRouting       bool   `json:"allow-direct-routing,omitempty"`
52
+	BridgeAcceptFwMark       string `json:"bridge-accept-fwmark,omitempty"`
52 53
 }
53 54
 
54 55
 // DefaultBridgeConfig stores all the parameters for the default bridge network.
... ...
@@ -243,15 +245,15 @@ func validatePlatformConfig(conf *Config) error {
243 243
 	if err := verifyDefaultIpcMode(conf.IpcMode); err != nil {
244 244
 		return err
245 245
 	}
246
-
247 246
 	if err := bridge.ValidateFixedCIDRV6(conf.FixedCIDRv6); err != nil {
248 247
 		return errors.Wrap(err, "invalid fixed-cidr-v6")
249 248
 	}
250
-
251 249
 	if err := validateFirewallBackend(conf.FirewallBackend); err != nil {
252 250
 		return errors.Wrap(err, "invalid firewall-backend")
253 251
 	}
254
-
252
+	if err := validateFwMarkMask(conf.BridgeAcceptFwMark); err != nil {
253
+		return errors.Wrap(err, "invalid bridge-accept-fwmark")
254
+	}
255 255
 	return verifyDefaultCgroupNsMode(conf.CgroupNamespaceMode)
256 256
 }
257 257
 
... ...
@@ -311,6 +313,22 @@ func validateFirewallBackend(val string) error {
311 311
 	return errors.New(`allowed values are "iptables" and "nftables"`)
312 312
 }
313 313
 
314
+func validateFwMarkMask(val string) error {
315
+	if val == "" {
316
+		return nil
317
+	}
318
+	mark, mask, haveMask := strings.Cut(val, "/")
319
+	if _, err := strconv.ParseUint(mark, 0, 32); err != nil {
320
+		return fmt.Errorf("invalid firewall mark %q: %w", val, err)
321
+	}
322
+	if haveMask {
323
+		if _, err := strconv.ParseUint(mask, 0, 32); err != nil {
324
+			return fmt.Errorf("invalid firewall mask %q: %w", val, err)
325
+		}
326
+	}
327
+	return nil
328
+}
329
+
314 330
 func verifyDefaultCgroupNsMode(mode string) error {
315 331
 	cm := container.CgroupnsMode(mode)
316 332
 	if !cm.Valid() {
... ...
@@ -396,3 +396,71 @@ func TestDaemonLegacyOptions(t *testing.T) {
396 396
 		})
397 397
 	}
398 398
 }
399
+
400
+func TestValidateAcceptFwMarkMark(t *testing.T) {
401
+	tests := []struct {
402
+		name   string
403
+		val    string
404
+		expErr string
405
+	}{
406
+		{
407
+			name: "empty",
408
+			val:  "",
409
+		},
410
+		{
411
+			name: "dec/no-mask",
412
+			val:  "1",
413
+		},
414
+		{
415
+			name: "hex/no-mask",
416
+			val:  "0x1",
417
+		},
418
+		{
419
+			name: "dec/mask",
420
+			val:  "1/2",
421
+		},
422
+		{
423
+			name: "hex/mask",
424
+			val:  "0x1/0x2",
425
+		},
426
+		{
427
+			name: "octal/mask",
428
+			val:  "010/0xff",
429
+		},
430
+		{
431
+			name:   "bad/mark",
432
+			val:    "hello/0x2",
433
+			expErr: `invalid firewall mark "hello/0x2": strconv.ParseUint: parsing "hello": invalid syntax`,
434
+		},
435
+		{
436
+			name:   "bad/mark",
437
+			val:    "1/hello",
438
+			expErr: `invalid firewall mask "1/hello": strconv.ParseUint: parsing "hello": invalid syntax`,
439
+		},
440
+		{
441
+			name:   "bad/sep",
442
+			val:    "1+hello",
443
+			expErr: `invalid firewall mark "1+hello": strconv.ParseUint: parsing "1+hello": invalid syntax`,
444
+		},
445
+		{
446
+			name:   "bad/no-mask",
447
+			val:    "1/",
448
+			expErr: `invalid firewall mask "1/": strconv.ParseUint: parsing "": invalid syntax`,
449
+		},
450
+		{
451
+			name:   "bad/negative",
452
+			val:    "-1",
453
+			expErr: `invalid firewall mark "-1": strconv.ParseUint: parsing "-1": invalid syntax`,
454
+		},
455
+	}
456
+	for _, tc := range tests {
457
+		t.Run(tc.name, func(t *testing.T) {
458
+			err := validateFwMarkMask(tc.val)
459
+			if tc.expErr == "" {
460
+				assert.NilError(t, err)
461
+			} else {
462
+				assert.Check(t, is.ErrorContains(err, tc.expErr))
463
+			}
464
+		})
465
+	}
466
+}
... ...
@@ -938,6 +938,7 @@ func networkPlatformOptions(conf *config.Config) []nwconfig.Option {
938 938
 				"EnableIP6Tables":          conf.BridgeConfig.EnableIP6Tables,
939 939
 				"Hairpin":                  !conf.EnableUserlandProxy || conf.UserlandProxyPath == "",
940 940
 				"AllowDirectRouting":       conf.BridgeConfig.AllowDirectRouting,
941
+				"AcceptFwMark":             conf.BridgeConfig.BridgeAcceptFwMark,
941 942
 			},
942 943
 		}),
943 944
 	}
... ...
@@ -77,6 +77,7 @@ type configuration struct {
77 77
 	// hairpinned.
78 78
 	Hairpin            bool
79 79
 	AllowDirectRouting bool
80
+	AcceptFwMark       string
80 81
 }
81 82
 
82 83
 // networkConfiguration for network specific configuration
... ...
@@ -429,6 +430,7 @@ func (n *bridgeNetwork) newFirewallerNetwork(ctx context.Context) (_ firewaller.
429 429
 		ICC:                   n.config.EnableICC,
430 430
 		Masquerade:            n.config.EnableIPMasquerade,
431 431
 		TrustedHostInterfaces: n.config.TrustedHostInterfaces,
432
+		AcceptFwMark:          n.driver.config.AcceptFwMark,
432 433
 		Config4:               config4,
433 434
 		Config6:               config6,
434 435
 	})
... ...
@@ -48,6 +48,10 @@ type NetworkConfig struct {
48 48
 	// bridge itself). In particular, these are not external interfaces for the purpose of
49 49
 	// blocking direct-routing to a container's IP address.
50 50
 	TrustedHostInterfaces []string
51
+	// AcceptFwMark is a firewall mark/mask. Packets with this mark will not be dropped by
52
+	// per-port blocking rules. So, packets with this mark have access to unpublished
53
+	// container ports.
54
+	AcceptFwMark string
51 55
 	// Config4 contains IPv4-specific configuration for the network.
52 56
 	Config4 NetworkConfigFam
53 57
 	// Config6 contains IPv6-specific configuration for the network.
... ...
@@ -7,6 +7,8 @@ import (
7 7
 	"errors"
8 8
 	"fmt"
9 9
 	"net/netip"
10
+	"strconv"
11
+	"strings"
10 12
 
11 13
 	"github.com/containerd/log"
12 14
 	"github.com/docker/docker/daemon/libnetwork/drivers/bridge/internal/firewaller"
... ...
@@ -263,6 +265,18 @@ func setDefaultForwardRule(ipVersion iptables.IPVersion, ifName string, unprotec
263 263
 }
264 264
 
265 265
 func (n *network) setupNonInternalNetworkRules(ctx context.Context, ipVer iptables.IPVersion, config firewaller.NetworkConfigFam, enable bool) error {
266
+	if n.config.AcceptFwMark != "" {
267
+		fwm, err := iptablesFwMark(n.config.AcceptFwMark)
268
+		if err != nil {
269
+			return err
270
+		}
271
+		if err := programChainRule(iptables.Rule{IPVer: ipVer, Table: iptables.Filter, Chain: DockerForwardChain, Args: []string{
272
+			"-m", "mark", "--mark", fwm, "-j", "ACCEPT",
273
+		}}, "ALLOW FW MARK", enable); err != nil {
274
+			return err
275
+		}
276
+	}
277
+
266 278
 	var natArgs, hpNatArgs []string
267 279
 	if config.HostIP.IsValid() {
268 280
 		// The user wants IPv4/IPv6 SNAT with the given address.
... ...
@@ -459,3 +473,23 @@ func setupInternalNetworkRules(ctx context.Context, bridgeIface string, prefix n
459 459
 	// Set Inter Container Communication.
460 460
 	return setIcc(ctx, version, bridgeIface, icc, true, insert)
461 461
 }
462
+
463
+// iptablesFwMark takes a string representing a firewall mark with an optional
464
+// "/mask" parses the mark and mask, and returns the same "mark/mask" with the
465
+// numbers converted to decimal, because strings.ParseUint accepts more integer
466
+// formats than iptables.
467
+func iptablesFwMark(val string) (string, error) {
468
+	markStr, maskStr, haveMask := strings.Cut(val, "/")
469
+	mark, err := strconv.ParseUint(markStr, 0, 32)
470
+	if err != nil {
471
+		return "", fmt.Errorf("invalid firewall mark %q: %w", val, err)
472
+	}
473
+	if haveMask {
474
+		mask, err := strconv.ParseUint(maskStr, 0, 32)
475
+		if err != nil {
476
+			return "", fmt.Errorf("invalid firewall mask %q: %w", val, err)
477
+		}
478
+		return fmt.Sprintf("%d/%d", mark, mask), nil
479
+	}
480
+	return strconv.FormatUint(mark, 10), nil
481
+}
... ...
@@ -5,6 +5,8 @@ package nftabler
5 5
 import (
6 6
 	"context"
7 7
 	"fmt"
8
+	"strconv"
9
+	"strings"
8 10
 
9 11
 	"github.com/containerd/log"
10 12
 	"github.com/docker/docker/daemon/libnetwork/drivers/bridge/internal/firewaller"
... ...
@@ -157,6 +159,20 @@ func (n *network) configure(ctx context.Context, table nftables.TableRef, conf f
157 157
 		}
158 158
 		cleanup.Add(cf)
159 159
 	} else {
160
+		// AcceptFwMark
161
+		if n.config.AcceptFwMark != "" {
162
+			fwm, err := nftFwMark(n.config.AcceptFwMark)
163
+			if err != nil {
164
+				return nil, fmt.Errorf("adding fwmark %q for %q: %w", n.config.AcceptFwMark, n.config.IfName, err)
165
+			}
166
+			cf, err = fwdInChain.AppendRuleCf(ctx, fwdInAcceptFwMarkRuleGroup,
167
+				`meta mark %s counter accept comment "ALLOW FW MARK"`, fwm)
168
+			if err != nil {
169
+				return nil, fmt.Errorf("adding ALLOW FW MARK rule for %q: %w", n.config.IfName, err)
170
+			}
171
+			cleanup.Add(cf)
172
+		}
173
+
160 174
 		// Inter-Container Communication
161 175
 		cf, err = fwdInChain.AppendRuleCf(ctx, fwdInICCRuleGroup, "iifname == %s counter %s comment ICC",
162 176
 			n.config.IfName, iccVerdict)
... ...
@@ -270,3 +286,23 @@ func chainNatPostRtOut(ifName string) string {
270 270
 func chainNatPostRtIn(ifName string) string {
271 271
 	return "nat-postrouting-in__" + ifName
272 272
 }
273
+
274
+// nftFwMark takes a string representing a firewall mark with an optional
275
+// "/mask", parses the mark and mask, and returns an nftables expression
276
+// representing the same mask/mark. Numbers are converted to decimal, because
277
+// strings.ParseUint accepts more integer formats than nft.
278
+func nftFwMark(val string) (string, error) {
279
+	markStr, maskStr, haveMask := strings.Cut(val, "/")
280
+	mark, err := strconv.ParseUint(markStr, 0, 32)
281
+	if err != nil {
282
+		return "", fmt.Errorf("invalid firewall mark %q: %w", val, err)
283
+	}
284
+	if haveMask {
285
+		mask, err := strconv.ParseUint(maskStr, 0, 32)
286
+		if err != nil {
287
+			return "", fmt.Errorf("invalid firewall mask %q: %w", val, err)
288
+		}
289
+		return fmt.Sprintf("and %d == %d", mask, mark), nil
290
+	}
291
+	return strconv.FormatUint(mark, 10), nil
292
+}
... ...
@@ -34,7 +34,8 @@ const (
34 34
 )
35 35
 
36 36
 const (
37
-	fwdInLegacyLinksRuleGroup = iota + initialRuleGroup + 1
37
+	fwdInAcceptFwMarkRuleGroup = iota + initialRuleGroup + 1
38
+	fwdInLegacyLinksRuleGroup
38 39
 	fwdInICCRuleGroup
39 40
 	fwdInPortsRuleGroup
40 41
 	fwdInFinalRuleGroup
... ...
@@ -798,6 +798,124 @@ func TestDirectRoutingOpenPorts(t *testing.T) {
798 798
 	}
799 799
 }
800 800
 
801
+func TestAcceptFwMark(t *testing.T) {
802
+	skip.If(t, testEnv.IsRootless())
803
+	ctx := setupTest(t)
804
+
805
+	d := daemon.New(t)
806
+	d.StartWithBusybox(ctx, t, "--bridge-accept-fwmark=2/3")
807
+	t.Cleanup(func() { d.Stop(t) })
808
+
809
+	c := d.NewClientT(t)
810
+	t.Cleanup(func() { c.Close() })
811
+
812
+	// Simulate the remote host.
813
+
814
+	l3 := networking.NewL3Segment(t, "test-routed-open-ports",
815
+		netip.MustParsePrefix("192.168.124.1/24"),
816
+		netip.MustParsePrefix("fdc0:36dc:a4dd::1/64"))
817
+	t.Cleanup(func() { l3.Destroy(t) })
818
+
819
+	// "docker" is the host where dockerd is running.
820
+	l3.AddHost(t, "docker", networking.CurrentNetns, "eth-test",
821
+		netip.MustParsePrefix("192.168.124.2/24"),
822
+		netip.MustParsePrefix("fdc0:36dc:a4dd::2/64"))
823
+	// "remote" simulates the remote host.
824
+	l3.AddHost(t, "remote", "test-remote-host", "eth0",
825
+		netip.MustParsePrefix("192.168.124.3/24"),
826
+		netip.MustParsePrefix("fdc0:36dc:a4dd::3/64"))
827
+	// Add default routes to the "docker" Host from the "remote" Host.
828
+	l3.Hosts["remote"].MustRun(t, "ip", "route", "add", "default", "via", "192.168.124.2")
829
+	l3.Hosts["remote"].MustRun(t, "ip", "-6", "route", "add", "default", "via", "fdc0:36dc:a4dd::2")
830
+
831
+	// Create a network and run a container on it.
832
+	// Don't publish any ports.
833
+	const netName = "test-acceptfwmark"
834
+	network.CreateNoError(ctx, t, c, netName,
835
+		network.WithOption(bridge.BridgeName, "br-acceptfwmark"),
836
+		network.WithOption(bridge.TrustedHostInterfaces, "eth-test"),
837
+		network.WithIPv6(),
838
+	)
839
+	t.Cleanup(func() {
840
+		network.RemoveNoError(ctx, t, c, netName)
841
+	})
842
+
843
+	ctrId := container.Run(ctx, t, c,
844
+		container.WithNetworkMode(netName),
845
+		container.WithCmd("httpd", "-f"),
846
+	)
847
+	t.Cleanup(func() {
848
+		c.ContainerRemove(ctx, ctrId, containertypes.RemoveOptions{Force: true})
849
+	})
850
+
851
+	insp := container.Inspect(ctx, t, c, ctrId)
852
+	ctrIPv4 := insp.NetworkSettings.Networks[netName].IPAddress
853
+	ctrIPv6 := insp.NetworkSettings.Networks[netName].GlobalIPv6Address
854
+
855
+	const (
856
+		httpSuccess = "404 Not Found"
857
+		httpFail    = "Connection timed out"
858
+		pingSuccess = 0
859
+		pingFail    = 1
860
+	)
861
+
862
+	testPing := func(t *testing.T, cmd, addr string, expExit int) {
863
+		t.Helper()
864
+		t.Parallel()
865
+		l3.Hosts["remote"].Do(t, func() {
866
+			t.Helper()
867
+			pingRes := icmd.RunCommand(cmd, "--numeric", "--count=1", "--timeout=3", addr)
868
+			assert.Check(t, pingRes.ExitCode == expExit, "%s %s -> out:%s err:%s",
869
+				cmd, addr, pingRes.Stdout(), pingRes.Stderr())
870
+		})
871
+	}
872
+	testHttp := func(t *testing.T, addr, port, expOut string) {
873
+		t.Helper()
874
+		t.Parallel()
875
+		l3.Hosts["remote"].Do(t, func() {
876
+			t.Helper()
877
+			u := "http://" + net.JoinHostPort(addr, port)
878
+			res := icmd.RunCommand("curl", "--max-time", "3", "--show-error", "--silent", u)
879
+			assert.Check(t, is.Contains(res.Combined(), expOut), "url:%s", u)
880
+		})
881
+	}
882
+
883
+	test := func(name string, expPing int, expHttp string) {
884
+		t.Run(name, func(t *testing.T) {
885
+			t.Run("v4/ping", func(t *testing.T) {
886
+				testPing(t, "ping", ctrIPv4, expPing)
887
+			})
888
+			t.Run("v6/ping", func(t *testing.T) {
889
+				testPing(t, "ping6", ctrIPv6, expPing)
890
+			})
891
+			t.Run("v4/http", func(t *testing.T) {
892
+				testHttp(t, ctrIPv4, "80", expHttp)
893
+			})
894
+			t.Run("v6/http", func(t *testing.T) {
895
+				testHttp(t, ctrIPv6, "80", expHttp)
896
+			})
897
+		})
898
+	}
899
+	test("nofwmark", pingFail, httpFail)
900
+
901
+	// This nftables will work if --firewall-backend=iptables, as long as it's iptables-nft.
902
+	cmd := icmd.Command("nft", "-f", "-")
903
+	res := icmd.RunCmd(cmd, icmd.WithStdin(strings.NewReader(`
904
+		table inet test-acceptfwmark {
905
+		  chain raw-PREROUTING {
906
+			type filter hook prerouting priority raw
907
+			iifname "eth-test" counter mark set 0xe
908
+		  }
909
+		}
910
+	`)))
911
+	res.Assert(t, icmd.Success)
912
+	defer func() {
913
+		icmd.RunCommand("nft", "delete table inet test-acceptfwmark").Assert(t, icmd.Success)
914
+	}()
915
+
916
+	test("fwmark", pingSuccess, httpSuccess)
917
+}
918
+
801 919
 // TestRoutedNonGateway checks whether a published container port on an endpoint in a
802 920
 // gateway mode "routed" network is accessible when the routed network is not providing
803 921
 // the container's default gateway.
... ...
@@ -10,6 +10,7 @@ dockerd - Enable daemon mode
10 10
 [**-b**|**--bridge**[=*BRIDGE*]]
11 11
 [**--bip**[=*BIP*]]
12 12
 [**--bip6**[=*BIP*]]
13
+[**--bridge-accept-fwmark**[=*[]*]]
13 14
 [**--cgroup-parent**[=*[]*]]
14 15
 [**--config-file**[=*path*]]
15 16
 [**--containerd**[=*SOCKET-PATH*]]
... ...
@@ -140,6 +141,9 @@ $ sudo dockerd --add-runtime runc=runc --add-runtime custom=/usr/local/bin/my-ru
140 140
   Use the provided CIDR notation IPv6 address for the default bridge network;
141 141
   Mutually exclusive of \-b
142 142
 
143
+**--bridge-accept-fwmark**=""
144
+Bridge networks will accept packets with this firewall mark/mask.
145
+
143 146
 **--cgroup-parent**=""
144 147
   Set parent cgroup for all containers. Default is "/docker" for fs cgroup
145 148
   driver and "system.slice" for systemd cgroup driver.