Port the firewall ruleset for encrypted overlay networks to nftables.
Maximize compatibility with the most distros by only using nftables
features that are widely available. Use the deprecated 'meta secpath
exists' expression instead of the more modern 'meta ipsec exists'.
Extract the VNI from VXLAN packets using the more widely available '@th'
raw payload expressions instead of '@ih' or 'vxlan vni' expressions.
Signed-off-by: Cory Snider <csnider@mirantis.com>
| ... | ... |
@@ -18,6 +18,7 @@ import ( |
| 18 | 18 |
"github.com/containerd/log" |
| 19 | 19 |
"github.com/moby/moby/v2/daemon/libnetwork/discoverapi" |
| 20 | 20 |
"github.com/moby/moby/v2/daemon/libnetwork/drivers/overlay/overlayutils" |
| 21 |
+ "github.com/moby/moby/v2/daemon/libnetwork/internal/nftables" |
|
| 21 | 22 |
"github.com/moby/moby/v2/daemon/libnetwork/iptables" |
| 22 | 23 |
"github.com/moby/moby/v2/daemon/libnetwork/ns" |
| 23 | 24 |
"github.com/moby/moby/v2/daemon/libnetwork/types" |
| ... | ... |
@@ -277,6 +278,22 @@ func (d *driver) programInput(vni uint32, add bool) error {
|
| 277 | 277 |
return nil |
| 278 | 278 |
} |
| 279 | 279 |
|
| 280 |
+func (d *driver) programOverlayEncryptionFirewall(ctx context.Context, vni uint32, encrypted bool) error {
|
|
| 281 |
+ if nftables.Enabled() {
|
|
| 282 |
+ return d.programOverlayEncVNINft(ctx, vni, encrypted) |
|
| 283 |
+ } |
|
| 284 |
+ |
|
| 285 |
+ mangleErr := d.programMangle(vni, encrypted) |
|
| 286 |
+ if mangleErr != nil && encrypted {
|
|
| 287 |
+ return mangleErr |
|
| 288 |
+ } |
|
| 289 |
+ err := d.programInput(vni, encrypted) |
|
| 290 |
+ if err != nil && encrypted {
|
|
| 291 |
+ return errors.Join(err, d.programMangle(vni, false)) |
|
| 292 |
+ } |
|
| 293 |
+ return errors.Join(mangleErr, err) |
|
| 294 |
+} |
|
| 295 |
+ |
|
| 280 | 296 |
func programSA(localIP, remoteIP net.IP, spi spi, k *key, dir int, add bool) (fSA *netlink.XfrmState, rSA *netlink.XfrmState, lastErr error) {
|
| 281 | 297 |
var ( |
| 282 | 298 |
action = "Removing" |
| 283 | 299 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,113 @@ |
| 0 |
+//go:build linux |
|
| 1 |
+ |
|
| 2 |
+package overlay |
|
| 3 |
+ |
|
| 4 |
+import ( |
|
| 5 |
+ "context" |
|
| 6 |
+ "fmt" |
|
| 7 |
+ "strconv" |
|
| 8 |
+ |
|
| 9 |
+ "github.com/moby/moby/v2/daemon/libnetwork/drivers/overlay/overlayutils" |
|
| 10 |
+ "github.com/moby/moby/v2/daemon/libnetwork/internal/nftables" |
|
| 11 |
+) |
|
| 12 |
+ |
|
| 13 |
+const ( |
|
| 14 |
+ nftOverlayTable = "docker-overlay" |
|
| 15 |
+ nftEncOutChainName = "enc-out" |
|
| 16 |
+ nftEncInChainName = "enc-in" |
|
| 17 |
+ nftEncVNSetName = "encrypted-vnis" |
|
| 18 |
+ nftEncVNIExpr = "@th,96,24" |
|
| 19 |
+) |
|
| 20 |
+ |
|
| 21 |
+// ensureOverlayEncNftTable returns the overlay encryption nft table, running one-time setup on first use. |
|
| 22 |
+func (d *driver) ensureOverlayEncNftTable(ctx context.Context) (nftables.Table, error) {
|
|
| 23 |
+ d.overlayEncNftInitMu.Lock() |
|
| 24 |
+ defer d.overlayEncNftInitMu.Unlock() |
|
| 25 |
+ if d.overlayEncNftTable.IsValid() {
|
|
| 26 |
+ return d.overlayEncNftTable, nil |
|
| 27 |
+ } |
|
| 28 |
+ |
|
| 29 |
+ v6, err := d.isIPv6Transport() |
|
| 30 |
+ if err != nil {
|
|
| 31 |
+ return nftables.Table{}, err
|
|
| 32 |
+ } |
|
| 33 |
+ fam := nftables.IPv4 |
|
| 34 |
+ if v6 {
|
|
| 35 |
+ fam = nftables.IPv6 |
|
| 36 |
+ } |
|
| 37 |
+ t, err := nftables.NewTable(fam, nftOverlayTable) |
|
| 38 |
+ if err != nil {
|
|
| 39 |
+ return nftables.Table{}, err
|
|
| 40 |
+ } |
|
| 41 |
+ |
|
| 42 |
+ tm := nftables.Modifier{}
|
|
| 43 |
+ tm.Create(nftables.Set{
|
|
| 44 |
+ Name: nftEncVNSetName, |
|
| 45 |
+ ElementType: nftables.Typeof(nftEncVNIExpr), |
|
| 46 |
+ }) |
|
| 47 |
+ tm.Create(nftables.BaseChain{
|
|
| 48 |
+ Name: nftEncOutChainName, |
|
| 49 |
+ ChainType: nftables.BaseChainTypeRoute, |
|
| 50 |
+ Hook: nftables.BaseChainHookOutput, |
|
| 51 |
+ Priority: nftables.BaseChainPriorityMangle, |
|
| 52 |
+ Policy: nftables.BaseChainPolicyAccept, |
|
| 53 |
+ }) |
|
| 54 |
+ tm.Create(nftables.BaseChain{
|
|
| 55 |
+ Name: nftEncInChainName, |
|
| 56 |
+ ChainType: nftables.BaseChainTypeFilter, |
|
| 57 |
+ Hook: nftables.BaseChainHookInput, |
|
| 58 |
+ Priority: nftables.BaseChainPriorityRaw, |
|
| 59 |
+ Policy: nftables.BaseChainPolicyAccept, |
|
| 60 |
+ }) |
|
| 61 |
+ |
|
| 62 |
+ port := strconv.FormatUint(uint64(overlayutils.VXLANUDPPort()), 10) |
|
| 63 |
+ tm.Create(nftables.Rule{
|
|
| 64 |
+ Chain: nftEncOutChainName, |
|
| 65 |
+ Rule: []string{
|
|
| 66 |
+ "udp dport", port, |
|
| 67 |
+ nftEncVNIExpr, |
|
| 68 |
+ "@" + nftEncVNSetName, |
|
| 69 |
+ "counter", |
|
| 70 |
+ "meta mark set", fmt.Sprintf("0x%x", mark),
|
|
| 71 |
+ }, |
|
| 72 |
+ }) |
|
| 73 |
+ tm.Create(nftables.Rule{
|
|
| 74 |
+ Chain: nftEncInChainName, |
|
| 75 |
+ Rule: []string{
|
|
| 76 |
+ "meta secpath missing", |
|
| 77 |
+ "udp dport", port, |
|
| 78 |
+ nftEncVNIExpr, |
|
| 79 |
+ "@" + nftEncVNSetName, |
|
| 80 |
+ "counter", |
|
| 81 |
+ "drop", |
|
| 82 |
+ }, |
|
| 83 |
+ }) |
|
| 84 |
+ |
|
| 85 |
+ if err := t.Apply(ctx, tm); err != nil {
|
|
| 86 |
+ _ = t.Close() |
|
| 87 |
+ return nftables.Table{}, err
|
|
| 88 |
+ } |
|
| 89 |
+ |
|
| 90 |
+ d.overlayEncNftTable = t |
|
| 91 |
+ return t, nil |
|
| 92 |
+} |
|
| 93 |
+ |
|
| 94 |
+func (d *driver) programOverlayEncVNINft(ctx context.Context, vni uint32, encrypted bool) error {
|
|
| 95 |
+ t, err := d.ensureOverlayEncNftTable(ctx) |
|
| 96 |
+ if err != nil {
|
|
| 97 |
+ return err |
|
| 98 |
+ } |
|
| 99 |
+ |
|
| 100 |
+ tm := nftables.Modifier{}
|
|
| 101 |
+ se := nftables.SetElement{
|
|
| 102 |
+ SetName: nftEncVNSetName, |
|
| 103 |
+ Element: fmt.Sprintf("0x%06x", vni&0xffffff),
|
|
| 104 |
+ Idempotent: true, |
|
| 105 |
+ } |
|
| 106 |
+ if encrypted {
|
|
| 107 |
+ tm.Create(se) |
|
| 108 |
+ } else {
|
|
| 109 |
+ tm.Delete(se) |
|
| 110 |
+ } |
|
| 111 |
+ return t.Apply(ctx, tm) |
|
| 112 |
+} |
| ... | ... |
@@ -182,8 +182,7 @@ func (d *driver) CreateNetwork(ctx context.Context, id string, option map[string |
| 182 | 182 |
// Make sure no rule is on the way from any stale secure network |
| 183 | 183 |
if !n.secure {
|
| 184 | 184 |
for _, vni := range vnis {
|
| 185 |
- d.programMangle(vni, false) |
|
| 186 |
- d.programInput(vni, false) |
|
| 185 |
+ _ = d.programOverlayEncryptionFirewall(ctx, vni, false) |
|
| 187 | 186 |
} |
| 188 | 187 |
} |
| 189 | 188 |
|
| ... | ... |
@@ -227,19 +226,12 @@ func (d *driver) DeleteNetwork(nid string) error {
|
| 227 | 227 |
|
| 228 | 228 |
if n.secure {
|
| 229 | 229 |
for _, s := range n.subnets {
|
| 230 |
- if err := d.programMangle(s.vni, false); err != nil {
|
|
| 230 |
+ if err := d.programOverlayEncryptionFirewall(context.TODO(), s.vni, false); err != nil {
|
|
| 231 | 231 |
log.G(context.TODO()).WithFields(log.Fields{
|
| 232 | 232 |
"error": err, |
| 233 | 233 |
"network_id": n.id, |
| 234 | 234 |
"subnet": s.subnetIP, |
| 235 |
- }).Warn("Failed to clean up iptables rules during overlay network deletion")
|
|
| 236 |
- } |
|
| 237 |
- if err := d.programInput(s.vni, false); err != nil {
|
|
| 238 |
- log.G(context.TODO()).WithFields(log.Fields{
|
|
| 239 |
- "error": err, |
|
| 240 |
- "network_id": n.id, |
|
| 241 |
- "subnet": s.subnetIP, |
|
| 242 |
- }).Warn("Failed to clean up iptables rules during overlay network deletion")
|
|
| 235 |
+ }).Warn("Failed to clean up overlay encryption firewall rules during overlay network deletion")
|
|
| 243 | 236 |
} |
| 244 | 237 |
} |
| 245 | 238 |
} |
| ... | ... |
@@ -529,14 +521,9 @@ func (n *network) initSubnetSandbox(s *subnet) error {
|
| 529 | 529 |
// Program iptables rules for mandatory encryption of the secure |
| 530 | 530 |
// network, or clean up leftover rules for a stale secure network which |
| 531 | 531 |
// was previously assigned the same VNI. |
| 532 |
- if err := n.driver.programMangle(s.vni, n.secure); err != nil {
|
|
| 532 |
+ if err := n.driver.programOverlayEncryptionFirewall(context.TODO(), s.vni, n.secure); err != nil {
|
|
| 533 | 533 |
return err |
| 534 | 534 |
} |
| 535 |
- if err := n.driver.programInput(s.vni, n.secure); err != nil {
|
|
| 536 |
- if n.secure {
|
|
| 537 |
- return errors.Join(err, n.driver.programMangle(s.vni, false)) |
|
| 538 |
- } |
|
| 539 |
- } |
|
| 540 | 535 |
|
| 541 | 536 |
if err := n.setupSubnetSandbox(s, brName, vxlanName); err != nil {
|
| 542 | 537 |
return err |
| ... | ... |
@@ -13,6 +13,7 @@ import ( |
| 13 | 13 |
|
| 14 | 14 |
"github.com/moby/moby/v2/daemon/libnetwork/discoverapi" |
| 15 | 15 |
"github.com/moby/moby/v2/daemon/libnetwork/driverapi" |
| 16 |
+ "github.com/moby/moby/v2/daemon/libnetwork/internal/nftables" |
|
| 16 | 17 |
"github.com/moby/moby/v2/daemon/libnetwork/scope" |
| 17 | 18 |
) |
| 18 | 19 |
|
| ... | ... |
@@ -43,6 +44,9 @@ type driver struct {
|
| 43 | 43 |
secMap encrMap |
| 44 | 44 |
keys []*key |
| 45 | 45 |
|
| 46 |
+ overlayEncNftInitMu sync.Mutex |
|
| 47 |
+ overlayEncNftTable nftables.Table |
|
| 48 |
+ |
|
| 46 | 49 |
// mu must be held when accessing the fields which follow it |
| 47 | 50 |
// in the struct definition. |
| 48 | 51 |
// |
| ... | ... |
@@ -1015,6 +1015,9 @@ func (sd Set) delete(ctx context.Context, t *table) (bool, error) {
|
| 1015 | 1015 |
type SetElement struct {
|
| 1016 | 1016 |
SetName string |
| 1017 | 1017 |
Element string |
| 1018 |
+ // If true, deleting an element that does not exist or creating an |
|
| 1019 |
+ // element that already exists will succeed. |
|
| 1020 |
+ Idempotent bool |
|
| 1018 | 1021 |
} |
| 1019 | 1022 |
|
| 1020 | 1023 |
func (se SetElement) create(ctx context.Context, t *table) (bool, error) {
|
| ... | ... |
@@ -1026,6 +1029,9 @@ func (se SetElement) create(ctx context.Context, t *table) (bool, error) {
|
| 1026 | 1026 |
return false, fmt.Errorf("cannot add to set '%s', element not specified", se.SetName)
|
| 1027 | 1027 |
} |
| 1028 | 1028 |
if _, ok := s.Elements[se.Element]; ok {
|
| 1029 |
+ if se.Idempotent {
|
|
| 1030 |
+ return false, nil |
|
| 1031 |
+ } |
|
| 1029 | 1032 |
return false, fmt.Errorf("set '%s' already contains element '%s'", s.Name, se.Element)
|
| 1030 | 1033 |
} |
| 1031 | 1034 |
s.Elements[se.Element] = struct{}{}
|
| ... | ... |
@@ -1046,6 +1052,9 @@ func (se SetElement) delete(ctx context.Context, t *table) (bool, error) {
|
| 1046 | 1046 |
return false, fmt.Errorf("cannot delete from set '%s', it does not exist", se.SetName)
|
| 1047 | 1047 |
} |
| 1048 | 1048 |
if _, ok := s.Elements[se.Element]; !ok {
|
| 1049 |
+ if se.Idempotent {
|
|
| 1050 |
+ return false, nil |
|
| 1051 |
+ } |
|
| 1049 | 1052 |
return false, fmt.Errorf("cannot delete '%s' from set '%s', it does not exist", se.Element, s.Name)
|
| 1050 | 1053 |
} |
| 1051 | 1054 |
delete(s.Elements, se.Element) |