Browse code

Merge pull request #32505 from fcrisciani/conntrack_test

Conntrack flush support

Sebastiaan van Stijn authored on 2017/04/11 23:40:56
Showing 20 changed files
... ...
@@ -12,6 +12,7 @@ import (
12 12
 	"os"
13 13
 	"path/filepath"
14 14
 	"strings"
15
+	"syscall"
15 16
 	"time"
16 17
 
17 18
 	"github.com/docker/docker/api/types"
... ...
@@ -1789,3 +1790,56 @@ func (s *DockerNetworkSuite) TestDockerNetworkDisconnectFromBridge(c *check.C) {
1789 1789
 	_, _, err := dockerCmdWithError("network", "disconnect", network, name)
1790 1790
 	c.Assert(err, check.IsNil)
1791 1791
 }
1792
+
1793
+// TestConntrackFlowsLeak covers the failure scenario of ticket: https://github.com/docker/docker/issues/8795
1794
+// Validates that conntrack is correctly cleaned once a container is destroyed
1795
+func (s *DockerNetworkSuite) TestConntrackFlowsLeak(c *check.C) {
1796
+	testRequires(c, IsAmd64, DaemonIsLinux, Network)
1797
+
1798
+	// Create a new network
1799
+	dockerCmd(c, "network", "create", "--subnet=192.168.10.0/24", "--gateway=192.168.10.1", "-o", "com.docker.network.bridge.host_binding_ipv4=192.168.10.1", "testbind")
1800
+	assertNwIsAvailable(c, "testbind")
1801
+
1802
+	// Launch the server, this will remain listening on an exposed port and reply to any request in a ping/pong fashion
1803
+	cmd := "while true; do echo hello | nc -w 1 -lu 8080; done"
1804
+	_, _, err := dockerCmdWithError("run", "-d", "--name", "server", "--net", "testbind", "-p", "8080:8080/udp", "appropriate/nc", "sh", "-c", cmd)
1805
+	c.Assert(err, check.IsNil)
1806
+
1807
+	// Launch a container client, here the objective is to create a flow that is natted in order to expose the bug
1808
+	cmd = "echo world | nc -q 1 -u 192.168.10.1 8080"
1809
+	_, _, err = dockerCmdWithError("run", "-d", "--name", "client", "--net=host", "appropriate/nc", "sh", "-c", cmd)
1810
+	c.Assert(err, check.IsNil)
1811
+
1812
+	// Get all the flows using netlink
1813
+	flows, err := netlink.ConntrackTableList(netlink.ConntrackTable, syscall.AF_INET)
1814
+	c.Assert(err, check.IsNil)
1815
+	var flowMatch int
1816
+	for _, flow := range flows {
1817
+		// count only the flows that we are interested in, skipping others that can be laying around the host
1818
+		if flow.Forward.Protocol == syscall.IPPROTO_UDP &&
1819
+			flow.Forward.DstIP.Equal(net.ParseIP("192.168.10.1")) &&
1820
+			flow.Forward.DstPort == 8080 {
1821
+			flowMatch++
1822
+		}
1823
+	}
1824
+	// The client should have created only 1 flow
1825
+	c.Assert(flowMatch, checker.Equals, 1)
1826
+
1827
+	// Now delete the server, this will trigger the conntrack cleanup
1828
+	err = deleteContainer("server")
1829
+	c.Assert(err, checker.IsNil)
1830
+
1831
+	// Fetch again all the flows and validate that there is no server flow in the conntrack laying around
1832
+	flows, err = netlink.ConntrackTableList(netlink.ConntrackTable, syscall.AF_INET)
1833
+	c.Assert(err, check.IsNil)
1834
+	flowMatch = 0
1835
+	for _, flow := range flows {
1836
+		if flow.Forward.Protocol == syscall.IPPROTO_UDP &&
1837
+			flow.Forward.DstIP.Equal(net.ParseIP("192.168.10.1")) &&
1838
+			flow.Forward.DstPort == 8080 {
1839
+			flowMatch++
1840
+		}
1841
+	}
1842
+	// All the flows have to be gone
1843
+	c.Assert(flowMatch, checker.Equals, 0)
1844
+}
... ...
@@ -24,7 +24,7 @@ github.com/RackSec/srslog 456df3a81436d29ba874f3590eeeee25d666f8a5
24 24
 github.com/imdario/mergo 0.2.1
25 25
 
26 26
 #get libnetwork packages
27
-github.com/docker/libnetwork ab8f7e61743aa7e54c5d0dad0551543adadc33cf
27
+github.com/docker/libnetwork b13e0604016a4944025aaff521d9c125850b0d04
28 28
 github.com/docker/go-events 18b43f1bc85d9cdd42c05a6cd2d444c7a200a894
29 29
 github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80
30 30
 github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
... ...
@@ -34,7 +34,7 @@ github.com/hashicorp/go-multierror fcdddc395df1ddf4247c69bd436e84cfa0733f7e
34 34
 github.com/hashicorp/serf 598c54895cc5a7b1a24a398d635e8c0ea0959870
35 35
 github.com/docker/libkv 1d8431073ae03cdaedb198a89722f3aab6d418ef
36 36
 github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25
37
-github.com/vishvananda/netlink c682914b0b231f6cad204a86e565551e51d387c0
37
+github.com/vishvananda/netlink 1e86b2bee5b6a7d377e4c02bb7f98209d6a7297c
38 38
 github.com/BurntSushi/toml f706d00e3de6abe700c994cdd545a1a4915af060
39 39
 github.com/samuel/go-zookeeper d0e0d8e11f318e000a8cc434616d69e329edc374
40 40
 github.com/deckarep/golang-set ef32fa3046d9f249d399f98ebaf9be944430fd1d
... ...
@@ -47,6 +47,7 @@ import (
47 47
 	"container/heap"
48 48
 	"fmt"
49 49
 	"net"
50
+	"path/filepath"
50 51
 	"strings"
51 52
 	"sync"
52 53
 	"time"
... ...
@@ -979,6 +980,8 @@ func (c *controller) NewSandbox(containerID string, options ...SandboxOption) (s
979 979
 
980 980
 	if sb.ingress {
981 981
 		c.ingressSandbox = sb
982
+		sb.config.hostsPath = filepath.Join(c.cfg.Daemon.DataDir, "/network/files/hosts")
983
+		sb.config.resolvConfPath = filepath.Join(c.cfg.Daemon.DataDir, "/network/files/resolv.conf")
982 984
 		sb.id = "ingress_sbox"
983 985
 	}
984 986
 	c.Unlock()
... ...
@@ -1346,6 +1346,13 @@ func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
1346 1346
 
1347 1347
 	endpoint.portMapping = nil
1348 1348
 
1349
+	// Clean the connection tracker state of the host for the specific endpoint
1350
+	// The host kernel keeps track of the connections (TCP and UDP), so if a new endpoint gets the same IP of
1351
+	// this one (that is going down), is possible that some of the packets would not be routed correctly inside
1352
+	// the new endpoint
1353
+	// Deeper details: https://github.com/docker/docker/issues/8795
1354
+	clearEndpointConnections(d.nlh, endpoint)
1355
+
1349 1356
 	if err = d.storeUpdate(endpoint); err != nil {
1350 1357
 		return fmt.Errorf("failed to update bridge endpoint %s to store: %v", endpoint.id[0:7], err)
1351 1358
 	}
... ...
@@ -7,6 +7,7 @@ import (
7 7
 
8 8
 	"github.com/Sirupsen/logrus"
9 9
 	"github.com/docker/libnetwork/iptables"
10
+	"github.com/vishvananda/netlink"
10 11
 )
11 12
 
12 13
 // DockerChain: DOCKER iptable chain name
... ...
@@ -348,3 +349,15 @@ func setupInternalNetworkRules(bridgeIface string, addr net.Addr, icc, insert bo
348 348
 	}
349 349
 	return nil
350 350
 }
351
+
352
+func clearEndpointConnections(nlh *netlink.Handle, ep *bridgeEndpoint) {
353
+	var ipv4List []net.IP
354
+	var ipv6List []net.IP
355
+	if ep.addr != nil {
356
+		ipv4List = append(ipv4List, ep.addr.IP)
357
+	}
358
+	if ep.addrv6 != nil {
359
+		ipv6List = append(ipv6List, ep.addrv6.IP)
360
+	}
361
+	iptables.DeleteConntrackEntries(nlh, ipv4List, ipv6List)
362
+}
... ...
@@ -665,7 +665,7 @@ func (ep *endpoint) hasInterface(iName string) bool {
665 665
 
666 666
 func (ep *endpoint) Leave(sbox Sandbox, options ...EndpointOption) error {
667 667
 	if sbox == nil || sbox.ID() == "" || sbox.Key() == "" {
668
-		return types.BadRequestErrorf("invalid Sandbox passed to enpoint leave: %v", sbox)
668
+		return types.BadRequestErrorf("invalid Sandbox passed to endpoint leave: %v", sbox)
669 669
 	}
670 670
 
671 671
 	sb, ok := sbox.(*sandbox)
... ...
@@ -129,7 +129,7 @@ type ActiveEndpointsError struct {
129 129
 }
130 130
 
131 131
 func (aee *ActiveEndpointsError) Error() string {
132
-	return fmt.Sprintf("network %s has active endpoints", aee.name)
132
+	return fmt.Sprintf("network %s id %s has active endpoints", aee.name, aee.id)
133 133
 }
134 134
 
135 135
 // Forbidden denotes the type of this error
136 136
new file mode 100644
... ...
@@ -0,0 +1,59 @@
0
+package iptables
1
+
2
+import (
3
+	"errors"
4
+	"net"
5
+	"syscall"
6
+
7
+	"github.com/Sirupsen/logrus"
8
+	"github.com/vishvananda/netlink"
9
+)
10
+
11
+var (
12
+	// ErrConntrackNotConfigurable means that conntrack module is not loaded or does not have the netlink module loaded
13
+	ErrConntrackNotConfigurable = errors.New("conntrack is not available")
14
+)
15
+
16
+// IsConntrackProgrammable returns true if the handle supports the NETLINK_NETFILTER and the base modules are loaded
17
+func IsConntrackProgrammable(nlh *netlink.Handle) bool {
18
+	return nlh.SupportsNetlinkFamily(syscall.NETLINK_NETFILTER)
19
+}
20
+
21
+// DeleteConntrackEntries deletes all the conntrack connections on the host for the specified IP
22
+// Returns the number of flows deleted for IPv4, IPv6 else error
23
+func DeleteConntrackEntries(nlh *netlink.Handle, ipv4List []net.IP, ipv6List []net.IP) (uint, uint, error) {
24
+	if !IsConntrackProgrammable(nlh) {
25
+		return 0, 0, ErrConntrackNotConfigurable
26
+	}
27
+
28
+	var totalIPv4FlowPurged uint
29
+	for _, ipAddress := range ipv4List {
30
+		flowPurged, err := purgeConntrackState(nlh, syscall.AF_INET, ipAddress)
31
+		if err != nil {
32
+			logrus.Warnf("Failed to delete conntrack state for %s: %v", ipAddress, err)
33
+			continue
34
+		}
35
+		totalIPv4FlowPurged += flowPurged
36
+	}
37
+
38
+	var totalIPv6FlowPurged uint
39
+	for _, ipAddress := range ipv6List {
40
+		flowPurged, err := purgeConntrackState(nlh, syscall.AF_INET6, ipAddress)
41
+		if err != nil {
42
+			logrus.Warnf("Failed to delete conntrack state for %s: %v", ipAddress, err)
43
+			continue
44
+		}
45
+		totalIPv6FlowPurged += flowPurged
46
+	}
47
+
48
+	logrus.Debugf("DeleteConntrackEntries purged ipv4:%d, ipv6:%d", totalIPv4FlowPurged, totalIPv6FlowPurged)
49
+	return totalIPv4FlowPurged, totalIPv6FlowPurged, nil
50
+}
51
+
52
+func purgeConntrackState(nlh *netlink.Handle, family netlink.InetFamily, ipAddress net.IP) (uint, error) {
53
+	filter := &netlink.ConntrackFilter{}
54
+	// NOTE: doing the flush using the ipAddress is safe because today there cannot be multiple networks with the same subnet
55
+	// so it will not be possible to flush flows that are of other containers
56
+	filter.AddIP(netlink.ConntrackNatAnyIP, ipAddress)
57
+	return nlh.ConntrackDeleteFilter(netlink.ConntrackTable, family, filter)
58
+}
... ...
@@ -100,14 +100,14 @@ func detectIptables() {
100 100
 	supportsCOpt = supportsCOption(mj, mn, mc)
101 101
 }
102 102
 
103
-func initIptables() {
103
+func initDependencies() {
104 104
 	probe()
105 105
 	initFirewalld()
106 106
 	detectIptables()
107 107
 }
108 108
 
109 109
 func initCheck() error {
110
-	initOnce.Do(initIptables)
110
+	initOnce.Do(initDependencies)
111 111
 
112 112
 	if iptablesPath == "" {
113 113
 		return ErrIptablesNotFound
... ...
@@ -88,12 +88,25 @@ func (nDB *NetworkDB) handleNodeEvent(nEvent *NodeEvent) bool {
88 88
 }
89 89
 
90 90
 func (nDB *NetworkDB) handleNetworkEvent(nEvent *NetworkEvent) bool {
91
+	var flushEntries bool
91 92
 	// Update our local clock if the received messages has newer
92 93
 	// time.
93 94
 	nDB.networkClock.Witness(nEvent.LTime)
94 95
 
95 96
 	nDB.Lock()
96
-	defer nDB.Unlock()
97
+	defer func() {
98
+		nDB.Unlock()
99
+		// When a node leaves a network on the last task removal cleanup the
100
+		// local entries for this network & node combination. When the tasks
101
+		// on a network are removed we could have missed the gossip updates.
102
+		// Not doing this cleanup can leave stale entries because bulksyncs
103
+		// from the node will no longer include this network state.
104
+		//
105
+		// deleteNodeNetworkEntries takes nDB lock.
106
+		if flushEntries {
107
+			nDB.deleteNodeNetworkEntries(nEvent.NetworkID, nEvent.NodeName)
108
+		}
109
+	}()
97 110
 
98 111
 	if nEvent.NodeName == nDB.config.NodeName {
99 112
 		return false
... ...
@@ -121,6 +134,7 @@ func (nDB *NetworkDB) handleNetworkEvent(nEvent *NetworkEvent) bool {
121 121
 		n.leaving = nEvent.Type == NetworkEventTypeLeave
122 122
 		if n.leaving {
123 123
 			n.reapTime = reapInterval
124
+			flushEntries = true
124 125
 		}
125 126
 
126 127
 		nDB.addNetworkNode(nEvent.NetworkID, nEvent.NodeName)
... ...
@@ -372,6 +372,37 @@ func (nDB *NetworkDB) deleteNetworkEntriesForNode(deletedNode string) {
372 372
 	nDB.Unlock()
373 373
 }
374 374
 
375
+func (nDB *NetworkDB) deleteNodeNetworkEntries(nid, node string) {
376
+	nDB.Lock()
377
+	nDB.indexes[byNetwork].WalkPrefix(fmt.Sprintf("/%s", nid),
378
+		func(path string, v interface{}) bool {
379
+			oldEntry := v.(*entry)
380
+			params := strings.Split(path[1:], "/")
381
+			nid := params[0]
382
+			tname := params[1]
383
+			key := params[2]
384
+
385
+			if oldEntry.node != node {
386
+				return false
387
+			}
388
+
389
+			entry := &entry{
390
+				ltime:    oldEntry.ltime,
391
+				node:     node,
392
+				value:    oldEntry.value,
393
+				deleting: true,
394
+				reapTime: reapInterval,
395
+			}
396
+
397
+			nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry)
398
+			nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry)
399
+
400
+			nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, entry.value))
401
+			return false
402
+		})
403
+	nDB.Unlock()
404
+}
405
+
375 406
 func (nDB *NetworkDB) deleteNodeTableEntries(node string) {
376 407
 	nDB.Lock()
377 408
 	nDB.indexes[byTable].Walk(func(path string, v interface{}) bool {
... ...
@@ -75,13 +75,28 @@ func NlHandle() *netlink.Handle {
75 75
 
76 76
 func getSupportedNlFamilies() []int {
77 77
 	fams := []int{syscall.NETLINK_ROUTE}
78
+	// NETLINK_XFRM test
78 79
 	if err := loadXfrmModules(); err != nil {
79 80
 		if checkXfrmSocket() != nil {
80 81
 			logrus.Warnf("Could not load necessary modules for IPSEC rules: %v", err)
81
-			return fams
82
+		} else {
83
+			fams = append(fams, syscall.NETLINK_XFRM)
82 84
 		}
85
+	} else {
86
+		fams = append(fams, syscall.NETLINK_XFRM)
83 87
 	}
84
-	return append(fams, syscall.NETLINK_XFRM)
88
+	// NETLINK_NETFILTER test
89
+	if err := loadNfConntrackModules(); err != nil {
90
+		if checkNfSocket() != nil {
91
+			logrus.Warnf("Could not load necessary modules for Conntrack: %v", err)
92
+		} else {
93
+			fams = append(fams, syscall.NETLINK_NETFILTER)
94
+		}
95
+	} else {
96
+		fams = append(fams, syscall.NETLINK_NETFILTER)
97
+	}
98
+
99
+	return fams
85 100
 }
86 101
 
87 102
 func loadXfrmModules() error {
... ...
@@ -103,3 +118,23 @@ func checkXfrmSocket() error {
103 103
 	syscall.Close(fd)
104 104
 	return nil
105 105
 }
106
+
107
+func loadNfConntrackModules() error {
108
+	if out, err := exec.Command("modprobe", "-va", "nf_conntrack").CombinedOutput(); err != nil {
109
+		return fmt.Errorf("Running modprobe nf_conntrack failed with message: `%s`, error: %v", strings.TrimSpace(string(out)), err)
110
+	}
111
+	if out, err := exec.Command("modprobe", "-va", "nf_conntrack_netlink").CombinedOutput(); err != nil {
112
+		return fmt.Errorf("Running modprobe nf_conntrack_netlink failed with message: `%s`, error: %v", strings.TrimSpace(string(out)), err)
113
+	}
114
+	return nil
115
+}
116
+
117
+// API check on required nf_conntrack* modules (nf_conntrack, nf_conntrack_netlink)
118
+func checkNfSocket() error {
119
+	fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW, syscall.NETLINK_NETFILTER)
120
+	if err != nil {
121
+		return err
122
+	}
123
+	syscall.Close(fd)
124
+	return nil
125
+}
... ...
@@ -644,13 +644,6 @@ func (sb *sandbox) SetKey(basePath string) error {
644 644
 	sb.Lock()
645 645
 	sb.osSbox = osSbox
646 646
 	sb.Unlock()
647
-	defer func() {
648
-		if err != nil {
649
-			sb.Lock()
650
-			sb.osSbox = nil
651
-			sb.Unlock()
652
-		}
653
-	}()
654 647
 
655 648
 	// If the resolver was setup before stop it and set it up in the
656 649
 	// new osl sandbox.
... ...
@@ -27,6 +27,19 @@ func (h *Handle) AddrAdd(link Link, addr *Addr) error {
27 27
 	return h.addrHandle(link, addr, req)
28 28
 }
29 29
 
30
+// AddrReplace will replace (or, if not present, add) an IP address on a link device.
31
+// Equivalent to: `ip addr replace $addr dev $link`
32
+func AddrReplace(link Link, addr *Addr) error {
33
+	return pkgHandle.AddrReplace(link, addr)
34
+}
35
+
36
+// AddrReplace will replace (or, if not present, add) an IP address on a link device.
37
+// Equivalent to: `ip addr replace $addr dev $link`
38
+func (h *Handle) AddrReplace(link Link, addr *Addr) error {
39
+	req := h.newNetlinkRequest(syscall.RTM_NEWADDR, syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE|syscall.NLM_F_ACK)
40
+	return h.addrHandle(link, addr, req)
41
+}
42
+
30 43
 // AddrDel will delete an IP address from a link device.
31 44
 // Equivalent to: `ip addr del $addr dev $link`
32 45
 func AddrDel(link Link, addr *Addr) error {
33 46
new file mode 100644
... ...
@@ -0,0 +1,344 @@
0
+package netlink
1
+
2
+import (
3
+	"bytes"
4
+	"encoding/binary"
5
+	"errors"
6
+	"fmt"
7
+	"net"
8
+	"syscall"
9
+
10
+	"github.com/vishvananda/netlink/nl"
11
+)
12
+
13
+// ConntrackTableType Conntrack table for the netlink operation
14
+type ConntrackTableType uint8
15
+
16
+const (
17
+	// ConntrackTable Conntrack table
18
+	// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK		 1
19
+	ConntrackTable = 1
20
+	// ConntrackExpectTable Conntrack expect table
21
+	// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK_EXP 2
22
+	ConntrackExpectTable = 2
23
+)
24
+
25
+const (
26
+	// backward compatibility with golang 1.6 which does not have io.SeekCurrent
27
+	seekCurrent = 1
28
+)
29
+
30
+// InetFamily Family type
31
+type InetFamily uint8
32
+
33
+//  -L [table] [options]          List conntrack or expectation table
34
+//  -G [table] parameters         Get conntrack or expectation
35
+
36
+//  -I [table] parameters         Create a conntrack or expectation
37
+//  -U [table] parameters         Update a conntrack
38
+//  -E [table] [options]          Show events
39
+
40
+//  -C [table]                    Show counter
41
+//  -S                            Show statistics
42
+
43
+// ConntrackTableList returns the flow list of a table of a specific family
44
+// conntrack -L [table] [options]          List conntrack or expectation table
45
+func ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
46
+	return pkgHandle.ConntrackTableList(table, family)
47
+}
48
+
49
+// ConntrackTableFlush flushes all the flows of a specified table
50
+// conntrack -F [table]            Flush table
51
+// The flush operation applies to all the family types
52
+func ConntrackTableFlush(table ConntrackTableType) error {
53
+	return pkgHandle.ConntrackTableFlush(table)
54
+}
55
+
56
+// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter
57
+// conntrack -D [table] parameters         Delete conntrack or expectation
58
+func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) {
59
+	return pkgHandle.ConntrackDeleteFilter(table, family, filter)
60
+}
61
+
62
+// ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed
63
+// conntrack -L [table] [options]          List conntrack or expectation table
64
+func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
65
+	res, err := h.dumpConntrackTable(table, family)
66
+	if err != nil {
67
+		return nil, err
68
+	}
69
+
70
+	// Deserialize all the flows
71
+	var result []*ConntrackFlow
72
+	for _, dataRaw := range res {
73
+		result = append(result, parseRawData(dataRaw))
74
+	}
75
+
76
+	return result, nil
77
+}
78
+
79
+// ConntrackTableFlush flushes all the flows of a specified table using the netlink handle passed
80
+// conntrack -F [table]            Flush table
81
+// The flush operation applies to all the family types
82
+func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error {
83
+	req := h.newConntrackRequest(table, syscall.AF_INET, nl.IPCTNL_MSG_CT_DELETE, syscall.NLM_F_ACK)
84
+	_, err := req.Execute(syscall.NETLINK_NETFILTER, 0)
85
+	return err
86
+}
87
+
88
+// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed
89
+// conntrack -D [table] parameters         Delete conntrack or expectation
90
+func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) {
91
+	res, err := h.dumpConntrackTable(table, family)
92
+	if err != nil {
93
+		return 0, err
94
+	}
95
+
96
+	var matched uint
97
+	for _, dataRaw := range res {
98
+		flow := parseRawData(dataRaw)
99
+		if match := filter.MatchConntrackFlow(flow); match {
100
+			req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, syscall.NLM_F_ACK)
101
+			// skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already
102
+			req2.AddRawData(dataRaw[4:])
103
+			req2.Execute(syscall.NETLINK_NETFILTER, 0)
104
+			matched++
105
+		}
106
+	}
107
+
108
+	return matched, nil
109
+}
110
+
111
+func (h *Handle) newConntrackRequest(table ConntrackTableType, family InetFamily, operation, flags int) *nl.NetlinkRequest {
112
+	// Create the Netlink request object
113
+	req := h.newNetlinkRequest((int(table)<<8)|operation, flags)
114
+	// Add the netfilter header
115
+	msg := &nl.Nfgenmsg{
116
+		NfgenFamily: uint8(family),
117
+		Version:     nl.NFNETLINK_V0,
118
+		ResId:       0,
119
+	}
120
+	req.AddData(msg)
121
+	return req
122
+}
123
+
124
+func (h *Handle) dumpConntrackTable(table ConntrackTableType, family InetFamily) ([][]byte, error) {
125
+	req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_GET, syscall.NLM_F_DUMP)
126
+	return req.Execute(syscall.NETLINK_NETFILTER, 0)
127
+}
128
+
129
+// The full conntrack flow structure is very complicated and can be found in the file:
130
+// http://git.netfilter.org/libnetfilter_conntrack/tree/include/internal/object.h
131
+// For the time being, the structure below allows to parse and extract the base information of a flow
132
+type ipTuple struct {
133
+	SrcIP    net.IP
134
+	DstIP    net.IP
135
+	Protocol uint8
136
+	SrcPort  uint16
137
+	DstPort  uint16
138
+}
139
+
140
+type ConntrackFlow struct {
141
+	FamilyType uint8
142
+	Forward    ipTuple
143
+	Reverse    ipTuple
144
+}
145
+
146
+func (s *ConntrackFlow) String() string {
147
+	// conntrack cmd output:
148
+	// udp      17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001
149
+	return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d\tsrc=%s dst=%s sport=%d dport=%d",
150
+		nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol,
151
+		s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort,
152
+		s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort)
153
+}
154
+
155
+// This method parse the ip tuple structure
156
+// The message structure is the following:
157
+// <len, [CTA_IP_V4_SRC|CTA_IP_V6_SRC], 16 bytes for the IP>
158
+// <len, [CTA_IP_V4_DST|CTA_IP_V6_DST], 16 bytes for the IP>
159
+// <len, NLA_F_NESTED|nl.CTA_TUPLE_PROTO, 1 byte for the protocol, 3 bytes of padding>
160
+// <len, CTA_PROTO_SRC_PORT, 2 bytes for the source port, 2 bytes of padding>
161
+// <len, CTA_PROTO_DST_PORT, 2 bytes for the source port, 2 bytes of padding>
162
+func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) {
163
+	for i := 0; i < 2; i++ {
164
+		_, t, _, v := parseNfAttrTLV(reader)
165
+		switch t {
166
+		case nl.CTA_IP_V4_SRC, nl.CTA_IP_V6_SRC:
167
+			tpl.SrcIP = v
168
+		case nl.CTA_IP_V4_DST, nl.CTA_IP_V6_DST:
169
+			tpl.DstIP = v
170
+		}
171
+	}
172
+	// Skip the next 4 bytes  nl.NLA_F_NESTED|nl.CTA_TUPLE_PROTO
173
+	reader.Seek(4, seekCurrent)
174
+	_, t, _, v := parseNfAttrTLV(reader)
175
+	if t == nl.CTA_PROTO_NUM {
176
+		tpl.Protocol = uint8(v[0])
177
+	}
178
+	// Skip some padding 3 bytes
179
+	reader.Seek(3, seekCurrent)
180
+	for i := 0; i < 2; i++ {
181
+		_, t, _ := parseNfAttrTL(reader)
182
+		switch t {
183
+		case nl.CTA_PROTO_SRC_PORT:
184
+			parseBERaw16(reader, &tpl.SrcPort)
185
+		case nl.CTA_PROTO_DST_PORT:
186
+			parseBERaw16(reader, &tpl.DstPort)
187
+		}
188
+		// Skip some padding 2 byte
189
+		reader.Seek(2, seekCurrent)
190
+	}
191
+}
192
+
193
+func parseNfAttrTLV(r *bytes.Reader) (isNested bool, attrType, len uint16, value []byte) {
194
+	isNested, attrType, len = parseNfAttrTL(r)
195
+
196
+	value = make([]byte, len)
197
+	binary.Read(r, binary.BigEndian, &value)
198
+	return isNested, attrType, len, value
199
+}
200
+
201
+func parseNfAttrTL(r *bytes.Reader) (isNested bool, attrType, len uint16) {
202
+	binary.Read(r, nl.NativeEndian(), &len)
203
+	len -= nl.SizeofNfattr
204
+
205
+	binary.Read(r, nl.NativeEndian(), &attrType)
206
+	isNested = (attrType & nl.NLA_F_NESTED) == nl.NLA_F_NESTED
207
+	attrType = attrType & (nl.NLA_F_NESTED - 1)
208
+
209
+	return isNested, attrType, len
210
+}
211
+
212
+func parseBERaw16(r *bytes.Reader, v *uint16) {
213
+	binary.Read(r, binary.BigEndian, v)
214
+}
215
+
216
+func parseRawData(data []byte) *ConntrackFlow {
217
+	s := &ConntrackFlow{}
218
+	// First there is the Nfgenmsg header
219
+	// consume only the family field
220
+	reader := bytes.NewReader(data)
221
+	binary.Read(reader, nl.NativeEndian(), &s.FamilyType)
222
+
223
+	// skip rest of the Netfilter header
224
+	reader.Seek(3, seekCurrent)
225
+	// The message structure is the following:
226
+	// <len, NLA_F_NESTED|CTA_TUPLE_ORIG> 4 bytes
227
+	// <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes
228
+	// flow information of the forward flow
229
+	// <len, NLA_F_NESTED|CTA_TUPLE_REPLY> 4 bytes
230
+	// <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes
231
+	// flow information of the reverse flow
232
+	for reader.Len() > 0 {
233
+		nested, t, l := parseNfAttrTL(reader)
234
+		if nested && t == nl.CTA_TUPLE_ORIG {
235
+			if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
236
+				parseIpTuple(reader, &s.Forward)
237
+			}
238
+		} else if nested && t == nl.CTA_TUPLE_REPLY {
239
+			if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
240
+				parseIpTuple(reader, &s.Reverse)
241
+
242
+				// Got all the useful information stop parsing
243
+				break
244
+			} else {
245
+				// Header not recognized skip it
246
+				reader.Seek(int64(l), seekCurrent)
247
+			}
248
+		}
249
+	}
250
+
251
+	return s
252
+}
253
+
254
+// Conntrack parameters and options:
255
+//   -n, --src-nat ip                      source NAT ip
256
+//   -g, --dst-nat ip                      destination NAT ip
257
+//   -j, --any-nat ip                      source or destination NAT ip
258
+//   -m, --mark mark                       Set mark
259
+//   -c, --secmark secmark                 Set selinux secmark
260
+//   -e, --event-mask eventmask            Event mask, eg. NEW,DESTROY
261
+//   -z, --zero                            Zero counters while listing
262
+//   -o, --output type[,...]               Output format, eg. xml
263
+//   -l, --label label[,...]               conntrack labels
264
+
265
+// Common parameters and options:
266
+//   -s, --src, --orig-src ip              Source address from original direction
267
+//   -d, --dst, --orig-dst ip              Destination address from original direction
268
+//   -r, --reply-src ip            Source addres from reply direction
269
+//   -q, --reply-dst ip            Destination address from reply direction
270
+//   -p, --protonum proto          Layer 4 Protocol, eg. 'tcp'
271
+//   -f, --family proto            Layer 3 Protocol, eg. 'ipv6'
272
+//   -t, --timeout timeout         Set timeout
273
+//   -u, --status status           Set status, eg. ASSURED
274
+//   -w, --zone value              Set conntrack zone
275
+//   --orig-zone value             Set zone for original direction
276
+//   --reply-zone value            Set zone for reply direction
277
+//   -b, --buffer-size             Netlink socket buffer size
278
+//   --mask-src ip                 Source mask address
279
+//   --mask-dst ip                 Destination mask address
280
+
281
+// Filter types
282
+type ConntrackFilterType uint8
283
+
284
+const (
285
+	ConntrackOrigSrcIP = iota // -orig-src ip   Source address from original direction
286
+	ConntrackOrigDstIP        // -orig-dst ip   Destination address from original direction
287
+	ConntrackNatSrcIP         // -src-nat ip    Source NAT ip
288
+	ConntrackNatDstIP         // -dst-nat ip    Destination NAT ip
289
+	ConntrackNatAnyIP         // -any-nat ip    Source or destination NAT ip
290
+)
291
+
292
+type ConntrackFilter struct {
293
+	ipFilter map[ConntrackFilterType]net.IP
294
+}
295
+
296
+// AddIP adds an IP to the conntrack filter
297
+func (f *ConntrackFilter) AddIP(tp ConntrackFilterType, ip net.IP) error {
298
+	if f.ipFilter == nil {
299
+		f.ipFilter = make(map[ConntrackFilterType]net.IP)
300
+	}
301
+	if _, ok := f.ipFilter[tp]; ok {
302
+		return errors.New("Filter attribute already present")
303
+	}
304
+	f.ipFilter[tp] = ip
305
+	return nil
306
+}
307
+
308
+// MatchConntrackFlow applies the filter to the flow and returns true if the flow matches the filter
309
+// false otherwise
310
+func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool {
311
+	if len(f.ipFilter) == 0 {
312
+		// empty filter always not match
313
+		return false
314
+	}
315
+
316
+	match := true
317
+	// -orig-src ip   Source address from original direction
318
+	if elem, found := f.ipFilter[ConntrackOrigSrcIP]; found {
319
+		match = match && elem.Equal(flow.Forward.SrcIP)
320
+	}
321
+
322
+	// -orig-dst ip   Destination address from original direction
323
+	if elem, found := f.ipFilter[ConntrackOrigDstIP]; match && found {
324
+		match = match && elem.Equal(flow.Forward.DstIP)
325
+	}
326
+
327
+	// -src-nat ip    Source NAT ip
328
+	if elem, found := f.ipFilter[ConntrackNatSrcIP]; match && found {
329
+		match = match && elem.Equal(flow.Reverse.SrcIP)
330
+	}
331
+
332
+	// -dst-nat ip    Destination NAT ip
333
+	if elem, found := f.ipFilter[ConntrackNatDstIP]; match && found {
334
+		match = match && elem.Equal(flow.Reverse.DstIP)
335
+	}
336
+
337
+	// -any-nat ip    Source or destination NAT ip
338
+	if elem, found := f.ipFilter[ConntrackNatAnyIP]; match && found {
339
+		match = match && (elem.Equal(flow.Reverse.SrcIP) || elem.Equal(flow.Reverse.DstIP))
340
+	}
341
+
342
+	return match
343
+}
0 344
new file mode 100644
... ...
@@ -0,0 +1,53 @@
0
+// +build !linux
1
+
2
+package netlink
3
+
4
+// ConntrackTableType Conntrack table for the netlink operation
5
+type ConntrackTableType uint8
6
+
7
+// InetFamily Family type
8
+type InetFamily uint8
9
+
10
+// ConntrackFlow placeholder
11
+type ConntrackFlow struct{}
12
+
13
+// ConntrackFilter placeholder
14
+type ConntrackFilter struct{}
15
+
16
+// ConntrackTableList returns the flow list of a table of a specific family
17
+// conntrack -L [table] [options]          List conntrack or expectation table
18
+func ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
19
+	return nil, ErrNotImplemented
20
+}
21
+
22
+// ConntrackTableFlush flushes all the flows of a specified table
23
+// conntrack -F [table]            Flush table
24
+// The flush operation applies to all the family types
25
+func ConntrackTableFlush(table ConntrackTableType) error {
26
+	return ErrNotImplemented
27
+}
28
+
29
+// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter
30
+// conntrack -D [table] parameters         Delete conntrack or expectation
31
+func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) {
32
+	return 0, ErrNotImplemented
33
+}
34
+
35
+// ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed
36
+// conntrack -L [table] [options]          List conntrack or expectation table
37
+func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
38
+	return nil, ErrNotImplemented
39
+}
40
+
41
+// ConntrackTableFlush flushes all the flows of a specified table using the netlink handle passed
42
+// conntrack -F [table]            Flush table
43
+// The flush operation applies to all the family types
44
+func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error {
45
+	return ErrNotImplemented
46
+}
47
+
48
+// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed
49
+// conntrack -D [table] parameters         Delete conntrack or expectation
50
+func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) {
51
+	return 0, ErrNotImplemented
52
+}
... ...
@@ -1,6 +1,10 @@
1 1
 package netlink
2 2
 
3
-import "fmt"
3
+import (
4
+	"fmt"
5
+
6
+	"github.com/vishvananda/netlink/nl"
7
+)
4 8
 
5 9
 type Filter interface {
6 10
 	Attrs() *FilterAttrs
... ...
@@ -180,11 +184,46 @@ func NewMirredAction(redirIndex int) *MirredAction {
180 180
 	}
181 181
 }
182 182
 
183
+// Constants used in TcU32Sel.Flags.
184
+const (
185
+	TC_U32_TERMINAL  = nl.TC_U32_TERMINAL
186
+	TC_U32_OFFSET    = nl.TC_U32_OFFSET
187
+	TC_U32_VAROFFSET = nl.TC_U32_VAROFFSET
188
+	TC_U32_EAT       = nl.TC_U32_EAT
189
+)
190
+
191
+// Sel of the U32 filters that contains multiple TcU32Key. This is the copy
192
+// and the frontend representation of nl.TcU32Sel. It is serialized into canonical
193
+// nl.TcU32Sel with the appropriate endianness.
194
+type TcU32Sel struct {
195
+	Flags    uint8
196
+	Offshift uint8
197
+	Nkeys    uint8
198
+	Pad      uint8
199
+	Offmask  uint16
200
+	Off      uint16
201
+	Offoff   int16
202
+	Hoff     int16
203
+	Hmask    uint32
204
+	Keys     []TcU32Key
205
+}
206
+
207
+// TcU32Key contained of Sel in the U32 filters. This is the copy and the frontend
208
+// representation of nl.TcU32Key. It is serialized into chanonical nl.TcU32Sel
209
+// with the appropriate endianness.
210
+type TcU32Key struct {
211
+	Mask    uint32
212
+	Val     uint32
213
+	Off     int32
214
+	OffMask int32
215
+}
216
+
183 217
 // U32 filters on many packet related properties
184 218
 type U32 struct {
185 219
 	FilterAttrs
186 220
 	ClassId    uint32
187 221
 	RedirIndex int
222
+	Sel        *TcU32Sel
188 223
 	Actions    []Action
189 224
 }
190 225
 
... ...
@@ -6,6 +6,7 @@ import (
6 6
 	"errors"
7 7
 	"fmt"
8 8
 	"syscall"
9
+	"unsafe"
9 10
 
10 11
 	"github.com/vishvananda/netlink/nl"
11 12
 )
... ...
@@ -128,12 +129,34 @@ func (h *Handle) FilterAdd(filter Filter) error {
128 128
 
129 129
 	options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
130 130
 	if u32, ok := filter.(*U32); ok {
131
-		// match all
132
-		sel := nl.TcU32Sel{
133
-			Nkeys: 1,
134
-			Flags: nl.TC_U32_TERMINAL,
131
+		// Convert TcU32Sel into nl.TcU32Sel as it is without copy.
132
+		sel := (*nl.TcU32Sel)(unsafe.Pointer(u32.Sel))
133
+		if sel == nil {
134
+			// match all
135
+			sel = &nl.TcU32Sel{
136
+				Nkeys: 1,
137
+				Flags: nl.TC_U32_TERMINAL,
138
+			}
139
+			sel.Keys = append(sel.Keys, nl.TcU32Key{})
140
+		}
141
+
142
+		if native != networkOrder {
143
+			// Copy Tcu32Sel.
144
+			cSel := sel
145
+			keys := make([]nl.TcU32Key, cap(sel.Keys))
146
+			copy(keys, sel.Keys)
147
+			cSel.Keys = keys
148
+			sel = cSel
149
+
150
+			// Handle the endianness of attributes
151
+			sel.Offmask = native.Uint16(htons(sel.Offmask))
152
+			sel.Hmask = native.Uint32(htonl(sel.Hmask))
153
+			for _, key := range sel.Keys {
154
+				key.Mask = native.Uint32(htonl(key.Mask))
155
+				key.Val = native.Uint32(htonl(key.Val))
156
+			}
135 157
 		}
136
-		sel.Keys = append(sel.Keys, nl.TcU32Key{})
158
+		sel.Nkeys = uint8(len(sel.Keys))
137 159
 		nl.NewRtAttrChild(options, nl.TCA_U32_SEL, sel.Serialize())
138 160
 		if u32.ClassId != 0 {
139 161
 			nl.NewRtAttrChild(options, nl.TCA_U32_CLASSID, nl.Uint32Attr(u32.ClassId))
... ...
@@ -425,6 +448,16 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error)
425 425
 		case nl.TCA_U32_SEL:
426 426
 			detailed = true
427 427
 			sel := nl.DeserializeTcU32Sel(datum.Value)
428
+			u32.Sel = (*TcU32Sel)(unsafe.Pointer(sel))
429
+			if native != networkOrder {
430
+				// Handle the endianness of attributes
431
+				u32.Sel.Offmask = native.Uint16(htons(sel.Offmask))
432
+				u32.Sel.Hmask = native.Uint32(htonl(sel.Hmask))
433
+				for _, key := range u32.Sel.Keys {
434
+					key.Mask = native.Uint32(htonl(key.Mask))
435
+					key.Val = native.Uint32(htonl(key.Val))
436
+				}
437
+			}
428 438
 			// only parse if we have a very basic redirect
429 439
 			if sel.Flags&nl.TC_U32_TERMINAL == 0 || sel.Nkeys != 1 {
430 440
 				return detailed, nil
... ...
@@ -443,6 +476,8 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error)
443 443
 					u32.RedirIndex = int(action.Ifindex)
444 444
 				}
445 445
 			}
446
+		case nl.TCA_U32_CLASSID:
447
+			u32.ClassId = native.Uint32(datum.Value)
446 448
 		}
447 449
 	}
448 450
 	return detailed, nil
449 451
new file mode 100644
... ...
@@ -0,0 +1,189 @@
0
+package nl
1
+
2
+import "unsafe"
3
+
4
+// Track the message sizes for the correct serialization/deserialization
5
+const (
6
+	SizeofNfgenmsg      = 4
7
+	SizeofNfattr        = 4
8
+	SizeofNfConntrack   = 376
9
+	SizeofNfctTupleHead = 52
10
+)
11
+
12
+var L4ProtoMap = map[uint8]string{
13
+	6:  "tcp",
14
+	17: "udp",
15
+}
16
+
17
+// All the following constants are coming from:
18
+// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink_conntrack.h
19
+
20
+// enum cntl_msg_types {
21
+// 	IPCTNL_MSG_CT_NEW,
22
+// 	IPCTNL_MSG_CT_GET,
23
+// 	IPCTNL_MSG_CT_DELETE,
24
+// 	IPCTNL_MSG_CT_GET_CTRZERO,
25
+// 	IPCTNL_MSG_CT_GET_STATS_CPU,
26
+// 	IPCTNL_MSG_CT_GET_STATS,
27
+// 	IPCTNL_MSG_CT_GET_DYING,
28
+// 	IPCTNL_MSG_CT_GET_UNCONFIRMED,
29
+//
30
+// 	IPCTNL_MSG_MAX
31
+// };
32
+const (
33
+	IPCTNL_MSG_CT_GET    = 1
34
+	IPCTNL_MSG_CT_DELETE = 2
35
+)
36
+
37
+// #define NFNETLINK_V0	0
38
+const (
39
+	NFNETLINK_V0 = 0
40
+)
41
+
42
+// #define NLA_F_NESTED (1 << 15)
43
+const (
44
+	NLA_F_NESTED = (1 << 15)
45
+)
46
+
47
+// enum ctattr_type {
48
+// 	CTA_UNSPEC,
49
+// 	CTA_TUPLE_ORIG,
50
+// 	CTA_TUPLE_REPLY,
51
+// 	CTA_STATUS,
52
+// 	CTA_PROTOINFO,
53
+// 	CTA_HELP,
54
+// 	CTA_NAT_SRC,
55
+// #define CTA_NAT	CTA_NAT_SRC	/* backwards compatibility */
56
+// 	CTA_TIMEOUT,
57
+// 	CTA_MARK,
58
+// 	CTA_COUNTERS_ORIG,
59
+// 	CTA_COUNTERS_REPLY,
60
+// 	CTA_USE,
61
+// 	CTA_ID,
62
+// 	CTA_NAT_DST,
63
+// 	CTA_TUPLE_MASTER,
64
+// 	CTA_SEQ_ADJ_ORIG,
65
+// 	CTA_NAT_SEQ_ADJ_ORIG	= CTA_SEQ_ADJ_ORIG,
66
+// 	CTA_SEQ_ADJ_REPLY,
67
+// 	CTA_NAT_SEQ_ADJ_REPLY	= CTA_SEQ_ADJ_REPLY,
68
+// 	CTA_SECMARK,		/* obsolete */
69
+// 	CTA_ZONE,
70
+// 	CTA_SECCTX,
71
+// 	CTA_TIMESTAMP,
72
+// 	CTA_MARK_MASK,
73
+// 	CTA_LABELS,
74
+// 	CTA_LABELS_MASK,
75
+// 	__CTA_MAX
76
+// };
77
+const (
78
+	CTA_TUPLE_ORIG  = 1
79
+	CTA_TUPLE_REPLY = 2
80
+	CTA_STATUS      = 3
81
+	CTA_TIMEOUT     = 8
82
+	CTA_MARK        = 9
83
+	CTA_PROTOINFO   = 4
84
+)
85
+
86
+// enum ctattr_tuple {
87
+// 	CTA_TUPLE_UNSPEC,
88
+// 	CTA_TUPLE_IP,
89
+// 	CTA_TUPLE_PROTO,
90
+// 	CTA_TUPLE_ZONE,
91
+// 	__CTA_TUPLE_MAX
92
+// };
93
+// #define CTA_TUPLE_MAX (__CTA_TUPLE_MAX - 1)
94
+const (
95
+	CTA_TUPLE_IP    = 1
96
+	CTA_TUPLE_PROTO = 2
97
+)
98
+
99
+// enum ctattr_ip {
100
+// 	CTA_IP_UNSPEC,
101
+// 	CTA_IP_V4_SRC,
102
+// 	CTA_IP_V4_DST,
103
+// 	CTA_IP_V6_SRC,
104
+// 	CTA_IP_V6_DST,
105
+// 	__CTA_IP_MAX
106
+// };
107
+// #define CTA_IP_MAX (__CTA_IP_MAX - 1)
108
+const (
109
+	CTA_IP_V4_SRC = 1
110
+	CTA_IP_V4_DST = 2
111
+	CTA_IP_V6_SRC = 3
112
+	CTA_IP_V6_DST = 4
113
+)
114
+
115
+// enum ctattr_l4proto {
116
+// 	CTA_PROTO_UNSPEC,
117
+// 	CTA_PROTO_NUM,
118
+// 	CTA_PROTO_SRC_PORT,
119
+// 	CTA_PROTO_DST_PORT,
120
+// 	CTA_PROTO_ICMP_ID,
121
+// 	CTA_PROTO_ICMP_TYPE,
122
+// 	CTA_PROTO_ICMP_CODE,
123
+// 	CTA_PROTO_ICMPV6_ID,
124
+// 	CTA_PROTO_ICMPV6_TYPE,
125
+// 	CTA_PROTO_ICMPV6_CODE,
126
+// 	__CTA_PROTO_MAX
127
+// };
128
+// #define CTA_PROTO_MAX (__CTA_PROTO_MAX - 1)
129
+const (
130
+	CTA_PROTO_NUM      = 1
131
+	CTA_PROTO_SRC_PORT = 2
132
+	CTA_PROTO_DST_PORT = 3
133
+)
134
+
135
+// enum ctattr_protoinfo {
136
+// 	CTA_PROTOINFO_UNSPEC,
137
+// 	CTA_PROTOINFO_TCP,
138
+// 	CTA_PROTOINFO_DCCP,
139
+// 	CTA_PROTOINFO_SCTP,
140
+// 	__CTA_PROTOINFO_MAX
141
+// };
142
+// #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1)
143
+const (
144
+	CTA_PROTOINFO_TCP = 1
145
+)
146
+
147
+// enum ctattr_protoinfo_tcp {
148
+// 	CTA_PROTOINFO_TCP_UNSPEC,
149
+// 	CTA_PROTOINFO_TCP_STATE,
150
+// 	CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
151
+// 	CTA_PROTOINFO_TCP_WSCALE_REPLY,
152
+// 	CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
153
+// 	CTA_PROTOINFO_TCP_FLAGS_REPLY,
154
+// 	__CTA_PROTOINFO_TCP_MAX
155
+// };
156
+// #define CTA_PROTOINFO_TCP_MAX (__CTA_PROTOINFO_TCP_MAX - 1)
157
+const (
158
+	CTA_PROTOINFO_TCP_STATE           = 1
159
+	CTA_PROTOINFO_TCP_WSCALE_ORIGINAL = 2
160
+	CTA_PROTOINFO_TCP_WSCALE_REPLY    = 3
161
+	CTA_PROTOINFO_TCP_FLAGS_ORIGINAL  = 4
162
+	CTA_PROTOINFO_TCP_FLAGS_REPLY     = 5
163
+)
164
+
165
+// /* General form of address family dependent message.
166
+//  */
167
+// struct nfgenmsg {
168
+// 	__u8  nfgen_family;		/* AF_xxx */
169
+// 	__u8  version;		/* nfnetlink version */
170
+// 	__be16    res_id;		/* resource id */
171
+// };
172
+type Nfgenmsg struct {
173
+	NfgenFamily uint8
174
+	Version     uint8
175
+	ResId       uint16 // big endian
176
+}
177
+
178
+func (msg *Nfgenmsg) Len() int {
179
+	return SizeofNfgenmsg
180
+}
181
+
182
+func DeserializeNfgenmsg(b []byte) *Nfgenmsg {
183
+	return (*Nfgenmsg)(unsafe.Pointer(&b[0:SizeofNfgenmsg][0]))
184
+}
185
+
186
+func (msg *Nfgenmsg) Serialize() []byte {
187
+	return (*(*[SizeofNfgenmsg]byte)(unsafe.Pointer(msg)))[:]
188
+}
... ...
@@ -24,7 +24,7 @@ const (
24 24
 )
25 25
 
26 26
 // SupportedNlFamilies contains the list of netlink families this netlink package supports
27
-var SupportedNlFamilies = []int{syscall.NETLINK_ROUTE, syscall.NETLINK_XFRM}
27
+var SupportedNlFamilies = []int{syscall.NETLINK_ROUTE, syscall.NETLINK_XFRM, syscall.NETLINK_NETFILTER}
28 28
 
29 29
 var nextSeqNr uint32
30 30
 
... ...
@@ -321,6 +321,7 @@ func (a *RtAttr) Serialize() []byte {
321 321
 type NetlinkRequest struct {
322 322
 	syscall.NlMsghdr
323 323
 	Data    []NetlinkRequestData
324
+	RawData []byte
324 325
 	Sockets map[int]*SocketHandle
325 326
 }
326 327
 
... ...
@@ -332,6 +333,8 @@ func (req *NetlinkRequest) Serialize() []byte {
332 332
 		dataBytes[i] = data.Serialize()
333 333
 		length = length + len(dataBytes[i])
334 334
 	}
335
+	length += len(req.RawData)
336
+
335 337
 	req.Len = uint32(length)
336 338
 	b := make([]byte, length)
337 339
 	hdr := (*(*[syscall.SizeofNlMsghdr]byte)(unsafe.Pointer(req)))[:]
... ...
@@ -343,6 +346,10 @@ func (req *NetlinkRequest) Serialize() []byte {
343 343
 			next = next + 1
344 344
 		}
345 345
 	}
346
+	// Add the raw data if any
347
+	if len(req.RawData) > 0 {
348
+		copy(b[next:length], req.RawData)
349
+	}
346 350
 	return b
347 351
 }
348 352
 
... ...
@@ -352,6 +359,13 @@ func (req *NetlinkRequest) AddData(data NetlinkRequestData) {
352 352
 	}
353 353
 }
354 354
 
355
+// AddRawData adds raw bytes to the end of the NetlinkRequest object during serialization
356
+func (req *NetlinkRequest) AddRawData(data []byte) {
357
+	if data != nil {
358
+		req.RawData = append(req.RawData, data...)
359
+	}
360
+}
361
+
355 362
 // Execute the request against a the given sockType.
356 363
 // Returns a list of netlink messages in serialized format, optionally filtered
357 364
 // by resType.