Browse code

Vendoring Netlink library

- Added conntrack support

Signed-off-by: Flavio Crisciani <flavio.crisciani@docker.com>

Flavio Crisciani authored on 2017/04/11 09:09:24
Showing 8 changed files
... ...
@@ -34,7 +34,7 @@ github.com/hashicorp/go-multierror fcdddc395df1ddf4247c69bd436e84cfa0733f7e
34 34
 github.com/hashicorp/serf 598c54895cc5a7b1a24a398d635e8c0ea0959870
35 35
 github.com/docker/libkv 1d8431073ae03cdaedb198a89722f3aab6d418ef
36 36
 github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25
37
-github.com/vishvananda/netlink c682914b0b231f6cad204a86e565551e51d387c0
37
+github.com/vishvananda/netlink 1e86b2bee5b6a7d377e4c02bb7f98209d6a7297c
38 38
 github.com/BurntSushi/toml f706d00e3de6abe700c994cdd545a1a4915af060
39 39
 github.com/samuel/go-zookeeper d0e0d8e11f318e000a8cc434616d69e329edc374
40 40
 github.com/deckarep/golang-set ef32fa3046d9f249d399f98ebaf9be944430fd1d
... ...
@@ -27,6 +27,19 @@ func (h *Handle) AddrAdd(link Link, addr *Addr) error {
27 27
 	return h.addrHandle(link, addr, req)
28 28
 }
29 29
 
30
+// AddrReplace will replace (or, if not present, add) an IP address on a link device.
31
+// Equivalent to: `ip addr replace $addr dev $link`
32
+func AddrReplace(link Link, addr *Addr) error {
33
+	return pkgHandle.AddrReplace(link, addr)
34
+}
35
+
36
+// AddrReplace will replace (or, if not present, add) an IP address on a link device.
37
+// Equivalent to: `ip addr replace $addr dev $link`
38
+func (h *Handle) AddrReplace(link Link, addr *Addr) error {
39
+	req := h.newNetlinkRequest(syscall.RTM_NEWADDR, syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE|syscall.NLM_F_ACK)
40
+	return h.addrHandle(link, addr, req)
41
+}
42
+
30 43
 // AddrDel will delete an IP address from a link device.
31 44
 // Equivalent to: `ip addr del $addr dev $link`
32 45
 func AddrDel(link Link, addr *Addr) error {
33 46
new file mode 100644
... ...
@@ -0,0 +1,344 @@
0
+package netlink
1
+
2
+import (
3
+	"bytes"
4
+	"encoding/binary"
5
+	"errors"
6
+	"fmt"
7
+	"net"
8
+	"syscall"
9
+
10
+	"github.com/vishvananda/netlink/nl"
11
+)
12
+
13
+// ConntrackTableType Conntrack table for the netlink operation
14
+type ConntrackTableType uint8
15
+
16
+const (
17
+	// ConntrackTable Conntrack table
18
+	// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK		 1
19
+	ConntrackTable = 1
20
+	// ConntrackExpectTable Conntrack expect table
21
+	// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK_EXP 2
22
+	ConntrackExpectTable = 2
23
+)
24
+
25
+const (
26
+	// backward compatibility with golang 1.6 which does not have io.SeekCurrent
27
+	seekCurrent = 1
28
+)
29
+
30
+// InetFamily Family type
31
+type InetFamily uint8
32
+
33
+//  -L [table] [options]          List conntrack or expectation table
34
+//  -G [table] parameters         Get conntrack or expectation
35
+
36
+//  -I [table] parameters         Create a conntrack or expectation
37
+//  -U [table] parameters         Update a conntrack
38
+//  -E [table] [options]          Show events
39
+
40
+//  -C [table]                    Show counter
41
+//  -S                            Show statistics
42
+
43
+// ConntrackTableList returns the flow list of a table of a specific family
44
+// conntrack -L [table] [options]          List conntrack or expectation table
45
+func ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
46
+	return pkgHandle.ConntrackTableList(table, family)
47
+}
48
+
49
+// ConntrackTableFlush flushes all the flows of a specified table
50
+// conntrack -F [table]            Flush table
51
+// The flush operation applies to all the family types
52
+func ConntrackTableFlush(table ConntrackTableType) error {
53
+	return pkgHandle.ConntrackTableFlush(table)
54
+}
55
+
56
+// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter
57
+// conntrack -D [table] parameters         Delete conntrack or expectation
58
+func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) {
59
+	return pkgHandle.ConntrackDeleteFilter(table, family, filter)
60
+}
61
+
62
+// ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed
63
+// conntrack -L [table] [options]          List conntrack or expectation table
64
+func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
65
+	res, err := h.dumpConntrackTable(table, family)
66
+	if err != nil {
67
+		return nil, err
68
+	}
69
+
70
+	// Deserialize all the flows
71
+	var result []*ConntrackFlow
72
+	for _, dataRaw := range res {
73
+		result = append(result, parseRawData(dataRaw))
74
+	}
75
+
76
+	return result, nil
77
+}
78
+
79
+// ConntrackTableFlush flushes all the flows of a specified table using the netlink handle passed
80
+// conntrack -F [table]            Flush table
81
+// The flush operation applies to all the family types
82
+func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error {
83
+	req := h.newConntrackRequest(table, syscall.AF_INET, nl.IPCTNL_MSG_CT_DELETE, syscall.NLM_F_ACK)
84
+	_, err := req.Execute(syscall.NETLINK_NETFILTER, 0)
85
+	return err
86
+}
87
+
88
+// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed
89
+// conntrack -D [table] parameters         Delete conntrack or expectation
90
+func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) {
91
+	res, err := h.dumpConntrackTable(table, family)
92
+	if err != nil {
93
+		return 0, err
94
+	}
95
+
96
+	var matched uint
97
+	for _, dataRaw := range res {
98
+		flow := parseRawData(dataRaw)
99
+		if match := filter.MatchConntrackFlow(flow); match {
100
+			req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, syscall.NLM_F_ACK)
101
+			// skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already
102
+			req2.AddRawData(dataRaw[4:])
103
+			req2.Execute(syscall.NETLINK_NETFILTER, 0)
104
+			matched++
105
+		}
106
+	}
107
+
108
+	return matched, nil
109
+}
110
+
111
+func (h *Handle) newConntrackRequest(table ConntrackTableType, family InetFamily, operation, flags int) *nl.NetlinkRequest {
112
+	// Create the Netlink request object
113
+	req := h.newNetlinkRequest((int(table)<<8)|operation, flags)
114
+	// Add the netfilter header
115
+	msg := &nl.Nfgenmsg{
116
+		NfgenFamily: uint8(family),
117
+		Version:     nl.NFNETLINK_V0,
118
+		ResId:       0,
119
+	}
120
+	req.AddData(msg)
121
+	return req
122
+}
123
+
124
+func (h *Handle) dumpConntrackTable(table ConntrackTableType, family InetFamily) ([][]byte, error) {
125
+	req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_GET, syscall.NLM_F_DUMP)
126
+	return req.Execute(syscall.NETLINK_NETFILTER, 0)
127
+}
128
+
129
+// The full conntrack flow structure is very complicated and can be found in the file:
130
+// http://git.netfilter.org/libnetfilter_conntrack/tree/include/internal/object.h
131
+// For the time being, the structure below allows to parse and extract the base information of a flow
132
+type ipTuple struct {
133
+	SrcIP    net.IP
134
+	DstIP    net.IP
135
+	Protocol uint8
136
+	SrcPort  uint16
137
+	DstPort  uint16
138
+}
139
+
140
+type ConntrackFlow struct {
141
+	FamilyType uint8
142
+	Forward    ipTuple
143
+	Reverse    ipTuple
144
+}
145
+
146
+func (s *ConntrackFlow) String() string {
147
+	// conntrack cmd output:
148
+	// udp      17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001
149
+	return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d\tsrc=%s dst=%s sport=%d dport=%d",
150
+		nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol,
151
+		s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort,
152
+		s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort)
153
+}
154
+
155
+// This method parse the ip tuple structure
156
+// The message structure is the following:
157
+// <len, [CTA_IP_V4_SRC|CTA_IP_V6_SRC], 16 bytes for the IP>
158
+// <len, [CTA_IP_V4_DST|CTA_IP_V6_DST], 16 bytes for the IP>
159
+// <len, NLA_F_NESTED|nl.CTA_TUPLE_PROTO, 1 byte for the protocol, 3 bytes of padding>
160
+// <len, CTA_PROTO_SRC_PORT, 2 bytes for the source port, 2 bytes of padding>
161
+// <len, CTA_PROTO_DST_PORT, 2 bytes for the source port, 2 bytes of padding>
162
+func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) {
163
+	for i := 0; i < 2; i++ {
164
+		_, t, _, v := parseNfAttrTLV(reader)
165
+		switch t {
166
+		case nl.CTA_IP_V4_SRC, nl.CTA_IP_V6_SRC:
167
+			tpl.SrcIP = v
168
+		case nl.CTA_IP_V4_DST, nl.CTA_IP_V6_DST:
169
+			tpl.DstIP = v
170
+		}
171
+	}
172
+	// Skip the next 4 bytes  nl.NLA_F_NESTED|nl.CTA_TUPLE_PROTO
173
+	reader.Seek(4, seekCurrent)
174
+	_, t, _, v := parseNfAttrTLV(reader)
175
+	if t == nl.CTA_PROTO_NUM {
176
+		tpl.Protocol = uint8(v[0])
177
+	}
178
+	// Skip some padding 3 bytes
179
+	reader.Seek(3, seekCurrent)
180
+	for i := 0; i < 2; i++ {
181
+		_, t, _ := parseNfAttrTL(reader)
182
+		switch t {
183
+		case nl.CTA_PROTO_SRC_PORT:
184
+			parseBERaw16(reader, &tpl.SrcPort)
185
+		case nl.CTA_PROTO_DST_PORT:
186
+			parseBERaw16(reader, &tpl.DstPort)
187
+		}
188
+		// Skip some padding 2 byte
189
+		reader.Seek(2, seekCurrent)
190
+	}
191
+}
192
+
193
+func parseNfAttrTLV(r *bytes.Reader) (isNested bool, attrType, len uint16, value []byte) {
194
+	isNested, attrType, len = parseNfAttrTL(r)
195
+
196
+	value = make([]byte, len)
197
+	binary.Read(r, binary.BigEndian, &value)
198
+	return isNested, attrType, len, value
199
+}
200
+
201
+func parseNfAttrTL(r *bytes.Reader) (isNested bool, attrType, len uint16) {
202
+	binary.Read(r, nl.NativeEndian(), &len)
203
+	len -= nl.SizeofNfattr
204
+
205
+	binary.Read(r, nl.NativeEndian(), &attrType)
206
+	isNested = (attrType & nl.NLA_F_NESTED) == nl.NLA_F_NESTED
207
+	attrType = attrType & (nl.NLA_F_NESTED - 1)
208
+
209
+	return isNested, attrType, len
210
+}
211
+
212
+func parseBERaw16(r *bytes.Reader, v *uint16) {
213
+	binary.Read(r, binary.BigEndian, v)
214
+}
215
+
216
+func parseRawData(data []byte) *ConntrackFlow {
217
+	s := &ConntrackFlow{}
218
+	// First there is the Nfgenmsg header
219
+	// consume only the family field
220
+	reader := bytes.NewReader(data)
221
+	binary.Read(reader, nl.NativeEndian(), &s.FamilyType)
222
+
223
+	// skip rest of the Netfilter header
224
+	reader.Seek(3, seekCurrent)
225
+	// The message structure is the following:
226
+	// <len, NLA_F_NESTED|CTA_TUPLE_ORIG> 4 bytes
227
+	// <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes
228
+	// flow information of the forward flow
229
+	// <len, NLA_F_NESTED|CTA_TUPLE_REPLY> 4 bytes
230
+	// <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes
231
+	// flow information of the reverse flow
232
+	for reader.Len() > 0 {
233
+		nested, t, l := parseNfAttrTL(reader)
234
+		if nested && t == nl.CTA_TUPLE_ORIG {
235
+			if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
236
+				parseIpTuple(reader, &s.Forward)
237
+			}
238
+		} else if nested && t == nl.CTA_TUPLE_REPLY {
239
+			if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
240
+				parseIpTuple(reader, &s.Reverse)
241
+
242
+				// Got all the useful information stop parsing
243
+				break
244
+			} else {
245
+				// Header not recognized skip it
246
+				reader.Seek(int64(l), seekCurrent)
247
+			}
248
+		}
249
+	}
250
+
251
+	return s
252
+}
253
+
254
+// Conntrack parameters and options:
255
+//   -n, --src-nat ip                      source NAT ip
256
+//   -g, --dst-nat ip                      destination NAT ip
257
+//   -j, --any-nat ip                      source or destination NAT ip
258
+//   -m, --mark mark                       Set mark
259
+//   -c, --secmark secmark                 Set selinux secmark
260
+//   -e, --event-mask eventmask            Event mask, eg. NEW,DESTROY
261
+//   -z, --zero                            Zero counters while listing
262
+//   -o, --output type[,...]               Output format, eg. xml
263
+//   -l, --label label[,...]               conntrack labels
264
+
265
+// Common parameters and options:
266
+//   -s, --src, --orig-src ip              Source address from original direction
267
+//   -d, --dst, --orig-dst ip              Destination address from original direction
268
+//   -r, --reply-src ip            Source addres from reply direction
269
+//   -q, --reply-dst ip            Destination address from reply direction
270
+//   -p, --protonum proto          Layer 4 Protocol, eg. 'tcp'
271
+//   -f, --family proto            Layer 3 Protocol, eg. 'ipv6'
272
+//   -t, --timeout timeout         Set timeout
273
+//   -u, --status status           Set status, eg. ASSURED
274
+//   -w, --zone value              Set conntrack zone
275
+//   --orig-zone value             Set zone for original direction
276
+//   --reply-zone value            Set zone for reply direction
277
+//   -b, --buffer-size             Netlink socket buffer size
278
+//   --mask-src ip                 Source mask address
279
+//   --mask-dst ip                 Destination mask address
280
+
281
+// Filter types
282
+type ConntrackFilterType uint8
283
+
284
+const (
285
+	ConntrackOrigSrcIP = iota // -orig-src ip   Source address from original direction
286
+	ConntrackOrigDstIP        // -orig-dst ip   Destination address from original direction
287
+	ConntrackNatSrcIP         // -src-nat ip    Source NAT ip
288
+	ConntrackNatDstIP         // -dst-nat ip    Destination NAT ip
289
+	ConntrackNatAnyIP         // -any-nat ip    Source or destination NAT ip
290
+)
291
+
292
+type ConntrackFilter struct {
293
+	ipFilter map[ConntrackFilterType]net.IP
294
+}
295
+
296
+// AddIP adds an IP to the conntrack filter
297
+func (f *ConntrackFilter) AddIP(tp ConntrackFilterType, ip net.IP) error {
298
+	if f.ipFilter == nil {
299
+		f.ipFilter = make(map[ConntrackFilterType]net.IP)
300
+	}
301
+	if _, ok := f.ipFilter[tp]; ok {
302
+		return errors.New("Filter attribute already present")
303
+	}
304
+	f.ipFilter[tp] = ip
305
+	return nil
306
+}
307
+
308
+// MatchConntrackFlow applies the filter to the flow and returns true if the flow matches the filter
309
+// false otherwise
310
+func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool {
311
+	if len(f.ipFilter) == 0 {
312
+		// empty filter always not match
313
+		return false
314
+	}
315
+
316
+	match := true
317
+	// -orig-src ip   Source address from original direction
318
+	if elem, found := f.ipFilter[ConntrackOrigSrcIP]; found {
319
+		match = match && elem.Equal(flow.Forward.SrcIP)
320
+	}
321
+
322
+	// -orig-dst ip   Destination address from original direction
323
+	if elem, found := f.ipFilter[ConntrackOrigDstIP]; match && found {
324
+		match = match && elem.Equal(flow.Forward.DstIP)
325
+	}
326
+
327
+	// -src-nat ip    Source NAT ip
328
+	if elem, found := f.ipFilter[ConntrackNatSrcIP]; match && found {
329
+		match = match && elem.Equal(flow.Reverse.SrcIP)
330
+	}
331
+
332
+	// -dst-nat ip    Destination NAT ip
333
+	if elem, found := f.ipFilter[ConntrackNatDstIP]; match && found {
334
+		match = match && elem.Equal(flow.Reverse.DstIP)
335
+	}
336
+
337
+	// -any-nat ip    Source or destination NAT ip
338
+	if elem, found := f.ipFilter[ConntrackNatAnyIP]; match && found {
339
+		match = match && (elem.Equal(flow.Reverse.SrcIP) || elem.Equal(flow.Reverse.DstIP))
340
+	}
341
+
342
+	return match
343
+}
0 344
new file mode 100644
... ...
@@ -0,0 +1,53 @@
0
+// +build !linux
1
+
2
+package netlink
3
+
4
+// ConntrackTableType Conntrack table for the netlink operation
5
+type ConntrackTableType uint8
6
+
7
+// InetFamily Family type
8
+type InetFamily uint8
9
+
10
+// ConntrackFlow placeholder
11
+type ConntrackFlow struct{}
12
+
13
+// ConntrackFilter placeholder
14
+type ConntrackFilter struct{}
15
+
16
+// ConntrackTableList returns the flow list of a table of a specific family
17
+// conntrack -L [table] [options]          List conntrack or expectation table
18
+func ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
19
+	return nil, ErrNotImplemented
20
+}
21
+
22
+// ConntrackTableFlush flushes all the flows of a specified table
23
+// conntrack -F [table]            Flush table
24
+// The flush operation applies to all the family types
25
+func ConntrackTableFlush(table ConntrackTableType) error {
26
+	return ErrNotImplemented
27
+}
28
+
29
+// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter
30
+// conntrack -D [table] parameters         Delete conntrack or expectation
31
+func ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) {
32
+	return 0, ErrNotImplemented
33
+}
34
+
35
+// ConntrackTableList returns the flow list of a table of a specific family using the netlink handle passed
36
+// conntrack -L [table] [options]          List conntrack or expectation table
37
+func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) ([]*ConntrackFlow, error) {
38
+	return nil, ErrNotImplemented
39
+}
40
+
41
+// ConntrackTableFlush flushes all the flows of a specified table using the netlink handle passed
42
+// conntrack -F [table]            Flush table
43
+// The flush operation applies to all the family types
44
+func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error {
45
+	return ErrNotImplemented
46
+}
47
+
48
+// ConntrackDeleteFilter deletes entries on the specified table on the base of the filter using the netlink handle passed
49
+// conntrack -D [table] parameters         Delete conntrack or expectation
50
+func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFamily, filter *ConntrackFilter) (uint, error) {
51
+	return 0, ErrNotImplemented
52
+}
... ...
@@ -1,6 +1,10 @@
1 1
 package netlink
2 2
 
3
-import "fmt"
3
+import (
4
+	"fmt"
5
+
6
+	"github.com/vishvananda/netlink/nl"
7
+)
4 8
 
5 9
 type Filter interface {
6 10
 	Attrs() *FilterAttrs
... ...
@@ -180,11 +184,46 @@ func NewMirredAction(redirIndex int) *MirredAction {
180 180
 	}
181 181
 }
182 182
 
183
+// Constants used in TcU32Sel.Flags.
184
+const (
185
+	TC_U32_TERMINAL  = nl.TC_U32_TERMINAL
186
+	TC_U32_OFFSET    = nl.TC_U32_OFFSET
187
+	TC_U32_VAROFFSET = nl.TC_U32_VAROFFSET
188
+	TC_U32_EAT       = nl.TC_U32_EAT
189
+)
190
+
191
+// Sel of the U32 filters that contains multiple TcU32Key. This is the copy
192
+// and the frontend representation of nl.TcU32Sel. It is serialized into canonical
193
+// nl.TcU32Sel with the appropriate endianness.
194
+type TcU32Sel struct {
195
+	Flags    uint8
196
+	Offshift uint8
197
+	Nkeys    uint8
198
+	Pad      uint8
199
+	Offmask  uint16
200
+	Off      uint16
201
+	Offoff   int16
202
+	Hoff     int16
203
+	Hmask    uint32
204
+	Keys     []TcU32Key
205
+}
206
+
207
+// TcU32Key contained of Sel in the U32 filters. This is the copy and the frontend
208
+// representation of nl.TcU32Key. It is serialized into chanonical nl.TcU32Sel
209
+// with the appropriate endianness.
210
+type TcU32Key struct {
211
+	Mask    uint32
212
+	Val     uint32
213
+	Off     int32
214
+	OffMask int32
215
+}
216
+
183 217
 // U32 filters on many packet related properties
184 218
 type U32 struct {
185 219
 	FilterAttrs
186 220
 	ClassId    uint32
187 221
 	RedirIndex int
222
+	Sel        *TcU32Sel
188 223
 	Actions    []Action
189 224
 }
190 225
 
... ...
@@ -6,6 +6,7 @@ import (
6 6
 	"errors"
7 7
 	"fmt"
8 8
 	"syscall"
9
+	"unsafe"
9 10
 
10 11
 	"github.com/vishvananda/netlink/nl"
11 12
 )
... ...
@@ -128,12 +129,34 @@ func (h *Handle) FilterAdd(filter Filter) error {
128 128
 
129 129
 	options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
130 130
 	if u32, ok := filter.(*U32); ok {
131
-		// match all
132
-		sel := nl.TcU32Sel{
133
-			Nkeys: 1,
134
-			Flags: nl.TC_U32_TERMINAL,
131
+		// Convert TcU32Sel into nl.TcU32Sel as it is without copy.
132
+		sel := (*nl.TcU32Sel)(unsafe.Pointer(u32.Sel))
133
+		if sel == nil {
134
+			// match all
135
+			sel = &nl.TcU32Sel{
136
+				Nkeys: 1,
137
+				Flags: nl.TC_U32_TERMINAL,
138
+			}
139
+			sel.Keys = append(sel.Keys, nl.TcU32Key{})
140
+		}
141
+
142
+		if native != networkOrder {
143
+			// Copy Tcu32Sel.
144
+			cSel := sel
145
+			keys := make([]nl.TcU32Key, cap(sel.Keys))
146
+			copy(keys, sel.Keys)
147
+			cSel.Keys = keys
148
+			sel = cSel
149
+
150
+			// Handle the endianness of attributes
151
+			sel.Offmask = native.Uint16(htons(sel.Offmask))
152
+			sel.Hmask = native.Uint32(htonl(sel.Hmask))
153
+			for _, key := range sel.Keys {
154
+				key.Mask = native.Uint32(htonl(key.Mask))
155
+				key.Val = native.Uint32(htonl(key.Val))
156
+			}
135 157
 		}
136
-		sel.Keys = append(sel.Keys, nl.TcU32Key{})
158
+		sel.Nkeys = uint8(len(sel.Keys))
137 159
 		nl.NewRtAttrChild(options, nl.TCA_U32_SEL, sel.Serialize())
138 160
 		if u32.ClassId != 0 {
139 161
 			nl.NewRtAttrChild(options, nl.TCA_U32_CLASSID, nl.Uint32Attr(u32.ClassId))
... ...
@@ -425,6 +448,16 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error)
425 425
 		case nl.TCA_U32_SEL:
426 426
 			detailed = true
427 427
 			sel := nl.DeserializeTcU32Sel(datum.Value)
428
+			u32.Sel = (*TcU32Sel)(unsafe.Pointer(sel))
429
+			if native != networkOrder {
430
+				// Handle the endianness of attributes
431
+				u32.Sel.Offmask = native.Uint16(htons(sel.Offmask))
432
+				u32.Sel.Hmask = native.Uint32(htonl(sel.Hmask))
433
+				for _, key := range u32.Sel.Keys {
434
+					key.Mask = native.Uint32(htonl(key.Mask))
435
+					key.Val = native.Uint32(htonl(key.Val))
436
+				}
437
+			}
428 438
 			// only parse if we have a very basic redirect
429 439
 			if sel.Flags&nl.TC_U32_TERMINAL == 0 || sel.Nkeys != 1 {
430 440
 				return detailed, nil
... ...
@@ -443,6 +476,8 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error)
443 443
 					u32.RedirIndex = int(action.Ifindex)
444 444
 				}
445 445
 			}
446
+		case nl.TCA_U32_CLASSID:
447
+			u32.ClassId = native.Uint32(datum.Value)
446 448
 		}
447 449
 	}
448 450
 	return detailed, nil
449 451
new file mode 100644
... ...
@@ -0,0 +1,189 @@
0
+package nl
1
+
2
+import "unsafe"
3
+
4
+// Track the message sizes for the correct serialization/deserialization
5
+const (
6
+	SizeofNfgenmsg      = 4
7
+	SizeofNfattr        = 4
8
+	SizeofNfConntrack   = 376
9
+	SizeofNfctTupleHead = 52
10
+)
11
+
12
+var L4ProtoMap = map[uint8]string{
13
+	6:  "tcp",
14
+	17: "udp",
15
+}
16
+
17
+// All the following constants are coming from:
18
+// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink_conntrack.h
19
+
20
+// enum cntl_msg_types {
21
+// 	IPCTNL_MSG_CT_NEW,
22
+// 	IPCTNL_MSG_CT_GET,
23
+// 	IPCTNL_MSG_CT_DELETE,
24
+// 	IPCTNL_MSG_CT_GET_CTRZERO,
25
+// 	IPCTNL_MSG_CT_GET_STATS_CPU,
26
+// 	IPCTNL_MSG_CT_GET_STATS,
27
+// 	IPCTNL_MSG_CT_GET_DYING,
28
+// 	IPCTNL_MSG_CT_GET_UNCONFIRMED,
29
+//
30
+// 	IPCTNL_MSG_MAX
31
+// };
32
+const (
33
+	IPCTNL_MSG_CT_GET    = 1
34
+	IPCTNL_MSG_CT_DELETE = 2
35
+)
36
+
37
+// #define NFNETLINK_V0	0
38
+const (
39
+	NFNETLINK_V0 = 0
40
+)
41
+
42
+// #define NLA_F_NESTED (1 << 15)
43
+const (
44
+	NLA_F_NESTED = (1 << 15)
45
+)
46
+
47
+// enum ctattr_type {
48
+// 	CTA_UNSPEC,
49
+// 	CTA_TUPLE_ORIG,
50
+// 	CTA_TUPLE_REPLY,
51
+// 	CTA_STATUS,
52
+// 	CTA_PROTOINFO,
53
+// 	CTA_HELP,
54
+// 	CTA_NAT_SRC,
55
+// #define CTA_NAT	CTA_NAT_SRC	/* backwards compatibility */
56
+// 	CTA_TIMEOUT,
57
+// 	CTA_MARK,
58
+// 	CTA_COUNTERS_ORIG,
59
+// 	CTA_COUNTERS_REPLY,
60
+// 	CTA_USE,
61
+// 	CTA_ID,
62
+// 	CTA_NAT_DST,
63
+// 	CTA_TUPLE_MASTER,
64
+// 	CTA_SEQ_ADJ_ORIG,
65
+// 	CTA_NAT_SEQ_ADJ_ORIG	= CTA_SEQ_ADJ_ORIG,
66
+// 	CTA_SEQ_ADJ_REPLY,
67
+// 	CTA_NAT_SEQ_ADJ_REPLY	= CTA_SEQ_ADJ_REPLY,
68
+// 	CTA_SECMARK,		/* obsolete */
69
+// 	CTA_ZONE,
70
+// 	CTA_SECCTX,
71
+// 	CTA_TIMESTAMP,
72
+// 	CTA_MARK_MASK,
73
+// 	CTA_LABELS,
74
+// 	CTA_LABELS_MASK,
75
+// 	__CTA_MAX
76
+// };
77
+const (
78
+	CTA_TUPLE_ORIG  = 1
79
+	CTA_TUPLE_REPLY = 2
80
+	CTA_STATUS      = 3
81
+	CTA_TIMEOUT     = 8
82
+	CTA_MARK        = 9
83
+	CTA_PROTOINFO   = 4
84
+)
85
+
86
+// enum ctattr_tuple {
87
+// 	CTA_TUPLE_UNSPEC,
88
+// 	CTA_TUPLE_IP,
89
+// 	CTA_TUPLE_PROTO,
90
+// 	CTA_TUPLE_ZONE,
91
+// 	__CTA_TUPLE_MAX
92
+// };
93
+// #define CTA_TUPLE_MAX (__CTA_TUPLE_MAX - 1)
94
+const (
95
+	CTA_TUPLE_IP    = 1
96
+	CTA_TUPLE_PROTO = 2
97
+)
98
+
99
+// enum ctattr_ip {
100
+// 	CTA_IP_UNSPEC,
101
+// 	CTA_IP_V4_SRC,
102
+// 	CTA_IP_V4_DST,
103
+// 	CTA_IP_V6_SRC,
104
+// 	CTA_IP_V6_DST,
105
+// 	__CTA_IP_MAX
106
+// };
107
+// #define CTA_IP_MAX (__CTA_IP_MAX - 1)
108
+const (
109
+	CTA_IP_V4_SRC = 1
110
+	CTA_IP_V4_DST = 2
111
+	CTA_IP_V6_SRC = 3
112
+	CTA_IP_V6_DST = 4
113
+)
114
+
115
+// enum ctattr_l4proto {
116
+// 	CTA_PROTO_UNSPEC,
117
+// 	CTA_PROTO_NUM,
118
+// 	CTA_PROTO_SRC_PORT,
119
+// 	CTA_PROTO_DST_PORT,
120
+// 	CTA_PROTO_ICMP_ID,
121
+// 	CTA_PROTO_ICMP_TYPE,
122
+// 	CTA_PROTO_ICMP_CODE,
123
+// 	CTA_PROTO_ICMPV6_ID,
124
+// 	CTA_PROTO_ICMPV6_TYPE,
125
+// 	CTA_PROTO_ICMPV6_CODE,
126
+// 	__CTA_PROTO_MAX
127
+// };
128
+// #define CTA_PROTO_MAX (__CTA_PROTO_MAX - 1)
129
+const (
130
+	CTA_PROTO_NUM      = 1
131
+	CTA_PROTO_SRC_PORT = 2
132
+	CTA_PROTO_DST_PORT = 3
133
+)
134
+
135
+// enum ctattr_protoinfo {
136
+// 	CTA_PROTOINFO_UNSPEC,
137
+// 	CTA_PROTOINFO_TCP,
138
+// 	CTA_PROTOINFO_DCCP,
139
+// 	CTA_PROTOINFO_SCTP,
140
+// 	__CTA_PROTOINFO_MAX
141
+// };
142
+// #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1)
143
+const (
144
+	CTA_PROTOINFO_TCP = 1
145
+)
146
+
147
+// enum ctattr_protoinfo_tcp {
148
+// 	CTA_PROTOINFO_TCP_UNSPEC,
149
+// 	CTA_PROTOINFO_TCP_STATE,
150
+// 	CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
151
+// 	CTA_PROTOINFO_TCP_WSCALE_REPLY,
152
+// 	CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
153
+// 	CTA_PROTOINFO_TCP_FLAGS_REPLY,
154
+// 	__CTA_PROTOINFO_TCP_MAX
155
+// };
156
+// #define CTA_PROTOINFO_TCP_MAX (__CTA_PROTOINFO_TCP_MAX - 1)
157
+const (
158
+	CTA_PROTOINFO_TCP_STATE           = 1
159
+	CTA_PROTOINFO_TCP_WSCALE_ORIGINAL = 2
160
+	CTA_PROTOINFO_TCP_WSCALE_REPLY    = 3
161
+	CTA_PROTOINFO_TCP_FLAGS_ORIGINAL  = 4
162
+	CTA_PROTOINFO_TCP_FLAGS_REPLY     = 5
163
+)
164
+
165
+// /* General form of address family dependent message.
166
+//  */
167
+// struct nfgenmsg {
168
+// 	__u8  nfgen_family;		/* AF_xxx */
169
+// 	__u8  version;		/* nfnetlink version */
170
+// 	__be16    res_id;		/* resource id */
171
+// };
172
+type Nfgenmsg struct {
173
+	NfgenFamily uint8
174
+	Version     uint8
175
+	ResId       uint16 // big endian
176
+}
177
+
178
+func (msg *Nfgenmsg) Len() int {
179
+	return SizeofNfgenmsg
180
+}
181
+
182
+func DeserializeNfgenmsg(b []byte) *Nfgenmsg {
183
+	return (*Nfgenmsg)(unsafe.Pointer(&b[0:SizeofNfgenmsg][0]))
184
+}
185
+
186
+func (msg *Nfgenmsg) Serialize() []byte {
187
+	return (*(*[SizeofNfgenmsg]byte)(unsafe.Pointer(msg)))[:]
188
+}
... ...
@@ -24,7 +24,7 @@ const (
24 24
 )
25 25
 
26 26
 // SupportedNlFamilies contains the list of netlink families this netlink package supports
27
-var SupportedNlFamilies = []int{syscall.NETLINK_ROUTE, syscall.NETLINK_XFRM}
27
+var SupportedNlFamilies = []int{syscall.NETLINK_ROUTE, syscall.NETLINK_XFRM, syscall.NETLINK_NETFILTER}
28 28
 
29 29
 var nextSeqNr uint32
30 30
 
... ...
@@ -321,6 +321,7 @@ func (a *RtAttr) Serialize() []byte {
321 321
 type NetlinkRequest struct {
322 322
 	syscall.NlMsghdr
323 323
 	Data    []NetlinkRequestData
324
+	RawData []byte
324 325
 	Sockets map[int]*SocketHandle
325 326
 }
326 327
 
... ...
@@ -332,6 +333,8 @@ func (req *NetlinkRequest) Serialize() []byte {
332 332
 		dataBytes[i] = data.Serialize()
333 333
 		length = length + len(dataBytes[i])
334 334
 	}
335
+	length += len(req.RawData)
336
+
335 337
 	req.Len = uint32(length)
336 338
 	b := make([]byte, length)
337 339
 	hdr := (*(*[syscall.SizeofNlMsghdr]byte)(unsafe.Pointer(req)))[:]
... ...
@@ -343,6 +346,10 @@ func (req *NetlinkRequest) Serialize() []byte {
343 343
 			next = next + 1
344 344
 		}
345 345
 	}
346
+	// Add the raw data if any
347
+	if len(req.RawData) > 0 {
348
+		copy(b[next:length], req.RawData)
349
+	}
346 350
 	return b
347 351
 }
348 352
 
... ...
@@ -352,6 +359,13 @@ func (req *NetlinkRequest) AddData(data NetlinkRequestData) {
352 352
 	}
353 353
 }
354 354
 
355
+// AddRawData adds raw bytes to the end of the NetlinkRequest object during serialization
356
+func (req *NetlinkRequest) AddRawData(data []byte) {
357
+	if data != nil {
358
+		req.RawData = append(req.RawData, data...)
359
+	}
360
+}
361
+
355 362
 // Execute the request against a the given sockType.
356 363
 // Returns a list of netlink messages in serialized format, optionally filtered
357 364
 // by resType.