Browse code

Vendoring vishvananda/netlink 734d02c

Signed-off-by: Alessandro Boch <aboch@docker.com>
(cherry picked from commit 58b8b8fa1555223c1144f08fc4536ffaa4d12fd0)

Alessandro Boch authored on 2016/06/23 01:31:55
Showing 24 changed files
... ...
@@ -75,7 +75,7 @@ clone git github.com/hashicorp/go-multierror fcdddc395df1ddf4247c69bd436e84cfa07
75 75
 clone git github.com/hashicorp/serf 598c54895cc5a7b1a24a398d635e8c0ea0959870
76 76
 clone git github.com/docker/libkv 7283ef27ed32fe267388510a91709b307bb9942c
77 77
 clone git github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25
78
-clone git github.com/vishvananda/netlink 7995ff5647a22cbf0dc41bf5c0e977bdb0d5c6b7
78
+clone git github.com/vishvananda/netlink 734d02c3e202f682c74b71314b2c61eec0170fd4
79 79
 clone git github.com/BurntSushi/toml f706d00e3de6abe700c994cdd545a1a4915af060
80 80
 clone git github.com/samuel/go-zookeeper d0e0d8e11f318e000a8cc434616d69e329edc374
81 81
 clone git github.com/deckarep/golang-set ef32fa3046d9f249d399f98ebaf9be944430fd1d
... ...
@@ -8,7 +8,7 @@ the kernel. It can be used to add and remove interfaces, set ip addresses
8 8
 and routes, and configure ipsec. Netlink communication requires elevated
9 9
 privileges, so in most cases this code needs to be run as root. Since
10 10
 low-level netlink messages are inscrutable at best, the library attempts
11
-to provide an api that is loosely modeled on the CLI provied by iproute2.
11
+to provide an api that is loosely modeled on the CLI provided by iproute2.
12 12
 Actions like `ip link add` will be accomplished via a similarly named
13 13
 function like AddLink(). This library began its life as a fork of the
14 14
 netlink functionality in
... ...
@@ -50,39 +50,6 @@ type HtbClass struct {
50 50
 	Prio    uint32
51 51
 }
52 52
 
53
-func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass {
54
-	mtu := 1600
55
-	rate := cattrs.Rate / 8
56
-	ceil := cattrs.Ceil / 8
57
-	buffer := cattrs.Buffer
58
-	cbuffer := cattrs.Cbuffer
59
-
60
-	if ceil == 0 {
61
-		ceil = rate
62
-	}
63
-
64
-	if buffer == 0 {
65
-		buffer = uint32(float64(rate)/Hz() + float64(mtu))
66
-	}
67
-	buffer = uint32(Xmittime(rate, buffer))
68
-
69
-	if cbuffer == 0 {
70
-		cbuffer = uint32(float64(ceil)/Hz() + float64(mtu))
71
-	}
72
-	cbuffer = uint32(Xmittime(ceil, cbuffer))
73
-
74
-	return &HtbClass{
75
-		ClassAttrs: attrs,
76
-		Rate:       rate,
77
-		Ceil:       ceil,
78
-		Buffer:     buffer,
79
-		Cbuffer:    cbuffer,
80
-		Quantum:    10,
81
-		Level:      0,
82
-		Prio:       0,
83
-	}
84
-}
85
-
86 53
 func (q HtbClass) String() string {
87 54
 	return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer)
88 55
 }
... ...
@@ -7,6 +7,40 @@ import (
7 7
 	"github.com/vishvananda/netlink/nl"
8 8
 )
9 9
 
10
+// NOTE: function is in here because it uses other linux functions
11
+func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass {
12
+	mtu := 1600
13
+	rate := cattrs.Rate / 8
14
+	ceil := cattrs.Ceil / 8
15
+	buffer := cattrs.Buffer
16
+	cbuffer := cattrs.Cbuffer
17
+
18
+	if ceil == 0 {
19
+		ceil = rate
20
+	}
21
+
22
+	if buffer == 0 {
23
+		buffer = uint32(float64(rate)/Hz() + float64(mtu))
24
+	}
25
+	buffer = uint32(Xmittime(rate, buffer))
26
+
27
+	if cbuffer == 0 {
28
+		cbuffer = uint32(float64(ceil)/Hz() + float64(mtu))
29
+	}
30
+	cbuffer = uint32(Xmittime(ceil, cbuffer))
31
+
32
+	return &HtbClass{
33
+		ClassAttrs: attrs,
34
+		Rate:       rate,
35
+		Ceil:       ceil,
36
+		Buffer:     buffer,
37
+		Cbuffer:    cbuffer,
38
+		Quantum:    10,
39
+		Level:      0,
40
+		Prio:       0,
41
+	}
42
+}
43
+
10 44
 // ClassDel will delete a class from the system.
11 45
 // Equivalent to: `tc class del $class`
12 46
 func ClassDel(class Class) error {
... ...
@@ -1,11 +1,6 @@
1 1
 package netlink
2 2
 
3
-import (
4
-	"errors"
5
-	"fmt"
6
-
7
-	"github.com/vishvananda/netlink/nl"
8
-)
3
+import "fmt"
9 4
 
10 5
 type Filter interface {
11 6
 	Attrs() *FilterAttrs
... ...
@@ -217,74 +212,6 @@ type FilterFwAttrs struct {
217 217
 	LinkLayer int
218 218
 }
219 219
 
220
-// Fw filter filters on firewall marks
221
-type Fw struct {
222
-	FilterAttrs
223
-	ClassId uint32
224
-	// TODO remove nl type from interface
225
-	Police nl.TcPolice
226
-	InDev  string
227
-	// TODO Action
228
-	Mask   uint32
229
-	AvRate uint32
230
-	Rtab   [256]uint32
231
-	Ptab   [256]uint32
232
-}
233
-
234
-func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) {
235
-	var rtab [256]uint32
236
-	var ptab [256]uint32
237
-	rcellLog := -1
238
-	pcellLog := -1
239
-	avrate := fattrs.AvRate / 8
240
-	police := nl.TcPolice{}
241
-	police.Rate.Rate = fattrs.Rate / 8
242
-	police.PeakRate.Rate = fattrs.PeakRate / 8
243
-	buffer := fattrs.Buffer
244
-	linklayer := nl.LINKLAYER_ETHERNET
245
-
246
-	if fattrs.LinkLayer != nl.LINKLAYER_UNSPEC {
247
-		linklayer = fattrs.LinkLayer
248
-	}
249
-
250
-	police.Action = int32(fattrs.Action)
251
-	if police.Rate.Rate != 0 {
252
-		police.Rate.Mpu = fattrs.Mpu
253
-		police.Rate.Overhead = fattrs.Overhead
254
-		if CalcRtable(&police.Rate, rtab, rcellLog, fattrs.Mtu, linklayer) < 0 {
255
-			return nil, errors.New("TBF: failed to calculate rate table")
256
-		}
257
-		police.Burst = uint32(Xmittime(uint64(police.Rate.Rate), uint32(buffer)))
258
-	}
259
-	police.Mtu = fattrs.Mtu
260
-	if police.PeakRate.Rate != 0 {
261
-		police.PeakRate.Mpu = fattrs.Mpu
262
-		police.PeakRate.Overhead = fattrs.Overhead
263
-		if CalcRtable(&police.PeakRate, ptab, pcellLog, fattrs.Mtu, linklayer) < 0 {
264
-			return nil, errors.New("POLICE: failed to calculate peak rate table")
265
-		}
266
-	}
267
-
268
-	return &Fw{
269
-		FilterAttrs: attrs,
270
-		ClassId:     fattrs.ClassId,
271
-		InDev:       fattrs.InDev,
272
-		Mask:        fattrs.Mask,
273
-		Police:      police,
274
-		AvRate:      avrate,
275
-		Rtab:        rtab,
276
-		Ptab:        ptab,
277
-	}, nil
278
-}
279
-
280
-func (filter *Fw) Attrs() *FilterAttrs {
281
-	return &filter.FilterAttrs
282
-}
283
-
284
-func (filter *Fw) Type() string {
285
-	return "fw"
286
-}
287
-
288 220
 type BpfFilter struct {
289 221
 	FilterAttrs
290 222
 	ClassId      uint32
... ...
@@ -3,12 +3,83 @@ package netlink
3 3
 import (
4 4
 	"bytes"
5 5
 	"encoding/binary"
6
+	"errors"
6 7
 	"fmt"
7 8
 	"syscall"
8 9
 
9 10
 	"github.com/vishvananda/netlink/nl"
10 11
 )
11 12
 
13
+// Fw filter filters on firewall marks
14
+// NOTE: this is in filter_linux because it refers to nl.TcPolice which
15
+//       is defined in nl/tc_linux.go
16
+type Fw struct {
17
+	FilterAttrs
18
+	ClassId uint32
19
+	// TODO remove nl type from interface
20
+	Police nl.TcPolice
21
+	InDev  string
22
+	// TODO Action
23
+	Mask   uint32
24
+	AvRate uint32
25
+	Rtab   [256]uint32
26
+	Ptab   [256]uint32
27
+}
28
+
29
+func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) {
30
+	var rtab [256]uint32
31
+	var ptab [256]uint32
32
+	rcellLog := -1
33
+	pcellLog := -1
34
+	avrate := fattrs.AvRate / 8
35
+	police := nl.TcPolice{}
36
+	police.Rate.Rate = fattrs.Rate / 8
37
+	police.PeakRate.Rate = fattrs.PeakRate / 8
38
+	buffer := fattrs.Buffer
39
+	linklayer := nl.LINKLAYER_ETHERNET
40
+
41
+	if fattrs.LinkLayer != nl.LINKLAYER_UNSPEC {
42
+		linklayer = fattrs.LinkLayer
43
+	}
44
+
45
+	police.Action = int32(fattrs.Action)
46
+	if police.Rate.Rate != 0 {
47
+		police.Rate.Mpu = fattrs.Mpu
48
+		police.Rate.Overhead = fattrs.Overhead
49
+		if CalcRtable(&police.Rate, rtab, rcellLog, fattrs.Mtu, linklayer) < 0 {
50
+			return nil, errors.New("TBF: failed to calculate rate table")
51
+		}
52
+		police.Burst = uint32(Xmittime(uint64(police.Rate.Rate), uint32(buffer)))
53
+	}
54
+	police.Mtu = fattrs.Mtu
55
+	if police.PeakRate.Rate != 0 {
56
+		police.PeakRate.Mpu = fattrs.Mpu
57
+		police.PeakRate.Overhead = fattrs.Overhead
58
+		if CalcRtable(&police.PeakRate, ptab, pcellLog, fattrs.Mtu, linklayer) < 0 {
59
+			return nil, errors.New("POLICE: failed to calculate peak rate table")
60
+		}
61
+	}
62
+
63
+	return &Fw{
64
+		FilterAttrs: attrs,
65
+		ClassId:     fattrs.ClassId,
66
+		InDev:       fattrs.InDev,
67
+		Mask:        fattrs.Mask,
68
+		Police:      police,
69
+		AvRate:      avrate,
70
+		Rtab:        rtab,
71
+		Ptab:        ptab,
72
+	}, nil
73
+}
74
+
75
+func (filter *Fw) Attrs() *FilterAttrs {
76
+	return &filter.FilterAttrs
77
+}
78
+
79
+func (filter *Fw) Type() string {
80
+	return "fw"
81
+}
82
+
12 83
 // FilterDel will delete a filter from the system.
13 84
 // Equivalent to: `tc filter del $filter`
14 85
 func FilterDel(filter Filter) error {
... ...
@@ -126,14 +197,14 @@ func (h *Handle) FilterAdd(filter Filter) error {
126 126
 
127 127
 // FilterList gets a list of filters in the system.
128 128
 // Equivalent to: `tc filter show`.
129
-// Generally retunrs nothing if link and parent are not specified.
129
+// Generally returns nothing if link and parent are not specified.
130 130
 func FilterList(link Link, parent uint32) ([]Filter, error) {
131 131
 	return pkgHandle.FilterList(link, parent)
132 132
 }
133 133
 
134 134
 // FilterList gets a list of filters in the system.
135 135
 // Equivalent to: `tc filter show`.
136
-// Generally retunrs nothing if link and parent are not specified.
136
+// Generally returns nothing if link and parent are not specified.
137 137
 func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) {
138 138
 	req := h.newNetlinkRequest(syscall.RTM_GETTFILTER, syscall.NLM_F_DUMP)
139 139
 	msg := &nl.TcMsg{
140 140
deleted file mode 100644
... ...
@@ -1,86 +0,0 @@
1
-package netlink
2
-
3
-import (
4
-	"sync/atomic"
5
-	"syscall"
6
-
7
-	"github.com/vishvananda/netlink/nl"
8
-	"github.com/vishvananda/netns"
9
-)
10
-
11
-// Empty handle used by the netlink package methods
12
-var pkgHandle = &Handle{}
13
-
14
-// Handle is an handle for the netlink requests
15
-// on a specific network namespace. All the requests
16
-// share the same netlink socket, which gets released
17
-// when the handle is deleted.
18
-type Handle struct {
19
-	seq          uint32
20
-	routeSocket  *nl.NetlinkSocket
21
-	xfrmSocket   *nl.NetlinkSocket
22
-	lookupByDump bool
23
-}
24
-
25
-// NewHandle returns a netlink handle on the current network namespace.
26
-func NewHandle() (*Handle, error) {
27
-	return newHandle(netns.None(), netns.None())
28
-}
29
-
30
-// NewHandle returns a netlink handle on the network namespace
31
-// specified by ns. If ns=netns.None(), current network namespace
32
-// will be assumed
33
-func NewHandleAt(ns netns.NsHandle) (*Handle, error) {
34
-	return newHandle(ns, netns.None())
35
-}
36
-
37
-// NewHandleAtFrom works as NewHandle but allows client to specify the
38
-// new and the origin netns Handle.
39
-func NewHandleAtFrom(newNs, curNs netns.NsHandle) (*Handle, error) {
40
-	return newHandle(newNs, curNs)
41
-}
42
-
43
-func newHandle(newNs, curNs netns.NsHandle) (*Handle, error) {
44
-	var (
45
-		err     error
46
-		rSocket *nl.NetlinkSocket
47
-		xSocket *nl.NetlinkSocket
48
-	)
49
-	rSocket, err = nl.GetNetlinkSocketAt(newNs, curNs, syscall.NETLINK_ROUTE)
50
-	if err != nil {
51
-		return nil, err
52
-	}
53
-	xSocket, err = nl.GetNetlinkSocketAt(newNs, curNs, syscall.NETLINK_XFRM)
54
-	if err != nil {
55
-		return nil, err
56
-	}
57
-	return &Handle{routeSocket: rSocket, xfrmSocket: xSocket}, nil
58
-}
59
-
60
-// Delete releases the resources allocated to this handle
61
-func (h *Handle) Delete() {
62
-	if h.routeSocket != nil {
63
-		h.routeSocket.Close()
64
-	}
65
-	if h.xfrmSocket != nil {
66
-		h.xfrmSocket.Close()
67
-	}
68
-	h.routeSocket, h.xfrmSocket = nil, nil
69
-}
70
-
71
-func (h *Handle) newNetlinkRequest(proto, flags int) *nl.NetlinkRequest {
72
-	// Do this so that package API still use nl package variable nextSeqNr
73
-	if h.routeSocket == nil {
74
-		return nl.NewNetlinkRequest(proto, flags)
75
-	}
76
-	return &nl.NetlinkRequest{
77
-		NlMsghdr: syscall.NlMsghdr{
78
-			Len:   uint32(syscall.SizeofNlMsghdr),
79
-			Type:  uint16(proto),
80
-			Flags: syscall.NLM_F_REQUEST | uint16(flags),
81
-			Seq:   atomic.AddUint32(&h.seq, 1),
82
-		},
83
-		RouteSocket: h.routeSocket,
84
-		XfmrSocket:  h.xfrmSocket,
85
-	}
86
-}
87 1
new file mode 100644
... ...
@@ -0,0 +1,86 @@
0
+package netlink
1
+
2
+import (
3
+	"syscall"
4
+
5
+	"github.com/vishvananda/netlink/nl"
6
+	"github.com/vishvananda/netns"
7
+)
8
+
9
+// Empty handle used by the netlink package methods
10
+var pkgHandle = &Handle{}
11
+
12
+// Handle is an handle for the netlink requests on a
13
+// specific network namespace. All the requests on the
14
+// same netlink family share the same netlink socket,
15
+// which gets released when the handle is deleted.
16
+type Handle struct {
17
+	sockets      map[int]*nl.SocketHandle
18
+	lookupByDump bool
19
+}
20
+
21
+// SupportsNetlinkFamily reports whether the passed netlink family is supported by this Handle
22
+func (h *Handle) SupportsNetlinkFamily(nlFamily int) bool {
23
+	_, ok := h.sockets[nlFamily]
24
+	return ok
25
+}
26
+
27
+// NewHandle returns a netlink handle on the current network namespace.
28
+// Caller may specify the netlink families the handle should support.
29
+// If no families are specified, all the families the netlink package
30
+// supports will be automatically added.
31
+func NewHandle(nlFamilies ...int) (*Handle, error) {
32
+	return newHandle(netns.None(), netns.None(), nlFamilies...)
33
+}
34
+
35
+// NewHandle returns a netlink handle on the network namespace
36
+// specified by ns. If ns=netns.None(), current network namespace
37
+// will be assumed
38
+func NewHandleAt(ns netns.NsHandle, nlFamilies ...int) (*Handle, error) {
39
+	return newHandle(ns, netns.None(), nlFamilies...)
40
+}
41
+
42
+// NewHandleAtFrom works as NewHandle but allows client to specify the
43
+// new and the origin netns Handle.
44
+func NewHandleAtFrom(newNs, curNs netns.NsHandle) (*Handle, error) {
45
+	return newHandle(newNs, curNs)
46
+}
47
+
48
+func newHandle(newNs, curNs netns.NsHandle, nlFamilies ...int) (*Handle, error) {
49
+	h := &Handle{sockets: map[int]*nl.SocketHandle{}}
50
+	fams := nl.SupportedNlFamilies
51
+	if len(nlFamilies) != 0 {
52
+		fams = nlFamilies
53
+	}
54
+	for _, f := range fams {
55
+		s, err := nl.GetNetlinkSocketAt(newNs, curNs, f)
56
+		if err != nil {
57
+			return nil, err
58
+		}
59
+		h.sockets[f] = &nl.SocketHandle{Socket: s}
60
+	}
61
+	return h, nil
62
+}
63
+
64
+// Delete releases the resources allocated to this handle
65
+func (h *Handle) Delete() {
66
+	for _, sh := range h.sockets {
67
+		sh.Close()
68
+	}
69
+	h.sockets = nil
70
+}
71
+
72
+func (h *Handle) newNetlinkRequest(proto, flags int) *nl.NetlinkRequest {
73
+	// Do this so that package API still use nl package variable nextSeqNr
74
+	if h.sockets == nil {
75
+		return nl.NewNetlinkRequest(proto, flags)
76
+	}
77
+	return &nl.NetlinkRequest{
78
+		NlMsghdr: syscall.NlMsghdr{
79
+			Len:   uint32(syscall.SizeofNlMsghdr),
80
+			Type:  uint16(proto),
81
+			Flags: syscall.NLM_F_REQUEST | uint16(flags),
82
+		},
83
+		Sockets: h.sockets,
84
+	}
85
+}
... ...
@@ -3,7 +3,6 @@ package netlink
3 3
 import (
4 4
 	"fmt"
5 5
 	"net"
6
-	"syscall"
7 6
 )
8 7
 
9 8
 // Link represents a link device from netlink. Shared link attributes
... ...
@@ -173,11 +172,6 @@ func (macvtap Macvtap) Type() string {
173 173
 
174 174
 type TuntapMode uint16
175 175
 
176
-const (
177
-	TUNTAP_MODE_TUN TuntapMode = syscall.IFF_TUN
178
-	TUNTAP_MODE_TAP TuntapMode = syscall.IFF_TAP
179
-)
180
-
181 176
 // Tuntap links created via /dev/tun/tap, but can be destroyed via netlink
182 177
 type Tuntap struct {
183 178
 	LinkAttrs
... ...
@@ -14,6 +14,11 @@ import (
14 14
 
15 15
 const SizeofLinkStats = 0x5c
16 16
 
17
+const (
18
+	TUNTAP_MODE_TUN TuntapMode = syscall.IFF_TUN
19
+	TUNTAP_MODE_TAP TuntapMode = syscall.IFF_TAP
20
+)
21
+
17 22
 var native = nl.NativeEndian()
18 23
 var lookupByDump = false
19 24
 
... ...
@@ -675,6 +680,11 @@ func (h *Handle) LinkAdd(link Link) error {
675 675
 			data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
676 676
 			nl.NewRtAttrChild(data, nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[macv.Mode]))
677 677
 		}
678
+	} else if macv, ok := link.(*Macvtap); ok {
679
+		if macv.Mode != MACVLAN_MODE_DEFAULT {
680
+			data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
681
+			nl.NewRtAttrChild(data, nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[macv.Mode]))
682
+		}
678 683
 	} else if gretap, ok := link.(*Gretap); ok {
679 684
 		addGretapAttrs(gretap, linkInfo)
680 685
 	}
... ...
@@ -8,18 +8,7 @@
8 8
 // interface that is loosly modeled on the iproute2 cli.
9 9
 package netlink
10 10
 
11
-import (
12
-	"net"
13
-
14
-	"github.com/vishvananda/netlink/nl"
15
-)
16
-
17
-// Family type definitions
18
-const (
19
-	FAMILY_ALL = nl.FAMILY_ALL
20
-	FAMILY_V4  = nl.FAMILY_V4
21
-	FAMILY_V6  = nl.FAMILY_V6
22
-)
11
+import "net"
23 12
 
24 13
 // ParseIPNet parses a string in ip/net format and returns a net.IPNet.
25 14
 // This is valuable because addresses in netlink are often IPNets and
26 15
new file mode 100644
... ...
@@ -0,0 +1,10 @@
0
+package netlink
1
+
2
+import "github.com/vishvananda/netlink/nl"
3
+
4
+// Family type definitions
5
+const (
6
+	FAMILY_ALL = nl.FAMILY_ALL
7
+	FAMILY_V4  = nl.FAMILY_V4
8
+	FAMILY_V6  = nl.FAMILY_V6
9
+)
... ...
@@ -138,6 +138,6 @@ func NeighList(linkIndex, family int) ([]Neigh, error) {
138 138
 	return nil, ErrNotImplemented
139 139
 }
140 140
 
141
-func NeighDeserialize(m []byte) (*Ndmsg, *Neigh, error) {
142
-	return nil, nil, ErrNotImplemented
141
+func NeighDeserialize(m []byte) (*Neigh, error) {
142
+	return nil, ErrNotImplemented
143 143
 }
... ...
@@ -22,6 +22,9 @@ const (
22 22
 	FAMILY_V6  = syscall.AF_INET6
23 23
 )
24 24
 
25
+// SupportedNlFamilies contains the list of netlink families this netlink package supports
26
+var SupportedNlFamilies = []int{syscall.NETLINK_ROUTE, syscall.NETLINK_XFRM}
27
+
25 28
 var nextSeqNr uint32
26 29
 
27 30
 // GetIPFamily returns the family type of a net.IP.
... ...
@@ -175,9 +178,8 @@ func (a *RtAttr) Serialize() []byte {
175 175
 
176 176
 type NetlinkRequest struct {
177 177
 	syscall.NlMsghdr
178
-	Data        []NetlinkRequestData
179
-	RouteSocket *NetlinkSocket
180
-	XfmrSocket  *NetlinkSocket
178
+	Data    []NetlinkRequestData
179
+	Sockets map[int]*SocketHandle
181 180
 }
182 181
 
183 182
 // Serialize the Netlink Request into a byte array
... ...
@@ -209,7 +211,7 @@ func (req *NetlinkRequest) AddData(data NetlinkRequestData) {
209 209
 }
210 210
 
211 211
 // Execute the request against a the given sockType.
212
-// Returns a list of netlink messages in seriaized format, optionally filtered
212
+// Returns a list of netlink messages in serialized format, optionally filtered
213 213
 // by resType.
214 214
 func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, error) {
215 215
 	var (
... ...
@@ -217,15 +219,12 @@ func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, erro
217 217
 		err error
218 218
 	)
219 219
 
220
-	switch sockType {
221
-	case syscall.NETLINK_XFRM:
222
-		s = req.XfmrSocket
223
-	case syscall.NETLINK_ROUTE:
224
-		s = req.RouteSocket
225
-	default:
226
-		return nil, fmt.Errorf("Socket type %d is not handled", sockType)
220
+	if req.Sockets != nil {
221
+		if sh, ok := req.Sockets[sockType]; ok {
222
+			s = sh.Socket
223
+			req.Seq = atomic.AddUint32(&sh.Seq, 1)
224
+		}
227 225
 	}
228
-
229 226
 	sharedSocket := s != nil
230 227
 
231 228
 	if s == nil {
... ...
@@ -486,3 +485,17 @@ func netlinkRouteAttrAndValue(b []byte) (*syscall.RtAttr, []byte, int, error) {
486 486
 	}
487 487
 	return a, b[syscall.SizeofRtAttr:], rtaAlignOf(int(a.Len)), nil
488 488
 }
489
+
490
+// SocketHandle contains the netlink socket and the associated
491
+// sequence counter for a specific netlink family
492
+type SocketHandle struct {
493
+	Seq    uint32
494
+	Socket *NetlinkSocket
495
+}
496
+
497
+// Close closes the netlink socket
498
+func (sh *SocketHandle) Close() {
499
+	if sh.Socket != nil {
500
+		sh.Socket.Close()
501
+	}
502
+}
... ...
@@ -40,3 +40,15 @@ func DeserializeRtMsg(b []byte) *RtMsg {
40 40
 func (msg *RtMsg) Serialize() []byte {
41 41
 	return (*(*[syscall.SizeofRtMsg]byte)(unsafe.Pointer(msg)))[:]
42 42
 }
43
+
44
+type RtNexthop struct {
45
+	syscall.RtNexthop
46
+}
47
+
48
+func DeserializeRtNexthop(b []byte) *RtNexthop {
49
+	return (*RtNexthop)(unsafe.Pointer(&b[0:syscall.SizeofRtNexthop][0]))
50
+}
51
+
52
+func (msg *RtNexthop) Serialize() []byte {
53
+	return (*(*[syscall.SizeofRtNexthop]byte)(unsafe.Pointer(msg)))[:]
54
+}
... ...
@@ -176,70 +176,6 @@ type Netem struct {
176 176
 	CorruptCorr   uint32
177 177
 }
178 178
 
179
-func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem {
180
-	var limit uint32 = 1000
181
-	var lossCorr, delayCorr, duplicateCorr uint32
182
-	var reorderProb, reorderCorr uint32
183
-	var corruptProb, corruptCorr uint32
184
-
185
-	latency := nattrs.Latency
186
-	loss := Percentage2u32(nattrs.Loss)
187
-	gap := nattrs.Gap
188
-	duplicate := Percentage2u32(nattrs.Duplicate)
189
-	jitter := nattrs.Jitter
190
-
191
-	// Correlation
192
-	if latency > 0 && jitter > 0 {
193
-		delayCorr = Percentage2u32(nattrs.DelayCorr)
194
-	}
195
-	if loss > 0 {
196
-		lossCorr = Percentage2u32(nattrs.LossCorr)
197
-	}
198
-	if duplicate > 0 {
199
-		duplicateCorr = Percentage2u32(nattrs.DuplicateCorr)
200
-	}
201
-	// FIXME should validate values(like loss/duplicate are percentages...)
202
-	latency = time2Tick(latency)
203
-
204
-	if nattrs.Limit != 0 {
205
-		limit = nattrs.Limit
206
-	}
207
-	// Jitter is only value if latency is > 0
208
-	if latency > 0 {
209
-		jitter = time2Tick(jitter)
210
-	}
211
-
212
-	reorderProb = Percentage2u32(nattrs.ReorderProb)
213
-	reorderCorr = Percentage2u32(nattrs.ReorderCorr)
214
-
215
-	if reorderProb > 0 {
216
-		// ERROR if lantency == 0
217
-		if gap == 0 {
218
-			gap = 1
219
-		}
220
-	}
221
-
222
-	corruptProb = Percentage2u32(nattrs.CorruptProb)
223
-	corruptCorr = Percentage2u32(nattrs.CorruptCorr)
224
-
225
-	return &Netem{
226
-		QdiscAttrs:    attrs,
227
-		Latency:       latency,
228
-		DelayCorr:     delayCorr,
229
-		Limit:         limit,
230
-		Loss:          loss,
231
-		LossCorr:      lossCorr,
232
-		Gap:           gap,
233
-		Duplicate:     duplicate,
234
-		DuplicateCorr: duplicateCorr,
235
-		Jitter:        jitter,
236
-		ReorderProb:   reorderProb,
237
-		ReorderCorr:   reorderCorr,
238
-		CorruptProb:   corruptProb,
239
-		CorruptCorr:   corruptCorr,
240
-	}
241
-}
242
-
243 179
 func (qdisc *Netem) Attrs() *QdiscAttrs {
244 180
 	return &qdisc.QdiscAttrs
245 181
 }
... ...
@@ -10,6 +10,71 @@ import (
10 10
 	"github.com/vishvananda/netlink/nl"
11 11
 )
12 12
 
13
+// NOTE function is here because it uses other linux functions
14
+func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem {
15
+	var limit uint32 = 1000
16
+	var lossCorr, delayCorr, duplicateCorr uint32
17
+	var reorderProb, reorderCorr uint32
18
+	var corruptProb, corruptCorr uint32
19
+
20
+	latency := nattrs.Latency
21
+	loss := Percentage2u32(nattrs.Loss)
22
+	gap := nattrs.Gap
23
+	duplicate := Percentage2u32(nattrs.Duplicate)
24
+	jitter := nattrs.Jitter
25
+
26
+	// Correlation
27
+	if latency > 0 && jitter > 0 {
28
+		delayCorr = Percentage2u32(nattrs.DelayCorr)
29
+	}
30
+	if loss > 0 {
31
+		lossCorr = Percentage2u32(nattrs.LossCorr)
32
+	}
33
+	if duplicate > 0 {
34
+		duplicateCorr = Percentage2u32(nattrs.DuplicateCorr)
35
+	}
36
+	// FIXME should validate values(like loss/duplicate are percentages...)
37
+	latency = time2Tick(latency)
38
+
39
+	if nattrs.Limit != 0 {
40
+		limit = nattrs.Limit
41
+	}
42
+	// Jitter is only value if latency is > 0
43
+	if latency > 0 {
44
+		jitter = time2Tick(jitter)
45
+	}
46
+
47
+	reorderProb = Percentage2u32(nattrs.ReorderProb)
48
+	reorderCorr = Percentage2u32(nattrs.ReorderCorr)
49
+
50
+	if reorderProb > 0 {
51
+		// ERROR if lantency == 0
52
+		if gap == 0 {
53
+			gap = 1
54
+		}
55
+	}
56
+
57
+	corruptProb = Percentage2u32(nattrs.CorruptProb)
58
+	corruptCorr = Percentage2u32(nattrs.CorruptCorr)
59
+
60
+	return &Netem{
61
+		QdiscAttrs:    attrs,
62
+		Latency:       latency,
63
+		DelayCorr:     delayCorr,
64
+		Limit:         limit,
65
+		Loss:          loss,
66
+		LossCorr:      lossCorr,
67
+		Gap:           gap,
68
+		Duplicate:     duplicate,
69
+		DuplicateCorr: duplicateCorr,
70
+		Jitter:        jitter,
71
+		ReorderProb:   reorderProb,
72
+		ReorderCorr:   reorderCorr,
73
+		CorruptProb:   corruptProb,
74
+		CorruptCorr:   corruptCorr,
75
+	}
76
+}
77
+
13 78
 // QdiscDel will delete a qdisc from the system.
14 79
 // Equivalent to: `tc qdisc del $qdisc`
15 80
 func QdiscDel(qdisc Qdisc) error {
... ...
@@ -3,27 +3,13 @@ package netlink
3 3
 import (
4 4
 	"fmt"
5 5
 	"net"
6
-	"syscall"
7 6
 )
8 7
 
9 8
 // Scope is an enum representing a route scope.
10 9
 type Scope uint8
11 10
 
12
-const (
13
-	SCOPE_UNIVERSE Scope = syscall.RT_SCOPE_UNIVERSE
14
-	SCOPE_SITE     Scope = syscall.RT_SCOPE_SITE
15
-	SCOPE_LINK     Scope = syscall.RT_SCOPE_LINK
16
-	SCOPE_HOST     Scope = syscall.RT_SCOPE_HOST
17
-	SCOPE_NOWHERE  Scope = syscall.RT_SCOPE_NOWHERE
18
-)
19
-
20 11
 type NextHopFlag int
21 12
 
22
-const (
23
-	FLAG_ONLINK    NextHopFlag = syscall.RTNH_F_ONLINK
24
-	FLAG_PERVASIVE NextHopFlag = syscall.RTNH_F_PERVASIVE
25
-)
26
-
27 13
 // Route represents a netlink route.
28 14
 type Route struct {
29 15
 	LinkIndex  int
... ...
@@ -32,6 +18,7 @@ type Route struct {
32 32
 	Dst        *net.IPNet
33 33
 	Src        net.IP
34 34
 	Gw         net.IP
35
+	MultiPath  []*NexthopInfo
35 36
 	Protocol   int
36 37
 	Priority   int
37 38
 	Table      int
... ...
@@ -41,6 +28,10 @@ type Route struct {
41 41
 }
42 42
 
43 43
 func (r Route) String() string {
44
+	if len(r.MultiPath) > 0 {
45
+		return fmt.Sprintf("{Dst: %s Src: %s Gw: %s Flags: %s Table: %d}", r.Dst,
46
+			r.Src, r.MultiPath, r.ListFlags(), r.Table)
47
+	}
44 48
 	return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s Flags: %s Table: %d}", r.LinkIndex, r.Dst,
45 49
 		r.Src, r.Gw, r.ListFlags(), r.Table)
46 50
 }
... ...
@@ -58,23 +49,18 @@ type flagString struct {
58 58
 	s string
59 59
 }
60 60
 
61
-var testFlags = []flagString{
62
-	{f: FLAG_ONLINK, s: "onlink"},
63
-	{f: FLAG_PERVASIVE, s: "pervasive"},
64
-}
65
-
66
-func (r *Route) ListFlags() []string {
67
-	var flags []string
68
-	for _, tf := range testFlags {
69
-		if r.Flags&int(tf.f) != 0 {
70
-			flags = append(flags, tf.s)
71
-		}
72
-	}
73
-	return flags
74
-}
75
-
76 61
 // RouteUpdate is sent when a route changes - type is RTM_NEWROUTE or RTM_DELROUTE
77 62
 type RouteUpdate struct {
78 63
 	Type uint16
79 64
 	Route
80 65
 }
66
+
67
+type NexthopInfo struct {
68
+	LinkIndex int
69
+	Hops      int
70
+	Gw        net.IP
71
+}
72
+
73
+func (n *NexthopInfo) String() string {
74
+	return fmt.Sprintf("{Ifindex: %d Weight: %d, Gw: %s}", n.LinkIndex, n.Hops+1, n.Gw)
75
+}
... ...
@@ -11,6 +11,14 @@ import (
11 11
 // RtAttr is shared so it is in netlink_linux.go
12 12
 
13 13
 const (
14
+	SCOPE_UNIVERSE Scope = syscall.RT_SCOPE_UNIVERSE
15
+	SCOPE_SITE     Scope = syscall.RT_SCOPE_SITE
16
+	SCOPE_LINK     Scope = syscall.RT_SCOPE_LINK
17
+	SCOPE_HOST     Scope = syscall.RT_SCOPE_HOST
18
+	SCOPE_NOWHERE  Scope = syscall.RT_SCOPE_NOWHERE
19
+)
20
+
21
+const (
14 22
 	RT_FILTER_PROTOCOL uint64 = 1 << (1 + iota)
15 23
 	RT_FILTER_SCOPE
16 24
 	RT_FILTER_TYPE
... ...
@@ -23,6 +31,26 @@ const (
23 23
 	RT_FILTER_TABLE
24 24
 )
25 25
 
26
+const (
27
+	FLAG_ONLINK    NextHopFlag = syscall.RTNH_F_ONLINK
28
+	FLAG_PERVASIVE NextHopFlag = syscall.RTNH_F_PERVASIVE
29
+)
30
+
31
+var testFlags = []flagString{
32
+	{f: FLAG_ONLINK, s: "onlink"},
33
+	{f: FLAG_PERVASIVE, s: "pervasive"},
34
+}
35
+
36
+func (r *Route) ListFlags() []string {
37
+	var flags []string
38
+	for _, tf := range testFlags {
39
+		if r.Flags&int(tf.f) != 0 {
40
+			flags = append(flags, tf.s)
41
+		}
42
+	}
43
+	return flags
44
+}
45
+
26 46
 // RouteAdd will add a route to the system.
27 47
 // Equivalent to: `ip route add $route`
28 48
 func RouteAdd(route *Route) error {
... ...
@@ -102,6 +130,37 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
102 102
 		rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_GATEWAY, gwData))
103 103
 	}
104 104
 
105
+	if len(route.MultiPath) > 0 {
106
+		buf := []byte{}
107
+		for _, nh := range route.MultiPath {
108
+			rtnh := &nl.RtNexthop{
109
+				RtNexthop: syscall.RtNexthop{
110
+					Hops:    uint8(nh.Hops),
111
+					Ifindex: int32(nh.LinkIndex),
112
+					Len:     uint16(syscall.SizeofRtNexthop),
113
+				},
114
+			}
115
+			var gwData []byte
116
+			if nh.Gw != nil {
117
+				gwFamily := nl.GetIPFamily(nh.Gw)
118
+				if family != -1 && family != gwFamily {
119
+					return fmt.Errorf("gateway, source, and destination ip are not the same IP family")
120
+				}
121
+				var gw *nl.RtAttr
122
+				if gwFamily == FAMILY_V4 {
123
+					gw = nl.NewRtAttr(syscall.RTA_GATEWAY, []byte(nh.Gw.To4()))
124
+				} else {
125
+					gw = nl.NewRtAttr(syscall.RTA_GATEWAY, []byte(nh.Gw.To16()))
126
+				}
127
+				gwData := gw.Serialize()
128
+				rtnh.Len += uint16(len(gwData))
129
+			}
130
+			buf = append(buf, rtnh.Serialize()...)
131
+			buf = append(buf, gwData...)
132
+		}
133
+		rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_MULTIPATH, buf))
134
+	}
135
+
105 136
 	if route.Table > 0 {
106 137
 		if route.Table >= 256 {
107 138
 			msg.Table = syscall.RT_TABLE_UNSPEC
... ...
@@ -275,6 +334,40 @@ func deserializeRoute(m []byte) (Route, error) {
275 275
 			route.Priority = int(native.Uint32(attr.Value[0:4]))
276 276
 		case syscall.RTA_TABLE:
277 277
 			route.Table = int(native.Uint32(attr.Value[0:4]))
278
+		case syscall.RTA_MULTIPATH:
279
+			parseRtNexthop := func(value []byte) (*NexthopInfo, []byte, error) {
280
+				if len(value) < syscall.SizeofRtNexthop {
281
+					return nil, nil, fmt.Errorf("Lack of bytes")
282
+				}
283
+				nh := nl.DeserializeRtNexthop(value)
284
+				if len(value) < int(nh.RtNexthop.Len) {
285
+					return nil, nil, fmt.Errorf("Lack of bytes")
286
+				}
287
+				info := &NexthopInfo{
288
+					LinkIndex: int(nh.RtNexthop.Ifindex),
289
+					Hops:      int(nh.RtNexthop.Hops),
290
+				}
291
+				attrs, err := nl.ParseRouteAttr(value[syscall.SizeofRtNexthop:int(nh.RtNexthop.Len)])
292
+				if err != nil {
293
+					return nil, nil, err
294
+				}
295
+				for _, attr := range attrs {
296
+					switch attr.Attr.Type {
297
+					case syscall.RTA_GATEWAY:
298
+						info.Gw = net.IP(attr.Value)
299
+					}
300
+				}
301
+				return info, value[int(nh.RtNexthop.Len):], nil
302
+			}
303
+			rest := attr.Value
304
+			for len(rest) > 0 {
305
+				info, buf, err := parseRtNexthop(rest)
306
+				if err != nil {
307
+					return route, err
308
+				}
309
+				route.MultiPath = append(route.MultiPath, info)
310
+				rest = buf
311
+			}
278 312
 		}
279 313
 	}
280 314
 	return route, nil
281 315
new file mode 100644
... ...
@@ -0,0 +1,7 @@
0
+// +build !linux
1
+
2
+package netlink
3
+
4
+func (r *Route) ListFlags() []string {
5
+	return []string{}
6
+}
... ...
@@ -3,13 +3,10 @@ package netlink
3 3
 import (
4 4
 	"fmt"
5 5
 	"net"
6
-
7
-	"github.com/vishvananda/netlink/nl"
8 6
 )
9 7
 
10 8
 // Rule represents a netlink rule.
11 9
 type Rule struct {
12
-	*nl.RtMsg
13 10
 	Priority          int
14 11
 	Table             int
15 12
 	Mark              int
... ...
@@ -165,7 +165,6 @@ func (h *Handle) RuleList(family int) ([]Rule, error) {
165 165
 		}
166 166
 
167 167
 		rule := NewRule()
168
-		rule.RtMsg = msg
169 168
 
170 169
 		for j := range attrs {
171 170
 			switch attrs[j].Attr.Type {
... ...
@@ -13,7 +13,7 @@ const (
13 13
 	XFRM_PROTO_ESP       Proto = syscall.IPPROTO_ESP
14 14
 	XFRM_PROTO_AH        Proto = syscall.IPPROTO_AH
15 15
 	XFRM_PROTO_HAO       Proto = syscall.IPPROTO_DSTOPTS
16
-	XFRM_PROTO_COMP      Proto = syscall.IPPROTO_COMP
16
+	XFRM_PROTO_COMP      Proto = 0x6c // NOTE not defined on darwin
17 17
 	XFRM_PROTO_IPSEC_ANY Proto = syscall.IPPROTO_RAW
18 18
 )
19 19
 
... ...
@@ -3,8 +3,6 @@ package netlink
3 3
 import (
4 4
 	"fmt"
5 5
 	"net"
6
-
7
-	"github.com/vishvananda/netlink/nl"
8 6
 )
9 7
 
10 8
 // XfrmStateAlgo represents the algorithm to use for the ipsec encryption.
... ...
@@ -93,7 +91,7 @@ func (sa XfrmState) Print(stats bool) string {
93 93
 }
94 94
 
95 95
 func printLimit(lmt uint64) string {
96
-	if lmt == nl.XFRM_INF {
96
+	if lmt == ^uint64(0) {
97 97
 		return "(INF)"
98 98
 	}
99 99
 	return fmt.Sprintf("%d", lmt)