Browse code

vendor: vishvananda/netlink v1.1.0

- vishvananda/netlink: https://github.com/vishvananda/netlink/compare/v1.0.0...v1.1.0
- vishvananda/netns: https://github.com/vishvananda/netns/compare/7109fa855b0ff1ebef7fbd2f6aa613e8db7cfbc0...0a2b9b5464df8343199164a0321edf3313202f7e
- libnetwork: https://github.com/moby/libnetwork/compare/bf2bd42abc0a3734f12b5ec724e571434e42c669...beab24292cb13d13670985fc860215f9e296bd47
- github.com/moby/ipvs: new dependency (was previously part of libnetwork)

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>

Sebastiaan van Stijn authored on 2020/03/13 06:51:18
Showing 64 changed files
... ...
@@ -3,7 +3,7 @@
3 3
 # LIBNETWORK_COMMIT is used to build the docker-userland-proxy binary. When
4 4
 # updating the binary version, consider updating github.com/docker/libnetwork
5 5
 # in vendor.conf accordingly
6
-: "${LIBNETWORK_COMMIT:=bf2bd42abc0a3734f12b5ec724e571434e42c669}"
6
+: "${LIBNETWORK_COMMIT:=beab24292cb13d13670985fc860215f9e296bd47}"
7 7
 
8 8
 install_proxy() {
9 9
 	case "$1" in
... ...
@@ -38,7 +38,7 @@ github.com/gofrs/flock                              392e7fae8f1b0bdbd67dad7237d2
38 38
 # libnetwork
39 39
 
40 40
 # When updating, also update LIBNETWORK_COMMIT in hack/dockerfile/install/proxy.installer accordingly
41
-github.com/docker/libnetwork                        bf2bd42abc0a3734f12b5ec724e571434e42c669
41
+github.com/docker/libnetwork                        beab24292cb13d13670985fc860215f9e296bd47
42 42
 github.com/docker/go-events                         9461782956ad83b30282bf90e31fa6a70c255ba9
43 43
 github.com/armon/go-radix                           e39d623f12e8e41c7b5529e9a9dd67a1e2261f80
44 44
 github.com/armon/go-metrics                         eb0af217e5e9747e41dd5303755356b62d28e3ec
... ...
@@ -50,8 +50,9 @@ github.com/hashicorp/go-sockaddr                    c7188e74f6acae5a989bdc959aa7
50 50
 github.com/hashicorp/go-multierror                  886a7fbe3eb1c874d46f623bfa70af45f425b3d1 # v1.0.0
51 51
 github.com/hashicorp/serf                           598c54895cc5a7b1a24a398d635e8c0ea0959870
52 52
 github.com/docker/libkv                             458977154600b9f23984d9f4b82e79570b5ae12b
53
-github.com/vishvananda/netns                        7109fa855b0ff1ebef7fbd2f6aa613e8db7cfbc0
54
-github.com/vishvananda/netlink                      a2ad57a690f3caf3015351d2d6e1c0b95c349752
53
+github.com/vishvananda/netns                        0a2b9b5464df8343199164a0321edf3313202f7e
54
+github.com/vishvananda/netlink                      f049be6f391489d3f374498fe0c8df8449258372 # v1.1.0
55
+github.com/moby/ipvs                                8f137da6850a975020f4f739c589d293dd3a9d7b # v1.0.0
55 56
 
56 57
 # When updating, consider updating TOMLV_COMMIT in hack/dockerfile/install/tomlv.installer accordingly
57 58
 github.com/BurntSushi/toml                          3012a1dbe2e4bd1391d42b32f0577cb7bbc7f005 # v0.3.1
... ...
@@ -793,7 +793,7 @@ func (n *network) watchMiss(nlSock *nl.NetlinkSocket, nsPath string) {
793 793
 		return
794 794
 	}
795 795
 	for {
796
-		msgs, err := nlSock.Receive()
796
+		msgs, _, err := nlSock.Receive()
797 797
 		if err != nil {
798 798
 			n.Lock()
799 799
 			nlFd := nlSock.GetFd()
800 800
deleted file mode 100644
... ...
@@ -1,178 +0,0 @@
1
-// +build linux
2
-
3
-package ipvs
4
-
5
-const (
6
-	genlCtrlID = 0x10
7
-)
8
-
9
-// GENL control commands
10
-const (
11
-	genlCtrlCmdUnspec uint8 = iota
12
-	genlCtrlCmdNewFamily
13
-	genlCtrlCmdDelFamily
14
-	genlCtrlCmdGetFamily
15
-)
16
-
17
-// GENL family attributes
18
-const (
19
-	genlCtrlAttrUnspec int = iota
20
-	genlCtrlAttrFamilyID
21
-	genlCtrlAttrFamilyName
22
-)
23
-
24
-// IPVS genl commands
25
-const (
26
-	ipvsCmdUnspec uint8 = iota
27
-	ipvsCmdNewService
28
-	ipvsCmdSetService
29
-	ipvsCmdDelService
30
-	ipvsCmdGetService
31
-	ipvsCmdNewDest
32
-	ipvsCmdSetDest
33
-	ipvsCmdDelDest
34
-	ipvsCmdGetDest
35
-	ipvsCmdNewDaemon
36
-	ipvsCmdDelDaemon
37
-	ipvsCmdGetDaemon
38
-	ipvsCmdSetConfig
39
-	ipvsCmdGetConfig
40
-	ipvsCmdSetInfo
41
-	ipvsCmdGetInfo
42
-	ipvsCmdZero
43
-	ipvsCmdFlush
44
-)
45
-
46
-// Attributes used in the first level of commands
47
-const (
48
-	ipvsCmdAttrUnspec int = iota
49
-	ipvsCmdAttrService
50
-	ipvsCmdAttrDest
51
-	ipvsCmdAttrDaemon
52
-	ipvsCmdAttrTimeoutTCP
53
-	ipvsCmdAttrTimeoutTCPFin
54
-	ipvsCmdAttrTimeoutUDP
55
-)
56
-
57
-// Attributes used to describe a service. Used inside nested attribute
58
-// ipvsCmdAttrService
59
-const (
60
-	ipvsSvcAttrUnspec int = iota
61
-	ipvsSvcAttrAddressFamily
62
-	ipvsSvcAttrProtocol
63
-	ipvsSvcAttrAddress
64
-	ipvsSvcAttrPort
65
-	ipvsSvcAttrFWMark
66
-	ipvsSvcAttrSchedName
67
-	ipvsSvcAttrFlags
68
-	ipvsSvcAttrTimeout
69
-	ipvsSvcAttrNetmask
70
-	ipvsSvcAttrStats
71
-	ipvsSvcAttrPEName
72
-)
73
-
74
-// Attributes used to describe a destination (real server). Used
75
-// inside nested attribute ipvsCmdAttrDest.
76
-const (
77
-	ipvsDestAttrUnspec int = iota
78
-	ipvsDestAttrAddress
79
-	ipvsDestAttrPort
80
-	ipvsDestAttrForwardingMethod
81
-	ipvsDestAttrWeight
82
-	ipvsDestAttrUpperThreshold
83
-	ipvsDestAttrLowerThreshold
84
-	ipvsDestAttrActiveConnections
85
-	ipvsDestAttrInactiveConnections
86
-	ipvsDestAttrPersistentConnections
87
-	ipvsDestAttrStats
88
-	ipvsDestAttrAddressFamily
89
-)
90
-
91
-// IPVS Svc Statistics constancs
92
-
93
-const (
94
-	ipvsSvcStatsUnspec int = iota
95
-	ipvsSvcStatsConns
96
-	ipvsSvcStatsPktsIn
97
-	ipvsSvcStatsPktsOut
98
-	ipvsSvcStatsBytesIn
99
-	ipvsSvcStatsBytesOut
100
-	ipvsSvcStatsCPS
101
-	ipvsSvcStatsPPSIn
102
-	ipvsSvcStatsPPSOut
103
-	ipvsSvcStatsBPSIn
104
-	ipvsSvcStatsBPSOut
105
-)
106
-
107
-// Destination forwarding methods
108
-const (
109
-	// ConnectionFlagFwdmask indicates the mask in the connection
110
-	// flags which is used by forwarding method bits.
111
-	ConnectionFlagFwdMask = 0x0007
112
-
113
-	// ConnectionFlagMasq is used for masquerade forwarding method.
114
-	ConnectionFlagMasq = 0x0000
115
-
116
-	// ConnectionFlagLocalNode is used for local node forwarding
117
-	// method.
118
-	ConnectionFlagLocalNode = 0x0001
119
-
120
-	// ConnectionFlagTunnel is used for tunnel mode forwarding
121
-	// method.
122
-	ConnectionFlagTunnel = 0x0002
123
-
124
-	// ConnectionFlagDirectRoute is used for direct routing
125
-	// forwarding method.
126
-	ConnectionFlagDirectRoute = 0x0003
127
-)
128
-
129
-const (
130
-	// RoundRobin distributes jobs equally amongst the available
131
-	// real servers.
132
-	RoundRobin = "rr"
133
-
134
-	// LeastConnection assigns more jobs to real servers with
135
-	// fewer active jobs.
136
-	LeastConnection = "lc"
137
-
138
-	// DestinationHashing assigns jobs to servers through looking
139
-	// up a statically assigned hash table by their destination IP
140
-	// addresses.
141
-	DestinationHashing = "dh"
142
-
143
-	// SourceHashing assigns jobs to servers through looking up
144
-	// a statically assigned hash table by their source IP
145
-	// addresses.
146
-	SourceHashing = "sh"
147
-
148
-	// WeightedRoundRobin assigns jobs to real servers proportionally
149
-	// to there real servers' weight. Servers with higher weights
150
-	// receive new jobs first and get more jobs than servers
151
-	// with lower weights. Servers with equal weights get
152
-	// an equal distribution of new jobs
153
-	WeightedRoundRobin = "wrr"
154
-
155
-	// WeightedLeastConnection assigns more jobs to servers
156
-	// with fewer jobs and relative to the real servers' weight
157
-	WeightedLeastConnection = "wlc"
158
-)
159
-
160
-const (
161
-	// ConnFwdMask is a mask for the fwd methods
162
-	ConnFwdMask = 0x0007
163
-
164
-	// ConnFwdMasq denotes forwarding via masquerading/NAT
165
-	ConnFwdMasq = 0x0000
166
-
167
-	// ConnFwdLocalNode denotes forwarding to a local node
168
-	ConnFwdLocalNode = 0x0001
169
-
170
-	// ConnFwdTunnel denotes forwarding via a tunnel
171
-	ConnFwdTunnel = 0x0002
172
-
173
-	// ConnFwdDirectRoute denotes forwarding via direct routing
174
-	ConnFwdDirectRoute = 0x0003
175
-
176
-	// ConnFwdBypass denotes forwarding while bypassing the cache
177
-	ConnFwdBypass = 0x0004
178
-)
179 1
deleted file mode 100644
... ...
@@ -1,206 +0,0 @@
1
-// +build linux
2
-
3
-package ipvs
4
-
5
-import (
6
-	"fmt"
7
-	"net"
8
-	"time"
9
-
10
-	"github.com/vishvananda/netlink/nl"
11
-	"github.com/vishvananda/netns"
12
-	"golang.org/x/sys/unix"
13
-)
14
-
15
-const (
16
-	netlinkRecvSocketsTimeout = 3 * time.Second
17
-	netlinkSendSocketTimeout  = 30 * time.Second
18
-)
19
-
20
-// Service defines an IPVS service in its entirety.
21
-type Service struct {
22
-	// Virtual service address.
23
-	Address  net.IP
24
-	Protocol uint16
25
-	Port     uint16
26
-	FWMark   uint32 // Firewall mark of the service.
27
-
28
-	// Virtual service options.
29
-	SchedName     string
30
-	Flags         uint32
31
-	Timeout       uint32
32
-	Netmask       uint32
33
-	AddressFamily uint16
34
-	PEName        string
35
-	Stats         SvcStats
36
-}
37
-
38
-// SvcStats defines an IPVS service statistics
39
-type SvcStats struct {
40
-	Connections uint32
41
-	PacketsIn   uint32
42
-	PacketsOut  uint32
43
-	BytesIn     uint64
44
-	BytesOut    uint64
45
-	CPS         uint32
46
-	BPSOut      uint32
47
-	PPSIn       uint32
48
-	PPSOut      uint32
49
-	BPSIn       uint32
50
-}
51
-
52
-// Destination defines an IPVS destination (real server) in its
53
-// entirety.
54
-type Destination struct {
55
-	Address             net.IP
56
-	Port                uint16
57
-	Weight              int
58
-	ConnectionFlags     uint32
59
-	AddressFamily       uint16
60
-	UpperThreshold      uint32
61
-	LowerThreshold      uint32
62
-	ActiveConnections   int
63
-	InactiveConnections int
64
-	Stats               DstStats
65
-}
66
-
67
-// DstStats defines IPVS destination (real server) statistics
68
-type DstStats SvcStats
69
-
70
-// Config defines IPVS timeout configuration
71
-type Config struct {
72
-	TimeoutTCP    time.Duration
73
-	TimeoutTCPFin time.Duration
74
-	TimeoutUDP    time.Duration
75
-}
76
-
77
-// Handle provides a namespace specific ipvs handle to program ipvs
78
-// rules.
79
-type Handle struct {
80
-	seq  uint32
81
-	sock *nl.NetlinkSocket
82
-}
83
-
84
-// New provides a new ipvs handle in the namespace pointed to by the
85
-// passed path. It will return a valid handle or an error in case an
86
-// error occurred while creating the handle.
87
-func New(path string) (*Handle, error) {
88
-	setup()
89
-
90
-	n := netns.None()
91
-	if path != "" {
92
-		var err error
93
-		n, err = netns.GetFromPath(path)
94
-		if err != nil {
95
-			return nil, err
96
-		}
97
-	}
98
-	defer n.Close()
99
-
100
-	sock, err := nl.GetNetlinkSocketAt(n, netns.None(), unix.NETLINK_GENERIC)
101
-	if err != nil {
102
-		return nil, err
103
-	}
104
-	// Add operation timeout to avoid deadlocks
105
-	tv := unix.NsecToTimeval(netlinkSendSocketTimeout.Nanoseconds())
106
-	if err := sock.SetSendTimeout(&tv); err != nil {
107
-		return nil, err
108
-	}
109
-	tv = unix.NsecToTimeval(netlinkRecvSocketsTimeout.Nanoseconds())
110
-	if err := sock.SetReceiveTimeout(&tv); err != nil {
111
-		return nil, err
112
-	}
113
-
114
-	return &Handle{sock: sock}, nil
115
-}
116
-
117
-// Close closes the ipvs handle. The handle is invalid after Close
118
-// returns.
119
-func (i *Handle) Close() {
120
-	if i.sock != nil {
121
-		i.sock.Close()
122
-	}
123
-}
124
-
125
-// NewService creates a new ipvs service in the passed handle.
126
-func (i *Handle) NewService(s *Service) error {
127
-	return i.doCmd(s, nil, ipvsCmdNewService)
128
-}
129
-
130
-// IsServicePresent queries for the ipvs service in the passed handle.
131
-func (i *Handle) IsServicePresent(s *Service) bool {
132
-	return nil == i.doCmd(s, nil, ipvsCmdGetService)
133
-}
134
-
135
-// UpdateService updates an already existing service in the passed
136
-// handle.
137
-func (i *Handle) UpdateService(s *Service) error {
138
-	return i.doCmd(s, nil, ipvsCmdSetService)
139
-}
140
-
141
-// DelService deletes an already existing service in the passed
142
-// handle.
143
-func (i *Handle) DelService(s *Service) error {
144
-	return i.doCmd(s, nil, ipvsCmdDelService)
145
-}
146
-
147
-// Flush deletes all existing services in the passed
148
-// handle.
149
-func (i *Handle) Flush() error {
150
-	_, err := i.doCmdWithoutAttr(ipvsCmdFlush)
151
-	return err
152
-}
153
-
154
-// NewDestination creates a new real server in the passed ipvs
155
-// service which should already be existing in the passed handle.
156
-func (i *Handle) NewDestination(s *Service, d *Destination) error {
157
-	return i.doCmd(s, d, ipvsCmdNewDest)
158
-}
159
-
160
-// UpdateDestination updates an already existing real server in the
161
-// passed ipvs service in the passed handle.
162
-func (i *Handle) UpdateDestination(s *Service, d *Destination) error {
163
-	return i.doCmd(s, d, ipvsCmdSetDest)
164
-}
165
-
166
-// DelDestination deletes an already existing real server in the
167
-// passed ipvs service in the passed handle.
168
-func (i *Handle) DelDestination(s *Service, d *Destination) error {
169
-	return i.doCmd(s, d, ipvsCmdDelDest)
170
-}
171
-
172
-// GetServices returns an array of services configured on the Node
173
-func (i *Handle) GetServices() ([]*Service, error) {
174
-	return i.doGetServicesCmd(nil)
175
-}
176
-
177
-// GetDestinations returns an array of Destinations configured for this Service
178
-func (i *Handle) GetDestinations(s *Service) ([]*Destination, error) {
179
-	return i.doGetDestinationsCmd(s, nil)
180
-}
181
-
182
-// GetService gets details of a specific IPVS services, useful in updating statisics etc.,
183
-func (i *Handle) GetService(s *Service) (*Service, error) {
184
-
185
-	res, err := i.doGetServicesCmd(s)
186
-	if err != nil {
187
-		return nil, err
188
-	}
189
-
190
-	// We are looking for exactly one service otherwise error out
191
-	if len(res) != 1 {
192
-		return nil, fmt.Errorf("Expected only one service obtained=%d", len(res))
193
-	}
194
-
195
-	return res[0], nil
196
-}
197
-
198
-// GetConfig returns the current timeout configuration
199
-func (i *Handle) GetConfig() (*Config, error) {
200
-	return i.doGetConfigCmd()
201
-}
202
-
203
-// SetConfig set the current timeout configuration. 0: no change
204
-func (i *Handle) SetConfig(c *Config) error {
205
-	return i.doSetConfigCmd(c)
206
-}
207 1
deleted file mode 100644
... ...
@@ -1,636 +0,0 @@
1
-// +build linux
2
-
3
-package ipvs
4
-
5
-import (
6
-	"bytes"
7
-	"encoding/binary"
8
-	"fmt"
9
-	"net"
10
-	"os/exec"
11
-	"strings"
12
-	"sync"
13
-	"sync/atomic"
14
-	"syscall"
15
-	"time"
16
-	"unsafe"
17
-
18
-	"github.com/sirupsen/logrus"
19
-	"github.com/vishvananda/netlink/nl"
20
-	"github.com/vishvananda/netns"
21
-)
22
-
23
-// For Quick Reference IPVS related netlink message is described at the end of this file.
24
-var (
25
-	native     = nl.NativeEndian()
26
-	ipvsFamily int
27
-	ipvsOnce   sync.Once
28
-)
29
-
30
-type genlMsgHdr struct {
31
-	cmd      uint8
32
-	version  uint8
33
-	reserved uint16
34
-}
35
-
36
-type ipvsFlags struct {
37
-	flags uint32
38
-	mask  uint32
39
-}
40
-
41
-func deserializeGenlMsg(b []byte) (hdr *genlMsgHdr) {
42
-	return (*genlMsgHdr)(unsafe.Pointer(&b[0:unsafe.Sizeof(*hdr)][0]))
43
-}
44
-
45
-func (hdr *genlMsgHdr) Serialize() []byte {
46
-	return (*(*[unsafe.Sizeof(*hdr)]byte)(unsafe.Pointer(hdr)))[:]
47
-}
48
-
49
-func (hdr *genlMsgHdr) Len() int {
50
-	return int(unsafe.Sizeof(*hdr))
51
-}
52
-
53
-func (f *ipvsFlags) Serialize() []byte {
54
-	return (*(*[unsafe.Sizeof(*f)]byte)(unsafe.Pointer(f)))[:]
55
-}
56
-
57
-func (f *ipvsFlags) Len() int {
58
-	return int(unsafe.Sizeof(*f))
59
-}
60
-
61
-func setup() {
62
-	ipvsOnce.Do(func() {
63
-		var err error
64
-		if out, err := exec.Command("modprobe", "-va", "ip_vs").CombinedOutput(); err != nil {
65
-			logrus.Warnf("Running modprobe ip_vs failed with message: `%s`, error: %v", strings.TrimSpace(string(out)), err)
66
-		}
67
-
68
-		ipvsFamily, err = getIPVSFamily()
69
-		if err != nil {
70
-			logrus.Error("Could not get ipvs family information from the kernel. It is possible that ipvs is not enabled in your kernel. Native loadbalancing will not work until this is fixed.")
71
-		}
72
-	})
73
-}
74
-
75
-func fillService(s *Service) nl.NetlinkRequestData {
76
-	cmdAttr := nl.NewRtAttr(ipvsCmdAttrService, nil)
77
-	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrAddressFamily, nl.Uint16Attr(s.AddressFamily))
78
-	if s.FWMark != 0 {
79
-		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrFWMark, nl.Uint32Attr(s.FWMark))
80
-	} else {
81
-		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrProtocol, nl.Uint16Attr(s.Protocol))
82
-		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrAddress, rawIPData(s.Address))
83
-
84
-		// Port needs to be in network byte order.
85
-		portBuf := new(bytes.Buffer)
86
-		binary.Write(portBuf, binary.BigEndian, s.Port)
87
-		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrPort, portBuf.Bytes())
88
-	}
89
-
90
-	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrSchedName, nl.ZeroTerminated(s.SchedName))
91
-	if s.PEName != "" {
92
-		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrPEName, nl.ZeroTerminated(s.PEName))
93
-	}
94
-	f := &ipvsFlags{
95
-		flags: s.Flags,
96
-		mask:  0xFFFFFFFF,
97
-	}
98
-	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrFlags, f.Serialize())
99
-	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrTimeout, nl.Uint32Attr(s.Timeout))
100
-	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrNetmask, nl.Uint32Attr(s.Netmask))
101
-	return cmdAttr
102
-}
103
-
104
-func fillDestination(d *Destination) nl.NetlinkRequestData {
105
-	cmdAttr := nl.NewRtAttr(ipvsCmdAttrDest, nil)
106
-
107
-	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrAddress, rawIPData(d.Address))
108
-	// Port needs to be in network byte order.
109
-	portBuf := new(bytes.Buffer)
110
-	binary.Write(portBuf, binary.BigEndian, d.Port)
111
-	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrPort, portBuf.Bytes())
112
-
113
-	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrForwardingMethod, nl.Uint32Attr(d.ConnectionFlags&ConnectionFlagFwdMask))
114
-	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrWeight, nl.Uint32Attr(uint32(d.Weight)))
115
-	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrUpperThreshold, nl.Uint32Attr(d.UpperThreshold))
116
-	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrLowerThreshold, nl.Uint32Attr(d.LowerThreshold))
117
-
118
-	return cmdAttr
119
-}
120
-
121
-func (i *Handle) doCmdwithResponse(s *Service, d *Destination, cmd uint8) ([][]byte, error) {
122
-	req := newIPVSRequest(cmd)
123
-	req.Seq = atomic.AddUint32(&i.seq, 1)
124
-
125
-	if s == nil {
126
-		req.Flags |= syscall.NLM_F_DUMP                    //Flag to dump all messages
127
-		req.AddData(nl.NewRtAttr(ipvsCmdAttrService, nil)) //Add a dummy attribute
128
-	} else {
129
-		req.AddData(fillService(s))
130
-	}
131
-
132
-	if d == nil {
133
-		if cmd == ipvsCmdGetDest {
134
-			req.Flags |= syscall.NLM_F_DUMP
135
-		}
136
-
137
-	} else {
138
-		req.AddData(fillDestination(d))
139
-	}
140
-
141
-	res, err := execute(i.sock, req, 0)
142
-	if err != nil {
143
-		return [][]byte{}, err
144
-	}
145
-
146
-	return res, nil
147
-}
148
-
149
-func (i *Handle) doCmd(s *Service, d *Destination, cmd uint8) error {
150
-	_, err := i.doCmdwithResponse(s, d, cmd)
151
-
152
-	return err
153
-}
154
-
155
-func getIPVSFamily() (int, error) {
156
-	sock, err := nl.GetNetlinkSocketAt(netns.None(), netns.None(), syscall.NETLINK_GENERIC)
157
-	if err != nil {
158
-		return 0, err
159
-	}
160
-	defer sock.Close()
161
-
162
-	req := newGenlRequest(genlCtrlID, genlCtrlCmdGetFamily)
163
-	req.AddData(nl.NewRtAttr(genlCtrlAttrFamilyName, nl.ZeroTerminated("IPVS")))
164
-
165
-	msgs, err := execute(sock, req, 0)
166
-	if err != nil {
167
-		return 0, err
168
-	}
169
-
170
-	for _, m := range msgs {
171
-		hdr := deserializeGenlMsg(m)
172
-		attrs, err := nl.ParseRouteAttr(m[hdr.Len():])
173
-		if err != nil {
174
-			return 0, err
175
-		}
176
-
177
-		for _, attr := range attrs {
178
-			switch int(attr.Attr.Type) {
179
-			case genlCtrlAttrFamilyID:
180
-				return int(native.Uint16(attr.Value[0:2])), nil
181
-			}
182
-		}
183
-	}
184
-
185
-	return 0, fmt.Errorf("no family id in the netlink response")
186
-}
187
-
188
-func rawIPData(ip net.IP) []byte {
189
-	family := nl.GetIPFamily(ip)
190
-	if family == nl.FAMILY_V4 {
191
-		return ip.To4()
192
-	}
193
-	return ip
194
-}
195
-
196
-func newIPVSRequest(cmd uint8) *nl.NetlinkRequest {
197
-	return newGenlRequest(ipvsFamily, cmd)
198
-}
199
-
200
-func newGenlRequest(familyID int, cmd uint8) *nl.NetlinkRequest {
201
-	req := nl.NewNetlinkRequest(familyID, syscall.NLM_F_ACK)
202
-	req.AddData(&genlMsgHdr{cmd: cmd, version: 1})
203
-	return req
204
-}
205
-
206
-func execute(s *nl.NetlinkSocket, req *nl.NetlinkRequest, resType uint16) ([][]byte, error) {
207
-	if err := s.Send(req); err != nil {
208
-		return nil, err
209
-	}
210
-
211
-	pid, err := s.GetPid()
212
-	if err != nil {
213
-		return nil, err
214
-	}
215
-
216
-	var res [][]byte
217
-
218
-done:
219
-	for {
220
-		msgs, err := s.Receive()
221
-		if err != nil {
222
-			if s.GetFd() == -1 {
223
-				return nil, fmt.Errorf("Socket got closed on receive")
224
-			}
225
-			if err == syscall.EAGAIN {
226
-				// timeout fired
227
-				continue
228
-			}
229
-			return nil, err
230
-		}
231
-		for _, m := range msgs {
232
-			if m.Header.Seq != req.Seq {
233
-				continue
234
-			}
235
-			if m.Header.Pid != pid {
236
-				return nil, fmt.Errorf("Wrong pid %d, expected %d", m.Header.Pid, pid)
237
-			}
238
-			if m.Header.Type == syscall.NLMSG_DONE {
239
-				break done
240
-			}
241
-			if m.Header.Type == syscall.NLMSG_ERROR {
242
-				error := int32(native.Uint32(m.Data[0:4]))
243
-				if error == 0 {
244
-					break done
245
-				}
246
-				return nil, syscall.Errno(-error)
247
-			}
248
-			if resType != 0 && m.Header.Type != resType {
249
-				continue
250
-			}
251
-			res = append(res, m.Data)
252
-			if m.Header.Flags&syscall.NLM_F_MULTI == 0 {
253
-				break done
254
-			}
255
-		}
256
-	}
257
-	return res, nil
258
-}
259
-
260
-func parseIP(ip []byte, family uint16) (net.IP, error) {
261
-
262
-	var resIP net.IP
263
-
264
-	switch family {
265
-	case syscall.AF_INET:
266
-		resIP = (net.IP)(ip[:4])
267
-	case syscall.AF_INET6:
268
-		resIP = (net.IP)(ip[:16])
269
-	default:
270
-		return nil, fmt.Errorf("parseIP Error ip=%v", ip)
271
-
272
-	}
273
-	return resIP, nil
274
-}
275
-
276
-// parseStats
277
-func assembleStats(msg []byte) (SvcStats, error) {
278
-
279
-	var s SvcStats
280
-
281
-	attrs, err := nl.ParseRouteAttr(msg)
282
-	if err != nil {
283
-		return s, err
284
-	}
285
-
286
-	for _, attr := range attrs {
287
-		attrType := int(attr.Attr.Type)
288
-		switch attrType {
289
-		case ipvsSvcStatsConns:
290
-			s.Connections = native.Uint32(attr.Value)
291
-		case ipvsSvcStatsPktsIn:
292
-			s.PacketsIn = native.Uint32(attr.Value)
293
-		case ipvsSvcStatsPktsOut:
294
-			s.PacketsOut = native.Uint32(attr.Value)
295
-		case ipvsSvcStatsBytesIn:
296
-			s.BytesIn = native.Uint64(attr.Value)
297
-		case ipvsSvcStatsBytesOut:
298
-			s.BytesOut = native.Uint64(attr.Value)
299
-		case ipvsSvcStatsCPS:
300
-			s.CPS = native.Uint32(attr.Value)
301
-		case ipvsSvcStatsPPSIn:
302
-			s.PPSIn = native.Uint32(attr.Value)
303
-		case ipvsSvcStatsPPSOut:
304
-			s.PPSOut = native.Uint32(attr.Value)
305
-		case ipvsSvcStatsBPSIn:
306
-			s.BPSIn = native.Uint32(attr.Value)
307
-		case ipvsSvcStatsBPSOut:
308
-			s.BPSOut = native.Uint32(attr.Value)
309
-		}
310
-	}
311
-	return s, nil
312
-}
313
-
314
-// assembleService assembles a services back from a hain of netlink attributes
315
-func assembleService(attrs []syscall.NetlinkRouteAttr) (*Service, error) {
316
-
317
-	var s Service
318
-	var addressBytes []byte
319
-
320
-	for _, attr := range attrs {
321
-
322
-		attrType := int(attr.Attr.Type)
323
-
324
-		switch attrType {
325
-
326
-		case ipvsSvcAttrAddressFamily:
327
-			s.AddressFamily = native.Uint16(attr.Value)
328
-		case ipvsSvcAttrProtocol:
329
-			s.Protocol = native.Uint16(attr.Value)
330
-		case ipvsSvcAttrAddress:
331
-			addressBytes = attr.Value
332
-		case ipvsSvcAttrPort:
333
-			s.Port = binary.BigEndian.Uint16(attr.Value)
334
-		case ipvsSvcAttrFWMark:
335
-			s.FWMark = native.Uint32(attr.Value)
336
-		case ipvsSvcAttrSchedName:
337
-			s.SchedName = nl.BytesToString(attr.Value)
338
-		case ipvsSvcAttrFlags:
339
-			s.Flags = native.Uint32(attr.Value)
340
-		case ipvsSvcAttrTimeout:
341
-			s.Timeout = native.Uint32(attr.Value)
342
-		case ipvsSvcAttrNetmask:
343
-			s.Netmask = native.Uint32(attr.Value)
344
-		case ipvsSvcAttrStats:
345
-			stats, err := assembleStats(attr.Value)
346
-			if err != nil {
347
-				return nil, err
348
-			}
349
-			s.Stats = stats
350
-		}
351
-
352
-	}
353
-
354
-	// parse Address after parse AddressFamily incase of parseIP error
355
-	if addressBytes != nil {
356
-		ip, err := parseIP(addressBytes, s.AddressFamily)
357
-		if err != nil {
358
-			return nil, err
359
-		}
360
-		s.Address = ip
361
-	}
362
-
363
-	return &s, nil
364
-}
365
-
366
-// parseService given a ipvs netlink response this function will respond with a valid service entry, an error otherwise
367
-func (i *Handle) parseService(msg []byte) (*Service, error) {
368
-
369
-	var s *Service
370
-
371
-	//Remove General header for this message and parse the NetLink message
372
-	hdr := deserializeGenlMsg(msg)
373
-	NetLinkAttrs, err := nl.ParseRouteAttr(msg[hdr.Len():])
374
-	if err != nil {
375
-		return nil, err
376
-	}
377
-	if len(NetLinkAttrs) == 0 {
378
-		return nil, fmt.Errorf("error no valid netlink message found while parsing service record")
379
-	}
380
-
381
-	//Now Parse and get IPVS related attributes messages packed in this message.
382
-	ipvsAttrs, err := nl.ParseRouteAttr(NetLinkAttrs[0].Value)
383
-	if err != nil {
384
-		return nil, err
385
-	}
386
-
387
-	//Assemble all the IPVS related attribute messages and create a service record
388
-	s, err = assembleService(ipvsAttrs)
389
-	if err != nil {
390
-		return nil, err
391
-	}
392
-
393
-	return s, nil
394
-}
395
-
396
-// doGetServicesCmd a wrapper which could be used commonly for both GetServices() and GetService(*Service)
397
-func (i *Handle) doGetServicesCmd(svc *Service) ([]*Service, error) {
398
-	var res []*Service
399
-
400
-	msgs, err := i.doCmdwithResponse(svc, nil, ipvsCmdGetService)
401
-	if err != nil {
402
-		return nil, err
403
-	}
404
-
405
-	for _, msg := range msgs {
406
-		srv, err := i.parseService(msg)
407
-		if err != nil {
408
-			return nil, err
409
-		}
410
-		res = append(res, srv)
411
-	}
412
-
413
-	return res, nil
414
-}
415
-
416
-// doCmdWithoutAttr a simple wrapper of netlink socket execute command
417
-func (i *Handle) doCmdWithoutAttr(cmd uint8) ([][]byte, error) {
418
-	req := newIPVSRequest(cmd)
419
-	req.Seq = atomic.AddUint32(&i.seq, 1)
420
-	return execute(i.sock, req, 0)
421
-}
422
-
423
-func assembleDestination(attrs []syscall.NetlinkRouteAttr) (*Destination, error) {
424
-
425
-	var d Destination
426
-	var addressBytes []byte
427
-
428
-	for _, attr := range attrs {
429
-
430
-		attrType := int(attr.Attr.Type)
431
-
432
-		switch attrType {
433
-
434
-		case ipvsDestAttrAddressFamily:
435
-			d.AddressFamily = native.Uint16(attr.Value)
436
-		case ipvsDestAttrAddress:
437
-			addressBytes = attr.Value
438
-		case ipvsDestAttrPort:
439
-			d.Port = binary.BigEndian.Uint16(attr.Value)
440
-		case ipvsDestAttrForwardingMethod:
441
-			d.ConnectionFlags = native.Uint32(attr.Value)
442
-		case ipvsDestAttrWeight:
443
-			d.Weight = int(native.Uint16(attr.Value))
444
-		case ipvsDestAttrUpperThreshold:
445
-			d.UpperThreshold = native.Uint32(attr.Value)
446
-		case ipvsDestAttrLowerThreshold:
447
-			d.LowerThreshold = native.Uint32(attr.Value)
448
-		case ipvsDestAttrActiveConnections:
449
-			d.ActiveConnections = int(native.Uint16(attr.Value))
450
-		case ipvsDestAttrInactiveConnections:
451
-			d.InactiveConnections = int(native.Uint16(attr.Value))
452
-		case ipvsSvcAttrStats:
453
-			stats, err := assembleStats(attr.Value)
454
-			if err != nil {
455
-				return nil, err
456
-			}
457
-			d.Stats = DstStats(stats)
458
-		}
459
-	}
460
-
461
-	// parse Address after parse AddressFamily incase of parseIP error
462
-	if addressBytes != nil {
463
-		ip, err := parseIP(addressBytes, d.AddressFamily)
464
-		if err != nil {
465
-			return nil, err
466
-		}
467
-		d.Address = ip
468
-	}
469
-
470
-	return &d, nil
471
-}
472
-
473
-// parseDestination given a ipvs netlink response this function will respond with a valid destination entry, an error otherwise
474
-func (i *Handle) parseDestination(msg []byte) (*Destination, error) {
475
-	var dst *Destination
476
-
477
-	//Remove General header for this message
478
-	hdr := deserializeGenlMsg(msg)
479
-	NetLinkAttrs, err := nl.ParseRouteAttr(msg[hdr.Len():])
480
-	if err != nil {
481
-		return nil, err
482
-	}
483
-	if len(NetLinkAttrs) == 0 {
484
-		return nil, fmt.Errorf("error no valid netlink message found while parsing destination record")
485
-	}
486
-
487
-	//Now Parse and get IPVS related attributes messages packed in this message.
488
-	ipvsAttrs, err := nl.ParseRouteAttr(NetLinkAttrs[0].Value)
489
-	if err != nil {
490
-		return nil, err
491
-	}
492
-
493
-	//Assemble netlink attributes and create a Destination record
494
-	dst, err = assembleDestination(ipvsAttrs)
495
-	if err != nil {
496
-		return nil, err
497
-	}
498
-
499
-	return dst, nil
500
-}
501
-
502
-// doGetDestinationsCmd a wrapper function to be used by GetDestinations and GetDestination(d) apis
503
-func (i *Handle) doGetDestinationsCmd(s *Service, d *Destination) ([]*Destination, error) {
504
-
505
-	var res []*Destination
506
-
507
-	msgs, err := i.doCmdwithResponse(s, d, ipvsCmdGetDest)
508
-	if err != nil {
509
-		return nil, err
510
-	}
511
-
512
-	for _, msg := range msgs {
513
-		dest, err := i.parseDestination(msg)
514
-		if err != nil {
515
-			return res, err
516
-		}
517
-		res = append(res, dest)
518
-	}
519
-	return res, nil
520
-}
521
-
522
-// parseConfig given a ipvs netlink response this function will respond with a valid config entry, an error otherwise
523
-func (i *Handle) parseConfig(msg []byte) (*Config, error) {
524
-	var c Config
525
-
526
-	//Remove General header for this message
527
-	hdr := deserializeGenlMsg(msg)
528
-	attrs, err := nl.ParseRouteAttr(msg[hdr.Len():])
529
-	if err != nil {
530
-		return nil, err
531
-	}
532
-
533
-	for _, attr := range attrs {
534
-		attrType := int(attr.Attr.Type)
535
-		switch attrType {
536
-		case ipvsCmdAttrTimeoutTCP:
537
-			c.TimeoutTCP = time.Duration(native.Uint32(attr.Value)) * time.Second
538
-		case ipvsCmdAttrTimeoutTCPFin:
539
-			c.TimeoutTCPFin = time.Duration(native.Uint32(attr.Value)) * time.Second
540
-		case ipvsCmdAttrTimeoutUDP:
541
-			c.TimeoutUDP = time.Duration(native.Uint32(attr.Value)) * time.Second
542
-		}
543
-	}
544
-
545
-	return &c, nil
546
-}
547
-
548
-// doGetConfigCmd a wrapper function to be used by GetConfig
549
-func (i *Handle) doGetConfigCmd() (*Config, error) {
550
-	msg, err := i.doCmdWithoutAttr(ipvsCmdGetConfig)
551
-	if err != nil {
552
-		return nil, err
553
-	}
554
-
555
-	res, err := i.parseConfig(msg[0])
556
-	if err != nil {
557
-		return res, err
558
-	}
559
-	return res, nil
560
-}
561
-
562
-// doSetConfigCmd a wrapper function to be used by SetConfig
563
-func (i *Handle) doSetConfigCmd(c *Config) error {
564
-	req := newIPVSRequest(ipvsCmdSetConfig)
565
-	req.Seq = atomic.AddUint32(&i.seq, 1)
566
-
567
-	req.AddData(nl.NewRtAttr(ipvsCmdAttrTimeoutTCP, nl.Uint32Attr(uint32(c.TimeoutTCP.Seconds()))))
568
-	req.AddData(nl.NewRtAttr(ipvsCmdAttrTimeoutTCPFin, nl.Uint32Attr(uint32(c.TimeoutTCPFin.Seconds()))))
569
-	req.AddData(nl.NewRtAttr(ipvsCmdAttrTimeoutUDP, nl.Uint32Attr(uint32(c.TimeoutUDP.Seconds()))))
570
-
571
-	_, err := execute(i.sock, req, 0)
572
-
573
-	return err
574
-}
575
-
576
-// IPVS related netlink message format explained
577
-
578
-/* EACH NETLINK MSG is of the below format, this is what we will receive from execute() api.
579
-   If we have multiple netlink objects to process like GetServices() etc., execute() will
580
-   supply an array of this below object
581
-
582
-            NETLINK MSG
583
-|-----------------------------------|
584
-    0        1        2        3
585
-|--------|--------|--------|--------| -
586
-| CMD ID |  VER   |    RESERVED     | |==> General Message Header represented by genlMsgHdr
587
-|-----------------------------------| -
588
-|    ATTR LEN     |   ATTR TYPE     | |
589
-|-----------------------------------| |
590
-|                                   | |
591
-|              VALUE                | |
592
-|     []byte Array of IPVS MSG      | |==> Attribute Message represented by syscall.NetlinkRouteAttr
593
-|        PADDED BY 4 BYTES          | |
594
-|                                   | |
595
-|-----------------------------------| -
596
-
597
-
598
- Once We strip genlMsgHdr from above NETLINK MSG, we should parse the VALUE.
599
- VALUE will have an array of netlink attributes (syscall.NetlinkRouteAttr) such that each attribute will
600
- represent a "Service" or "Destination" object's field.  If we assemble these attributes we can construct
601
- Service or Destination.
602
-
603
-            IPVS MSG
604
-|-----------------------------------|
605
-     0        1        2        3
606
-|--------|--------|--------|--------|
607
-|    ATTR LEN     |    ATTR TYPE    |
608
-|-----------------------------------|
609
-|                                   |
610
-|                                   |
611
-| []byte IPVS ATTRIBUTE  BY 4 BYTES |
612
-|                                   |
613
-|                                   |
614
-|-----------------------------------|
615
-           NEXT ATTRIBUTE
616
-|-----------------------------------|
617
-|    ATTR LEN     |    ATTR TYPE    |
618
-|-----------------------------------|
619
-|                                   |
620
-|                                   |
621
-| []byte IPVS ATTRIBUTE  BY 4 BYTES |
622
-|                                   |
623
-|                                   |
624
-|-----------------------------------|
625
-           NEXT ATTRIBUTE
626
-|-----------------------------------|
627
-|    ATTR LEN     |    ATTR TYPE    |
628
-|-----------------------------------|
629
-|                                   |
630
-|                                   |
631
-| []byte IPVS ATTRIBUTE  BY 4 BYTES |
632
-|                                   |
633
-|                                   |
634
-|-----------------------------------|
635
-
636
-*/
... ...
@@ -16,10 +16,10 @@ import (
16 16
 
17 17
 	"github.com/docker/docker/pkg/reexec"
18 18
 	"github.com/docker/libnetwork/iptables"
19
-	"github.com/docker/libnetwork/ipvs"
20 19
 	"github.com/docker/libnetwork/ns"
21 20
 	"github.com/gogo/protobuf/proto"
22 21
 	"github.com/ishidawataru/sctp"
22
+	"github.com/moby/ipvs"
23 23
 	"github.com/sirupsen/logrus"
24 24
 	"github.com/vishvananda/netlink/nl"
25 25
 	"github.com/vishvananda/netns"
... ...
@@ -45,8 +45,8 @@ github.com/sirupsen/logrus              8bdbc7bcc01dcbb8ec23dc8a28e332258d25251f
45 45
 github.com/konsorten/go-windows-terminal-sequences   5c8c8bd35d3832f5d134ae1e1e375b69a4d25242 # v1.0.1
46 46
 github.com/ugorji/go                    b4c50a2b199d93b13dc15e78929cfb23bfdf21ab # v1.1.1
47 47
 github.com/urfave/cli                   a65b733b303f0055f8d324d805f393cd3e7a7904
48
-github.com/vishvananda/netlink          a2ad57a690f3caf3015351d2d6e1c0b95c349752 # v1.0.0
49
-github.com/vishvananda/netns            7109fa855b0ff1ebef7fbd2f6aa613e8db7cfbc0
48
+github.com/vishvananda/netlink          f049be6f391489d3f374498fe0c8df8449258372 # v1.1.0
49
+github.com/vishvananda/netns            0a2b9b5464df8343199164a0321edf3313202f7e
50 50
 golang.org/x/crypto                     b7391e95e576cacdcdd422573063bc057239113d
51 51
 golang.org/x/net                        a680a1efc54dd51c040b3b5ce4939ea3cf2ea0d1
52 52
 golang.org/x/sys                        d455e41777fca6e8a5a79e34a14b8368bc11d9ba
... ...
@@ -57,3 +57,5 @@ go.opencensus.io                        9c377598961b706d1542bd2d84d538b5094d596e
57 57
 
58 58
 gotest.tools                            1083505acf35a0bd8a696b26837e1fb3187a7a83 # v2.3.0
59 59
 github.com/google/go-cmp                3af367b6b30c263d47e8895973edcca9a49cf029 # v0.2.0
60
+
61
+github.com/moby/ipvs                    8f137da6850a975020f4f739c589d293dd3a9d7b # v1.0.0
60 62
new file mode 100644
... ...
@@ -0,0 +1,202 @@
0
+Apache License
1
+                           Version 2.0, January 2004
2
+                        http://www.apache.org/licenses/
3
+
4
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
5
+
6
+   1. Definitions.
7
+
8
+      "License" shall mean the terms and conditions for use, reproduction,
9
+      and distribution as defined by Sections 1 through 9 of this document.
10
+
11
+      "Licensor" shall mean the copyright owner or entity authorized by
12
+      the copyright owner that is granting the License.
13
+
14
+      "Legal Entity" shall mean the union of the acting entity and all
15
+      other entities that control, are controlled by, or are under common
16
+      control with that entity. For the purposes of this definition,
17
+      "control" means (i) the power, direct or indirect, to cause the
18
+      direction or management of such entity, whether by contract or
19
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
20
+      outstanding shares, or (iii) beneficial ownership of such entity.
21
+
22
+      "You" (or "Your") shall mean an individual or Legal Entity
23
+      exercising permissions granted by this License.
24
+
25
+      "Source" form shall mean the preferred form for making modifications,
26
+      including but not limited to software source code, documentation
27
+      source, and configuration files.
28
+
29
+      "Object" form shall mean any form resulting from mechanical
30
+      transformation or translation of a Source form, including but
31
+      not limited to compiled object code, generated documentation,
32
+      and conversions to other media types.
33
+
34
+      "Work" shall mean the work of authorship, whether in Source or
35
+      Object form, made available under the License, as indicated by a
36
+      copyright notice that is included in or attached to the work
37
+      (an example is provided in the Appendix below).
38
+
39
+      "Derivative Works" shall mean any work, whether in Source or Object
40
+      form, that is based on (or derived from) the Work and for which the
41
+      editorial revisions, annotations, elaborations, or other modifications
42
+      represent, as a whole, an original work of authorship. For the purposes
43
+      of this License, Derivative Works shall not include works that remain
44
+      separable from, or merely link (or bind by name) to the interfaces of,
45
+      the Work and Derivative Works thereof.
46
+
47
+      "Contribution" shall mean any work of authorship, including
48
+      the original version of the Work and any modifications or additions
49
+      to that Work or Derivative Works thereof, that is intentionally
50
+      submitted to Licensor for inclusion in the Work by the copyright owner
51
+      or by an individual or Legal Entity authorized to submit on behalf of
52
+      the copyright owner. For the purposes of this definition, "submitted"
53
+      means any form of electronic, verbal, or written communication sent
54
+      to the Licensor or its representatives, including but not limited to
55
+      communication on electronic mailing lists, source code control systems,
56
+      and issue tracking systems that are managed by, or on behalf of, the
57
+      Licensor for the purpose of discussing and improving the Work, but
58
+      excluding communication that is conspicuously marked or otherwise
59
+      designated in writing by the copyright owner as "Not a Contribution."
60
+
61
+      "Contributor" shall mean Licensor and any individual or Legal Entity
62
+      on behalf of whom a Contribution has been received by Licensor and
63
+      subsequently incorporated within the Work.
64
+
65
+   2. Grant of Copyright License. Subject to the terms and conditions of
66
+      this License, each Contributor hereby grants to You a perpetual,
67
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
68
+      copyright license to reproduce, prepare Derivative Works of,
69
+      publicly display, publicly perform, sublicense, and distribute the
70
+      Work and such Derivative Works in Source or Object form.
71
+
72
+   3. Grant of Patent License. Subject to the terms and conditions of
73
+      this License, each Contributor hereby grants to You a perpetual,
74
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
75
+      (except as stated in this section) patent license to make, have made,
76
+      use, offer to sell, sell, import, and otherwise transfer the Work,
77
+      where such license applies only to those patent claims licensable
78
+      by such Contributor that are necessarily infringed by their
79
+      Contribution(s) alone or by combination of their Contribution(s)
80
+      with the Work to which such Contribution(s) was submitted. If You
81
+      institute patent litigation against any entity (including a
82
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
83
+      or a Contribution incorporated within the Work constitutes direct
84
+      or contributory patent infringement, then any patent licenses
85
+      granted to You under this License for that Work shall terminate
86
+      as of the date such litigation is filed.
87
+
88
+   4. Redistribution. You may reproduce and distribute copies of the
89
+      Work or Derivative Works thereof in any medium, with or without
90
+      modifications, and in Source or Object form, provided that You
91
+      meet the following conditions:
92
+
93
+      (a) You must give any other recipients of the Work or
94
+          Derivative Works a copy of this License; and
95
+
96
+      (b) You must cause any modified files to carry prominent notices
97
+          stating that You changed the files; and
98
+
99
+      (c) You must retain, in the Source form of any Derivative Works
100
+          that You distribute, all copyright, patent, trademark, and
101
+          attribution notices from the Source form of the Work,
102
+          excluding those notices that do not pertain to any part of
103
+          the Derivative Works; and
104
+
105
+      (d) If the Work includes a "NOTICE" text file as part of its
106
+          distribution, then any Derivative Works that You distribute must
107
+          include a readable copy of the attribution notices contained
108
+          within such NOTICE file, excluding those notices that do not
109
+          pertain to any part of the Derivative Works, in at least one
110
+          of the following places: within a NOTICE text file distributed
111
+          as part of the Derivative Works; within the Source form or
112
+          documentation, if provided along with the Derivative Works; or,
113
+          within a display generated by the Derivative Works, if and
114
+          wherever such third-party notices normally appear. The contents
115
+          of the NOTICE file are for informational purposes only and
116
+          do not modify the License. You may add Your own attribution
117
+          notices within Derivative Works that You distribute, alongside
118
+          or as an addendum to the NOTICE text from the Work, provided
119
+          that such additional attribution notices cannot be construed
120
+          as modifying the License.
121
+
122
+      You may add Your own copyright statement to Your modifications and
123
+      may provide additional or different license terms and conditions
124
+      for use, reproduction, or distribution of Your modifications, or
125
+      for any such Derivative Works as a whole, provided Your use,
126
+      reproduction, and distribution of the Work otherwise complies with
127
+      the conditions stated in this License.
128
+
129
+   5. Submission of Contributions. Unless You explicitly state otherwise,
130
+      any Contribution intentionally submitted for inclusion in the Work
131
+      by You to the Licensor shall be under the terms and conditions of
132
+      this License, without any additional terms or conditions.
133
+      Notwithstanding the above, nothing herein shall supersede or modify
134
+      the terms of any separate license agreement you may have executed
135
+      with Licensor regarding such Contributions.
136
+
137
+   6. Trademarks. This License does not grant permission to use the trade
138
+      names, trademarks, service marks, or product names of the Licensor,
139
+      except as required for reasonable and customary use in describing the
140
+      origin of the Work and reproducing the content of the NOTICE file.
141
+
142
+   7. Disclaimer of Warranty. Unless required by applicable law or
143
+      agreed to in writing, Licensor provides the Work (and each
144
+      Contributor provides its Contributions) on an "AS IS" BASIS,
145
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
146
+      implied, including, without limitation, any warranties or conditions
147
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
148
+      PARTICULAR PURPOSE. You are solely responsible for determining the
149
+      appropriateness of using or redistributing the Work and assume any
150
+      risks associated with Your exercise of permissions under this License.
151
+
152
+   8. Limitation of Liability. In no event and under no legal theory,
153
+      whether in tort (including negligence), contract, or otherwise,
154
+      unless required by applicable law (such as deliberate and grossly
155
+      negligent acts) or agreed to in writing, shall any Contributor be
156
+      liable to You for damages, including any direct, indirect, special,
157
+      incidental, or consequential damages of any character arising as a
158
+      result of this License or out of the use or inability to use the
159
+      Work (including but not limited to damages for loss of goodwill,
160
+      work stoppage, computer failure or malfunction, or any and all
161
+      other commercial damages or losses), even if such Contributor
162
+      has been advised of the possibility of such damages.
163
+
164
+   9. Accepting Warranty or Additional Liability. While redistributing
165
+      the Work or Derivative Works thereof, You may choose to offer,
166
+      and charge a fee for, acceptance of support, warranty, indemnity,
167
+      or other liability obligations and/or rights consistent with this
168
+      License. However, in accepting such obligations, You may act only
169
+      on Your own behalf and on Your sole responsibility, not on behalf
170
+      of any other Contributor, and only if You agree to indemnify,
171
+      defend, and hold each Contributor harmless for any liability
172
+      incurred by, or claims asserted against, such Contributor by reason
173
+      of your accepting any such warranty or additional liability.
174
+
175
+   END OF TERMS AND CONDITIONS
176
+
177
+   APPENDIX: How to apply the Apache License to your work.
178
+
179
+      To apply the Apache License to your work, attach the following
180
+      boilerplate notice, with the fields enclosed by brackets "{}"
181
+      replaced with your own identifying information. (Don't include
182
+      the brackets!)  The text should be enclosed in the appropriate
183
+      comment syntax for the file format. We also recommend that a
184
+      file or class name and description of purpose be included on the
185
+      same "printed page" as the copyright notice for easier
186
+      identification within third-party archives.
187
+
188
+   Copyright {yyyy} {name of copyright owner}
189
+
190
+   Licensed under the Apache License, Version 2.0 (the "License");
191
+   you may not use this file except in compliance with the License.
192
+   You may obtain a copy of the License at
193
+
194
+       http://www.apache.org/licenses/LICENSE-2.0
195
+
196
+   Unless required by applicable law or agreed to in writing, software
197
+   distributed under the License is distributed on an "AS IS" BASIS,
198
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
199
+   See the License for the specific language governing permissions and
200
+   limitations under the License.
201
+
0 202
new file mode 100644
... ...
@@ -0,0 +1,34 @@
0
+# ipvs - networking for containers
1
+
2
+![Test](https://github.com/moby/ipvs/workflows/Test/badge.svg) [![GoDoc](https://godoc.org/github.com/moby/ipvs?status.svg)](https://godoc.org/github.com/moby/ipvs) [![Go Report Card](https://goreportcard.com/badge/github.com/moby/ipvs)](https://goreportcard.com/report/github.com/moby/ipvs)
3
+
4
+ipvs provides a native Go implementation for communicating with IPVS kernel module using a netlink socket.
5
+
6
+
7
+#### Using ipvs
8
+
9
+```go
10
+import (
11
+	"log"
12
+
13
+	"github.com/moby/ipvs"
14
+)
15
+
16
+func main() {
17
+	handle, err := ipvs.New("")
18
+	if err != nil {
19
+		log.Fatalf("ipvs.New: %s", err)
20
+	}
21
+	svcs, err := handle.GetServices()
22
+	if err != nil {
23
+		log.Fatalf("handle.GetServices: %s", err)
24
+	}
25
+}
26
+```
27
+
28
+## Contributing
29
+
30
+Want to hack on ipvs? [Docker's contributions guidelines](https://github.com/docker/docker/blob/master/CONTRIBUTING.md) apply.
31
+
32
+## Copyright and license
33
+Code and documentation copyright 2015 Docker, inc. Code released under the Apache 2.0 license. Docs released under Creative commons.
0 34
new file mode 100644
... ...
@@ -0,0 +1,178 @@
0
+// +build linux
1
+
2
+package ipvs
3
+
4
+const (
5
+	genlCtrlID = 0x10
6
+)
7
+
8
+// GENL control commands
9
+const (
10
+	genlCtrlCmdUnspec uint8 = iota
11
+	genlCtrlCmdNewFamily
12
+	genlCtrlCmdDelFamily
13
+	genlCtrlCmdGetFamily
14
+)
15
+
16
+// GENL family attributes
17
+const (
18
+	genlCtrlAttrUnspec int = iota
19
+	genlCtrlAttrFamilyID
20
+	genlCtrlAttrFamilyName
21
+)
22
+
23
+// IPVS genl commands
24
+const (
25
+	ipvsCmdUnspec uint8 = iota
26
+	ipvsCmdNewService
27
+	ipvsCmdSetService
28
+	ipvsCmdDelService
29
+	ipvsCmdGetService
30
+	ipvsCmdNewDest
31
+	ipvsCmdSetDest
32
+	ipvsCmdDelDest
33
+	ipvsCmdGetDest
34
+	ipvsCmdNewDaemon
35
+	ipvsCmdDelDaemon
36
+	ipvsCmdGetDaemon
37
+	ipvsCmdSetConfig
38
+	ipvsCmdGetConfig
39
+	ipvsCmdSetInfo
40
+	ipvsCmdGetInfo
41
+	ipvsCmdZero
42
+	ipvsCmdFlush
43
+)
44
+
45
+// Attributes used in the first level of commands
46
+const (
47
+	ipvsCmdAttrUnspec int = iota
48
+	ipvsCmdAttrService
49
+	ipvsCmdAttrDest
50
+	ipvsCmdAttrDaemon
51
+	ipvsCmdAttrTimeoutTCP
52
+	ipvsCmdAttrTimeoutTCPFin
53
+	ipvsCmdAttrTimeoutUDP
54
+)
55
+
56
+// Attributes used to describe a service. Used inside nested attribute
57
+// ipvsCmdAttrService
58
+const (
59
+	ipvsSvcAttrUnspec int = iota
60
+	ipvsSvcAttrAddressFamily
61
+	ipvsSvcAttrProtocol
62
+	ipvsSvcAttrAddress
63
+	ipvsSvcAttrPort
64
+	ipvsSvcAttrFWMark
65
+	ipvsSvcAttrSchedName
66
+	ipvsSvcAttrFlags
67
+	ipvsSvcAttrTimeout
68
+	ipvsSvcAttrNetmask
69
+	ipvsSvcAttrStats
70
+	ipvsSvcAttrPEName
71
+)
72
+
73
+// Attributes used to describe a destination (real server). Used
74
+// inside nested attribute ipvsCmdAttrDest.
75
+const (
76
+	ipvsDestAttrUnspec int = iota
77
+	ipvsDestAttrAddress
78
+	ipvsDestAttrPort
79
+	ipvsDestAttrForwardingMethod
80
+	ipvsDestAttrWeight
81
+	ipvsDestAttrUpperThreshold
82
+	ipvsDestAttrLowerThreshold
83
+	ipvsDestAttrActiveConnections
84
+	ipvsDestAttrInactiveConnections
85
+	ipvsDestAttrPersistentConnections
86
+	ipvsDestAttrStats
87
+	ipvsDestAttrAddressFamily
88
+)
89
+
90
+// IPVS Svc Statistics constancs
91
+
92
+const (
93
+	ipvsSvcStatsUnspec int = iota
94
+	ipvsSvcStatsConns
95
+	ipvsSvcStatsPktsIn
96
+	ipvsSvcStatsPktsOut
97
+	ipvsSvcStatsBytesIn
98
+	ipvsSvcStatsBytesOut
99
+	ipvsSvcStatsCPS
100
+	ipvsSvcStatsPPSIn
101
+	ipvsSvcStatsPPSOut
102
+	ipvsSvcStatsBPSIn
103
+	ipvsSvcStatsBPSOut
104
+)
105
+
106
+// Destination forwarding methods
107
+const (
108
+	// ConnectionFlagFwdmask indicates the mask in the connection
109
+	// flags which is used by forwarding method bits.
110
+	ConnectionFlagFwdMask = 0x0007
111
+
112
+	// ConnectionFlagMasq is used for masquerade forwarding method.
113
+	ConnectionFlagMasq = 0x0000
114
+
115
+	// ConnectionFlagLocalNode is used for local node forwarding
116
+	// method.
117
+	ConnectionFlagLocalNode = 0x0001
118
+
119
+	// ConnectionFlagTunnel is used for tunnel mode forwarding
120
+	// method.
121
+	ConnectionFlagTunnel = 0x0002
122
+
123
+	// ConnectionFlagDirectRoute is used for direct routing
124
+	// forwarding method.
125
+	ConnectionFlagDirectRoute = 0x0003
126
+)
127
+
128
+const (
129
+	// RoundRobin distributes jobs equally amongst the available
130
+	// real servers.
131
+	RoundRobin = "rr"
132
+
133
+	// LeastConnection assigns more jobs to real servers with
134
+	// fewer active jobs.
135
+	LeastConnection = "lc"
136
+
137
+	// DestinationHashing assigns jobs to servers through looking
138
+	// up a statically assigned hash table by their destination IP
139
+	// addresses.
140
+	DestinationHashing = "dh"
141
+
142
+	// SourceHashing assigns jobs to servers through looking up
143
+	// a statically assigned hash table by their source IP
144
+	// addresses.
145
+	SourceHashing = "sh"
146
+
147
+	// WeightedRoundRobin assigns jobs to real servers proportionally
148
+	// to there real servers' weight. Servers with higher weights
149
+	// receive new jobs first and get more jobs than servers
150
+	// with lower weights. Servers with equal weights get
151
+	// an equal distribution of new jobs
152
+	WeightedRoundRobin = "wrr"
153
+
154
+	// WeightedLeastConnection assigns more jobs to servers
155
+	// with fewer jobs and relative to the real servers' weight
156
+	WeightedLeastConnection = "wlc"
157
+)
158
+
159
+const (
160
+	// ConnFwdMask is a mask for the fwd methods
161
+	ConnFwdMask = 0x0007
162
+
163
+	// ConnFwdMasq denotes forwarding via masquerading/NAT
164
+	ConnFwdMasq = 0x0000
165
+
166
+	// ConnFwdLocalNode denotes forwarding to a local node
167
+	ConnFwdLocalNode = 0x0001
168
+
169
+	// ConnFwdTunnel denotes forwarding via a tunnel
170
+	ConnFwdTunnel = 0x0002
171
+
172
+	// ConnFwdDirectRoute denotes forwarding via direct routing
173
+	ConnFwdDirectRoute = 0x0003
174
+
175
+	// ConnFwdBypass denotes forwarding while bypassing the cache
176
+	ConnFwdBypass = 0x0004
177
+)
0 178
new file mode 100644
... ...
@@ -0,0 +1 @@
0
+package ipvs
0 1
new file mode 100644
... ...
@@ -0,0 +1,12 @@
0
+module github.com/moby/ipvs
1
+
2
+go 1.13
3
+
4
+require (
5
+	github.com/pkg/errors v0.9.1 // indirect
6
+	github.com/sirupsen/logrus v1.4.2
7
+	github.com/vishvananda/netlink v1.1.0
8
+	github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df
9
+	golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527
10
+	gotest.tools/v3 v3.0.2
11
+)
0 12
new file mode 100644
... ...
@@ -0,0 +1,206 @@
0
+// +build linux
1
+
2
+package ipvs
3
+
4
+import (
5
+	"fmt"
6
+	"net"
7
+	"time"
8
+
9
+	"github.com/vishvananda/netlink/nl"
10
+	"github.com/vishvananda/netns"
11
+	"golang.org/x/sys/unix"
12
+)
13
+
14
+const (
15
+	netlinkRecvSocketsTimeout = 3 * time.Second
16
+	netlinkSendSocketTimeout  = 30 * time.Second
17
+)
18
+
19
+// Service defines an IPVS service in its entirety.
20
+type Service struct {
21
+	// Virtual service address.
22
+	Address  net.IP
23
+	Protocol uint16
24
+	Port     uint16
25
+	FWMark   uint32 // Firewall mark of the service.
26
+
27
+	// Virtual service options.
28
+	SchedName     string
29
+	Flags         uint32
30
+	Timeout       uint32
31
+	Netmask       uint32
32
+	AddressFamily uint16
33
+	PEName        string
34
+	Stats         SvcStats
35
+}
36
+
37
+// SvcStats defines an IPVS service statistics
38
+type SvcStats struct {
39
+	Connections uint32
40
+	PacketsIn   uint32
41
+	PacketsOut  uint32
42
+	BytesIn     uint64
43
+	BytesOut    uint64
44
+	CPS         uint32
45
+	BPSOut      uint32
46
+	PPSIn       uint32
47
+	PPSOut      uint32
48
+	BPSIn       uint32
49
+}
50
+
51
+// Destination defines an IPVS destination (real server) in its
52
+// entirety.
53
+type Destination struct {
54
+	Address             net.IP
55
+	Port                uint16
56
+	Weight              int
57
+	ConnectionFlags     uint32
58
+	AddressFamily       uint16
59
+	UpperThreshold      uint32
60
+	LowerThreshold      uint32
61
+	ActiveConnections   int
62
+	InactiveConnections int
63
+	Stats               DstStats
64
+}
65
+
66
+// DstStats defines IPVS destination (real server) statistics
67
+type DstStats SvcStats
68
+
69
+// Config defines IPVS timeout configuration
70
+type Config struct {
71
+	TimeoutTCP    time.Duration
72
+	TimeoutTCPFin time.Duration
73
+	TimeoutUDP    time.Duration
74
+}
75
+
76
+// Handle provides a namespace specific ipvs handle to program ipvs
77
+// rules.
78
+type Handle struct {
79
+	seq  uint32
80
+	sock *nl.NetlinkSocket
81
+}
82
+
83
+// New provides a new ipvs handle in the namespace pointed to by the
84
+// passed path. It will return a valid handle or an error in case an
85
+// error occurred while creating the handle.
86
+func New(path string) (*Handle, error) {
87
+	setup()
88
+
89
+	n := netns.None()
90
+	if path != "" {
91
+		var err error
92
+		n, err = netns.GetFromPath(path)
93
+		if err != nil {
94
+			return nil, err
95
+		}
96
+	}
97
+	defer n.Close()
98
+
99
+	sock, err := nl.GetNetlinkSocketAt(n, netns.None(), unix.NETLINK_GENERIC)
100
+	if err != nil {
101
+		return nil, err
102
+	}
103
+	// Add operation timeout to avoid deadlocks
104
+	tv := unix.NsecToTimeval(netlinkSendSocketTimeout.Nanoseconds())
105
+	if err := sock.SetSendTimeout(&tv); err != nil {
106
+		return nil, err
107
+	}
108
+	tv = unix.NsecToTimeval(netlinkRecvSocketsTimeout.Nanoseconds())
109
+	if err := sock.SetReceiveTimeout(&tv); err != nil {
110
+		return nil, err
111
+	}
112
+
113
+	return &Handle{sock: sock}, nil
114
+}
115
+
116
+// Close closes the ipvs handle. The handle is invalid after Close
117
+// returns.
118
+func (i *Handle) Close() {
119
+	if i.sock != nil {
120
+		i.sock.Close()
121
+	}
122
+}
123
+
124
+// NewService creates a new ipvs service in the passed handle.
125
+func (i *Handle) NewService(s *Service) error {
126
+	return i.doCmd(s, nil, ipvsCmdNewService)
127
+}
128
+
129
+// IsServicePresent queries for the ipvs service in the passed handle.
130
+func (i *Handle) IsServicePresent(s *Service) bool {
131
+	return nil == i.doCmd(s, nil, ipvsCmdGetService)
132
+}
133
+
134
+// UpdateService updates an already existing service in the passed
135
+// handle.
136
+func (i *Handle) UpdateService(s *Service) error {
137
+	return i.doCmd(s, nil, ipvsCmdSetService)
138
+}
139
+
140
+// DelService deletes an already existing service in the passed
141
+// handle.
142
+func (i *Handle) DelService(s *Service) error {
143
+	return i.doCmd(s, nil, ipvsCmdDelService)
144
+}
145
+
146
+// Flush deletes all existing services in the passed
147
+// handle.
148
+func (i *Handle) Flush() error {
149
+	_, err := i.doCmdWithoutAttr(ipvsCmdFlush)
150
+	return err
151
+}
152
+
153
+// NewDestination creates a new real server in the passed ipvs
154
+// service which should already be existing in the passed handle.
155
+func (i *Handle) NewDestination(s *Service, d *Destination) error {
156
+	return i.doCmd(s, d, ipvsCmdNewDest)
157
+}
158
+
159
+// UpdateDestination updates an already existing real server in the
160
+// passed ipvs service in the passed handle.
161
+func (i *Handle) UpdateDestination(s *Service, d *Destination) error {
162
+	return i.doCmd(s, d, ipvsCmdSetDest)
163
+}
164
+
165
+// DelDestination deletes an already existing real server in the
166
+// passed ipvs service in the passed handle.
167
+func (i *Handle) DelDestination(s *Service, d *Destination) error {
168
+	return i.doCmd(s, d, ipvsCmdDelDest)
169
+}
170
+
171
+// GetServices returns an array of services configured on the Node
172
+func (i *Handle) GetServices() ([]*Service, error) {
173
+	return i.doGetServicesCmd(nil)
174
+}
175
+
176
+// GetDestinations returns an array of Destinations configured for this Service
177
+func (i *Handle) GetDestinations(s *Service) ([]*Destination, error) {
178
+	return i.doGetDestinationsCmd(s, nil)
179
+}
180
+
181
+// GetService gets details of a specific IPVS services, useful in updating statisics etc.,
182
+func (i *Handle) GetService(s *Service) (*Service, error) {
183
+
184
+	res, err := i.doGetServicesCmd(s)
185
+	if err != nil {
186
+		return nil, err
187
+	}
188
+
189
+	// We are looking for exactly one service otherwise error out
190
+	if len(res) != 1 {
191
+		return nil, fmt.Errorf("Expected only one service obtained=%d", len(res))
192
+	}
193
+
194
+	return res[0], nil
195
+}
196
+
197
+// GetConfig returns the current timeout configuration
198
+func (i *Handle) GetConfig() (*Config, error) {
199
+	return i.doGetConfigCmd()
200
+}
201
+
202
+// SetConfig set the current timeout configuration. 0: no change
203
+func (i *Handle) SetConfig(c *Config) error {
204
+	return i.doSetConfigCmd(c)
205
+}
0 206
new file mode 100644
... ...
@@ -0,0 +1,636 @@
0
+// +build linux
1
+
2
+package ipvs
3
+
4
+import (
5
+	"bytes"
6
+	"encoding/binary"
7
+	"fmt"
8
+	"net"
9
+	"os/exec"
10
+	"strings"
11
+	"sync"
12
+	"sync/atomic"
13
+	"syscall"
14
+	"time"
15
+	"unsafe"
16
+
17
+	"github.com/sirupsen/logrus"
18
+	"github.com/vishvananda/netlink/nl"
19
+	"github.com/vishvananda/netns"
20
+)
21
+
22
+// For Quick Reference IPVS related netlink message is described at the end of this file.
23
+var (
24
+	native     = nl.NativeEndian()
25
+	ipvsFamily int
26
+	ipvsOnce   sync.Once
27
+)
28
+
29
+type genlMsgHdr struct {
30
+	cmd      uint8
31
+	version  uint8
32
+	reserved uint16
33
+}
34
+
35
+type ipvsFlags struct {
36
+	flags uint32
37
+	mask  uint32
38
+}
39
+
40
+func deserializeGenlMsg(b []byte) (hdr *genlMsgHdr) {
41
+	return (*genlMsgHdr)(unsafe.Pointer(&b[0:unsafe.Sizeof(*hdr)][0]))
42
+}
43
+
44
+func (hdr *genlMsgHdr) Serialize() []byte {
45
+	return (*(*[unsafe.Sizeof(*hdr)]byte)(unsafe.Pointer(hdr)))[:]
46
+}
47
+
48
+func (hdr *genlMsgHdr) Len() int {
49
+	return int(unsafe.Sizeof(*hdr))
50
+}
51
+
52
+func (f *ipvsFlags) Serialize() []byte {
53
+	return (*(*[unsafe.Sizeof(*f)]byte)(unsafe.Pointer(f)))[:]
54
+}
55
+
56
+func (f *ipvsFlags) Len() int {
57
+	return int(unsafe.Sizeof(*f))
58
+}
59
+
60
+func setup() {
61
+	ipvsOnce.Do(func() {
62
+		var err error
63
+		if out, err := exec.Command("modprobe", "-va", "ip_vs").CombinedOutput(); err != nil {
64
+			logrus.Warnf("Running modprobe ip_vs failed with message: `%s`, error: %v", strings.TrimSpace(string(out)), err)
65
+		}
66
+
67
+		ipvsFamily, err = getIPVSFamily()
68
+		if err != nil {
69
+			logrus.Error("Could not get ipvs family information from the kernel. It is possible that ipvs is not enabled in your kernel. Native loadbalancing will not work until this is fixed.")
70
+		}
71
+	})
72
+}
73
+
74
+func fillService(s *Service) nl.NetlinkRequestData {
75
+	cmdAttr := nl.NewRtAttr(ipvsCmdAttrService, nil)
76
+	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrAddressFamily, nl.Uint16Attr(s.AddressFamily))
77
+	if s.FWMark != 0 {
78
+		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrFWMark, nl.Uint32Attr(s.FWMark))
79
+	} else {
80
+		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrProtocol, nl.Uint16Attr(s.Protocol))
81
+		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrAddress, rawIPData(s.Address))
82
+
83
+		// Port needs to be in network byte order.
84
+		portBuf := new(bytes.Buffer)
85
+		binary.Write(portBuf, binary.BigEndian, s.Port)
86
+		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrPort, portBuf.Bytes())
87
+	}
88
+
89
+	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrSchedName, nl.ZeroTerminated(s.SchedName))
90
+	if s.PEName != "" {
91
+		nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrPEName, nl.ZeroTerminated(s.PEName))
92
+	}
93
+	f := &ipvsFlags{
94
+		flags: s.Flags,
95
+		mask:  0xFFFFFFFF,
96
+	}
97
+	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrFlags, f.Serialize())
98
+	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrTimeout, nl.Uint32Attr(s.Timeout))
99
+	nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrNetmask, nl.Uint32Attr(s.Netmask))
100
+	return cmdAttr
101
+}
102
+
103
+func fillDestination(d *Destination) nl.NetlinkRequestData {
104
+	cmdAttr := nl.NewRtAttr(ipvsCmdAttrDest, nil)
105
+
106
+	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrAddress, rawIPData(d.Address))
107
+	// Port needs to be in network byte order.
108
+	portBuf := new(bytes.Buffer)
109
+	binary.Write(portBuf, binary.BigEndian, d.Port)
110
+	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrPort, portBuf.Bytes())
111
+
112
+	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrForwardingMethod, nl.Uint32Attr(d.ConnectionFlags&ConnectionFlagFwdMask))
113
+	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrWeight, nl.Uint32Attr(uint32(d.Weight)))
114
+	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrUpperThreshold, nl.Uint32Attr(d.UpperThreshold))
115
+	nl.NewRtAttrChild(cmdAttr, ipvsDestAttrLowerThreshold, nl.Uint32Attr(d.LowerThreshold))
116
+
117
+	return cmdAttr
118
+}
119
+
120
+func (i *Handle) doCmdwithResponse(s *Service, d *Destination, cmd uint8) ([][]byte, error) {
121
+	req := newIPVSRequest(cmd)
122
+	req.Seq = atomic.AddUint32(&i.seq, 1)
123
+
124
+	if s == nil {
125
+		req.Flags |= syscall.NLM_F_DUMP                    //Flag to dump all messages
126
+		req.AddData(nl.NewRtAttr(ipvsCmdAttrService, nil)) //Add a dummy attribute
127
+	} else {
128
+		req.AddData(fillService(s))
129
+	}
130
+
131
+	if d == nil {
132
+		if cmd == ipvsCmdGetDest {
133
+			req.Flags |= syscall.NLM_F_DUMP
134
+		}
135
+
136
+	} else {
137
+		req.AddData(fillDestination(d))
138
+	}
139
+
140
+	res, err := execute(i.sock, req, 0)
141
+	if err != nil {
142
+		return [][]byte{}, err
143
+	}
144
+
145
+	return res, nil
146
+}
147
+
148
+func (i *Handle) doCmd(s *Service, d *Destination, cmd uint8) error {
149
+	_, err := i.doCmdwithResponse(s, d, cmd)
150
+
151
+	return err
152
+}
153
+
154
+func getIPVSFamily() (int, error) {
155
+	sock, err := nl.GetNetlinkSocketAt(netns.None(), netns.None(), syscall.NETLINK_GENERIC)
156
+	if err != nil {
157
+		return 0, err
158
+	}
159
+	defer sock.Close()
160
+
161
+	req := newGenlRequest(genlCtrlID, genlCtrlCmdGetFamily)
162
+	req.AddData(nl.NewRtAttr(genlCtrlAttrFamilyName, nl.ZeroTerminated("IPVS")))
163
+
164
+	msgs, err := execute(sock, req, 0)
165
+	if err != nil {
166
+		return 0, err
167
+	}
168
+
169
+	for _, m := range msgs {
170
+		hdr := deserializeGenlMsg(m)
171
+		attrs, err := nl.ParseRouteAttr(m[hdr.Len():])
172
+		if err != nil {
173
+			return 0, err
174
+		}
175
+
176
+		for _, attr := range attrs {
177
+			switch int(attr.Attr.Type) {
178
+			case genlCtrlAttrFamilyID:
179
+				return int(native.Uint16(attr.Value[0:2])), nil
180
+			}
181
+		}
182
+	}
183
+
184
+	return 0, fmt.Errorf("no family id in the netlink response")
185
+}
186
+
187
+func rawIPData(ip net.IP) []byte {
188
+	family := nl.GetIPFamily(ip)
189
+	if family == nl.FAMILY_V4 {
190
+		return ip.To4()
191
+	}
192
+	return ip
193
+}
194
+
195
+func newIPVSRequest(cmd uint8) *nl.NetlinkRequest {
196
+	return newGenlRequest(ipvsFamily, cmd)
197
+}
198
+
199
+func newGenlRequest(familyID int, cmd uint8) *nl.NetlinkRequest {
200
+	req := nl.NewNetlinkRequest(familyID, syscall.NLM_F_ACK)
201
+	req.AddData(&genlMsgHdr{cmd: cmd, version: 1})
202
+	return req
203
+}
204
+
205
+func execute(s *nl.NetlinkSocket, req *nl.NetlinkRequest, resType uint16) ([][]byte, error) {
206
+	if err := s.Send(req); err != nil {
207
+		return nil, err
208
+	}
209
+
210
+	pid, err := s.GetPid()
211
+	if err != nil {
212
+		return nil, err
213
+	}
214
+
215
+	var res [][]byte
216
+
217
+done:
218
+	for {
219
+		msgs, _, err := s.Receive()
220
+		if err != nil {
221
+			if s.GetFd() == -1 {
222
+				return nil, fmt.Errorf("Socket got closed on receive")
223
+			}
224
+			if err == syscall.EAGAIN {
225
+				// timeout fired
226
+				continue
227
+			}
228
+			return nil, err
229
+		}
230
+		for _, m := range msgs {
231
+			if m.Header.Seq != req.Seq {
232
+				continue
233
+			}
234
+			if m.Header.Pid != pid {
235
+				return nil, fmt.Errorf("Wrong pid %d, expected %d", m.Header.Pid, pid)
236
+			}
237
+			if m.Header.Type == syscall.NLMSG_DONE {
238
+				break done
239
+			}
240
+			if m.Header.Type == syscall.NLMSG_ERROR {
241
+				error := int32(native.Uint32(m.Data[0:4]))
242
+				if error == 0 {
243
+					break done
244
+				}
245
+				return nil, syscall.Errno(-error)
246
+			}
247
+			if resType != 0 && m.Header.Type != resType {
248
+				continue
249
+			}
250
+			res = append(res, m.Data)
251
+			if m.Header.Flags&syscall.NLM_F_MULTI == 0 {
252
+				break done
253
+			}
254
+		}
255
+	}
256
+	return res, nil
257
+}
258
+
259
+func parseIP(ip []byte, family uint16) (net.IP, error) {
260
+
261
+	var resIP net.IP
262
+
263
+	switch family {
264
+	case syscall.AF_INET:
265
+		resIP = (net.IP)(ip[:4])
266
+	case syscall.AF_INET6:
267
+		resIP = (net.IP)(ip[:16])
268
+	default:
269
+		return nil, fmt.Errorf("parseIP Error ip=%v", ip)
270
+
271
+	}
272
+	return resIP, nil
273
+}
274
+
275
+// parseStats
276
+func assembleStats(msg []byte) (SvcStats, error) {
277
+
278
+	var s SvcStats
279
+
280
+	attrs, err := nl.ParseRouteAttr(msg)
281
+	if err != nil {
282
+		return s, err
283
+	}
284
+
285
+	for _, attr := range attrs {
286
+		attrType := int(attr.Attr.Type)
287
+		switch attrType {
288
+		case ipvsSvcStatsConns:
289
+			s.Connections = native.Uint32(attr.Value)
290
+		case ipvsSvcStatsPktsIn:
291
+			s.PacketsIn = native.Uint32(attr.Value)
292
+		case ipvsSvcStatsPktsOut:
293
+			s.PacketsOut = native.Uint32(attr.Value)
294
+		case ipvsSvcStatsBytesIn:
295
+			s.BytesIn = native.Uint64(attr.Value)
296
+		case ipvsSvcStatsBytesOut:
297
+			s.BytesOut = native.Uint64(attr.Value)
298
+		case ipvsSvcStatsCPS:
299
+			s.CPS = native.Uint32(attr.Value)
300
+		case ipvsSvcStatsPPSIn:
301
+			s.PPSIn = native.Uint32(attr.Value)
302
+		case ipvsSvcStatsPPSOut:
303
+			s.PPSOut = native.Uint32(attr.Value)
304
+		case ipvsSvcStatsBPSIn:
305
+			s.BPSIn = native.Uint32(attr.Value)
306
+		case ipvsSvcStatsBPSOut:
307
+			s.BPSOut = native.Uint32(attr.Value)
308
+		}
309
+	}
310
+	return s, nil
311
+}
312
+
313
+// assembleService assembles a services back from a hain of netlink attributes
314
+func assembleService(attrs []syscall.NetlinkRouteAttr) (*Service, error) {
315
+
316
+	var s Service
317
+	var addressBytes []byte
318
+
319
+	for _, attr := range attrs {
320
+
321
+		attrType := int(attr.Attr.Type)
322
+
323
+		switch attrType {
324
+
325
+		case ipvsSvcAttrAddressFamily:
326
+			s.AddressFamily = native.Uint16(attr.Value)
327
+		case ipvsSvcAttrProtocol:
328
+			s.Protocol = native.Uint16(attr.Value)
329
+		case ipvsSvcAttrAddress:
330
+			addressBytes = attr.Value
331
+		case ipvsSvcAttrPort:
332
+			s.Port = binary.BigEndian.Uint16(attr.Value)
333
+		case ipvsSvcAttrFWMark:
334
+			s.FWMark = native.Uint32(attr.Value)
335
+		case ipvsSvcAttrSchedName:
336
+			s.SchedName = nl.BytesToString(attr.Value)
337
+		case ipvsSvcAttrFlags:
338
+			s.Flags = native.Uint32(attr.Value)
339
+		case ipvsSvcAttrTimeout:
340
+			s.Timeout = native.Uint32(attr.Value)
341
+		case ipvsSvcAttrNetmask:
342
+			s.Netmask = native.Uint32(attr.Value)
343
+		case ipvsSvcAttrStats:
344
+			stats, err := assembleStats(attr.Value)
345
+			if err != nil {
346
+				return nil, err
347
+			}
348
+			s.Stats = stats
349
+		}
350
+
351
+	}
352
+
353
+	// parse Address after parse AddressFamily incase of parseIP error
354
+	if addressBytes != nil {
355
+		ip, err := parseIP(addressBytes, s.AddressFamily)
356
+		if err != nil {
357
+			return nil, err
358
+		}
359
+		s.Address = ip
360
+	}
361
+
362
+	return &s, nil
363
+}
364
+
365
+// parseService given a ipvs netlink response this function will respond with a valid service entry, an error otherwise
366
+func (i *Handle) parseService(msg []byte) (*Service, error) {
367
+
368
+	var s *Service
369
+
370
+	//Remove General header for this message and parse the NetLink message
371
+	hdr := deserializeGenlMsg(msg)
372
+	NetLinkAttrs, err := nl.ParseRouteAttr(msg[hdr.Len():])
373
+	if err != nil {
374
+		return nil, err
375
+	}
376
+	if len(NetLinkAttrs) == 0 {
377
+		return nil, fmt.Errorf("error no valid netlink message found while parsing service record")
378
+	}
379
+
380
+	//Now Parse and get IPVS related attributes messages packed in this message.
381
+	ipvsAttrs, err := nl.ParseRouteAttr(NetLinkAttrs[0].Value)
382
+	if err != nil {
383
+		return nil, err
384
+	}
385
+
386
+	//Assemble all the IPVS related attribute messages and create a service record
387
+	s, err = assembleService(ipvsAttrs)
388
+	if err != nil {
389
+		return nil, err
390
+	}
391
+
392
+	return s, nil
393
+}
394
+
395
+// doGetServicesCmd a wrapper which could be used commonly for both GetServices() and GetService(*Service)
396
+func (i *Handle) doGetServicesCmd(svc *Service) ([]*Service, error) {
397
+	var res []*Service
398
+
399
+	msgs, err := i.doCmdwithResponse(svc, nil, ipvsCmdGetService)
400
+	if err != nil {
401
+		return nil, err
402
+	}
403
+
404
+	for _, msg := range msgs {
405
+		srv, err := i.parseService(msg)
406
+		if err != nil {
407
+			return nil, err
408
+		}
409
+		res = append(res, srv)
410
+	}
411
+
412
+	return res, nil
413
+}
414
+
415
+// doCmdWithoutAttr a simple wrapper of netlink socket execute command
416
+func (i *Handle) doCmdWithoutAttr(cmd uint8) ([][]byte, error) {
417
+	req := newIPVSRequest(cmd)
418
+	req.Seq = atomic.AddUint32(&i.seq, 1)
419
+	return execute(i.sock, req, 0)
420
+}
421
+
422
+func assembleDestination(attrs []syscall.NetlinkRouteAttr) (*Destination, error) {
423
+
424
+	var d Destination
425
+	var addressBytes []byte
426
+
427
+	for _, attr := range attrs {
428
+
429
+		attrType := int(attr.Attr.Type)
430
+
431
+		switch attrType {
432
+
433
+		case ipvsDestAttrAddressFamily:
434
+			d.AddressFamily = native.Uint16(attr.Value)
435
+		case ipvsDestAttrAddress:
436
+			addressBytes = attr.Value
437
+		case ipvsDestAttrPort:
438
+			d.Port = binary.BigEndian.Uint16(attr.Value)
439
+		case ipvsDestAttrForwardingMethod:
440
+			d.ConnectionFlags = native.Uint32(attr.Value)
441
+		case ipvsDestAttrWeight:
442
+			d.Weight = int(native.Uint16(attr.Value))
443
+		case ipvsDestAttrUpperThreshold:
444
+			d.UpperThreshold = native.Uint32(attr.Value)
445
+		case ipvsDestAttrLowerThreshold:
446
+			d.LowerThreshold = native.Uint32(attr.Value)
447
+		case ipvsDestAttrActiveConnections:
448
+			d.ActiveConnections = int(native.Uint16(attr.Value))
449
+		case ipvsDestAttrInactiveConnections:
450
+			d.InactiveConnections = int(native.Uint16(attr.Value))
451
+		case ipvsSvcAttrStats:
452
+			stats, err := assembleStats(attr.Value)
453
+			if err != nil {
454
+				return nil, err
455
+			}
456
+			d.Stats = DstStats(stats)
457
+		}
458
+	}
459
+
460
+	// parse Address after parse AddressFamily incase of parseIP error
461
+	if addressBytes != nil {
462
+		ip, err := parseIP(addressBytes, d.AddressFamily)
463
+		if err != nil {
464
+			return nil, err
465
+		}
466
+		d.Address = ip
467
+	}
468
+
469
+	return &d, nil
470
+}
471
+
472
+// parseDestination given a ipvs netlink response this function will respond with a valid destination entry, an error otherwise
473
+func (i *Handle) parseDestination(msg []byte) (*Destination, error) {
474
+	var dst *Destination
475
+
476
+	//Remove General header for this message
477
+	hdr := deserializeGenlMsg(msg)
478
+	NetLinkAttrs, err := nl.ParseRouteAttr(msg[hdr.Len():])
479
+	if err != nil {
480
+		return nil, err
481
+	}
482
+	if len(NetLinkAttrs) == 0 {
483
+		return nil, fmt.Errorf("error no valid netlink message found while parsing destination record")
484
+	}
485
+
486
+	//Now Parse and get IPVS related attributes messages packed in this message.
487
+	ipvsAttrs, err := nl.ParseRouteAttr(NetLinkAttrs[0].Value)
488
+	if err != nil {
489
+		return nil, err
490
+	}
491
+
492
+	//Assemble netlink attributes and create a Destination record
493
+	dst, err = assembleDestination(ipvsAttrs)
494
+	if err != nil {
495
+		return nil, err
496
+	}
497
+
498
+	return dst, nil
499
+}
500
+
501
+// doGetDestinationsCmd a wrapper function to be used by GetDestinations and GetDestination(d) apis
502
+func (i *Handle) doGetDestinationsCmd(s *Service, d *Destination) ([]*Destination, error) {
503
+
504
+	var res []*Destination
505
+
506
+	msgs, err := i.doCmdwithResponse(s, d, ipvsCmdGetDest)
507
+	if err != nil {
508
+		return nil, err
509
+	}
510
+
511
+	for _, msg := range msgs {
512
+		dest, err := i.parseDestination(msg)
513
+		if err != nil {
514
+			return res, err
515
+		}
516
+		res = append(res, dest)
517
+	}
518
+	return res, nil
519
+}
520
+
521
+// parseConfig given a ipvs netlink response this function will respond with a valid config entry, an error otherwise
522
+func (i *Handle) parseConfig(msg []byte) (*Config, error) {
523
+	var c Config
524
+
525
+	//Remove General header for this message
526
+	hdr := deserializeGenlMsg(msg)
527
+	attrs, err := nl.ParseRouteAttr(msg[hdr.Len():])
528
+	if err != nil {
529
+		return nil, err
530
+	}
531
+
532
+	for _, attr := range attrs {
533
+		attrType := int(attr.Attr.Type)
534
+		switch attrType {
535
+		case ipvsCmdAttrTimeoutTCP:
536
+			c.TimeoutTCP = time.Duration(native.Uint32(attr.Value)) * time.Second
537
+		case ipvsCmdAttrTimeoutTCPFin:
538
+			c.TimeoutTCPFin = time.Duration(native.Uint32(attr.Value)) * time.Second
539
+		case ipvsCmdAttrTimeoutUDP:
540
+			c.TimeoutUDP = time.Duration(native.Uint32(attr.Value)) * time.Second
541
+		}
542
+	}
543
+
544
+	return &c, nil
545
+}
546
+
547
+// doGetConfigCmd a wrapper function to be used by GetConfig
548
+func (i *Handle) doGetConfigCmd() (*Config, error) {
549
+	msg, err := i.doCmdWithoutAttr(ipvsCmdGetConfig)
550
+	if err != nil {
551
+		return nil, err
552
+	}
553
+
554
+	res, err := i.parseConfig(msg[0])
555
+	if err != nil {
556
+		return res, err
557
+	}
558
+	return res, nil
559
+}
560
+
561
+// doSetConfigCmd a wrapper function to be used by SetConfig
562
+func (i *Handle) doSetConfigCmd(c *Config) error {
563
+	req := newIPVSRequest(ipvsCmdSetConfig)
564
+	req.Seq = atomic.AddUint32(&i.seq, 1)
565
+
566
+	req.AddData(nl.NewRtAttr(ipvsCmdAttrTimeoutTCP, nl.Uint32Attr(uint32(c.TimeoutTCP.Seconds()))))
567
+	req.AddData(nl.NewRtAttr(ipvsCmdAttrTimeoutTCPFin, nl.Uint32Attr(uint32(c.TimeoutTCPFin.Seconds()))))
568
+	req.AddData(nl.NewRtAttr(ipvsCmdAttrTimeoutUDP, nl.Uint32Attr(uint32(c.TimeoutUDP.Seconds()))))
569
+
570
+	_, err := execute(i.sock, req, 0)
571
+
572
+	return err
573
+}
574
+
575
+// IPVS related netlink message format explained
576
+
577
+/* EACH NETLINK MSG is of the below format, this is what we will receive from execute() api.
578
+   If we have multiple netlink objects to process like GetServices() etc., execute() will
579
+   supply an array of this below object
580
+
581
+            NETLINK MSG
582
+|-----------------------------------|
583
+    0        1        2        3
584
+|--------|--------|--------|--------| -
585
+| CMD ID |  VER   |    RESERVED     | |==> General Message Header represented by genlMsgHdr
586
+|-----------------------------------| -
587
+|    ATTR LEN     |   ATTR TYPE     | |
588
+|-----------------------------------| |
589
+|                                   | |
590
+|              VALUE                | |
591
+|     []byte Array of IPVS MSG      | |==> Attribute Message represented by syscall.NetlinkRouteAttr
592
+|        PADDED BY 4 BYTES          | |
593
+|                                   | |
594
+|-----------------------------------| -
595
+
596
+
597
+ Once We strip genlMsgHdr from above NETLINK MSG, we should parse the VALUE.
598
+ VALUE will have an array of netlink attributes (syscall.NetlinkRouteAttr) such that each attribute will
599
+ represent a "Service" or "Destination" object's field.  If we assemble these attributes we can construct
600
+ Service or Destination.
601
+
602
+            IPVS MSG
603
+|-----------------------------------|
604
+     0        1        2        3
605
+|--------|--------|--------|--------|
606
+|    ATTR LEN     |    ATTR TYPE    |
607
+|-----------------------------------|
608
+|                                   |
609
+|                                   |
610
+| []byte IPVS ATTRIBUTE  BY 4 BYTES |
611
+|                                   |
612
+|                                   |
613
+|-----------------------------------|
614
+           NEXT ATTRIBUTE
615
+|-----------------------------------|
616
+|    ATTR LEN     |    ATTR TYPE    |
617
+|-----------------------------------|
618
+|                                   |
619
+|                                   |
620
+| []byte IPVS ATTRIBUTE  BY 4 BYTES |
621
+|                                   |
622
+|                                   |
623
+|-----------------------------------|
624
+           NEXT ATTRIBUTE
625
+|-----------------------------------|
626
+|    ATTR LEN     |    ATTR TYPE    |
627
+|-----------------------------------|
628
+|                                   |
629
+|                                   |
630
+| []byte IPVS ATTRIBUTE  BY 4 BYTES |
631
+|                                   |
632
+|                                   |
633
+|-----------------------------------|
634
+
635
+*/
... ...
@@ -15,39 +15,62 @@ import (
15 15
 const IFA_FLAGS = 0x8
16 16
 
17 17
 // AddrAdd will add an IP address to a link device.
18
+//
18 19
 // Equivalent to: `ip addr add $addr dev $link`
20
+//
21
+// If `addr` is an IPv4 address and the broadcast address is not given, it
22
+// will be automatically computed based on the IP mask if /30 or larger.
19 23
 func AddrAdd(link Link, addr *Addr) error {
20 24
 	return pkgHandle.AddrAdd(link, addr)
21 25
 }
22 26
 
23 27
 // AddrAdd will add an IP address to a link device.
28
+//
24 29
 // Equivalent to: `ip addr add $addr dev $link`
30
+//
31
+// If `addr` is an IPv4 address and the broadcast address is not given, it
32
+// will be automatically computed based on the IP mask if /30 or larger.
25 33
 func (h *Handle) AddrAdd(link Link, addr *Addr) error {
26 34
 	req := h.newNetlinkRequest(unix.RTM_NEWADDR, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK)
27 35
 	return h.addrHandle(link, addr, req)
28 36
 }
29 37
 
30 38
 // AddrReplace will replace (or, if not present, add) an IP address on a link device.
39
+//
31 40
 // Equivalent to: `ip addr replace $addr dev $link`
41
+//
42
+// If `addr` is an IPv4 address and the broadcast address is not given, it
43
+// will be automatically computed based on the IP mask if /30 or larger.
32 44
 func AddrReplace(link Link, addr *Addr) error {
33 45
 	return pkgHandle.AddrReplace(link, addr)
34 46
 }
35 47
 
36 48
 // AddrReplace will replace (or, if not present, add) an IP address on a link device.
49
+//
37 50
 // Equivalent to: `ip addr replace $addr dev $link`
51
+//
52
+// If `addr` is an IPv4 address and the broadcast address is not given, it
53
+// will be automatically computed based on the IP mask if /30 or larger.
38 54
 func (h *Handle) AddrReplace(link Link, addr *Addr) error {
39 55
 	req := h.newNetlinkRequest(unix.RTM_NEWADDR, unix.NLM_F_CREATE|unix.NLM_F_REPLACE|unix.NLM_F_ACK)
40 56
 	return h.addrHandle(link, addr, req)
41 57
 }
42 58
 
43 59
 // AddrDel will delete an IP address from a link device.
60
+//
44 61
 // Equivalent to: `ip addr del $addr dev $link`
62
+//
63
+// If `addr` is an IPv4 address and the broadcast address is not given, it
64
+// will be automatically computed based on the IP mask if /30 or larger.
45 65
 func AddrDel(link Link, addr *Addr) error {
46 66
 	return pkgHandle.AddrDel(link, addr)
47 67
 }
48 68
 
49 69
 // AddrDel will delete an IP address from a link device.
50 70
 // Equivalent to: `ip addr del $addr dev $link`
71
+//
72
+// If `addr` is an IPv4 address and the broadcast address is not given, it
73
+// will be automatically computed based on the IP mask if /30 or larger.
51 74
 func (h *Handle) AddrDel(link Link, addr *Addr) error {
52 75
 	req := h.newNetlinkRequest(unix.RTM_DELADDR, unix.NLM_F_ACK)
53 76
 	return h.addrHandle(link, addr, req)
... ...
@@ -65,7 +88,11 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error
65 65
 	msg := nl.NewIfAddrmsg(family)
66 66
 	msg.Index = uint32(base.Index)
67 67
 	msg.Scope = uint8(addr.Scope)
68
-	prefixlen, masklen := addr.Mask.Size()
68
+	mask := addr.Mask
69
+	if addr.Peer != nil {
70
+		mask = addr.Peer.Mask
71
+	}
72
+	prefixlen, masklen := mask.Size()
69 73
 	msg.Prefixlen = uint8(prefixlen)
70 74
 	req.AddData(msg)
71 75
 
... ...
@@ -104,14 +131,20 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error
104 104
 	}
105 105
 
106 106
 	if family == FAMILY_V4 {
107
-		if addr.Broadcast == nil {
107
+		// Automatically set the broadcast address if it is unset and the
108
+		// subnet is large enough to sensibly have one (/30 or larger).
109
+		// See: RFC 3021
110
+		if addr.Broadcast == nil && prefixlen < 31 {
108 111
 			calcBroadcast := make(net.IP, masklen/8)
109 112
 			for i := range localAddrData {
110
-				calcBroadcast[i] = localAddrData[i] | ^addr.Mask[i]
113
+				calcBroadcast[i] = localAddrData[i] | ^mask[i]
111 114
 			}
112 115
 			addr.Broadcast = calcBroadcast
113 116
 		}
114
-		req.AddData(nl.NewRtAttr(unix.IFA_BROADCAST, addr.Broadcast))
117
+
118
+		if addr.Broadcast != nil {
119
+			req.AddData(nl.NewRtAttr(unix.IFA_BROADCAST, addr.Broadcast))
120
+		}
115 121
 
116 122
 		if addr.Label != "" {
117 123
 			labelData := nl.NewRtAttr(unix.IFA_LABEL, nl.ZeroTerminated(addr.Label))
... ...
@@ -206,13 +239,17 @@ func parseAddr(m []byte) (addr Addr, family, index int, err error) {
206 206
 				IP:   attr.Value,
207 207
 				Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)),
208 208
 			}
209
-			addr.Peer = dst
210 209
 		case unix.IFA_LOCAL:
210
+			// iproute2 manual:
211
+			// If a peer address is specified, the local address
212
+			// cannot have a prefix length. The network prefix is
213
+			// associated with the peer rather than with the local
214
+			// address.
215
+			n := 8 * len(attr.Value)
211 216
 			local = &net.IPNet{
212 217
 				IP:   attr.Value,
213
-				Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)),
218
+				Mask: net.CIDRMask(n, n),
214 219
 			}
215
-			addr.IPNet = local
216 220
 		case unix.IFA_BROADCAST:
217 221
 			addr.Broadcast = attr.Value
218 222
 		case unix.IFA_LABEL:
... ...
@@ -226,12 +263,24 @@ func parseAddr(m []byte) (addr Addr, family, index int, err error) {
226 226
 		}
227 227
 	}
228 228
 
229
-	// IFA_LOCAL should be there but if not, fall back to IFA_ADDRESS
229
+	// libnl addr.c comment:
230
+	// IPv6 sends the local address as IFA_ADDRESS with no
231
+	// IFA_LOCAL, IPv4 sends both IFA_LOCAL and IFA_ADDRESS
232
+	// with IFA_ADDRESS being the peer address if they differ
233
+	//
234
+	// But obviously, as there are IPv6 PtP addresses, too,
235
+	// IFA_LOCAL should also be handled for IPv6.
230 236
 	if local != nil {
231
-		addr.IPNet = local
237
+		if family == FAMILY_V4 && local.IP.Equal(dst.IP) {
238
+			addr.IPNet = dst
239
+		} else {
240
+			addr.IPNet = local
241
+			addr.Peer = dst
242
+		}
232 243
 	} else {
233 244
 		addr.IPNet = dst
234 245
 	}
246
+
235 247
 	addr.Scope = int(msg.Scope)
236 248
 
237 249
 	return
... ...
@@ -250,21 +299,22 @@ type AddrUpdate struct {
250 250
 // AddrSubscribe takes a chan down which notifications will be sent
251 251
 // when addresses change.  Close the 'done' chan to stop subscription.
252 252
 func AddrSubscribe(ch chan<- AddrUpdate, done <-chan struct{}) error {
253
-	return addrSubscribeAt(netns.None(), netns.None(), ch, done, nil, false)
253
+	return addrSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0)
254 254
 }
255 255
 
256 256
 // AddrSubscribeAt works like AddrSubscribe plus it allows the caller
257 257
 // to choose the network namespace in which to subscribe (ns).
258 258
 func AddrSubscribeAt(ns netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error {
259
-	return addrSubscribeAt(ns, netns.None(), ch, done, nil, false)
259
+	return addrSubscribeAt(ns, netns.None(), ch, done, nil, false, 0)
260 260
 }
261 261
 
262 262
 // AddrSubscribeOptions contains a set of options to use with
263 263
 // AddrSubscribeWithOptions.
264 264
 type AddrSubscribeOptions struct {
265
-	Namespace     *netns.NsHandle
266
-	ErrorCallback func(error)
267
-	ListExisting  bool
265
+	Namespace         *netns.NsHandle
266
+	ErrorCallback     func(error)
267
+	ListExisting      bool
268
+	ReceiveBufferSize int
268 269
 }
269 270
 
270 271
 // AddrSubscribeWithOptions work like AddrSubscribe but enable to
... ...
@@ -275,10 +325,10 @@ func AddrSubscribeWithOptions(ch chan<- AddrUpdate, done <-chan struct{}, option
275 275
 		none := netns.None()
276 276
 		options.Namespace = &none
277 277
 	}
278
-	return addrSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting)
278
+	return addrSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, options.ReceiveBufferSize)
279 279
 }
280 280
 
281
-func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error {
281
+func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}, cberr func(error), listExisting bool, rcvbuf int) error {
282 282
 	s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_IPV4_IFADDR, unix.RTNLGRP_IPV6_IFADDR)
283 283
 	if err != nil {
284 284
 		return err
... ...
@@ -289,6 +339,12 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c
289 289
 			s.Close()
290 290
 		}()
291 291
 	}
292
+	if rcvbuf != 0 {
293
+		err = pkgHandle.SetSocketReceiveBufferSize(rcvbuf, false)
294
+		if err != nil {
295
+			return err
296
+		}
297
+	}
292 298
 	if listExisting {
293 299
 		req := pkgHandle.newNetlinkRequest(unix.RTM_GETADDR,
294 300
 			unix.NLM_F_DUMP)
... ...
@@ -301,13 +357,19 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c
301 301
 	go func() {
302 302
 		defer close(ch)
303 303
 		for {
304
-			msgs, err := s.Receive()
304
+			msgs, from, err := s.Receive()
305 305
 			if err != nil {
306 306
 				if cberr != nil {
307 307
 					cberr(err)
308 308
 				}
309 309
 				return
310 310
 			}
311
+			if from.Pid != nl.PidKernel {
312
+				if cberr != nil {
313
+					cberr(fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel))
314
+				}
315
+				continue
316
+			}
311 317
 			for _, m := range msgs {
312 318
 				if m.Header.Type == unix.NLMSG_DONE {
313 319
 					continue
... ...
@@ -319,16 +381,17 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c
319 319
 						continue
320 320
 					}
321 321
 					if cberr != nil {
322
-						cberr(syscall.Errno(-error))
322
+						cberr(fmt.Errorf("error message: %v",
323
+							syscall.Errno(-error)))
323 324
 					}
324
-					return
325
+					continue
325 326
 				}
326 327
 				msgType := m.Header.Type
327 328
 				if msgType != unix.RTM_NEWADDR && msgType != unix.RTM_DELADDR {
328 329
 					if cberr != nil {
329 330
 						cberr(fmt.Errorf("bad message type: %d", msgType))
330 331
 					}
331
-					return
332
+					continue
332 333
 				}
333 334
 
334 335
 				addr, _, ifindex, err := parseAddr(m.Data)
... ...
@@ -336,7 +399,7 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c
336 336
 					if cberr != nil {
337 337
 						cberr(fmt.Errorf("could not parse address: %v", err))
338 338
 					}
339
-					return
339
+					continue
340 340
 				}
341 341
 
342 342
 				ch <- AddrUpdate{LinkAddress: *addr.IPNet,
... ...
@@ -96,7 +96,7 @@ func (h *Handle) bridgeVlanModify(cmd int, link Link, vid uint16, pvid, untagged
96 96
 		flags |= nl.BRIDGE_FLAGS_MASTER
97 97
 	}
98 98
 	if flags > 0 {
99
-		nl.NewRtAttrChild(br, nl.IFLA_BRIDGE_FLAGS, nl.Uint16Attr(flags))
99
+		br.AddRtAttr(nl.IFLA_BRIDGE_FLAGS, nl.Uint16Attr(flags))
100 100
 	}
101 101
 	vlanInfo := &nl.BridgeVlanInfo{Vid: vid}
102 102
 	if pvid {
... ...
@@ -105,11 +105,8 @@ func (h *Handle) bridgeVlanModify(cmd int, link Link, vid uint16, pvid, untagged
105 105
 	if untagged {
106 106
 		vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_UNTAGGED
107 107
 	}
108
-	nl.NewRtAttrChild(br, nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize())
108
+	br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize())
109 109
 	req.AddData(br)
110 110
 	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
111
-	if err != nil {
112
-		return err
113
-	}
114
-	return nil
111
+	return err
115 112
 }
... ...
@@ -4,25 +4,76 @@ import (
4 4
 	"fmt"
5 5
 )
6 6
 
7
+// Class interfaces for all classes
7 8
 type Class interface {
8 9
 	Attrs() *ClassAttrs
9 10
 	Type() string
10 11
 }
11 12
 
13
+// Generic networking statistics for netlink users.
14
+// This file contains "gnet_" prefixed structs and relevant functions.
15
+// See Documentation/networking/getn_stats.txt in Linux source code for more details.
16
+
17
+// GnetStatsBasic Ref: struct gnet_stats_basic { ... }
18
+type GnetStatsBasic struct {
19
+	Bytes   uint64 // number of seen bytes
20
+	Packets uint32 // number of seen packets
21
+}
22
+
23
+// GnetStatsRateEst Ref: struct gnet_stats_rate_est { ... }
24
+type GnetStatsRateEst struct {
25
+	Bps uint32 // current byte rate
26
+	Pps uint32 // current packet rate
27
+}
28
+
29
+// GnetStatsRateEst64 Ref: struct gnet_stats_rate_est64 { ... }
30
+type GnetStatsRateEst64 struct {
31
+	Bps uint64 // current byte rate
32
+	Pps uint64 // current packet rate
33
+}
34
+
35
+// GnetStatsQueue Ref: struct gnet_stats_queue { ... }
36
+type GnetStatsQueue struct {
37
+	Qlen       uint32 // queue length
38
+	Backlog    uint32 // backlog size of queue
39
+	Drops      uint32 // number of dropped packets
40
+	Requeues   uint32 // number of requues
41
+	Overlimits uint32 // number of enqueues over the limit
42
+}
43
+
44
+// ClassStatistics representation based on generic networking statistics for netlink.
45
+// See Documentation/networking/gen_stats.txt in Linux source code for more details.
46
+type ClassStatistics struct {
47
+	Basic   *GnetStatsBasic
48
+	Queue   *GnetStatsQueue
49
+	RateEst *GnetStatsRateEst
50
+}
51
+
52
+// NewClassStatistics Construct a ClassStatistics struct which fields are all initialized by 0.
53
+func NewClassStatistics() *ClassStatistics {
54
+	return &ClassStatistics{
55
+		Basic:   &GnetStatsBasic{},
56
+		Queue:   &GnetStatsQueue{},
57
+		RateEst: &GnetStatsRateEst{},
58
+	}
59
+}
60
+
12 61
 // ClassAttrs represents a netlink class. A filter is associated with a link,
13 62
 // has a handle and a parent. The root filter of a device should have a
14 63
 // parent == HANDLE_ROOT.
15 64
 type ClassAttrs struct {
16
-	LinkIndex int
17
-	Handle    uint32
18
-	Parent    uint32
19
-	Leaf      uint32
65
+	LinkIndex  int
66
+	Handle     uint32
67
+	Parent     uint32
68
+	Leaf       uint32
69
+	Statistics *ClassStatistics
20 70
 }
21 71
 
22 72
 func (q ClassAttrs) String() string {
23 73
 	return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Leaf: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Leaf)
24 74
 }
25 75
 
76
+// HtbClassAttrs stores the attributes of HTB class
26 77
 type HtbClassAttrs struct {
27 78
 	// TODO handle all attributes
28 79
 	Rate    uint64
... ...
@@ -54,10 +105,12 @@ func (q HtbClass) String() string {
54 54
 	return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer)
55 55
 }
56 56
 
57
+// Attrs returns the class attributes
57 58
 func (q *HtbClass) Attrs() *ClassAttrs {
58 59
 	return &q.ClassAttrs
59 60
 }
60 61
 
62
+// Type return the class type
61 63
 func (q *HtbClass) Type() string {
62 64
 	return "htb"
63 65
 }
... ...
@@ -69,10 +122,90 @@ type GenericClass struct {
69 69
 	ClassType string
70 70
 }
71 71
 
72
+// Attrs return the class attributes
72 73
 func (class *GenericClass) Attrs() *ClassAttrs {
73 74
 	return &class.ClassAttrs
74 75
 }
75 76
 
77
+// Type return the class type
76 78
 func (class *GenericClass) Type() string {
77 79
 	return class.ClassType
78 80
 }
81
+
82
+// ServiceCurve is the way the HFSC curve are represented
83
+type ServiceCurve struct {
84
+	m1 uint32
85
+	d  uint32
86
+	m2 uint32
87
+}
88
+
89
+// Attrs return the parameters of the service curve
90
+func (c *ServiceCurve) Attrs() (uint32, uint32, uint32) {
91
+	return c.m1, c.d, c.m2
92
+}
93
+
94
+// HfscClass is a representation of the HFSC class
95
+type HfscClass struct {
96
+	ClassAttrs
97
+	Rsc ServiceCurve
98
+	Fsc ServiceCurve
99
+	Usc ServiceCurve
100
+}
101
+
102
+// SetUsc sets the Usc curve
103
+func (hfsc *HfscClass) SetUsc(m1 uint32, d uint32, m2 uint32) {
104
+	hfsc.Usc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8}
105
+}
106
+
107
+// SetFsc sets the Fsc curve
108
+func (hfsc *HfscClass) SetFsc(m1 uint32, d uint32, m2 uint32) {
109
+	hfsc.Fsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8}
110
+}
111
+
112
+// SetRsc sets the Rsc curve
113
+func (hfsc *HfscClass) SetRsc(m1 uint32, d uint32, m2 uint32) {
114
+	hfsc.Rsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8}
115
+}
116
+
117
+// SetSC implements the SC from the tc CLI
118
+func (hfsc *HfscClass) SetSC(m1 uint32, d uint32, m2 uint32) {
119
+	hfsc.Rsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8}
120
+	hfsc.Fsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8}
121
+}
122
+
123
+// SetUL implements the UL from the tc CLI
124
+func (hfsc *HfscClass) SetUL(m1 uint32, d uint32, m2 uint32) {
125
+	hfsc.Usc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8}
126
+}
127
+
128
+// SetLS implements the LS from the tc CLI
129
+func (hfsc *HfscClass) SetLS(m1 uint32, d uint32, m2 uint32) {
130
+	hfsc.Fsc = ServiceCurve{m1: m1 / 8, d: d, m2: m2 / 8}
131
+}
132
+
133
+// NewHfscClass returns a new HFSC struct with the set parameters
134
+func NewHfscClass(attrs ClassAttrs) *HfscClass {
135
+	return &HfscClass{
136
+		ClassAttrs: attrs,
137
+		Rsc:        ServiceCurve{},
138
+		Fsc:        ServiceCurve{},
139
+		Usc:        ServiceCurve{},
140
+	}
141
+}
142
+
143
+func (hfsc *HfscClass) String() string {
144
+	return fmt.Sprintf(
145
+		"{%s -- {RSC: {m1=%d d=%d m2=%d}} {FSC: {m1=%d d=%d m2=%d}} {USC: {m1=%d d=%d m2=%d}}}",
146
+		hfsc.Attrs(), hfsc.Rsc.m1*8, hfsc.Rsc.d, hfsc.Rsc.m2*8, hfsc.Fsc.m1*8, hfsc.Fsc.d, hfsc.Fsc.m2*8, hfsc.Usc.m1*8, hfsc.Usc.d, hfsc.Usc.m2*8,
147
+	)
148
+}
149
+
150
+// Attrs return the Hfsc parameters
151
+func (hfsc *HfscClass) Attrs() *ClassAttrs {
152
+	return &hfsc.ClassAttrs
153
+}
154
+
155
+// Type return the type of the class
156
+func (hfsc *HfscClass) Type() string {
157
+	return "hfsc"
158
+}
... ...
@@ -1,14 +1,34 @@
1 1
 package netlink
2 2
 
3 3
 import (
4
+	"bytes"
5
+	"encoding/binary"
6
+	"encoding/hex"
4 7
 	"errors"
8
+	"fmt"
5 9
 	"syscall"
6 10
 
7 11
 	"github.com/vishvananda/netlink/nl"
8 12
 	"golang.org/x/sys/unix"
9 13
 )
10 14
 
11
-// NOTE: function is in here because it uses other linux functions
15
+// Internal tc_stats representation in Go struct.
16
+// This is for internal uses only to deserialize the payload of rtattr.
17
+// After the deserialization, this should be converted into the canonical stats
18
+// struct, ClassStatistics, in case of statistics of a class.
19
+// Ref: struct tc_stats { ... }
20
+type tcStats struct {
21
+	Bytes      uint64 // Number of enqueued bytes
22
+	Packets    uint32 // Number of enqueued packets
23
+	Drops      uint32 // Packets dropped because of lack of resources
24
+	Overlimits uint32 // Number of throttle events when this flow goes out of allocated bandwidth
25
+	Bps        uint32 // Current flow byte rate
26
+	Pps        uint32 // Current flow packet rate
27
+	Qlen       uint32
28
+	Backlog    uint32
29
+}
30
+
31
+// NewHtbClass NOTE: function is in here because it uses other linux functions
12 32
 func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass {
13 33
 	mtu := 1600
14 34
 	rate := cattrs.Rate / 8
... ...
@@ -126,7 +146,9 @@ func classPayload(req *nl.NetlinkRequest, class Class) error {
126 126
 	req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(class.Type())))
127 127
 
128 128
 	options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
129
-	if htb, ok := class.(*HtbClass); ok {
129
+	switch class.Type() {
130
+	case "htb":
131
+		htb := class.(*HtbClass)
130 132
 		opt := nl.TcHtbCopt{}
131 133
 		opt.Buffer = htb.Buffer
132 134
 		opt.Cbuffer = htb.Cbuffer
... ...
@@ -151,9 +173,18 @@ func classPayload(req *nl.NetlinkRequest, class Class) error {
151 151
 			return errors.New("HTB: failed to calculate ceil rate table")
152 152
 		}
153 153
 		opt.Ceil = tcceil
154
-		nl.NewRtAttrChild(options, nl.TCA_HTB_PARMS, opt.Serialize())
155
-		nl.NewRtAttrChild(options, nl.TCA_HTB_RTAB, SerializeRtab(rtab))
156
-		nl.NewRtAttrChild(options, nl.TCA_HTB_CTAB, SerializeRtab(ctab))
154
+		options.AddRtAttr(nl.TCA_HTB_PARMS, opt.Serialize())
155
+		options.AddRtAttr(nl.TCA_HTB_RTAB, SerializeRtab(rtab))
156
+		options.AddRtAttr(nl.TCA_HTB_CTAB, SerializeRtab(ctab))
157
+	case "hfsc":
158
+		hfsc := class.(*HfscClass)
159
+		opt := nl.HfscCopt{}
160
+		opt.Rsc.Set(hfsc.Rsc.Attrs())
161
+		opt.Fsc.Set(hfsc.Fsc.Attrs())
162
+		opt.Usc.Set(hfsc.Usc.Attrs())
163
+		options.AddRtAttr(nl.TCA_HFSC_RSC, nl.SerializeHfscCurve(&opt.Rsc))
164
+		options.AddRtAttr(nl.TCA_HFSC_FSC, nl.SerializeHfscCurve(&opt.Fsc))
165
+		options.AddRtAttr(nl.TCA_HFSC_USC, nl.SerializeHfscCurve(&opt.Usc))
157 166
 	}
158 167
 	req.AddData(options)
159 168
 	return nil
... ...
@@ -197,9 +228,10 @@ func (h *Handle) ClassList(link Link, parent uint32) ([]Class, error) {
197 197
 		}
198 198
 
199 199
 		base := ClassAttrs{
200
-			LinkIndex: int(msg.Ifindex),
201
-			Handle:    msg.Handle,
202
-			Parent:    msg.Parent,
200
+			LinkIndex:  int(msg.Ifindex),
201
+			Handle:     msg.Handle,
202
+			Parent:     msg.Parent,
203
+			Statistics: nil,
203 204
 		}
204 205
 
205 206
 		var class Class
... ...
@@ -211,6 +243,8 @@ func (h *Handle) ClassList(link Link, parent uint32) ([]Class, error) {
211 211
 				switch classType {
212 212
 				case "htb":
213 213
 					class = &HtbClass{}
214
+				case "hfsc":
215
+					class = &HfscClass{}
214 216
 				default:
215 217
 					class = &GenericClass{ClassType: classType}
216 218
 				}
... ...
@@ -225,6 +259,26 @@ func (h *Handle) ClassList(link Link, parent uint32) ([]Class, error) {
225 225
 					if err != nil {
226 226
 						return nil, err
227 227
 					}
228
+				case "hfsc":
229
+					data, err := nl.ParseRouteAttr(attr.Value)
230
+					if err != nil {
231
+						return nil, err
232
+					}
233
+					_, err = parseHfscClassData(class, data)
234
+					if err != nil {
235
+						return nil, err
236
+					}
237
+				}
238
+			// For backward compatibility.
239
+			case nl.TCA_STATS:
240
+				base.Statistics, err = parseTcStats(attr.Value)
241
+				if err != nil {
242
+					return nil, err
243
+				}
244
+			case nl.TCA_STATS2:
245
+				base.Statistics, err = parseTcStats2(attr.Value)
246
+				if err != nil {
247
+					return nil, err
228 248
 				}
229 249
 			}
230 250
 		}
... ...
@@ -253,3 +307,78 @@ func parseHtbClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, erro
253 253
 	}
254 254
 	return detailed, nil
255 255
 }
256
+
257
+func parseHfscClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, error) {
258
+	hfsc := class.(*HfscClass)
259
+	detailed := false
260
+	for _, datum := range data {
261
+		m1, d, m2 := nl.DeserializeHfscCurve(datum.Value).Attrs()
262
+		switch datum.Attr.Type {
263
+		case nl.TCA_HFSC_RSC:
264
+			hfsc.Rsc = ServiceCurve{m1: m1, d: d, m2: m2}
265
+		case nl.TCA_HFSC_FSC:
266
+			hfsc.Fsc = ServiceCurve{m1: m1, d: d, m2: m2}
267
+		case nl.TCA_HFSC_USC:
268
+			hfsc.Usc = ServiceCurve{m1: m1, d: d, m2: m2}
269
+		}
270
+	}
271
+	return detailed, nil
272
+}
273
+
274
+func parseTcStats(data []byte) (*ClassStatistics, error) {
275
+	buf := &bytes.Buffer{}
276
+	buf.Write(data)
277
+	native := nl.NativeEndian()
278
+	tcStats := &tcStats{}
279
+	if err := binary.Read(buf, native, tcStats); err != nil {
280
+		return nil, err
281
+	}
282
+
283
+	stats := NewClassStatistics()
284
+	stats.Basic.Bytes = tcStats.Bytes
285
+	stats.Basic.Packets = tcStats.Packets
286
+	stats.Queue.Qlen = tcStats.Qlen
287
+	stats.Queue.Backlog = tcStats.Backlog
288
+	stats.Queue.Drops = tcStats.Drops
289
+	stats.Queue.Overlimits = tcStats.Overlimits
290
+	stats.RateEst.Bps = tcStats.Bps
291
+	stats.RateEst.Pps = tcStats.Pps
292
+
293
+	return stats, nil
294
+}
295
+
296
+func parseGnetStats(data []byte, gnetStats interface{}) error {
297
+	buf := &bytes.Buffer{}
298
+	buf.Write(data)
299
+	native := nl.NativeEndian()
300
+	return binary.Read(buf, native, gnetStats)
301
+}
302
+
303
+func parseTcStats2(data []byte) (*ClassStatistics, error) {
304
+	rtAttrs, err := nl.ParseRouteAttr(data)
305
+	if err != nil {
306
+		return nil, err
307
+	}
308
+	stats := NewClassStatistics()
309
+	for _, datum := range rtAttrs {
310
+		switch datum.Attr.Type {
311
+		case nl.TCA_STATS_BASIC:
312
+			if err := parseGnetStats(datum.Value, stats.Basic); err != nil {
313
+				return nil, fmt.Errorf("Failed to parse ClassStatistics.Basic with: %v\n%s",
314
+					err, hex.Dump(datum.Value))
315
+			}
316
+		case nl.TCA_STATS_QUEUE:
317
+			if err := parseGnetStats(datum.Value, stats.Queue); err != nil {
318
+				return nil, fmt.Errorf("Failed to parse ClassStatistics.Queue with: %v\n%s",
319
+					err, hex.Dump(datum.Value))
320
+			}
321
+		case nl.TCA_STATS_RATE_EST:
322
+			if err := parseGnetStats(datum.Value, stats.RateEst); err != nil {
323
+				return nil, fmt.Errorf("Failed to parse ClassStatistics.RateEst with: %v\n%s",
324
+					err, hex.Dump(datum.Value))
325
+			}
326
+		}
327
+	}
328
+
329
+	return stats, nil
330
+}
... ...
@@ -22,11 +22,7 @@ const (
22 22
 	// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK_EXP 2
23 23
 	ConntrackExpectTable = 2
24 24
 )
25
-const (
26
-	// For Parsing Mark
27
-	TCP_PROTO = 6
28
-	UDP_PROTO = 17
29
-)
25
+
30 26
 const (
31 27
 	// backward compatibility with golang 1.6 which does not have io.SeekCurrent
32 28
 	seekCurrent = 1
... ...
@@ -135,11 +131,13 @@ func (h *Handle) dumpConntrackTable(table ConntrackTableType, family InetFamily)
135 135
 // http://git.netfilter.org/libnetfilter_conntrack/tree/include/internal/object.h
136 136
 // For the time being, the structure below allows to parse and extract the base information of a flow
137 137
 type ipTuple struct {
138
-	SrcIP    net.IP
138
+	Bytes    uint64
139 139
 	DstIP    net.IP
140
+	DstPort  uint16
141
+	Packets  uint64
140 142
 	Protocol uint8
143
+	SrcIP    net.IP
141 144
 	SrcPort  uint16
142
-	DstPort  uint16
143 145
 }
144 146
 
145 147
 type ConntrackFlow struct {
... ...
@@ -151,11 +149,12 @@ type ConntrackFlow struct {
151 151
 
152 152
 func (s *ConntrackFlow) String() string {
153 153
 	// conntrack cmd output:
154
-	// udp      17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 mark=0
155
-	return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d\tsrc=%s dst=%s sport=%d dport=%d mark=%d",
154
+	// udp      17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 packets=5 bytes=532 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 packets=10 bytes=1078 mark=0
155
+	return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=%d",
156 156
 		nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol,
157
-		s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort,
158
-		s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort, s.Mark)
157
+		s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort, s.Forward.Packets, s.Forward.Bytes,
158
+		s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort, s.Reverse.Packets, s.Reverse.Bytes,
159
+		s.Mark)
159 160
 }
160 161
 
161 162
 // This method parse the ip tuple structure
... ...
@@ -220,9 +219,35 @@ func parseBERaw16(r *bytes.Reader, v *uint16) {
220 220
 	binary.Read(r, binary.BigEndian, v)
221 221
 }
222 222
 
223
+func parseBERaw32(r *bytes.Reader, v *uint32) {
224
+	binary.Read(r, binary.BigEndian, v)
225
+}
226
+
227
+func parseBERaw64(r *bytes.Reader, v *uint64) {
228
+	binary.Read(r, binary.BigEndian, v)
229
+}
230
+
231
+func parseByteAndPacketCounters(r *bytes.Reader) (bytes, packets uint64) {
232
+	for i := 0; i < 2; i++ {
233
+		switch _, t, _ := parseNfAttrTL(r); t {
234
+		case nl.CTA_COUNTERS_BYTES:
235
+			parseBERaw64(r, &bytes)
236
+		case nl.CTA_COUNTERS_PACKETS:
237
+			parseBERaw64(r, &packets)
238
+		default:
239
+			return
240
+		}
241
+	}
242
+	return
243
+}
244
+
245
+func parseConnectionMark(r *bytes.Reader) (mark uint32) {
246
+	parseBERaw32(r, &mark)
247
+	return
248
+}
249
+
223 250
 func parseRawData(data []byte) *ConntrackFlow {
224 251
 	s := &ConntrackFlow{}
225
-	var proto uint8
226 252
 	// First there is the Nfgenmsg header
227 253
 	// consume only the family field
228 254
 	reader := bytes.NewReader(data)
... ...
@@ -238,36 +263,31 @@ func parseRawData(data []byte) *ConntrackFlow {
238 238
 	// <len, NLA_F_NESTED|CTA_TUPLE_IP> 4 bytes
239 239
 	// flow information of the reverse flow
240 240
 	for reader.Len() > 0 {
241
-		nested, t, l := parseNfAttrTL(reader)
242
-		if nested && t == nl.CTA_TUPLE_ORIG {
243
-			if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
244
-				proto = parseIpTuple(reader, &s.Forward)
241
+		if nested, t, l := parseNfAttrTL(reader); nested {
242
+			switch t {
243
+			case nl.CTA_TUPLE_ORIG:
244
+				if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
245
+					parseIpTuple(reader, &s.Forward)
246
+				}
247
+			case nl.CTA_TUPLE_REPLY:
248
+				if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
249
+					parseIpTuple(reader, &s.Reverse)
250
+				} else {
251
+					// Header not recognized skip it
252
+					reader.Seek(int64(l), seekCurrent)
253
+				}
254
+			case nl.CTA_COUNTERS_ORIG:
255
+				s.Forward.Bytes, s.Forward.Packets = parseByteAndPacketCounters(reader)
256
+			case nl.CTA_COUNTERS_REPLY:
257
+				s.Reverse.Bytes, s.Reverse.Packets = parseByteAndPacketCounters(reader)
245 258
 			}
246
-		} else if nested && t == nl.CTA_TUPLE_REPLY {
247
-			if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
248
-				parseIpTuple(reader, &s.Reverse)
249
-
250
-				// Got all the useful information stop parsing
251
-				break
252
-			} else {
253
-				// Header not recognized skip it
254
-				reader.Seek(int64(l), seekCurrent)
259
+		} else {
260
+			switch t {
261
+			case nl.CTA_MARK:
262
+				s.Mark = parseConnectionMark(reader)
255 263
 			}
256 264
 		}
257 265
 	}
258
-	if proto == TCP_PROTO {
259
-		reader.Seek(64, seekCurrent)
260
-		_, t, _, v := parseNfAttrTLV(reader)
261
-		if t == nl.CTA_MARK {
262
-			s.Mark = uint32(v[3])
263
-		}
264
-	} else if proto == UDP_PROTO {
265
-		reader.Seek(16, seekCurrent)
266
-		_, t, _, v := parseNfAttrTLV(reader)
267
-		if t == nl.CTA_MARK {
268
-			s.Mark = uint32(v[3])
269
-		}
270
-	}
271 266
 	return s
272 267
 }
273 268
 
... ...
@@ -285,7 +305,7 @@ func parseRawData(data []byte) *ConntrackFlow {
285 285
 // Common parameters and options:
286 286
 //   -s, --src, --orig-src ip              Source address from original direction
287 287
 //   -d, --dst, --orig-dst ip              Destination address from original direction
288
-//   -r, --reply-src ip            Source addres from reply direction
288
+//   -r, --reply-src ip            Source address from reply direction
289 289
 //   -q, --reply-dst ip            Destination address from reply direction
290 290
 //   -p, --protonum proto          Layer 4 Protocol, eg. 'tcp'
291 291
 //   -f, --family proto            Layer 3 Protocol, eg. 'ipv6'
... ...
@@ -302,11 +322,14 @@ func parseRawData(data []byte) *ConntrackFlow {
302 302
 type ConntrackFilterType uint8
303 303
 
304 304
 const (
305
-	ConntrackOrigSrcIP = iota // -orig-src ip   Source address from original direction
306
-	ConntrackOrigDstIP        // -orig-dst ip   Destination address from original direction
307
-	ConntrackNatSrcIP         // -src-nat ip    Source NAT ip
308
-	ConntrackNatDstIP         // -dst-nat ip    Destination NAT ip
309
-	ConntrackNatAnyIP         // -any-nat ip    Source or destination NAT ip
305
+	ConntrackOrigSrcIP  = iota                // -orig-src ip    Source address from original direction
306
+	ConntrackOrigDstIP                        // -orig-dst ip    Destination address from original direction
307
+	ConntrackReplySrcIP                       // --reply-src ip  Reply Source IP
308
+	ConntrackReplyDstIP                       // --reply-dst ip  Reply Destination IP
309
+	ConntrackReplyAnyIP                       // Match source or destination reply IP
310
+	ConntrackNatSrcIP   = ConntrackReplySrcIP // deprecated use instead ConntrackReplySrcIP
311
+	ConntrackNatDstIP   = ConntrackReplyDstIP // deprecated use instead ConntrackReplyDstIP
312
+	ConntrackNatAnyIP   = ConntrackReplyAnyIP // deprecated use instaed ConntrackReplyAnyIP
310 313
 )
311 314
 
312 315
 type CustomConntrackFilter interface {
... ...
@@ -351,17 +374,17 @@ func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool {
351 351
 	}
352 352
 
353 353
 	// -src-nat ip    Source NAT ip
354
-	if elem, found := f.ipFilter[ConntrackNatSrcIP]; match && found {
354
+	if elem, found := f.ipFilter[ConntrackReplySrcIP]; match && found {
355 355
 		match = match && elem.Equal(flow.Reverse.SrcIP)
356 356
 	}
357 357
 
358 358
 	// -dst-nat ip    Destination NAT ip
359
-	if elem, found := f.ipFilter[ConntrackNatDstIP]; match && found {
359
+	if elem, found := f.ipFilter[ConntrackReplyDstIP]; match && found {
360 360
 		match = match && elem.Equal(flow.Reverse.DstIP)
361 361
 	}
362 362
 
363
-	// -any-nat ip    Source or destination NAT ip
364
-	if elem, found := f.ipFilter[ConntrackNatAnyIP]; match && found {
363
+	// Match source or destination reply IP
364
+	if elem, found := f.ipFilter[ConntrackReplyAnyIP]; match && found {
365 365
 		match = match && (elem.Equal(flow.Reverse.SrcIP) || elem.Equal(flow.Reverse.DstIP))
366 366
 	}
367 367
 
368 368
new file mode 100644
... ...
@@ -0,0 +1,272 @@
0
+package netlink
1
+
2
+import (
3
+	"syscall"
4
+
5
+	"fmt"
6
+	"github.com/vishvananda/netlink/nl"
7
+	"golang.org/x/sys/unix"
8
+)
9
+
10
+// DevlinkDevEswitchAttr represents device's eswitch attributes
11
+type DevlinkDevEswitchAttr struct {
12
+	Mode       string
13
+	InlineMode string
14
+	EncapMode  string
15
+}
16
+
17
+// DevlinkDevAttrs represents device attributes
18
+type DevlinkDevAttrs struct {
19
+	Eswitch DevlinkDevEswitchAttr
20
+}
21
+
22
+// DevlinkDevice represents device and its attributes
23
+type DevlinkDevice struct {
24
+	BusName    string
25
+	DeviceName string
26
+	Attrs      DevlinkDevAttrs
27
+}
28
+
29
+func parseDevLinkDeviceList(msgs [][]byte) ([]*DevlinkDevice, error) {
30
+	devices := make([]*DevlinkDevice, 0, len(msgs))
31
+	for _, m := range msgs {
32
+		attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:])
33
+		if err != nil {
34
+			return nil, err
35
+		}
36
+		dev := &DevlinkDevice{}
37
+		if err = dev.parseAttributes(attrs); err != nil {
38
+			return nil, err
39
+		}
40
+		devices = append(devices, dev)
41
+	}
42
+	return devices, nil
43
+}
44
+
45
+func eswitchStringToMode(modeName string) (uint16, error) {
46
+	if modeName == "legacy" {
47
+		return nl.DEVLINK_ESWITCH_MODE_LEGACY, nil
48
+	} else if modeName == "switchdev" {
49
+		return nl.DEVLINK_ESWITCH_MODE_SWITCHDEV, nil
50
+	} else {
51
+		return 0xffff, fmt.Errorf("invalid switchdev mode")
52
+	}
53
+}
54
+
55
+func parseEswitchMode(mode uint16) string {
56
+	var eswitchMode = map[uint16]string{
57
+		nl.DEVLINK_ESWITCH_MODE_LEGACY:    "legacy",
58
+		nl.DEVLINK_ESWITCH_MODE_SWITCHDEV: "switchdev",
59
+	}
60
+	if eswitchMode[mode] == "" {
61
+		return "unknown"
62
+	} else {
63
+		return eswitchMode[mode]
64
+	}
65
+}
66
+
67
+func parseEswitchInlineMode(inlinemode uint8) string {
68
+	var eswitchInlineMode = map[uint8]string{
69
+		nl.DEVLINK_ESWITCH_INLINE_MODE_NONE:      "none",
70
+		nl.DEVLINK_ESWITCH_INLINE_MODE_LINK:      "link",
71
+		nl.DEVLINK_ESWITCH_INLINE_MODE_NETWORK:   "network",
72
+		nl.DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT: "transport",
73
+	}
74
+	if eswitchInlineMode[inlinemode] == "" {
75
+		return "unknown"
76
+	} else {
77
+		return eswitchInlineMode[inlinemode]
78
+	}
79
+}
80
+
81
+func parseEswitchEncapMode(encapmode uint8) string {
82
+	var eswitchEncapMode = map[uint8]string{
83
+		nl.DEVLINK_ESWITCH_ENCAP_MODE_NONE:  "disable",
84
+		nl.DEVLINK_ESWITCH_ENCAP_MODE_BASIC: "enable",
85
+	}
86
+	if eswitchEncapMode[encapmode] == "" {
87
+		return "unknown"
88
+	} else {
89
+		return eswitchEncapMode[encapmode]
90
+	}
91
+}
92
+
93
+func (d *DevlinkDevice) parseAttributes(attrs []syscall.NetlinkRouteAttr) error {
94
+	for _, a := range attrs {
95
+		switch a.Attr.Type {
96
+		case nl.DEVLINK_ATTR_BUS_NAME:
97
+			d.BusName = string(a.Value)
98
+		case nl.DEVLINK_ATTR_DEV_NAME:
99
+			d.DeviceName = string(a.Value)
100
+		case nl.DEVLINK_ATTR_ESWITCH_MODE:
101
+			d.Attrs.Eswitch.Mode = parseEswitchMode(native.Uint16(a.Value))
102
+		case nl.DEVLINK_ATTR_ESWITCH_INLINE_MODE:
103
+			d.Attrs.Eswitch.InlineMode = parseEswitchInlineMode(uint8(a.Value[0]))
104
+		case nl.DEVLINK_ATTR_ESWITCH_ENCAP_MODE:
105
+			d.Attrs.Eswitch.EncapMode = parseEswitchEncapMode(uint8(a.Value[0]))
106
+		}
107
+	}
108
+	return nil
109
+}
110
+
111
+func (dev *DevlinkDevice) parseEswitchAttrs(msgs [][]byte) {
112
+	m := msgs[0]
113
+	attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:])
114
+	if err != nil {
115
+		return
116
+	}
117
+	dev.parseAttributes(attrs)
118
+}
119
+
120
+func (h *Handle) getEswitchAttrs(family *GenlFamily, dev *DevlinkDevice) {
121
+	msg := &nl.Genlmsg{
122
+		Command: nl.DEVLINK_CMD_ESWITCH_GET,
123
+		Version: nl.GENL_DEVLINK_VERSION,
124
+	}
125
+	req := h.newNetlinkRequest(int(family.ID), unix.NLM_F_REQUEST|unix.NLM_F_ACK)
126
+	req.AddData(msg)
127
+
128
+	b := make([]byte, len(dev.BusName))
129
+	copy(b, dev.BusName)
130
+	data := nl.NewRtAttr(nl.DEVLINK_ATTR_BUS_NAME, b)
131
+	req.AddData(data)
132
+
133
+	b = make([]byte, len(dev.DeviceName))
134
+	copy(b, dev.DeviceName)
135
+	data = nl.NewRtAttr(nl.DEVLINK_ATTR_DEV_NAME, b)
136
+	req.AddData(data)
137
+
138
+	msgs, err := req.Execute(unix.NETLINK_GENERIC, 0)
139
+	if err != nil {
140
+		return
141
+	}
142
+	dev.parseEswitchAttrs(msgs)
143
+}
144
+
145
+// DevLinkGetDeviceList provides a pointer to devlink devices and nil error,
146
+// otherwise returns an error code.
147
+func (h *Handle) DevLinkGetDeviceList() ([]*DevlinkDevice, error) {
148
+	f, err := h.GenlFamilyGet(nl.GENL_DEVLINK_NAME)
149
+	if err != nil {
150
+		return nil, err
151
+	}
152
+	msg := &nl.Genlmsg{
153
+		Command: nl.DEVLINK_CMD_GET,
154
+		Version: nl.GENL_DEVLINK_VERSION,
155
+	}
156
+	req := h.newNetlinkRequest(int(f.ID),
157
+		unix.NLM_F_REQUEST|unix.NLM_F_ACK|unix.NLM_F_DUMP)
158
+	req.AddData(msg)
159
+	msgs, err := req.Execute(unix.NETLINK_GENERIC, 0)
160
+	if err != nil {
161
+		return nil, err
162
+	}
163
+	devices, err := parseDevLinkDeviceList(msgs)
164
+	if err != nil {
165
+		return nil, err
166
+	}
167
+	for _, d := range devices {
168
+		h.getEswitchAttrs(f, d)
169
+	}
170
+	return devices, nil
171
+}
172
+
173
+// DevLinkGetDeviceList provides a pointer to devlink devices and nil error,
174
+// otherwise returns an error code.
175
+func DevLinkGetDeviceList() ([]*DevlinkDevice, error) {
176
+	return pkgHandle.DevLinkGetDeviceList()
177
+}
178
+
179
+func parseDevlinkDevice(msgs [][]byte) (*DevlinkDevice, error) {
180
+	m := msgs[0]
181
+	attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:])
182
+	if err != nil {
183
+		return nil, err
184
+	}
185
+	dev := &DevlinkDevice{}
186
+	if err = dev.parseAttributes(attrs); err != nil {
187
+		return nil, err
188
+	}
189
+	return dev, nil
190
+}
191
+
192
+func (h *Handle) createCmdReq(cmd uint8, bus string, device string) (*GenlFamily, *nl.NetlinkRequest, error) {
193
+	f, err := h.GenlFamilyGet(nl.GENL_DEVLINK_NAME)
194
+	if err != nil {
195
+		return nil, nil, err
196
+	}
197
+
198
+	msg := &nl.Genlmsg{
199
+		Command: cmd,
200
+		Version: nl.GENL_DEVLINK_VERSION,
201
+	}
202
+	req := h.newNetlinkRequest(int(f.ID),
203
+		unix.NLM_F_REQUEST|unix.NLM_F_ACK)
204
+	req.AddData(msg)
205
+
206
+	b := make([]byte, len(bus)+1)
207
+	copy(b, bus)
208
+	data := nl.NewRtAttr(nl.DEVLINK_ATTR_BUS_NAME, b)
209
+	req.AddData(data)
210
+
211
+	b = make([]byte, len(device)+1)
212
+	copy(b, device)
213
+	data = nl.NewRtAttr(nl.DEVLINK_ATTR_DEV_NAME, b)
214
+	req.AddData(data)
215
+
216
+	return f, req, nil
217
+}
218
+
219
+// DevlinkGetDeviceByName provides a pointer to devlink device and nil error,
220
+// otherwise returns an error code.
221
+func (h *Handle) DevLinkGetDeviceByName(Bus string, Device string) (*DevlinkDevice, error) {
222
+	f, req, err := h.createCmdReq(nl.DEVLINK_CMD_GET, Bus, Device)
223
+	if err != nil {
224
+		return nil, err
225
+	}
226
+
227
+	respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0)
228
+	if err != nil {
229
+		return nil, err
230
+	}
231
+	dev, err := parseDevlinkDevice(respmsg)
232
+	if err == nil {
233
+		h.getEswitchAttrs(f, dev)
234
+	}
235
+	return dev, err
236
+}
237
+
238
+// DevlinkGetDeviceByName provides a pointer to devlink device and nil error,
239
+// otherwise returns an error code.
240
+func DevLinkGetDeviceByName(Bus string, Device string) (*DevlinkDevice, error) {
241
+	return pkgHandle.DevLinkGetDeviceByName(Bus, Device)
242
+}
243
+
244
+// DevLinkSetEswitchMode sets eswitch mode if able to set successfully or
245
+// returns an error code.
246
+// Equivalent to: `devlink dev eswitch set $dev mode switchdev`
247
+// Equivalent to: `devlink dev eswitch set $dev mode legacy`
248
+func (h *Handle) DevLinkSetEswitchMode(Dev *DevlinkDevice, NewMode string) error {
249
+	mode, err := eswitchStringToMode(NewMode)
250
+	if err != nil {
251
+		return err
252
+	}
253
+
254
+	_, req, err := h.createCmdReq(nl.DEVLINK_CMD_ESWITCH_SET, Dev.BusName, Dev.DeviceName)
255
+	if err != nil {
256
+		return err
257
+	}
258
+
259
+	req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_ESWITCH_MODE, nl.Uint16Attr(mode)))
260
+
261
+	_, err = req.Execute(unix.NETLINK_GENERIC, 0)
262
+	return err
263
+}
264
+
265
+// DevLinkSetEswitchMode sets eswitch mode if able to set successfully or
266
+// returns an error code.
267
+// Equivalent to: `devlink dev eswitch set $dev mode switchdev`
268
+// Equivalent to: `devlink dev eswitch set $dev mode legacy`
269
+func DevLinkSetEswitchMode(Dev *DevlinkDevice, NewMode string) error {
270
+	return pkgHandle.DevLinkSetEswitchMode(Dev, NewMode)
271
+}
... ...
@@ -2,6 +2,7 @@ package netlink
2 2
 
3 3
 import (
4 4
 	"fmt"
5
+	"net"
5 6
 )
6 7
 
7 8
 type Filter interface {
... ...
@@ -135,6 +136,27 @@ func (action *BpfAction) Attrs() *ActionAttrs {
135 135
 	return &action.ActionAttrs
136 136
 }
137 137
 
138
+type ConnmarkAction struct {
139
+	ActionAttrs
140
+	Zone uint16
141
+}
142
+
143
+func (action *ConnmarkAction) Type() string {
144
+	return "connmark"
145
+}
146
+
147
+func (action *ConnmarkAction) Attrs() *ActionAttrs {
148
+	return &action.ActionAttrs
149
+}
150
+
151
+func NewConnmarkAction() *ConnmarkAction {
152
+	return &ConnmarkAction{
153
+		ActionAttrs: ActionAttrs{
154
+			Action: TC_ACT_PIPE,
155
+		},
156
+	}
157
+}
158
+
138 159
 type MirredAct uint8
139 160
 
140 161
 func (a MirredAct) String() string {
... ...
@@ -182,47 +204,59 @@ func NewMirredAction(redirIndex int) *MirredAction {
182 182
 	}
183 183
 }
184 184
 
185
-// Sel of the U32 filters that contains multiple TcU32Key. This is the copy
186
-// and the frontend representation of nl.TcU32Sel. It is serialized into canonical
187
-// nl.TcU32Sel with the appropriate endianness.
188
-type TcU32Sel struct {
189
-	Flags    uint8
190
-	Offshift uint8
191
-	Nkeys    uint8
192
-	Pad      uint8
193
-	Offmask  uint16
194
-	Off      uint16
195
-	Offoff   int16
196
-	Hoff     int16
197
-	Hmask    uint32
198
-	Keys     []TcU32Key
199
-}
200
-
201
-// TcU32Key contained of Sel in the U32 filters. This is the copy and the frontend
202
-// representation of nl.TcU32Key. It is serialized into chanonical nl.TcU32Sel
203
-// with the appropriate endianness.
204
-type TcU32Key struct {
205
-	Mask    uint32
206
-	Val     uint32
207
-	Off     int32
208
-	OffMask int32
209
-}
210
-
211
-// U32 filters on many packet related properties
212
-type U32 struct {
213
-	FilterAttrs
214
-	ClassId    uint32
215
-	RedirIndex int
216
-	Sel        *TcU32Sel
217
-	Actions    []Action
185
+type TunnelKeyAct int8
186
+
187
+const (
188
+	TCA_TUNNEL_KEY_SET   TunnelKeyAct = 1 // set tunnel key
189
+	TCA_TUNNEL_KEY_UNSET TunnelKeyAct = 2 // unset tunnel key
190
+)
191
+
192
+type TunnelKeyAction struct {
193
+	ActionAttrs
194
+	Action  TunnelKeyAct
195
+	SrcAddr net.IP
196
+	DstAddr net.IP
197
+	KeyID   uint32
218 198
 }
219 199
 
220
-func (filter *U32) Attrs() *FilterAttrs {
221
-	return &filter.FilterAttrs
200
+func (action *TunnelKeyAction) Type() string {
201
+	return "tunnel_key"
222 202
 }
223 203
 
224
-func (filter *U32) Type() string {
225
-	return "u32"
204
+func (action *TunnelKeyAction) Attrs() *ActionAttrs {
205
+	return &action.ActionAttrs
206
+}
207
+
208
+func NewTunnelKeyAction() *TunnelKeyAction {
209
+	return &TunnelKeyAction{
210
+		ActionAttrs: ActionAttrs{
211
+			Action: TC_ACT_PIPE,
212
+		},
213
+	}
214
+}
215
+
216
+type SkbEditAction struct {
217
+	ActionAttrs
218
+	QueueMapping *uint16
219
+	PType        *uint16
220
+	Priority     *uint32
221
+	Mark         *uint32
222
+}
223
+
224
+func (action *SkbEditAction) Type() string {
225
+	return "skbedit"
226
+}
227
+
228
+func (action *SkbEditAction) Attrs() *ActionAttrs {
229
+	return &action.ActionAttrs
230
+}
231
+
232
+func NewSkbEditAction() *SkbEditAction {
233
+	return &SkbEditAction{
234
+		ActionAttrs: ActionAttrs{
235
+			Action: TC_ACT_PIPE,
236
+		},
237
+	}
226 238
 }
227 239
 
228 240
 // MatchAll filters match all packets
... ...
@@ -262,6 +296,8 @@ type BpfFilter struct {
262 262
 	Fd           int
263 263
 	Name         string
264 264
 	DirectAction bool
265
+	Id           int
266
+	Tag          string
265 267
 }
266 268
 
267 269
 func (filter *BpfFilter) Type() string {
... ...
@@ -3,10 +3,11 @@ package netlink
3 3
 import (
4 4
 	"bytes"
5 5
 	"encoding/binary"
6
+	"encoding/hex"
6 7
 	"errors"
7 8
 	"fmt"
9
+	"net"
8 10
 	"syscall"
9
-	"unsafe"
10 11
 
11 12
 	"github.com/vishvananda/netlink/nl"
12 13
 	"golang.org/x/sys/unix"
... ...
@@ -20,6 +21,35 @@ const (
20 20
 	TC_U32_EAT       = nl.TC_U32_EAT
21 21
 )
22 22
 
23
+// Sel of the U32 filters that contains multiple TcU32Key. This is the type
24
+// alias and the frontend representation of nl.TcU32Sel. It is serialized into
25
+// canonical nl.TcU32Sel with the appropriate endianness.
26
+type TcU32Sel = nl.TcU32Sel
27
+
28
+// TcU32Key contained of Sel in the U32 filters. This is the type alias and the
29
+// frontend representation of nl.TcU32Key. It is serialized into chanonical
30
+// nl.TcU32Sel with the appropriate endianness.
31
+type TcU32Key = nl.TcU32Key
32
+
33
+// U32 filters on many packet related properties
34
+type U32 struct {
35
+	FilterAttrs
36
+	ClassId    uint32
37
+	Divisor    uint32 // Divisor MUST be power of 2.
38
+	Hash       uint32
39
+	RedirIndex int
40
+	Sel        *TcU32Sel
41
+	Actions    []Action
42
+}
43
+
44
+func (filter *U32) Attrs() *FilterAttrs {
45
+	return &filter.FilterAttrs
46
+}
47
+
48
+func (filter *U32) Type() string {
49
+	return "u32"
50
+}
51
+
23 52
 // Fw filter filters on firewall marks
24 53
 // NOTE: this is in filter_linux because it refers to nl.TcPolice which
25 54
 //       is defined in nl/tc_linux.go
... ...
@@ -123,8 +153,24 @@ func FilterAdd(filter Filter) error {
123 123
 // FilterAdd will add a filter to the system.
124 124
 // Equivalent to: `tc filter add $filter`
125 125
 func (h *Handle) FilterAdd(filter Filter) error {
126
+	return h.filterModify(filter, unix.NLM_F_CREATE|unix.NLM_F_EXCL)
127
+}
128
+
129
+// FilterReplace will replace a filter.
130
+// Equivalent to: `tc filter replace $filter`
131
+func FilterReplace(filter Filter) error {
132
+	return pkgHandle.FilterReplace(filter)
133
+}
134
+
135
+// FilterReplace will replace a filter.
136
+// Equivalent to: `tc filter replace $filter`
137
+func (h *Handle) FilterReplace(filter Filter) error {
138
+	return h.filterModify(filter, unix.NLM_F_CREATE)
139
+}
140
+
141
+func (h *Handle) filterModify(filter Filter, flags int) error {
126 142
 	native = nl.NativeEndian()
127
-	req := h.newNetlinkRequest(unix.RTM_NEWTFILTER, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK)
143
+	req := h.newNetlinkRequest(unix.RTM_NEWTFILTER, flags|unix.NLM_F_ACK)
128 144
 	base := filter.Attrs()
129 145
 	msg := &nl.TcMsg{
130 146
 		Family:  nl.FAMILY_ALL,
... ...
@@ -140,8 +186,7 @@ func (h *Handle) FilterAdd(filter Filter) error {
140 140
 
141 141
 	switch filter := filter.(type) {
142 142
 	case *U32:
143
-		// Convert TcU32Sel into nl.TcU32Sel as it is without copy.
144
-		sel := (*nl.TcU32Sel)(unsafe.Pointer(filter.Sel))
143
+		sel := filter.Sel
145 144
 		if sel == nil {
146 145
 			// match all
147 146
 			sel = &nl.TcU32Sel{
... ...
@@ -168,11 +213,20 @@ func (h *Handle) FilterAdd(filter Filter) error {
168 168
 			}
169 169
 		}
170 170
 		sel.Nkeys = uint8(len(sel.Keys))
171
-		nl.NewRtAttrChild(options, nl.TCA_U32_SEL, sel.Serialize())
171
+		options.AddRtAttr(nl.TCA_U32_SEL, sel.Serialize())
172 172
 		if filter.ClassId != 0 {
173
-			nl.NewRtAttrChild(options, nl.TCA_U32_CLASSID, nl.Uint32Attr(filter.ClassId))
173
+			options.AddRtAttr(nl.TCA_U32_CLASSID, nl.Uint32Attr(filter.ClassId))
174
+		}
175
+		if filter.Divisor != 0 {
176
+			if (filter.Divisor-1)&filter.Divisor != 0 {
177
+				return fmt.Errorf("illegal divisor %d. Must be a power of 2", filter.Divisor)
178
+			}
179
+			options.AddRtAttr(nl.TCA_U32_DIVISOR, nl.Uint32Attr(filter.Divisor))
174 180
 		}
175
-		actionsAttr := nl.NewRtAttrChild(options, nl.TCA_U32_ACT, nil)
181
+		if filter.Hash != 0 {
182
+			options.AddRtAttr(nl.TCA_U32_HASH, nl.Uint32Attr(filter.Hash))
183
+		}
184
+		actionsAttr := options.AddRtAttr(nl.TCA_U32_ACT, nil)
176 185
 		// backwards compatibility
177 186
 		if filter.RedirIndex != 0 {
178 187
 			filter.Actions = append([]Action{NewMirredAction(filter.RedirIndex)}, filter.Actions...)
... ...
@@ -184,51 +238,51 @@ func (h *Handle) FilterAdd(filter Filter) error {
184 184
 		if filter.Mask != 0 {
185 185
 			b := make([]byte, 4)
186 186
 			native.PutUint32(b, filter.Mask)
187
-			nl.NewRtAttrChild(options, nl.TCA_FW_MASK, b)
187
+			options.AddRtAttr(nl.TCA_FW_MASK, b)
188 188
 		}
189 189
 		if filter.InDev != "" {
190
-			nl.NewRtAttrChild(options, nl.TCA_FW_INDEV, nl.ZeroTerminated(filter.InDev))
190
+			options.AddRtAttr(nl.TCA_FW_INDEV, nl.ZeroTerminated(filter.InDev))
191 191
 		}
192 192
 		if (filter.Police != nl.TcPolice{}) {
193 193
 
194
-			police := nl.NewRtAttrChild(options, nl.TCA_FW_POLICE, nil)
195
-			nl.NewRtAttrChild(police, nl.TCA_POLICE_TBF, filter.Police.Serialize())
194
+			police := options.AddRtAttr(nl.TCA_FW_POLICE, nil)
195
+			police.AddRtAttr(nl.TCA_POLICE_TBF, filter.Police.Serialize())
196 196
 			if (filter.Police.Rate != nl.TcRateSpec{}) {
197 197
 				payload := SerializeRtab(filter.Rtab)
198
-				nl.NewRtAttrChild(police, nl.TCA_POLICE_RATE, payload)
198
+				police.AddRtAttr(nl.TCA_POLICE_RATE, payload)
199 199
 			}
200 200
 			if (filter.Police.PeakRate != nl.TcRateSpec{}) {
201 201
 				payload := SerializeRtab(filter.Ptab)
202
-				nl.NewRtAttrChild(police, nl.TCA_POLICE_PEAKRATE, payload)
202
+				police.AddRtAttr(nl.TCA_POLICE_PEAKRATE, payload)
203 203
 			}
204 204
 		}
205 205
 		if filter.ClassId != 0 {
206 206
 			b := make([]byte, 4)
207 207
 			native.PutUint32(b, filter.ClassId)
208
-			nl.NewRtAttrChild(options, nl.TCA_FW_CLASSID, b)
208
+			options.AddRtAttr(nl.TCA_FW_CLASSID, b)
209 209
 		}
210 210
 	case *BpfFilter:
211 211
 		var bpfFlags uint32
212 212
 		if filter.ClassId != 0 {
213
-			nl.NewRtAttrChild(options, nl.TCA_BPF_CLASSID, nl.Uint32Attr(filter.ClassId))
213
+			options.AddRtAttr(nl.TCA_BPF_CLASSID, nl.Uint32Attr(filter.ClassId))
214 214
 		}
215 215
 		if filter.Fd >= 0 {
216
-			nl.NewRtAttrChild(options, nl.TCA_BPF_FD, nl.Uint32Attr((uint32(filter.Fd))))
216
+			options.AddRtAttr(nl.TCA_BPF_FD, nl.Uint32Attr((uint32(filter.Fd))))
217 217
 		}
218 218
 		if filter.Name != "" {
219
-			nl.NewRtAttrChild(options, nl.TCA_BPF_NAME, nl.ZeroTerminated(filter.Name))
219
+			options.AddRtAttr(nl.TCA_BPF_NAME, nl.ZeroTerminated(filter.Name))
220 220
 		}
221 221
 		if filter.DirectAction {
222 222
 			bpfFlags |= nl.TCA_BPF_FLAG_ACT_DIRECT
223 223
 		}
224
-		nl.NewRtAttrChild(options, nl.TCA_BPF_FLAGS, nl.Uint32Attr(bpfFlags))
224
+		options.AddRtAttr(nl.TCA_BPF_FLAGS, nl.Uint32Attr(bpfFlags))
225 225
 	case *MatchAll:
226
-		actionsAttr := nl.NewRtAttrChild(options, nl.TCA_MATCHALL_ACT, nil)
226
+		actionsAttr := options.AddRtAttr(nl.TCA_MATCHALL_ACT, nil)
227 227
 		if err := EncodeActions(actionsAttr, filter.Actions); err != nil {
228 228
 			return err
229 229
 		}
230 230
 		if filter.ClassId != 0 {
231
-			nl.NewRtAttrChild(options, nl.TCA_MATCHALL_CLASSID, nl.Uint32Attr(filter.ClassId))
231
+			options.AddRtAttr(nl.TCA_MATCHALL_CLASSID, nl.Uint32Attr(filter.ClassId))
232 232
 		}
233 233
 	}
234 234
 
... ...
@@ -366,34 +420,91 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error {
366 366
 		default:
367 367
 			return fmt.Errorf("unknown action type %s", action.Type())
368 368
 		case *MirredAction:
369
-			table := nl.NewRtAttrChild(attr, tabIndex, nil)
369
+			table := attr.AddRtAttr(tabIndex, nil)
370 370
 			tabIndex++
371
-			nl.NewRtAttrChild(table, nl.TCA_ACT_KIND, nl.ZeroTerminated("mirred"))
372
-			aopts := nl.NewRtAttrChild(table, nl.TCA_ACT_OPTIONS, nil)
371
+			table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("mirred"))
372
+			aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil)
373 373
 			mirred := nl.TcMirred{
374 374
 				Eaction: int32(action.MirredAction),
375 375
 				Ifindex: uint32(action.Ifindex),
376 376
 			}
377 377
 			toTcGen(action.Attrs(), &mirred.TcGen)
378
-			nl.NewRtAttrChild(aopts, nl.TCA_MIRRED_PARMS, mirred.Serialize())
378
+			aopts.AddRtAttr(nl.TCA_MIRRED_PARMS, mirred.Serialize())
379
+		case *TunnelKeyAction:
380
+			table := attr.AddRtAttr(tabIndex, nil)
381
+			tabIndex++
382
+			table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("tunnel_key"))
383
+			aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil)
384
+			tun := nl.TcTunnelKey{
385
+				Action: int32(action.Action),
386
+			}
387
+			toTcGen(action.Attrs(), &tun.TcGen)
388
+			aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_PARMS, tun.Serialize())
389
+			if action.Action == TCA_TUNNEL_KEY_SET {
390
+				aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_KEY_ID, htonl(action.KeyID))
391
+				if v4 := action.SrcAddr.To4(); v4 != nil {
392
+					aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_IPV4_SRC, v4[:])
393
+				} else if v6 := action.SrcAddr.To16(); v6 != nil {
394
+					aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_IPV6_SRC, v6[:])
395
+				} else {
396
+					return fmt.Errorf("invalid src addr %s for tunnel_key action", action.SrcAddr)
397
+				}
398
+				if v4 := action.DstAddr.To4(); v4 != nil {
399
+					aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_IPV4_DST, v4[:])
400
+				} else if v6 := action.DstAddr.To16(); v6 != nil {
401
+					aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_IPV6_DST, v6[:])
402
+				} else {
403
+					return fmt.Errorf("invalid dst addr %s for tunnel_key action", action.DstAddr)
404
+				}
405
+			}
406
+		case *SkbEditAction:
407
+			table := attr.AddRtAttr(tabIndex, nil)
408
+			tabIndex++
409
+			table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("skbedit"))
410
+			aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil)
411
+			skbedit := nl.TcSkbEdit{}
412
+			toTcGen(action.Attrs(), &skbedit.TcGen)
413
+			aopts.AddRtAttr(nl.TCA_SKBEDIT_PARMS, skbedit.Serialize())
414
+			if action.QueueMapping != nil {
415
+				aopts.AddRtAttr(nl.TCA_SKBEDIT_QUEUE_MAPPING, nl.Uint16Attr(*action.QueueMapping))
416
+			}
417
+			if action.Priority != nil {
418
+				aopts.AddRtAttr(nl.TCA_SKBEDIT_PRIORITY, nl.Uint32Attr(*action.Priority))
419
+			}
420
+			if action.PType != nil {
421
+				aopts.AddRtAttr(nl.TCA_SKBEDIT_PTYPE, nl.Uint16Attr(*action.PType))
422
+			}
423
+			if action.Mark != nil {
424
+				aopts.AddRtAttr(nl.TCA_SKBEDIT_MARK, nl.Uint32Attr(*action.Mark))
425
+			}
426
+		case *ConnmarkAction:
427
+			table := attr.AddRtAttr(tabIndex, nil)
428
+			tabIndex++
429
+			table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("connmark"))
430
+			aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil)
431
+			connmark := nl.TcConnmark{
432
+				Zone: action.Zone,
433
+			}
434
+			toTcGen(action.Attrs(), &connmark.TcGen)
435
+			aopts.AddRtAttr(nl.TCA_CONNMARK_PARMS, connmark.Serialize())
379 436
 		case *BpfAction:
380
-			table := nl.NewRtAttrChild(attr, tabIndex, nil)
437
+			table := attr.AddRtAttr(tabIndex, nil)
381 438
 			tabIndex++
382
-			nl.NewRtAttrChild(table, nl.TCA_ACT_KIND, nl.ZeroTerminated("bpf"))
383
-			aopts := nl.NewRtAttrChild(table, nl.TCA_ACT_OPTIONS, nil)
439
+			table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("bpf"))
440
+			aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil)
384 441
 			gen := nl.TcGen{}
385 442
 			toTcGen(action.Attrs(), &gen)
386
-			nl.NewRtAttrChild(aopts, nl.TCA_ACT_BPF_PARMS, gen.Serialize())
387
-			nl.NewRtAttrChild(aopts, nl.TCA_ACT_BPF_FD, nl.Uint32Attr(uint32(action.Fd)))
388
-			nl.NewRtAttrChild(aopts, nl.TCA_ACT_BPF_NAME, nl.ZeroTerminated(action.Name))
443
+			aopts.AddRtAttr(nl.TCA_ACT_BPF_PARMS, gen.Serialize())
444
+			aopts.AddRtAttr(nl.TCA_ACT_BPF_FD, nl.Uint32Attr(uint32(action.Fd)))
445
+			aopts.AddRtAttr(nl.TCA_ACT_BPF_NAME, nl.ZeroTerminated(action.Name))
389 446
 		case *GenericAction:
390
-			table := nl.NewRtAttrChild(attr, tabIndex, nil)
447
+			table := attr.AddRtAttr(tabIndex, nil)
391 448
 			tabIndex++
392
-			nl.NewRtAttrChild(table, nl.TCA_ACT_KIND, nl.ZeroTerminated("gact"))
393
-			aopts := nl.NewRtAttrChild(table, nl.TCA_ACT_OPTIONS, nil)
449
+			table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("gact"))
450
+			aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil)
394 451
 			gen := nl.TcGen{}
395 452
 			toTcGen(action.Attrs(), &gen)
396
-			nl.NewRtAttrChild(aopts, nl.TCA_GACT_PARMS, gen.Serialize())
453
+			aopts.AddRtAttr(nl.TCA_GACT_PARMS, gen.Serialize())
397 454
 		}
398 455
 	}
399 456
 	return nil
... ...
@@ -419,8 +530,14 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
419 419
 					action = &MirredAction{}
420 420
 				case "bpf":
421 421
 					action = &BpfAction{}
422
+				case "connmark":
423
+					action = &ConnmarkAction{}
422 424
 				case "gact":
423 425
 					action = &GenericAction{}
426
+				case "tunnel_key":
427
+					action = &TunnelKeyAction{}
428
+				case "skbedit":
429
+					action = &SkbEditAction{}
424 430
 				default:
425 431
 					break nextattr
426 432
 				}
... ...
@@ -435,11 +552,46 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
435 435
 						switch adatum.Attr.Type {
436 436
 						case nl.TCA_MIRRED_PARMS:
437 437
 							mirred := *nl.DeserializeTcMirred(adatum.Value)
438
-							toAttrs(&mirred.TcGen, action.Attrs())
439 438
 							action.(*MirredAction).ActionAttrs = ActionAttrs{}
439
+							toAttrs(&mirred.TcGen, action.Attrs())
440 440
 							action.(*MirredAction).Ifindex = int(mirred.Ifindex)
441 441
 							action.(*MirredAction).MirredAction = MirredAct(mirred.Eaction)
442 442
 						}
443
+					case "tunnel_key":
444
+						switch adatum.Attr.Type {
445
+						case nl.TCA_TUNNEL_KEY_PARMS:
446
+							tun := *nl.DeserializeTunnelKey(adatum.Value)
447
+							action.(*TunnelKeyAction).ActionAttrs = ActionAttrs{}
448
+							toAttrs(&tun.TcGen, action.Attrs())
449
+							action.(*TunnelKeyAction).Action = TunnelKeyAct(tun.Action)
450
+						case nl.TCA_TUNNEL_KEY_ENC_KEY_ID:
451
+							action.(*TunnelKeyAction).KeyID = networkOrder.Uint32(adatum.Value[0:4])
452
+						case nl.TCA_TUNNEL_KEY_ENC_IPV6_SRC:
453
+						case nl.TCA_TUNNEL_KEY_ENC_IPV4_SRC:
454
+							action.(*TunnelKeyAction).SrcAddr = net.IP(adatum.Value[:])
455
+						case nl.TCA_TUNNEL_KEY_ENC_IPV6_DST:
456
+						case nl.TCA_TUNNEL_KEY_ENC_IPV4_DST:
457
+							action.(*TunnelKeyAction).DstAddr = net.IP(adatum.Value[:])
458
+						}
459
+					case "skbedit":
460
+						switch adatum.Attr.Type {
461
+						case nl.TCA_SKBEDIT_PARMS:
462
+							skbedit := *nl.DeserializeSkbEdit(adatum.Value)
463
+							action.(*SkbEditAction).ActionAttrs = ActionAttrs{}
464
+							toAttrs(&skbedit.TcGen, action.Attrs())
465
+						case nl.TCA_SKBEDIT_MARK:
466
+							mark := native.Uint32(adatum.Value[0:4])
467
+							action.(*SkbEditAction).Mark = &mark
468
+						case nl.TCA_SKBEDIT_PRIORITY:
469
+							priority := native.Uint32(adatum.Value[0:4])
470
+							action.(*SkbEditAction).Priority = &priority
471
+						case nl.TCA_SKBEDIT_PTYPE:
472
+							ptype := native.Uint16(adatum.Value[0:2])
473
+							action.(*SkbEditAction).PType = &ptype
474
+						case nl.TCA_SKBEDIT_QUEUE_MAPPING:
475
+							mapping := native.Uint16(adatum.Value[0:2])
476
+							action.(*SkbEditAction).QueueMapping = &mapping
477
+						}
443 478
 					case "bpf":
444 479
 						switch adatum.Attr.Type {
445 480
 						case nl.TCA_ACT_BPF_PARMS:
... ...
@@ -450,6 +602,14 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
450 450
 						case nl.TCA_ACT_BPF_NAME:
451 451
 							action.(*BpfAction).Name = string(adatum.Value[:len(adatum.Value)-1])
452 452
 						}
453
+					case "connmark":
454
+						switch adatum.Attr.Type {
455
+						case nl.TCA_CONNMARK_PARMS:
456
+							connmark := *nl.DeserializeTcConnmark(adatum.Value)
457
+							action.(*ConnmarkAction).ActionAttrs = ActionAttrs{}
458
+							toAttrs(&connmark.TcGen, action.Attrs())
459
+							action.(*ConnmarkAction).Zone = connmark.Zone
460
+						}
453 461
 					case "gact":
454 462
 						switch adatum.Attr.Type {
455 463
 						case nl.TCA_GACT_PARMS:
... ...
@@ -474,7 +634,7 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error)
474 474
 		case nl.TCA_U32_SEL:
475 475
 			detailed = true
476 476
 			sel := nl.DeserializeTcU32Sel(datum.Value)
477
-			u32.Sel = (*TcU32Sel)(unsafe.Pointer(sel))
477
+			u32.Sel = sel
478 478
 			if native != networkOrder {
479 479
 				// Handle the endianness of attributes
480 480
 				u32.Sel.Offmask = native.Uint16(htons(sel.Offmask))
... ...
@@ -500,6 +660,10 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error)
500 500
 			}
501 501
 		case nl.TCA_U32_CLASSID:
502 502
 			u32.ClassId = native.Uint32(datum.Value)
503
+		case nl.TCA_U32_DIVISOR:
504
+			u32.Divisor = native.Uint32(datum.Value)
505
+		case nl.TCA_U32_HASH:
506
+			u32.Hash = native.Uint32(datum.Value)
503 507
 		}
504 508
 	}
505 509
 	return detailed, nil
... ...
@@ -551,6 +715,10 @@ func parseBpfData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error)
551 551
 			if (flags & nl.TCA_BPF_FLAG_ACT_DIRECT) != 0 {
552 552
 				bpf.DirectAction = true
553 553
 			}
554
+		case nl.TCA_BPF_ID:
555
+			bpf.Id = int(native.Uint32(datum.Value[0:4]))
556
+		case nl.TCA_BPF_TAG:
557
+			bpf.Tag = hex.EncodeToString(datum.Value[:len(datum.Value)-1])
554 558
 		}
555 559
 	}
556 560
 	return detailed, nil
... ...
@@ -90,11 +90,7 @@ func (h *Handle) FouAdd(f Fou) error {
90 90
 	req.AddRawData(raw)
91 91
 
92 92
 	_, err = req.Execute(unix.NETLINK_GENERIC, 0)
93
-	if err != nil {
94
-		return err
95
-	}
96
-
97
-	return nil
93
+	return err
98 94
 }
99 95
 
100 96
 func FouDel(f Fou) error {
... ...
@@ -157,6 +157,9 @@ func (h *Handle) GenlFamilyGet(name string) (*GenlFamily, error) {
157 157
 		return nil, err
158 158
 	}
159 159
 	families, err := parseFamilies(msgs)
160
+	if err != nil {
161
+		return nil, err
162
+	}
160 163
 	if len(families) != 1 {
161 164
 		return nil, fmt.Errorf("invalid response for GENL_CTRL_CMD_GETFAMILY")
162 165
 	}
163 166
new file mode 100644
... ...
@@ -0,0 +1,8 @@
0
+module github.com/vishvananda/netlink
1
+
2
+go 1.12
3
+
4
+require (
5
+	github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df
6
+	golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444
7
+)
... ...
@@ -91,7 +91,7 @@ func (h *Handle) GetSocketReceiveBufferSize() ([]int, error) {
91 91
 	return results, nil
92 92
 }
93 93
 
94
-// NewHandle returns a netlink handle on the network namespace
94
+// NewHandleAt returns a netlink handle on the network namespace
95 95
 // specified by ns. If ns=netns.None(), current network namespace
96 96
 // will be assumed
97 97
 func NewHandleAt(ns netns.NsHandle, nlFamilies ...int) (*Handle, error) {
... ...
@@ -73,10 +73,18 @@ func (h *Handle) LinkSetVfVlan(link Link, vf, vlan int) error {
73 73
 	return ErrNotImplemented
74 74
 }
75 75
 
76
+func (h *Handle) LinkSetVfVlanQos(link Link, vf, vlan, qos int) error {
77
+	return ErrNotImplemented
78
+}
79
+
76 80
 func (h *Handle) LinkSetVfTxRate(link Link, vf, rate int) error {
77 81
 	return ErrNotImplemented
78 82
 }
79 83
 
84
+func (h *Handle) LinkSetVfRate(link Link, vf, minRate, maxRate int) error {
85
+	return ErrNotImplemented
86
+}
87
+
80 88
 func (h *Handle) LinkSetMaster(link Link, master *Bridge) error {
81 89
 	return ErrNotImplemented
82 90
 }
... ...
@@ -149,6 +157,10 @@ func (h *Handle) LinkSetTxQLen(link Link, qlen int) error {
149 149
 	return ErrNotImplemented
150 150
 }
151 151
 
152
+func (h *Handle) LinkSetGroup(link Link, group int) error {
153
+	return ErrNotImplemented
154
+}
155
+
152 156
 func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error {
153 157
 	return ErrNotImplemented
154 158
 }
... ...
@@ -56,18 +56,10 @@ type ethtoolSset struct {
56 56
 	data     [1]uint32
57 57
 }
58 58
 
59
-// ethtoolGstrings is string set for data tagging
60
-type ethtoolGstrings struct {
61
-	cmd       uint32
62
-	stringSet uint32
63
-	length    uint32
64
-	data      [32]byte
65
-}
66
-
67 59
 type ethtoolStats struct {
68 60
 	cmd    uint32
69 61
 	nStats uint32
70
-	data   [1]uint64
62
+	// Followed by nStats * []uint64.
71 63
 }
72 64
 
73 65
 // newIocltSlaveReq returns filled IfreqSlave with proper interface names
... ...
@@ -4,6 +4,7 @@ import (
4 4
 	"fmt"
5 5
 	"net"
6 6
 	"os"
7
+	"strconv"
7 8
 )
8 9
 
9 10
 // Link represents a link device from netlink. Shared link attributes
... ...
@@ -41,6 +42,29 @@ type LinkAttrs struct {
41 41
 	NetNsID      int
42 42
 	NumTxQueues  int
43 43
 	NumRxQueues  int
44
+	GSOMaxSize   uint32
45
+	GSOMaxSegs   uint32
46
+	Vfs          []VfInfo // virtual functions available on link
47
+	Group        uint32
48
+	Slave        LinkSlave
49
+}
50
+
51
+// LinkSlave represents a slave device.
52
+type LinkSlave interface {
53
+	SlaveType() string
54
+}
55
+
56
+// VfInfo represents configuration of virtual function
57
+type VfInfo struct {
58
+	ID        int
59
+	Mac       net.HardwareAddr
60
+	Vlan      int
61
+	Qos       int
62
+	TxRate    int // IFLA_VF_TX_RATE  Max TxRate
63
+	Spoofchk  bool
64
+	LinkState uint32
65
+	MaxTxRate uint32 // IFLA_VF_RATE Max TxRate
66
+	MinTxRate uint32 // IFLA_VF_RATE Min TxRate
44 67
 }
45 68
 
46 69
 // LinkOperState represents the values of the IFLA_OPERSTATE link
... ...
@@ -223,6 +247,7 @@ type Bridge struct {
223 223
 	LinkAttrs
224 224
 	MulticastSnooping *bool
225 225
 	HelloTime         *uint32
226
+	VlanFiltering     *bool
226 227
 }
227 228
 
228 229
 func (bridge *Bridge) Attrs() *LinkAttrs {
... ...
@@ -236,7 +261,8 @@ func (bridge *Bridge) Type() string {
236 236
 // Vlan links have ParentIndex set in their Attrs()
237 237
 type Vlan struct {
238 238
 	LinkAttrs
239
-	VlanId int
239
+	VlanId       int
240
+	VlanProtocol VlanProtocol
240 241
 }
241 242
 
242 243
 func (vlan *Vlan) Attrs() *LinkAttrs {
... ...
@@ -290,10 +316,13 @@ type TuntapFlag uint16
290 290
 // Tuntap links created via /dev/tun/tap, but can be destroyed via netlink
291 291
 type Tuntap struct {
292 292
 	LinkAttrs
293
-	Mode   TuntapMode
294
-	Flags  TuntapFlag
295
-	Queues int
296
-	Fds    []*os.File
293
+	Mode       TuntapMode
294
+	Flags      TuntapFlag
295
+	NonPersist bool
296
+	Queues     int
297
+	Fds        []*os.File
298
+	Owner      uint32
299
+	Group      uint32
297 300
 }
298 301
 
299 302
 func (tuntap *Tuntap) Attrs() *LinkAttrs {
... ...
@@ -307,7 +336,8 @@ func (tuntap *Tuntap) Type() string {
307 307
 // Veth devices must specify PeerName on create
308 308
 type Veth struct {
309 309
 	LinkAttrs
310
-	PeerName string // veth on create only
310
+	PeerName         string // veth on create only
311
+	PeerHardwareAddr net.HardwareAddr
311 312
 }
312 313
 
313 314
 func (veth *Veth) Attrs() *LinkAttrs {
... ...
@@ -376,9 +406,18 @@ const (
376 376
 	IPVLAN_MODE_MAX
377 377
 )
378 378
 
379
+type IPVlanFlag uint16
380
+
381
+const (
382
+	IPVLAN_FLAG_BRIDGE IPVlanFlag = iota
383
+	IPVLAN_FLAG_PRIVATE
384
+	IPVLAN_FLAG_VEPA
385
+)
386
+
379 387
 type IPVlan struct {
380 388
 	LinkAttrs
381 389
 	Mode IPVlanMode
390
+	Flag IPVlanFlag
382 391
 }
383 392
 
384 393
 func (ipvlan *IPVlan) Attrs() *LinkAttrs {
... ...
@@ -389,6 +428,43 @@ func (ipvlan *IPVlan) Type() string {
389 389
 	return "ipvlan"
390 390
 }
391 391
 
392
+// VlanProtocol type
393
+type VlanProtocol int
394
+
395
+func (p VlanProtocol) String() string {
396
+	s, ok := VlanProtocolToString[p]
397
+	if !ok {
398
+		return fmt.Sprintf("VlanProtocol(%d)", p)
399
+	}
400
+	return s
401
+}
402
+
403
+// StringToVlanProtocol returns vlan protocol, or unknown is the s is invalid.
404
+func StringToVlanProtocol(s string) VlanProtocol {
405
+	mode, ok := StringToVlanProtocolMap[s]
406
+	if !ok {
407
+		return VLAN_PROTOCOL_UNKNOWN
408
+	}
409
+	return mode
410
+}
411
+
412
+// VlanProtocol possible values
413
+const (
414
+	VLAN_PROTOCOL_UNKNOWN VlanProtocol = 0
415
+	VLAN_PROTOCOL_8021Q   VlanProtocol = 0x8100
416
+	VLAN_PROTOCOL_8021AD  VlanProtocol = 0x88A8
417
+)
418
+
419
+var VlanProtocolToString = map[VlanProtocol]string{
420
+	VLAN_PROTOCOL_8021Q:  "802.1q",
421
+	VLAN_PROTOCOL_8021AD: "802.1ad",
422
+}
423
+
424
+var StringToVlanProtocolMap = map[string]VlanProtocol{
425
+	"802.1q":  VLAN_PROTOCOL_8021Q,
426
+	"802.1ad": VLAN_PROTOCOL_8021AD,
427
+}
428
+
392 429
 // BondMode type
393 430
 type BondMode int
394 431
 
... ...
@@ -400,7 +476,7 @@ func (b BondMode) String() string {
400 400
 	return s
401 401
 }
402 402
 
403
-// StringToBondMode returns bond mode, or uknonw is the s is invalid.
403
+// StringToBondMode returns bond mode, or unknown is the s is invalid.
404 404
 func StringToBondMode(s string) BondMode {
405 405
 	mode, ok := StringToBondModeMap[s]
406 406
 	if !ok {
... ...
@@ -491,7 +567,7 @@ func (b BondXmitHashPolicy) String() string {
491 491
 	return s
492 492
 }
493 493
 
494
-// StringToBondXmitHashPolicy returns bond lacp arte, or uknonw is the s is invalid.
494
+// StringToBondXmitHashPolicy returns bond lacp arte, or unknown is the s is invalid.
495 495
 func StringToBondXmitHashPolicy(s string) BondXmitHashPolicy {
496 496
 	lacp, ok := StringToBondXmitHashPolicyMap[s]
497 497
 	if !ok {
... ...
@@ -536,7 +612,7 @@ func (b BondLacpRate) String() string {
536 536
 	return s
537 537
 }
538 538
 
539
-// StringToBondLacpRate returns bond lacp arte, or uknonw is the s is invalid.
539
+// StringToBondLacpRate returns bond lacp arte, or unknown is the s is invalid.
540 540
 func StringToBondLacpRate(s string) BondLacpRate {
541 541
 	lacp, ok := StringToBondLacpRateMap[s]
542 542
 	if !ok {
... ...
@@ -680,6 +756,67 @@ func (bond *Bond) Type() string {
680 680
 	return "bond"
681 681
 }
682 682
 
683
+// BondSlaveState represents the values of the IFLA_BOND_SLAVE_STATE bond slave
684
+// attribute, which contains the state of the bond slave.
685
+type BondSlaveState uint8
686
+
687
+const (
688
+	BondStateActive = iota // Link is active.
689
+	BondStateBackup        // Link is backup.
690
+)
691
+
692
+func (s BondSlaveState) String() string {
693
+	switch s {
694
+	case BondStateActive:
695
+		return "ACTIVE"
696
+	case BondStateBackup:
697
+		return "BACKUP"
698
+	default:
699
+		return strconv.Itoa(int(s))
700
+	}
701
+}
702
+
703
+// BondSlaveState represents the values of the IFLA_BOND_SLAVE_MII_STATUS bond slave
704
+// attribute, which contains the status of MII link monitoring
705
+type BondSlaveMiiStatus uint8
706
+
707
+const (
708
+	BondLinkUp   = iota // link is up and running.
709
+	BondLinkFail        // link has just gone down.
710
+	BondLinkDown        // link has been down for too long time.
711
+	BondLinkBack        // link is going back.
712
+)
713
+
714
+func (s BondSlaveMiiStatus) String() string {
715
+	switch s {
716
+	case BondLinkUp:
717
+		return "UP"
718
+	case BondLinkFail:
719
+		return "GOING_DOWN"
720
+	case BondLinkDown:
721
+		return "DOWN"
722
+	case BondLinkBack:
723
+		return "GOING_BACK"
724
+	default:
725
+		return strconv.Itoa(int(s))
726
+	}
727
+}
728
+
729
+type BondSlave struct {
730
+	State                  BondSlaveState
731
+	MiiStatus              BondSlaveMiiStatus
732
+	LinkFailureCount       uint32
733
+	PermHardwareAddr       net.HardwareAddr
734
+	QueueId                uint16
735
+	AggregatorId           uint16
736
+	AdActorOperPortState   uint8
737
+	AdPartnerOperPortState uint16
738
+}
739
+
740
+func (b *BondSlave) SlaveType() string {
741
+	return "bond"
742
+}
743
+
683 744
 // Gretap devices must specify LocalIP and RemoteIP on create
684 745
 type Gretap struct {
685 746
 	LinkAttrs
... ...
@@ -734,6 +871,27 @@ func (iptun *Iptun) Type() string {
734 734
 	return "ipip"
735 735
 }
736 736
 
737
+type Ip6tnl struct {
738
+	LinkAttrs
739
+	Link       uint32
740
+	Local      net.IP
741
+	Remote     net.IP
742
+	Ttl        uint8
743
+	Tos        uint8
744
+	EncapLimit uint8
745
+	Flags      uint32
746
+	Proto      uint8
747
+	FlowInfo   uint32
748
+}
749
+
750
+func (ip6tnl *Ip6tnl) Attrs() *LinkAttrs {
751
+	return &ip6tnl.LinkAttrs
752
+}
753
+
754
+func (ip6tnl *Ip6tnl) Type() string {
755
+	return "ip6tnl"
756
+}
757
+
737 758
 type Sittun struct {
738 759
 	LinkAttrs
739 760
 	Link       uint32
... ...
@@ -769,7 +927,10 @@ func (vti *Vti) Attrs() *LinkAttrs {
769 769
 	return &vti.LinkAttrs
770 770
 }
771 771
 
772
-func (iptun *Vti) Type() string {
772
+func (vti *Vti) Type() string {
773
+	if vti.Local.To4() == nil {
774
+		return "vti6"
775
+	}
773 776
 	return "vti"
774 777
 }
775 778
 
... ...
@@ -831,11 +992,68 @@ func (gtp *GTP) Type() string {
831 831
 	return "gtp"
832 832
 }
833 833
 
834
+// Virtual XFRM Interfaces
835
+//	Named "xfrmi" to prevent confusion with XFRM objects
836
+type Xfrmi struct {
837
+	LinkAttrs
838
+	Ifid uint32
839
+}
840
+
841
+func (xfrm *Xfrmi) Attrs() *LinkAttrs {
842
+	return &xfrm.LinkAttrs
843
+}
844
+
845
+func (xfrm *Xfrmi) Type() string {
846
+	return "xfrm"
847
+}
848
+
849
+// IPoIB interface
850
+
851
+type IPoIBMode uint16
852
+
853
+func (m *IPoIBMode) String() string {
854
+	str, ok := iPoIBModeToString[*m]
855
+	if !ok {
856
+		return fmt.Sprintf("mode(%d)", *m)
857
+	}
858
+	return str
859
+}
860
+
861
+const (
862
+	IPOIB_MODE_DATAGRAM = iota
863
+	IPOIB_MODE_CONNECTED
864
+)
865
+
866
+var iPoIBModeToString = map[IPoIBMode]string{
867
+	IPOIB_MODE_DATAGRAM:  "datagram",
868
+	IPOIB_MODE_CONNECTED: "connected",
869
+}
870
+
871
+var StringToIPoIBMode = map[string]IPoIBMode{
872
+	"datagram":  IPOIB_MODE_DATAGRAM,
873
+	"connected": IPOIB_MODE_CONNECTED,
874
+}
875
+
876
+type IPoIB struct {
877
+	LinkAttrs
878
+	Pkey   uint16
879
+	Mode   IPoIBMode
880
+	Umcast uint16
881
+}
882
+
883
+func (ipoib *IPoIB) Attrs() *LinkAttrs {
884
+	return &ipoib.LinkAttrs
885
+}
886
+
887
+func (ipoib *IPoIB) Type() string {
888
+	return "ipoib"
889
+}
890
+
834 891
 // iproute2 supported devices;
835 892
 // vlan | veth | vcan | dummy | ifb | macvlan | macvtap |
836 893
 // bridge | bond | ipoib | ip6tnl | ipip | sit | vxlan |
837
-// gre | gretap | ip6gre | ip6gretap | vti | nlmon |
838
-// bond_slave | ipvlan
894
+// gre | gretap | ip6gre | ip6gretap | vti | vti6 | nlmon |
895
+// bond_slave | ipvlan | xfrm
839 896
 
840 897
 // LinkNotFoundError wraps the various not found errors when
841 898
 // getting/reading links. This is intended for better error
... ...
@@ -4,8 +4,11 @@ import (
4 4
 	"bytes"
5 5
 	"encoding/binary"
6 6
 	"fmt"
7
+	"io/ioutil"
7 8
 	"net"
8 9
 	"os"
10
+	"strconv"
11
+	"strings"
9 12
 	"syscall"
10 13
 	"unsafe"
11 14
 
... ...
@@ -16,7 +19,7 @@ import (
16 16
 
17 17
 const (
18 18
 	SizeofLinkStats32 = 0x5c
19
-	SizeofLinkStats64 = 0xd8
19
+	SizeofLinkStats64 = 0xb8
20 20
 )
21 21
 
22 22
 const (
... ...
@@ -31,6 +34,12 @@ const (
31 31
 	TUNTAP_MULTI_QUEUE_DEFAULTS TuntapFlag = TUNTAP_MULTI_QUEUE | TUNTAP_NO_PI
32 32
 )
33 33
 
34
+const (
35
+	VF_LINK_STATE_AUTO    uint32 = 0
36
+	VF_LINK_STATE_ENABLE  uint32 = 1
37
+	VF_LINK_STATE_DISABLE uint32 = 2
38
+)
39
+
34 40
 var lookupByDump = false
35 41
 
36 42
 var macvlanModes = [...]uint32{
... ...
@@ -113,6 +122,52 @@ func (h *Handle) SetPromiscOn(link Link) error {
113 113
 	return err
114 114
 }
115 115
 
116
+// LinkSetAllmulticastOn enables the reception of all hardware multicast packets for the link device.
117
+// Equivalent to: `ip link set $link allmulticast on`
118
+func LinkSetAllmulticastOn(link Link) error {
119
+	return pkgHandle.LinkSetAllmulticastOn(link)
120
+}
121
+
122
+// LinkSetAllmulticastOn enables the reception of all hardware multicast packets for the link device.
123
+// Equivalent to: `ip link set $link allmulticast on`
124
+func (h *Handle) LinkSetAllmulticastOn(link Link) error {
125
+	base := link.Attrs()
126
+	h.ensureIndex(base)
127
+	req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK)
128
+
129
+	msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
130
+	msg.Change = unix.IFF_ALLMULTI
131
+	msg.Flags = unix.IFF_ALLMULTI
132
+
133
+	msg.Index = int32(base.Index)
134
+	req.AddData(msg)
135
+
136
+	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
137
+	return err
138
+}
139
+
140
+// LinkSetAllmulticastOff disables the reception of all hardware multicast packets for the link device.
141
+// Equivalent to: `ip link set $link allmulticast off`
142
+func LinkSetAllmulticastOff(link Link) error {
143
+	return pkgHandle.LinkSetAllmulticastOff(link)
144
+}
145
+
146
+// LinkSetAllmulticastOff disables the reception of all hardware multicast packets for the link device.
147
+// Equivalent to: `ip link set $link allmulticast off`
148
+func (h *Handle) LinkSetAllmulticastOff(link Link) error {
149
+	base := link.Attrs()
150
+	h.ensureIndex(base)
151
+	req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK)
152
+
153
+	msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
154
+	msg.Change = unix.IFF_ALLMULTI
155
+	msg.Index = int32(base.Index)
156
+	req.AddData(msg)
157
+
158
+	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
159
+	return err
160
+}
161
+
116 162
 func MacvlanMACAddrAdd(link Link, addr net.HardwareAddr) error {
117 163
 	return pkgHandle.MacvlanMACAddrAdd(link, addr)
118 164
 }
... ...
@@ -155,24 +210,24 @@ func (h *Handle) macvlanMACAddrChange(link Link, addrs []net.HardwareAddr, mode
155 155
 	req.AddData(msg)
156 156
 
157 157
 	linkInfo := nl.NewRtAttr(unix.IFLA_LINKINFO, nil)
158
-	nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type()))
159
-	inner := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
158
+	linkInfo.AddRtAttr(nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type()))
159
+	inner := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
160 160
 
161 161
 	// IFLA_MACVLAN_MACADDR_MODE = mode
162 162
 	b := make([]byte, 4)
163 163
 	native.PutUint32(b, mode)
164
-	nl.NewRtAttrChild(inner, nl.IFLA_MACVLAN_MACADDR_MODE, b)
164
+	inner.AddRtAttr(nl.IFLA_MACVLAN_MACADDR_MODE, b)
165 165
 
166 166
 	// populate message with MAC addrs, if necessary
167 167
 	switch mode {
168 168
 	case nl.MACVLAN_MACADDR_ADD, nl.MACVLAN_MACADDR_DEL:
169 169
 		if len(addrs) == 1 {
170
-			nl.NewRtAttrChild(inner, nl.IFLA_MACVLAN_MACADDR, []byte(addrs[0]))
170
+			inner.AddRtAttr(nl.IFLA_MACVLAN_MACADDR, []byte(addrs[0]))
171 171
 		}
172 172
 	case nl.MACVLAN_MACADDR_SET:
173
-		mad := nl.NewRtAttrChild(inner, nl.IFLA_MACVLAN_MACADDR_DATA, nil)
173
+		mad := inner.AddRtAttr(nl.IFLA_MACVLAN_MACADDR_DATA, nil)
174 174
 		for _, addr := range addrs {
175
-			nl.NewRtAttrChild(mad, nl.IFLA_MACVLAN_MACADDR, []byte(addr))
175
+			mad.AddRtAttr(nl.IFLA_MACVLAN_MACADDR, []byte(addr))
176 176
 		}
177 177
 	}
178 178
 
... ...
@@ -203,7 +258,6 @@ func (h *Handle) SetPromiscOff(link Link) error {
203 203
 
204 204
 	msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
205 205
 	msg.Change = unix.IFF_PROMISC
206
-	msg.Flags = 0 & ^unix.IFF_PROMISC
207 206
 	msg.Index = int32(base.Index)
208 207
 	req.AddData(msg)
209 208
 
... ...
@@ -253,7 +307,6 @@ func (h *Handle) LinkSetDown(link Link) error {
253 253
 
254 254
 	msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
255 255
 	msg.Change = unix.IFF_UP
256
-	msg.Flags = 0 & ^unix.IFF_UP
257 256
 	msg.Index = int32(base.Index)
258 257
 	req.AddData(msg)
259 258
 
... ...
@@ -378,12 +431,12 @@ func (h *Handle) LinkSetVfHardwareAddr(link Link, vf int, hwaddr net.HardwareAdd
378 378
 	req.AddData(msg)
379 379
 
380 380
 	data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil)
381
-	info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil)
381
+	info := data.AddRtAttr(nl.IFLA_VF_INFO, nil)
382 382
 	vfmsg := nl.VfMac{
383 383
 		Vf: uint32(vf),
384 384
 	}
385 385
 	copy(vfmsg.Mac[:], []byte(hwaddr))
386
-	nl.NewRtAttrChild(info, nl.IFLA_VF_MAC, vfmsg.Serialize())
386
+	info.AddRtAttr(nl.IFLA_VF_MAC, vfmsg.Serialize())
387 387
 	req.AddData(data)
388 388
 
389 389
 	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
... ...
@@ -408,10 +461,41 @@ func (h *Handle) LinkSetVfVlan(link Link, vf, vlan int) error {
408 408
 	req.AddData(msg)
409 409
 
410 410
 	data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil)
411
+	info := data.AddRtAttr(nl.IFLA_VF_INFO, nil)
412
+	vfmsg := nl.VfVlan{
413
+		Vf:   uint32(vf),
414
+		Vlan: uint32(vlan),
415
+	}
416
+	info.AddRtAttr(nl.IFLA_VF_VLAN, vfmsg.Serialize())
417
+	req.AddData(data)
418
+
419
+	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
420
+	return err
421
+}
422
+
423
+// LinkSetVfVlanQos sets the vlan and qos priority of a vf for the link.
424
+// Equivalent to: `ip link set $link vf $vf vlan $vlan qos $qos`
425
+func LinkSetVfVlanQos(link Link, vf, vlan, qos int) error {
426
+	return pkgHandle.LinkSetVfVlanQos(link, vf, vlan, qos)
427
+}
428
+
429
+// LinkSetVfVlanQos sets the vlan and qos priority of a vf for the link.
430
+// Equivalent to: `ip link set $link vf $vf vlan $vlan qos $qos`
431
+func (h *Handle) LinkSetVfVlanQos(link Link, vf, vlan, qos int) error {
432
+	base := link.Attrs()
433
+	h.ensureIndex(base)
434
+	req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK)
435
+
436
+	msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
437
+	msg.Index = int32(base.Index)
438
+	req.AddData(msg)
439
+
440
+	data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil)
411 441
 	info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil)
412 442
 	vfmsg := nl.VfVlan{
413 443
 		Vf:   uint32(vf),
414 444
 		Vlan: uint32(vlan),
445
+		Qos:  uint32(qos),
415 446
 	}
416 447
 	nl.NewRtAttrChild(info, nl.IFLA_VF_VLAN, vfmsg.Serialize())
417 448
 	req.AddData(data)
... ...
@@ -438,12 +522,73 @@ func (h *Handle) LinkSetVfTxRate(link Link, vf, rate int) error {
438 438
 	req.AddData(msg)
439 439
 
440 440
 	data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil)
441
-	info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil)
441
+	info := data.AddRtAttr(nl.IFLA_VF_INFO, nil)
442 442
 	vfmsg := nl.VfTxRate{
443 443
 		Vf:   uint32(vf),
444 444
 		Rate: uint32(rate),
445 445
 	}
446
-	nl.NewRtAttrChild(info, nl.IFLA_VF_TX_RATE, vfmsg.Serialize())
446
+	info.AddRtAttr(nl.IFLA_VF_TX_RATE, vfmsg.Serialize())
447
+	req.AddData(data)
448
+
449
+	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
450
+	return err
451
+}
452
+
453
+// LinkSetVfRate sets the min and max tx rate of a vf for the link.
454
+// Equivalent to: `ip link set $link vf $vf min_tx_rate $min_rate max_tx_rate $max_rate`
455
+func LinkSetVfRate(link Link, vf, minRate, maxRate int) error {
456
+	return pkgHandle.LinkSetVfRate(link, vf, minRate, maxRate)
457
+}
458
+
459
+// LinkSetVfRate sets the min and max tx rate of a vf for the link.
460
+// Equivalent to: `ip link set $link vf $vf min_tx_rate $min_rate max_tx_rate $max_rate`
461
+func (h *Handle) LinkSetVfRate(link Link, vf, minRate, maxRate int) error {
462
+	base := link.Attrs()
463
+	h.ensureIndex(base)
464
+	req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK)
465
+
466
+	msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
467
+	msg.Index = int32(base.Index)
468
+	req.AddData(msg)
469
+
470
+	data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil)
471
+	info := data.AddRtAttr(nl.IFLA_VF_INFO, nil)
472
+	vfmsg := nl.VfRate{
473
+		Vf:        uint32(vf),
474
+		MinTxRate: uint32(minRate),
475
+		MaxTxRate: uint32(maxRate),
476
+	}
477
+	info.AddRtAttr(nl.IFLA_VF_RATE, vfmsg.Serialize())
478
+	req.AddData(data)
479
+
480
+	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
481
+	return err
482
+}
483
+
484
+// LinkSetVfState enables/disables virtual link state on a vf.
485
+// Equivalent to: `ip link set $link vf $vf state $state`
486
+func LinkSetVfState(link Link, vf int, state uint32) error {
487
+	return pkgHandle.LinkSetVfState(link, vf, state)
488
+}
489
+
490
+// LinkSetVfState enables/disables virtual link state on a vf.
491
+// Equivalent to: `ip link set $link vf $vf state $state`
492
+func (h *Handle) LinkSetVfState(link Link, vf int, state uint32) error {
493
+	base := link.Attrs()
494
+	h.ensureIndex(base)
495
+	req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK)
496
+
497
+	msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
498
+	msg.Index = int32(base.Index)
499
+	req.AddData(msg)
500
+
501
+	data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil)
502
+	info := data.AddRtAttr(nl.IFLA_VF_INFO, nil)
503
+	vfmsg := nl.VfLinkState{
504
+		Vf:        uint32(vf),
505
+		LinkState: state,
506
+	}
507
+	info.AddRtAttr(nl.IFLA_VF_LINK_STATE, vfmsg.Serialize())
447 508
 	req.AddData(data)
448 509
 
449 510
 	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
... ...
@@ -456,7 +601,7 @@ func LinkSetVfSpoofchk(link Link, vf int, check bool) error {
456 456
 	return pkgHandle.LinkSetVfSpoofchk(link, vf, check)
457 457
 }
458 458
 
459
-// LinkSetVfSpookfchk enables/disables spoof check on a vf for the link.
459
+// LinkSetVfSpoofchk enables/disables spoof check on a vf for the link.
460 460
 // Equivalent to: `ip link set $link vf $vf spoofchk $check`
461 461
 func (h *Handle) LinkSetVfSpoofchk(link Link, vf int, check bool) error {
462 462
 	var setting uint32
... ...
@@ -469,7 +614,7 @@ func (h *Handle) LinkSetVfSpoofchk(link Link, vf int, check bool) error {
469 469
 	req.AddData(msg)
470 470
 
471 471
 	data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil)
472
-	info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil)
472
+	info := data.AddRtAttr(nl.IFLA_VF_INFO, nil)
473 473
 	if check {
474 474
 		setting = 1
475 475
 	}
... ...
@@ -477,7 +622,7 @@ func (h *Handle) LinkSetVfSpoofchk(link Link, vf int, check bool) error {
477 477
 		Vf:      uint32(vf),
478 478
 		Setting: setting,
479 479
 	}
480
-	nl.NewRtAttrChild(info, nl.IFLA_VF_SPOOFCHK, vfmsg.Serialize())
480
+	info.AddRtAttr(nl.IFLA_VF_SPOOFCHK, vfmsg.Serialize())
481 481
 	req.AddData(data)
482 482
 
483 483
 	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
... ...
@@ -503,7 +648,7 @@ func (h *Handle) LinkSetVfTrust(link Link, vf int, state bool) error {
503 503
 	req.AddData(msg)
504 504
 
505 505
 	data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil)
506
-	info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil)
506
+	info := data.AddRtAttr(nl.IFLA_VF_INFO, nil)
507 507
 	if state {
508 508
 		setting = 1
509 509
 	}
... ...
@@ -511,22 +656,66 @@ func (h *Handle) LinkSetVfTrust(link Link, vf int, state bool) error {
511 511
 		Vf:      uint32(vf),
512 512
 		Setting: setting,
513 513
 	}
514
-	nl.NewRtAttrChild(info, nl.IFLA_VF_TRUST, vfmsg.Serialize())
514
+	info.AddRtAttr(nl.IFLA_VF_TRUST, vfmsg.Serialize())
515 515
 	req.AddData(data)
516 516
 
517 517
 	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
518 518
 	return err
519 519
 }
520 520
 
521
+// LinkSetVfNodeGUID sets the node GUID of a vf for the link.
522
+// Equivalent to: `ip link set dev $link vf $vf node_guid $nodeguid`
523
+func LinkSetVfNodeGUID(link Link, vf int, nodeguid net.HardwareAddr) error {
524
+	return pkgHandle.LinkSetVfGUID(link, vf, nodeguid, nl.IFLA_VF_IB_NODE_GUID)
525
+}
526
+
527
+// LinkSetVfPortGUID sets the port GUID of a vf for the link.
528
+// Equivalent to: `ip link set dev $link vf $vf port_guid $portguid`
529
+func LinkSetVfPortGUID(link Link, vf int, portguid net.HardwareAddr) error {
530
+	return pkgHandle.LinkSetVfGUID(link, vf, portguid, nl.IFLA_VF_IB_PORT_GUID)
531
+}
532
+
533
+// LinkSetVfGUID sets the node or port GUID of a vf for the link.
534
+func (h *Handle) LinkSetVfGUID(link Link, vf int, vfGuid net.HardwareAddr, guidType int) error {
535
+	var err error
536
+	var guid uint64
537
+
538
+	buf := bytes.NewBuffer(vfGuid)
539
+	err = binary.Read(buf, binary.BigEndian, &guid)
540
+	if err != nil {
541
+		return err
542
+	}
543
+
544
+	base := link.Attrs()
545
+	h.ensureIndex(base)
546
+	req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK)
547
+
548
+	msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
549
+	msg.Index = int32(base.Index)
550
+	req.AddData(msg)
551
+
552
+	data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil)
553
+	info := data.AddRtAttr(nl.IFLA_VF_INFO, nil)
554
+	vfmsg := nl.VfGUID{
555
+		Vf:   uint32(vf),
556
+		GUID: guid,
557
+	}
558
+	info.AddRtAttr(guidType, vfmsg.Serialize())
559
+	req.AddData(data)
560
+
561
+	_, err = req.Execute(unix.NETLINK_ROUTE, 0)
562
+	return err
563
+}
564
+
521 565
 // LinkSetMaster sets the master of the link device.
522 566
 // Equivalent to: `ip link set $link master $master`
523
-func LinkSetMaster(link Link, master *Bridge) error {
567
+func LinkSetMaster(link Link, master Link) error {
524 568
 	return pkgHandle.LinkSetMaster(link, master)
525 569
 }
526 570
 
527 571
 // LinkSetMaster sets the master of the link device.
528 572
 // Equivalent to: `ip link set $link master $master`
529
-func (h *Handle) LinkSetMaster(link Link, master *Bridge) error {
573
+func (h *Handle) LinkSetMaster(link Link, master Link) error {
530 574
 	index := 0
531 575
 	if master != nil {
532 576
 		masterBase := master.Attrs()
... ...
@@ -672,69 +861,69 @@ type vxlanPortRange struct {
672 672
 }
673 673
 
674 674
 func addVxlanAttrs(vxlan *Vxlan, linkInfo *nl.RtAttr) {
675
-	data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
675
+	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
676 676
 
677 677
 	if vxlan.FlowBased {
678 678
 		vxlan.VxlanId = 0
679 679
 	}
680 680
 
681
-	nl.NewRtAttrChild(data, nl.IFLA_VXLAN_ID, nl.Uint32Attr(uint32(vxlan.VxlanId)))
681
+	data.AddRtAttr(nl.IFLA_VXLAN_ID, nl.Uint32Attr(uint32(vxlan.VxlanId)))
682 682
 
683 683
 	if vxlan.VtepDevIndex != 0 {
684
-		nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LINK, nl.Uint32Attr(uint32(vxlan.VtepDevIndex)))
684
+		data.AddRtAttr(nl.IFLA_VXLAN_LINK, nl.Uint32Attr(uint32(vxlan.VtepDevIndex)))
685 685
 	}
686 686
 	if vxlan.SrcAddr != nil {
687 687
 		ip := vxlan.SrcAddr.To4()
688 688
 		if ip != nil {
689
-			nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LOCAL, []byte(ip))
689
+			data.AddRtAttr(nl.IFLA_VXLAN_LOCAL, []byte(ip))
690 690
 		} else {
691 691
 			ip = vxlan.SrcAddr.To16()
692 692
 			if ip != nil {
693
-				nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LOCAL6, []byte(ip))
693
+				data.AddRtAttr(nl.IFLA_VXLAN_LOCAL6, []byte(ip))
694 694
 			}
695 695
 		}
696 696
 	}
697 697
 	if vxlan.Group != nil {
698 698
 		group := vxlan.Group.To4()
699 699
 		if group != nil {
700
-			nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GROUP, []byte(group))
700
+			data.AddRtAttr(nl.IFLA_VXLAN_GROUP, []byte(group))
701 701
 		} else {
702 702
 			group = vxlan.Group.To16()
703 703
 			if group != nil {
704
-				nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GROUP6, []byte(group))
704
+				data.AddRtAttr(nl.IFLA_VXLAN_GROUP6, []byte(group))
705 705
 			}
706 706
 		}
707 707
 	}
708 708
 
709
-	nl.NewRtAttrChild(data, nl.IFLA_VXLAN_TTL, nl.Uint8Attr(uint8(vxlan.TTL)))
710
-	nl.NewRtAttrChild(data, nl.IFLA_VXLAN_TOS, nl.Uint8Attr(uint8(vxlan.TOS)))
711
-	nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LEARNING, boolAttr(vxlan.Learning))
712
-	nl.NewRtAttrChild(data, nl.IFLA_VXLAN_PROXY, boolAttr(vxlan.Proxy))
713
-	nl.NewRtAttrChild(data, nl.IFLA_VXLAN_RSC, boolAttr(vxlan.RSC))
714
-	nl.NewRtAttrChild(data, nl.IFLA_VXLAN_L2MISS, boolAttr(vxlan.L2miss))
715
-	nl.NewRtAttrChild(data, nl.IFLA_VXLAN_L3MISS, boolAttr(vxlan.L3miss))
716
-	nl.NewRtAttrChild(data, nl.IFLA_VXLAN_UDP_ZERO_CSUM6_TX, boolAttr(vxlan.UDP6ZeroCSumTx))
717
-	nl.NewRtAttrChild(data, nl.IFLA_VXLAN_UDP_ZERO_CSUM6_RX, boolAttr(vxlan.UDP6ZeroCSumRx))
709
+	data.AddRtAttr(nl.IFLA_VXLAN_TTL, nl.Uint8Attr(uint8(vxlan.TTL)))
710
+	data.AddRtAttr(nl.IFLA_VXLAN_TOS, nl.Uint8Attr(uint8(vxlan.TOS)))
711
+	data.AddRtAttr(nl.IFLA_VXLAN_LEARNING, boolAttr(vxlan.Learning))
712
+	data.AddRtAttr(nl.IFLA_VXLAN_PROXY, boolAttr(vxlan.Proxy))
713
+	data.AddRtAttr(nl.IFLA_VXLAN_RSC, boolAttr(vxlan.RSC))
714
+	data.AddRtAttr(nl.IFLA_VXLAN_L2MISS, boolAttr(vxlan.L2miss))
715
+	data.AddRtAttr(nl.IFLA_VXLAN_L3MISS, boolAttr(vxlan.L3miss))
716
+	data.AddRtAttr(nl.IFLA_VXLAN_UDP_ZERO_CSUM6_TX, boolAttr(vxlan.UDP6ZeroCSumTx))
717
+	data.AddRtAttr(nl.IFLA_VXLAN_UDP_ZERO_CSUM6_RX, boolAttr(vxlan.UDP6ZeroCSumRx))
718 718
 
719 719
 	if vxlan.UDPCSum {
720
-		nl.NewRtAttrChild(data, nl.IFLA_VXLAN_UDP_CSUM, boolAttr(vxlan.UDPCSum))
720
+		data.AddRtAttr(nl.IFLA_VXLAN_UDP_CSUM, boolAttr(vxlan.UDPCSum))
721 721
 	}
722 722
 	if vxlan.GBP {
723
-		nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GBP, []byte{})
723
+		data.AddRtAttr(nl.IFLA_VXLAN_GBP, []byte{})
724 724
 	}
725 725
 	if vxlan.FlowBased {
726
-		nl.NewRtAttrChild(data, nl.IFLA_VXLAN_FLOWBASED, boolAttr(vxlan.FlowBased))
726
+		data.AddRtAttr(nl.IFLA_VXLAN_FLOWBASED, boolAttr(vxlan.FlowBased))
727 727
 	}
728 728
 	if vxlan.NoAge {
729
-		nl.NewRtAttrChild(data, nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(0))
729
+		data.AddRtAttr(nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(0))
730 730
 	} else if vxlan.Age > 0 {
731
-		nl.NewRtAttrChild(data, nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(uint32(vxlan.Age)))
731
+		data.AddRtAttr(nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(uint32(vxlan.Age)))
732 732
 	}
733 733
 	if vxlan.Limit > 0 {
734
-		nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LIMIT, nl.Uint32Attr(uint32(vxlan.Limit)))
734
+		data.AddRtAttr(nl.IFLA_VXLAN_LIMIT, nl.Uint32Attr(uint32(vxlan.Limit)))
735 735
 	}
736 736
 	if vxlan.Port > 0 {
737
-		nl.NewRtAttrChild(data, nl.IFLA_VXLAN_PORT, htons(uint16(vxlan.Port)))
737
+		data.AddRtAttr(nl.IFLA_VXLAN_PORT, htons(uint16(vxlan.Port)))
738 738
 	}
739 739
 	if vxlan.PortLow > 0 || vxlan.PortHigh > 0 {
740 740
 		pr := vxlanPortRange{uint16(vxlan.PortLow), uint16(vxlan.PortHigh)}
... ...
@@ -742,100 +931,100 @@ func addVxlanAttrs(vxlan *Vxlan, linkInfo *nl.RtAttr) {
742 742
 		buf := new(bytes.Buffer)
743 743
 		binary.Write(buf, binary.BigEndian, &pr)
744 744
 
745
-		nl.NewRtAttrChild(data, nl.IFLA_VXLAN_PORT_RANGE, buf.Bytes())
745
+		data.AddRtAttr(nl.IFLA_VXLAN_PORT_RANGE, buf.Bytes())
746 746
 	}
747 747
 }
748 748
 
749 749
 func addBondAttrs(bond *Bond, linkInfo *nl.RtAttr) {
750
-	data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
750
+	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
751 751
 	if bond.Mode >= 0 {
752
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_MODE, nl.Uint8Attr(uint8(bond.Mode)))
752
+		data.AddRtAttr(nl.IFLA_BOND_MODE, nl.Uint8Attr(uint8(bond.Mode)))
753 753
 	}
754 754
 	if bond.ActiveSlave >= 0 {
755
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_ACTIVE_SLAVE, nl.Uint32Attr(uint32(bond.ActiveSlave)))
755
+		data.AddRtAttr(nl.IFLA_BOND_ACTIVE_SLAVE, nl.Uint32Attr(uint32(bond.ActiveSlave)))
756 756
 	}
757 757
 	if bond.Miimon >= 0 {
758
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_MIIMON, nl.Uint32Attr(uint32(bond.Miimon)))
758
+		data.AddRtAttr(nl.IFLA_BOND_MIIMON, nl.Uint32Attr(uint32(bond.Miimon)))
759 759
 	}
760 760
 	if bond.UpDelay >= 0 {
761
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_UPDELAY, nl.Uint32Attr(uint32(bond.UpDelay)))
761
+		data.AddRtAttr(nl.IFLA_BOND_UPDELAY, nl.Uint32Attr(uint32(bond.UpDelay)))
762 762
 	}
763 763
 	if bond.DownDelay >= 0 {
764
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_DOWNDELAY, nl.Uint32Attr(uint32(bond.DownDelay)))
764
+		data.AddRtAttr(nl.IFLA_BOND_DOWNDELAY, nl.Uint32Attr(uint32(bond.DownDelay)))
765 765
 	}
766 766
 	if bond.UseCarrier >= 0 {
767
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_USE_CARRIER, nl.Uint8Attr(uint8(bond.UseCarrier)))
767
+		data.AddRtAttr(nl.IFLA_BOND_USE_CARRIER, nl.Uint8Attr(uint8(bond.UseCarrier)))
768 768
 	}
769 769
 	if bond.ArpInterval >= 0 {
770
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_ARP_INTERVAL, nl.Uint32Attr(uint32(bond.ArpInterval)))
770
+		data.AddRtAttr(nl.IFLA_BOND_ARP_INTERVAL, nl.Uint32Attr(uint32(bond.ArpInterval)))
771 771
 	}
772 772
 	if bond.ArpIpTargets != nil {
773
-		msg := nl.NewRtAttrChild(data, nl.IFLA_BOND_ARP_IP_TARGET, nil)
773
+		msg := data.AddRtAttr(nl.IFLA_BOND_ARP_IP_TARGET, nil)
774 774
 		for i := range bond.ArpIpTargets {
775 775
 			ip := bond.ArpIpTargets[i].To4()
776 776
 			if ip != nil {
777
-				nl.NewRtAttrChild(msg, i, []byte(ip))
777
+				msg.AddRtAttr(i, []byte(ip))
778 778
 				continue
779 779
 			}
780 780
 			ip = bond.ArpIpTargets[i].To16()
781 781
 			if ip != nil {
782
-				nl.NewRtAttrChild(msg, i, []byte(ip))
782
+				msg.AddRtAttr(i, []byte(ip))
783 783
 			}
784 784
 		}
785 785
 	}
786 786
 	if bond.ArpValidate >= 0 {
787
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_ARP_VALIDATE, nl.Uint32Attr(uint32(bond.ArpValidate)))
787
+		data.AddRtAttr(nl.IFLA_BOND_ARP_VALIDATE, nl.Uint32Attr(uint32(bond.ArpValidate)))
788 788
 	}
789 789
 	if bond.ArpAllTargets >= 0 {
790
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_ARP_ALL_TARGETS, nl.Uint32Attr(uint32(bond.ArpAllTargets)))
790
+		data.AddRtAttr(nl.IFLA_BOND_ARP_ALL_TARGETS, nl.Uint32Attr(uint32(bond.ArpAllTargets)))
791 791
 	}
792 792
 	if bond.Primary >= 0 {
793
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_PRIMARY, nl.Uint32Attr(uint32(bond.Primary)))
793
+		data.AddRtAttr(nl.IFLA_BOND_PRIMARY, nl.Uint32Attr(uint32(bond.Primary)))
794 794
 	}
795 795
 	if bond.PrimaryReselect >= 0 {
796
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_PRIMARY_RESELECT, nl.Uint8Attr(uint8(bond.PrimaryReselect)))
796
+		data.AddRtAttr(nl.IFLA_BOND_PRIMARY_RESELECT, nl.Uint8Attr(uint8(bond.PrimaryReselect)))
797 797
 	}
798 798
 	if bond.FailOverMac >= 0 {
799
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_FAIL_OVER_MAC, nl.Uint8Attr(uint8(bond.FailOverMac)))
799
+		data.AddRtAttr(nl.IFLA_BOND_FAIL_OVER_MAC, nl.Uint8Attr(uint8(bond.FailOverMac)))
800 800
 	}
801 801
 	if bond.XmitHashPolicy >= 0 {
802
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_XMIT_HASH_POLICY, nl.Uint8Attr(uint8(bond.XmitHashPolicy)))
802
+		data.AddRtAttr(nl.IFLA_BOND_XMIT_HASH_POLICY, nl.Uint8Attr(uint8(bond.XmitHashPolicy)))
803 803
 	}
804 804
 	if bond.ResendIgmp >= 0 {
805
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_RESEND_IGMP, nl.Uint32Attr(uint32(bond.ResendIgmp)))
805
+		data.AddRtAttr(nl.IFLA_BOND_RESEND_IGMP, nl.Uint32Attr(uint32(bond.ResendIgmp)))
806 806
 	}
807 807
 	if bond.NumPeerNotif >= 0 {
808
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_NUM_PEER_NOTIF, nl.Uint8Attr(uint8(bond.NumPeerNotif)))
808
+		data.AddRtAttr(nl.IFLA_BOND_NUM_PEER_NOTIF, nl.Uint8Attr(uint8(bond.NumPeerNotif)))
809 809
 	}
810 810
 	if bond.AllSlavesActive >= 0 {
811
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_ALL_SLAVES_ACTIVE, nl.Uint8Attr(uint8(bond.AllSlavesActive)))
811
+		data.AddRtAttr(nl.IFLA_BOND_ALL_SLAVES_ACTIVE, nl.Uint8Attr(uint8(bond.AllSlavesActive)))
812 812
 	}
813 813
 	if bond.MinLinks >= 0 {
814
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_MIN_LINKS, nl.Uint32Attr(uint32(bond.MinLinks)))
814
+		data.AddRtAttr(nl.IFLA_BOND_MIN_LINKS, nl.Uint32Attr(uint32(bond.MinLinks)))
815 815
 	}
816 816
 	if bond.LpInterval >= 0 {
817
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_LP_INTERVAL, nl.Uint32Attr(uint32(bond.LpInterval)))
817
+		data.AddRtAttr(nl.IFLA_BOND_LP_INTERVAL, nl.Uint32Attr(uint32(bond.LpInterval)))
818 818
 	}
819 819
 	if bond.PackersPerSlave >= 0 {
820
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_PACKETS_PER_SLAVE, nl.Uint32Attr(uint32(bond.PackersPerSlave)))
820
+		data.AddRtAttr(nl.IFLA_BOND_PACKETS_PER_SLAVE, nl.Uint32Attr(uint32(bond.PackersPerSlave)))
821 821
 	}
822 822
 	if bond.LacpRate >= 0 {
823
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_AD_LACP_RATE, nl.Uint8Attr(uint8(bond.LacpRate)))
823
+		data.AddRtAttr(nl.IFLA_BOND_AD_LACP_RATE, nl.Uint8Attr(uint8(bond.LacpRate)))
824 824
 	}
825 825
 	if bond.AdSelect >= 0 {
826
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_AD_SELECT, nl.Uint8Attr(uint8(bond.AdSelect)))
826
+		data.AddRtAttr(nl.IFLA_BOND_AD_SELECT, nl.Uint8Attr(uint8(bond.AdSelect)))
827 827
 	}
828 828
 	if bond.AdActorSysPrio >= 0 {
829
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_AD_ACTOR_SYS_PRIO, nl.Uint16Attr(uint16(bond.AdActorSysPrio)))
829
+		data.AddRtAttr(nl.IFLA_BOND_AD_ACTOR_SYS_PRIO, nl.Uint16Attr(uint16(bond.AdActorSysPrio)))
830 830
 	}
831 831
 	if bond.AdUserPortKey >= 0 {
832
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_AD_USER_PORT_KEY, nl.Uint16Attr(uint16(bond.AdUserPortKey)))
832
+		data.AddRtAttr(nl.IFLA_BOND_AD_USER_PORT_KEY, nl.Uint16Attr(uint16(bond.AdUserPortKey)))
833 833
 	}
834 834
 	if bond.AdActorSystem != nil {
835
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_AD_ACTOR_SYSTEM, []byte(bond.AdActorSystem))
835
+		data.AddRtAttr(nl.IFLA_BOND_AD_ACTOR_SYSTEM, []byte(bond.AdActorSystem))
836 836
 	}
837 837
 	if bond.TlbDynamicLb >= 0 {
838
-		nl.NewRtAttrChild(data, nl.IFLA_BOND_TLB_DYNAMIC_LB, nl.Uint8Attr(uint8(bond.TlbDynamicLb)))
838
+		data.AddRtAttr(nl.IFLA_BOND_TLB_DYNAMIC_LB, nl.Uint8Attr(uint8(bond.TlbDynamicLb)))
839 839
 	}
840 840
 }
841 841
 
... ...
@@ -853,7 +1042,7 @@ func LinkAdd(link Link) error {
853 853
 }
854 854
 
855 855
 // LinkAdd adds a new link device. The type and features of the device
856
-// are taken fromt the parameters in the link object.
856
+// are taken from the parameters in the link object.
857 857
 // Equivalent to: `ip link add $link`
858 858
 func (h *Handle) LinkAdd(link Link) error {
859 859
 	return h.linkModify(link, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK)
... ...
@@ -863,16 +1052,18 @@ func (h *Handle) linkModify(link Link, flags int) error {
863 863
 	// TODO: support extra data for macvlan
864 864
 	base := link.Attrs()
865 865
 
866
-	if base.Name == "" {
867
-		return fmt.Errorf("LinkAttrs.Name cannot be empty!")
866
+	// if tuntap, then the name can be empty, OS will provide a name
867
+	tuntap, isTuntap := link.(*Tuntap)
868
+
869
+	if base.Name == "" && !isTuntap {
870
+		return fmt.Errorf("LinkAttrs.Name cannot be empty")
868 871
 	}
869 872
 
870
-	if tuntap, ok := link.(*Tuntap); ok {
873
+	if isTuntap {
871 874
 		// TODO: support user
872 875
 		// TODO: support group
873
-		// TODO: support non- persistent
874 876
 		if tuntap.Mode < unix.IFF_TUN || tuntap.Mode > unix.IFF_TAP {
875
-			return fmt.Errorf("Tuntap.Mode %v unknown!", tuntap.Mode)
877
+			return fmt.Errorf("Tuntap.Mode %v unknown", tuntap.Mode)
876 878
 		}
877 879
 
878 880
 		queues := tuntap.Queues
... ...
@@ -913,12 +1104,25 @@ func (h *Handle) linkModify(link Link, flags int) error {
913 913
 				cleanupFds(fds)
914 914
 				return fmt.Errorf("Tuntap IOCTL TUNSETIFF failed [%d], errno %v", i, errno)
915 915
 			}
916
+			// 1) we only care for the name of the first tap in the multi queue set
917
+			// 2) if the original name was empty, the localReq has now the actual name
918
+			//
919
+			// In addition:
920
+			// This ensures that the link name is always identical to what the kernel returns.
921
+			// Not only in case of an empty name, but also when using name templates.
922
+			// e.g. when the provided name is "tap%d", the kernel replaces %d with the next available number.
923
+			if i == 0 {
924
+				link.Attrs().Name = strings.Trim(string(localReq.Name[:]), "\x00")
925
+			}
916 926
 		}
917 927
 
918
-		_, _, errno := unix.Syscall(unix.SYS_IOCTL, fds[0].Fd(), uintptr(unix.TUNSETPERSIST), 1)
919
-		if errno != 0 {
920
-			cleanupFds(fds)
921
-			return fmt.Errorf("Tuntap IOCTL TUNSETPERSIST failed, errno %v", errno)
928
+		// only persist interface if NonPersist is NOT set
929
+		if !tuntap.NonPersist {
930
+			_, _, errno := unix.Syscall(unix.SYS_IOCTL, fds[0].Fd(), uintptr(unix.TUNSETPERSIST), 1)
931
+			if errno != 0 {
932
+				cleanupFds(fds)
933
+				return fmt.Errorf("Tuntap IOCTL TUNSETPERSIST failed, errno %v", errno)
934
+			}
922 935
 		}
923 936
 
924 937
 		h.ensureIndex(base)
... ...
@@ -928,7 +1132,11 @@ func (h *Handle) linkModify(link Link, flags int) error {
928 928
 			// TODO: verify MasterIndex is actually a bridge?
929 929
 			err := h.LinkSetMasterByIndex(link, base.MasterIndex)
930 930
 			if err != nil {
931
-				_, _, _ = unix.Syscall(unix.SYS_IOCTL, fds[0].Fd(), uintptr(unix.TUNSETPERSIST), 0)
931
+				// un-persist (e.g. allow the interface to be removed) the tuntap
932
+				// should not hurt if not set prior, condition might be not needed
933
+				if !tuntap.NonPersist {
934
+					_, _, _ = unix.Syscall(unix.SYS_IOCTL, fds[0].Fd(), uintptr(unix.TUNSETPERSIST), 0)
935
+				}
932 936
 				cleanupFds(fds)
933 937
 				return err
934 938
 			}
... ...
@@ -978,8 +1186,8 @@ func (h *Handle) linkModify(link Link, flags int) error {
978 978
 		native.PutUint32(b, uint32(base.ParentIndex))
979 979
 		data := nl.NewRtAttr(unix.IFLA_LINK, b)
980 980
 		req.AddData(data)
981
-	} else if link.Type() == "ipvlan" {
982
-		return fmt.Errorf("Can't create ipvlan link without ParentIndex")
981
+	} else if link.Type() == "ipvlan" || link.Type() == "ipoib" {
982
+		return fmt.Errorf("Can't create %s link without ParentIndex", link.Type())
983 983
 	}
984 984
 
985 985
 	nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(base.Name))
... ...
@@ -1010,14 +1218,29 @@ func (h *Handle) linkModify(link Link, flags int) error {
1010 1010
 		req.AddData(rxqueues)
1011 1011
 	}
1012 1012
 
1013
+	if base.GSOMaxSegs > 0 {
1014
+		gsoAttr := nl.NewRtAttr(unix.IFLA_GSO_MAX_SEGS, nl.Uint32Attr(base.GSOMaxSegs))
1015
+		req.AddData(gsoAttr)
1016
+	}
1017
+
1018
+	if base.GSOMaxSize > 0 {
1019
+		gsoAttr := nl.NewRtAttr(unix.IFLA_GSO_MAX_SIZE, nl.Uint32Attr(base.GSOMaxSize))
1020
+		req.AddData(gsoAttr)
1021
+	}
1022
+
1023
+	if base.Group > 0 {
1024
+		groupAttr := nl.NewRtAttr(unix.IFLA_GROUP, nl.Uint32Attr(base.Group))
1025
+		req.AddData(groupAttr)
1026
+	}
1027
+
1013 1028
 	if base.Namespace != nil {
1014 1029
 		var attr *nl.RtAttr
1015
-		switch base.Namespace.(type) {
1030
+		switch ns := base.Namespace.(type) {
1016 1031
 		case NsPid:
1017
-			val := nl.Uint32Attr(uint32(base.Namespace.(NsPid)))
1032
+			val := nl.Uint32Attr(uint32(ns))
1018 1033
 			attr = nl.NewRtAttr(unix.IFLA_NET_NS_PID, val)
1019 1034
 		case NsFd:
1020
-			val := nl.Uint32Attr(uint32(base.Namespace.(NsFd)))
1035
+			val := nl.Uint32Attr(uint32(ns))
1021 1036
 			attr = nl.NewRtAttr(unix.IFLA_NET_NS_FD, val)
1022 1037
 		}
1023 1038
 
... ...
@@ -1029,47 +1252,56 @@ func (h *Handle) linkModify(link Link, flags int) error {
1029 1029
 	}
1030 1030
 
1031 1031
 	linkInfo := nl.NewRtAttr(unix.IFLA_LINKINFO, nil)
1032
-	nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type()))
1032
+	linkInfo.AddRtAttr(nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type()))
1033 1033
 
1034 1034
 	switch link := link.(type) {
1035 1035
 	case *Vlan:
1036 1036
 		b := make([]byte, 2)
1037 1037
 		native.PutUint16(b, uint16(link.VlanId))
1038
-		data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
1039
-		nl.NewRtAttrChild(data, nl.IFLA_VLAN_ID, b)
1038
+		data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
1039
+		data.AddRtAttr(nl.IFLA_VLAN_ID, b)
1040
+
1041
+		if link.VlanProtocol != VLAN_PROTOCOL_UNKNOWN {
1042
+			data.AddRtAttr(nl.IFLA_VLAN_PROTOCOL, htons(uint16(link.VlanProtocol)))
1043
+		}
1040 1044
 	case *Veth:
1041
-		data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
1042
-		peer := nl.NewRtAttrChild(data, nl.VETH_INFO_PEER, nil)
1045
+		data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
1046
+		peer := data.AddRtAttr(nl.VETH_INFO_PEER, nil)
1043 1047
 		nl.NewIfInfomsgChild(peer, unix.AF_UNSPEC)
1044
-		nl.NewRtAttrChild(peer, unix.IFLA_IFNAME, nl.ZeroTerminated(link.PeerName))
1048
+		peer.AddRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(link.PeerName))
1045 1049
 		if base.TxQLen >= 0 {
1046
-			nl.NewRtAttrChild(peer, unix.IFLA_TXQLEN, nl.Uint32Attr(uint32(base.TxQLen)))
1050
+			peer.AddRtAttr(unix.IFLA_TXQLEN, nl.Uint32Attr(uint32(base.TxQLen)))
1047 1051
 		}
1048 1052
 		if base.MTU > 0 {
1049
-			nl.NewRtAttrChild(peer, unix.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU)))
1053
+			peer.AddRtAttr(unix.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU)))
1054
+		}
1055
+		if link.PeerHardwareAddr != nil {
1056
+			peer.AddRtAttr(unix.IFLA_ADDRESS, []byte(link.PeerHardwareAddr))
1050 1057
 		}
1051
-
1052 1058
 	case *Vxlan:
1053 1059
 		addVxlanAttrs(link, linkInfo)
1054 1060
 	case *Bond:
1055 1061
 		addBondAttrs(link, linkInfo)
1056 1062
 	case *IPVlan:
1057
-		data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
1058
-		nl.NewRtAttrChild(data, nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(link.Mode)))
1063
+		data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
1064
+		data.AddRtAttr(nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(link.Mode)))
1065
+		data.AddRtAttr(nl.IFLA_IPVLAN_FLAG, nl.Uint16Attr(uint16(link.Flag)))
1059 1066
 	case *Macvlan:
1060 1067
 		if link.Mode != MACVLAN_MODE_DEFAULT {
1061
-			data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
1062
-			nl.NewRtAttrChild(data, nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[link.Mode]))
1068
+			data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
1069
+			data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[link.Mode]))
1063 1070
 		}
1064 1071
 	case *Macvtap:
1065 1072
 		if link.Mode != MACVLAN_MODE_DEFAULT {
1066
-			data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
1067
-			nl.NewRtAttrChild(data, nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[link.Mode]))
1073
+			data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
1074
+			data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[link.Mode]))
1068 1075
 		}
1069 1076
 	case *Gretap:
1070 1077
 		addGretapAttrs(link, linkInfo)
1071 1078
 	case *Iptun:
1072 1079
 		addIptunAttrs(link, linkInfo)
1080
+	case *Ip6tnl:
1081
+		addIp6tnlAttrs(link, linkInfo)
1073 1082
 	case *Sittun:
1074 1083
 		addSittunAttrs(link, linkInfo)
1075 1084
 	case *Gretun:
... ...
@@ -1082,6 +1314,10 @@ func (h *Handle) linkModify(link Link, flags int) error {
1082 1082
 		addBridgeAttrs(link, linkInfo)
1083 1083
 	case *GTP:
1084 1084
 		addGTPAttrs(link, linkInfo)
1085
+	case *Xfrmi:
1086
+		addXfrmiAttrs(link, linkInfo)
1087
+	case *IPoIB:
1088
+		addIPoIBAttrs(link, linkInfo)
1085 1089
 	}
1086 1090
 
1087 1091
 	req.AddData(linkInfo)
... ...
@@ -1170,6 +1406,9 @@ func (h *Handle) LinkByName(name string) (Link, error) {
1170 1170
 	msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
1171 1171
 	req.AddData(msg)
1172 1172
 
1173
+	attr := nl.NewRtAttr(unix.IFLA_EXT_MASK, nl.Uint32Attr(nl.RTEXT_FILTER_VF))
1174
+	req.AddData(attr)
1175
+
1173 1176
 	nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(name))
1174 1177
 	req.AddData(nameData)
1175 1178
 
... ...
@@ -1202,6 +1441,9 @@ func (h *Handle) LinkByAlias(alias string) (Link, error) {
1202 1202
 	msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
1203 1203
 	req.AddData(msg)
1204 1204
 
1205
+	attr := nl.NewRtAttr(unix.IFLA_EXT_MASK, nl.Uint32Attr(nl.RTEXT_FILTER_VF))
1206
+	req.AddData(attr)
1207
+
1205 1208
 	nameData := nl.NewRtAttr(unix.IFLA_IFALIAS, nl.ZeroTerminated(alias))
1206 1209
 	req.AddData(nameData)
1207 1210
 
... ...
@@ -1228,6 +1470,8 @@ func (h *Handle) LinkByIndex(index int) (Link, error) {
1228 1228
 	msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
1229 1229
 	msg.Index = int32(index)
1230 1230
 	req.AddData(msg)
1231
+	attr := nl.NewRtAttr(unix.IFLA_EXT_MASK, nl.Uint32Attr(nl.RTEXT_FILTER_VF))
1232
+	req.AddData(attr)
1231 1233
 
1232 1234
 	return execGetLink(req)
1233 1235
 }
... ...
@@ -1270,10 +1514,12 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
1270 1270
 		base.Promisc = 1
1271 1271
 	}
1272 1272
 	var (
1273
-		link     Link
1274
-		stats32  []byte
1275
-		stats64  []byte
1276
-		linkType string
1273
+		link      Link
1274
+		stats32   *LinkStatistics32
1275
+		stats64   *LinkStatistics64
1276
+		linkType  string
1277
+		linkSlave LinkSlave
1278
+		slaveType string
1277 1279
 	)
1278 1280
 	for _, attr := range attrs {
1279 1281
 		switch attr.Attr.Type {
... ...
@@ -1313,18 +1559,26 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
1313 1313
 						link = &Gretap{}
1314 1314
 					case "ipip":
1315 1315
 						link = &Iptun{}
1316
+					case "ip6tnl":
1317
+						link = &Ip6tnl{}
1316 1318
 					case "sit":
1317 1319
 						link = &Sittun{}
1318 1320
 					case "gre":
1319 1321
 						link = &Gretun{}
1320 1322
 					case "ip6gre":
1321 1323
 						link = &Gretun{}
1322
-					case "vti":
1324
+					case "vti", "vti6":
1323 1325
 						link = &Vti{}
1324 1326
 					case "vrf":
1325 1327
 						link = &Vrf{}
1326 1328
 					case "gtp":
1327 1329
 						link = &GTP{}
1330
+					case "xfrm":
1331
+						link = &Xfrmi{}
1332
+					case "tun":
1333
+						link = &Tuntap{}
1334
+					case "ipoib":
1335
+						link = &IPoIB{}
1328 1336
 					default:
1329 1337
 						link = &GenericLink{LinkType: linkType}
1330 1338
 					}
... ...
@@ -1352,13 +1606,15 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
1352 1352
 						parseGretapData(link, data)
1353 1353
 					case "ipip":
1354 1354
 						parseIptunData(link, data)
1355
+					case "ip6tnl":
1356
+						parseIp6tnlData(link, data)
1355 1357
 					case "sit":
1356 1358
 						parseSittunData(link, data)
1357 1359
 					case "gre":
1358 1360
 						parseGretunData(link, data)
1359 1361
 					case "ip6gre":
1360 1362
 						parseGretunData(link, data)
1361
-					case "vti":
1363
+					case "vti", "vti6":
1362 1364
 						parseVtiData(link, data)
1363 1365
 					case "vrf":
1364 1366
 						parseVrfData(link, data)
... ...
@@ -1366,6 +1622,27 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
1366 1366
 						parseBridgeData(link, data)
1367 1367
 					case "gtp":
1368 1368
 						parseGTPData(link, data)
1369
+					case "xfrm":
1370
+						parseXfrmiData(link, data)
1371
+					case "tun":
1372
+						parseTuntapData(link, data)
1373
+					case "ipoib":
1374
+						parseIPoIBData(link, data)
1375
+					}
1376
+				case nl.IFLA_INFO_SLAVE_KIND:
1377
+					slaveType = string(info.Value[:len(info.Value)-1])
1378
+					switch slaveType {
1379
+					case "bond":
1380
+						linkSlave = &BondSlave{}
1381
+					}
1382
+				case nl.IFLA_INFO_SLAVE_DATA:
1383
+					switch slaveType {
1384
+					case "bond":
1385
+						data, err := nl.ParseRouteAttr(info.Value)
1386
+						if err != nil {
1387
+							return nil, err
1388
+						}
1389
+						parseBondSlaveData(linkSlave, data)
1369 1390
 					}
1370 1391
 				}
1371 1392
 			}
... ...
@@ -1392,9 +1669,15 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
1392 1392
 		case unix.IFLA_IFALIAS:
1393 1393
 			base.Alias = string(attr.Value[:len(attr.Value)-1])
1394 1394
 		case unix.IFLA_STATS:
1395
-			stats32 = attr.Value[:]
1395
+			stats32 = new(LinkStatistics32)
1396
+			if err := binary.Read(bytes.NewBuffer(attr.Value[:]), nl.NativeEndian(), stats32); err != nil {
1397
+				return nil, err
1398
+			}
1396 1399
 		case unix.IFLA_STATS64:
1397
-			stats64 = attr.Value[:]
1400
+			stats64 = new(LinkStatistics64)
1401
+			if err := binary.Read(bytes.NewBuffer(attr.Value[:]), nl.NativeEndian(), stats64); err != nil {
1402
+				return nil, err
1403
+			}
1398 1404
 		case unix.IFLA_XDP:
1399 1405
 			xdp, err := parseLinkXdp(attr.Value[:])
1400 1406
 			if err != nil {
... ...
@@ -1408,19 +1691,40 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
1408 1408
 				if err != nil {
1409 1409
 					return nil, err
1410 1410
 				}
1411
-				base.Protinfo = parseProtinfo(attrs)
1411
+				protinfo := parseProtinfo(attrs)
1412
+				base.Protinfo = &protinfo
1412 1413
 			}
1413 1414
 		case unix.IFLA_OPERSTATE:
1414 1415
 			base.OperState = LinkOperState(uint8(attr.Value[0]))
1415 1416
 		case unix.IFLA_LINK_NETNSID:
1416 1417
 			base.NetNsID = int(native.Uint32(attr.Value[0:4]))
1418
+		case unix.IFLA_GSO_MAX_SIZE:
1419
+			base.GSOMaxSize = native.Uint32(attr.Value[0:4])
1420
+		case unix.IFLA_GSO_MAX_SEGS:
1421
+			base.GSOMaxSegs = native.Uint32(attr.Value[0:4])
1422
+		case unix.IFLA_VFINFO_LIST:
1423
+			data, err := nl.ParseRouteAttr(attr.Value)
1424
+			if err != nil {
1425
+				return nil, err
1426
+			}
1427
+			vfs, err := parseVfInfoList(data)
1428
+			if err != nil {
1429
+				return nil, err
1430
+			}
1431
+			base.Vfs = vfs
1432
+		case unix.IFLA_NUM_TX_QUEUES:
1433
+			base.NumTxQueues = int(native.Uint32(attr.Value[0:4]))
1434
+		case unix.IFLA_NUM_RX_QUEUES:
1435
+			base.NumRxQueues = int(native.Uint32(attr.Value[0:4]))
1436
+		case unix.IFLA_GROUP:
1437
+			base.Group = native.Uint32(attr.Value[0:4])
1417 1438
 		}
1418 1439
 	}
1419 1440
 
1420 1441
 	if stats64 != nil {
1421
-		base.Statistics = parseLinkStats64(stats64)
1442
+		base.Statistics = (*LinkStatistics)(stats64)
1422 1443
 	} else if stats32 != nil {
1423
-		base.Statistics = parseLinkStats32(stats32)
1444
+		base.Statistics = (*LinkStatistics)(stats32.to64())
1424 1445
 	}
1425 1446
 
1426 1447
 	// Links that don't have IFLA_INFO_KIND are hardware devices
... ...
@@ -1428,10 +1732,59 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
1428 1428
 		link = &Device{}
1429 1429
 	}
1430 1430
 	*link.Attrs() = base
1431
+	link.Attrs().Slave = linkSlave
1432
+
1433
+	// If the tuntap attributes are not updated by netlink due to
1434
+	// an older driver, use sysfs
1435
+	if link != nil && linkType == "tun" {
1436
+		tuntap := link.(*Tuntap)
1437
+
1438
+		if tuntap.Mode == 0 {
1439
+			ifname := tuntap.Attrs().Name
1440
+			if flags, err := readSysPropAsInt64(ifname, "tun_flags"); err == nil {
1441
+
1442
+				if flags&unix.IFF_TUN != 0 {
1443
+					tuntap.Mode = unix.IFF_TUN
1444
+				} else if flags&unix.IFF_TAP != 0 {
1445
+					tuntap.Mode = unix.IFF_TAP
1446
+				}
1447
+
1448
+				tuntap.NonPersist = false
1449
+				if flags&unix.IFF_PERSIST == 0 {
1450
+					tuntap.NonPersist = true
1451
+				}
1452
+			}
1453
+
1454
+			// The sysfs interface for owner/group returns -1 for root user, instead of returning 0.
1455
+			// So explicitly check for negative value, before assigning the owner uid/gid.
1456
+			if owner, err := readSysPropAsInt64(ifname, "owner"); err == nil && owner > 0 {
1457
+				tuntap.Owner = uint32(owner)
1458
+			}
1459
+
1460
+			if group, err := readSysPropAsInt64(ifname, "group"); err == nil && group > 0 {
1461
+				tuntap.Group = uint32(group)
1462
+			}
1463
+		}
1464
+	}
1431 1465
 
1432 1466
 	return link, nil
1433 1467
 }
1434 1468
 
1469
+func readSysPropAsInt64(ifname, prop string) (int64, error) {
1470
+	fname := fmt.Sprintf("/sys/class/net/%s/%s", ifname, prop)
1471
+	contents, err := ioutil.ReadFile(fname)
1472
+	if err != nil {
1473
+		return 0, err
1474
+	}
1475
+
1476
+	num, err := strconv.ParseInt(strings.TrimSpace(string(contents)), 0, 64)
1477
+	if err == nil {
1478
+		return num, nil
1479
+	}
1480
+
1481
+	return 0, err
1482
+}
1483
+
1435 1484
 // LinkList gets a list of link devices.
1436 1485
 // Equivalent to: `ip link show`
1437 1486
 func LinkList() ([]Link, error) {
... ...
@@ -1447,6 +1800,8 @@ func (h *Handle) LinkList() ([]Link, error) {
1447 1447
 
1448 1448
 	msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
1449 1449
 	req.AddData(msg)
1450
+	attr := nl.NewRtAttr(unix.IFLA_EXT_MASK, nl.Uint32Attr(nl.RTEXT_FILTER_VF))
1451
+	req.AddData(attr)
1450 1452
 
1451 1453
 	msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWLINK)
1452 1454
 	if err != nil {
... ...
@@ -1526,13 +1881,19 @@ func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-c
1526 1526
 	go func() {
1527 1527
 		defer close(ch)
1528 1528
 		for {
1529
-			msgs, err := s.Receive()
1529
+			msgs, from, err := s.Receive()
1530 1530
 			if err != nil {
1531 1531
 				if cberr != nil {
1532 1532
 					cberr(err)
1533 1533
 				}
1534 1534
 				return
1535 1535
 			}
1536
+			if from.Pid != nl.PidKernel {
1537
+				if cberr != nil {
1538
+					cberr(fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel))
1539
+				}
1540
+				continue
1541
+			}
1536 1542
 			for _, m := range msgs {
1537 1543
 				if m.Header.Type == unix.NLMSG_DONE {
1538 1544
 					continue
... ...
@@ -1639,7 +2000,7 @@ func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error {
1639 1639
 	req.AddData(msg)
1640 1640
 
1641 1641
 	br := nl.NewRtAttr(unix.IFLA_PROTINFO|unix.NLA_F_NESTED, nil)
1642
-	nl.NewRtAttrChild(br, attr, boolToByte(mode))
1642
+	br.AddRtAttr(attr, boolToByte(mode))
1643 1643
 	req.AddData(br)
1644 1644
 	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
1645 1645
 	if err != nil {
... ...
@@ -1675,12 +2036,43 @@ func (h *Handle) LinkSetTxQLen(link Link, qlen int) error {
1675 1675
 	return err
1676 1676
 }
1677 1677
 
1678
+// LinkSetGroup sets the link group id which can be used to perform mass actions
1679
+// with iproute2 as well use it as a reference in nft filters.
1680
+// Equivalent to: `ip link set $link group $id`
1681
+func LinkSetGroup(link Link, group int) error {
1682
+	return pkgHandle.LinkSetGroup(link, group)
1683
+}
1684
+
1685
+// LinkSetGroup sets the link group id which can be used to perform mass actions
1686
+// with iproute2 as well use it as a reference in nft filters.
1687
+// Equivalent to: `ip link set $link group $id`
1688
+func (h *Handle) LinkSetGroup(link Link, group int) error {
1689
+	base := link.Attrs()
1690
+	h.ensureIndex(base)
1691
+	req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK)
1692
+
1693
+	msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
1694
+	msg.Index = int32(base.Index)
1695
+	req.AddData(msg)
1696
+
1697
+	b := make([]byte, 4)
1698
+	native.PutUint32(b, uint32(group))
1699
+
1700
+	data := nl.NewRtAttr(unix.IFLA_GROUP, b)
1701
+	req.AddData(data)
1702
+
1703
+	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
1704
+	return err
1705
+}
1706
+
1678 1707
 func parseVlanData(link Link, data []syscall.NetlinkRouteAttr) {
1679 1708
 	vlan := link.(*Vlan)
1680 1709
 	for _, datum := range data {
1681 1710
 		switch datum.Attr.Type {
1682 1711
 		case nl.IFLA_VLAN_ID:
1683 1712
 			vlan.VlanId = int(native.Uint16(datum.Value[0:2]))
1713
+		case nl.IFLA_VLAN_PROTOCOL:
1714
+			vlan.VlanProtocol = VlanProtocol(int(ntohs(datum.Value[0:2])))
1684 1715
 		}
1685 1716
 	}
1686 1717
 }
... ...
@@ -1762,7 +2154,7 @@ func parseBondData(link Link, data []syscall.NetlinkRouteAttr) {
1762 1762
 		case nl.IFLA_BOND_ARP_INTERVAL:
1763 1763
 			bond.ArpInterval = int(native.Uint32(data[i].Value[0:4]))
1764 1764
 		case nl.IFLA_BOND_ARP_IP_TARGET:
1765
-			// TODO: implement
1765
+			bond.ArpIpTargets = parseBondArpIpTargets(data[i].Value)
1766 1766
 		case nl.IFLA_BOND_ARP_VALIDATE:
1767 1767
 			bond.ArpValidate = BondArpValidate(native.Uint32(data[i].Value[0:4]))
1768 1768
 		case nl.IFLA_BOND_ARP_ALL_TARGETS:
... ...
@@ -1805,12 +2197,75 @@ func parseBondData(link Link, data []syscall.NetlinkRouteAttr) {
1805 1805
 	}
1806 1806
 }
1807 1807
 
1808
+func parseBondArpIpTargets(value []byte) []net.IP {
1809
+	data, err := nl.ParseRouteAttr(value)
1810
+	if err != nil {
1811
+		return nil
1812
+	}
1813
+
1814
+	targets := []net.IP{}
1815
+	for i := range data {
1816
+		target := net.IP(data[i].Value)
1817
+		if ip := target.To4(); ip != nil {
1818
+			targets = append(targets, ip)
1819
+			continue
1820
+		}
1821
+		if ip := target.To16(); ip != nil {
1822
+			targets = append(targets, ip)
1823
+		}
1824
+	}
1825
+
1826
+	return targets
1827
+}
1828
+
1829
+func addBondSlaveAttrs(bondSlave *BondSlave, linkInfo *nl.RtAttr) {
1830
+	data := linkInfo.AddRtAttr(nl.IFLA_INFO_SLAVE_DATA, nil)
1831
+
1832
+	data.AddRtAttr(nl.IFLA_BOND_SLAVE_STATE, nl.Uint8Attr(uint8(bondSlave.State)))
1833
+	data.AddRtAttr(nl.IFLA_BOND_SLAVE_MII_STATUS, nl.Uint8Attr(uint8(bondSlave.MiiStatus)))
1834
+	data.AddRtAttr(nl.IFLA_BOND_SLAVE_LINK_FAILURE_COUNT, nl.Uint32Attr(bondSlave.LinkFailureCount))
1835
+	data.AddRtAttr(nl.IFLA_BOND_SLAVE_QUEUE_ID, nl.Uint16Attr(bondSlave.QueueId))
1836
+	data.AddRtAttr(nl.IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, nl.Uint16Attr(bondSlave.AggregatorId))
1837
+	data.AddRtAttr(nl.IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, nl.Uint8Attr(bondSlave.AdActorOperPortState))
1838
+	data.AddRtAttr(nl.IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, nl.Uint16Attr(bondSlave.AdPartnerOperPortState))
1839
+
1840
+	if mac := bondSlave.PermHardwareAddr; mac != nil {
1841
+		data.AddRtAttr(nl.IFLA_BOND_SLAVE_PERM_HWADDR, []byte(mac))
1842
+	}
1843
+}
1844
+
1845
+func parseBondSlaveData(slave LinkSlave, data []syscall.NetlinkRouteAttr) {
1846
+	bondSlave := slave.(*BondSlave)
1847
+	for i := range data {
1848
+		switch data[i].Attr.Type {
1849
+		case nl.IFLA_BOND_SLAVE_STATE:
1850
+			bondSlave.State = BondSlaveState(data[i].Value[0])
1851
+		case nl.IFLA_BOND_SLAVE_MII_STATUS:
1852
+			bondSlave.MiiStatus = BondSlaveMiiStatus(data[i].Value[0])
1853
+		case nl.IFLA_BOND_SLAVE_LINK_FAILURE_COUNT:
1854
+			bondSlave.LinkFailureCount = native.Uint32(data[i].Value[0:4])
1855
+		case nl.IFLA_BOND_SLAVE_PERM_HWADDR:
1856
+			bondSlave.PermHardwareAddr = net.HardwareAddr(data[i].Value[0:6])
1857
+		case nl.IFLA_BOND_SLAVE_QUEUE_ID:
1858
+			bondSlave.QueueId = native.Uint16(data[i].Value[0:2])
1859
+		case nl.IFLA_BOND_SLAVE_AD_AGGREGATOR_ID:
1860
+			bondSlave.AggregatorId = native.Uint16(data[i].Value[0:2])
1861
+		case nl.IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE:
1862
+			bondSlave.AdActorOperPortState = uint8(data[i].Value[0])
1863
+		case nl.IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE:
1864
+			bondSlave.AdPartnerOperPortState = native.Uint16(data[i].Value[0:2])
1865
+		}
1866
+	}
1867
+}
1868
+
1808 1869
 func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) {
1809 1870
 	ipv := link.(*IPVlan)
1810 1871
 	for _, datum := range data {
1811
-		if datum.Attr.Type == nl.IFLA_IPVLAN_MODE {
1872
+		switch datum.Attr.Type {
1873
+		case nl.IFLA_IPVLAN_MODE:
1812 1874
 			ipv.Mode = IPVlanMode(native.Uint32(datum.Value[0:4]))
1813
-			return
1875
+		case nl.IFLA_IPVLAN_FLAG:
1876
+			ipv.Flag = IPVlanFlag(native.Uint32(datum.Value[0:4]))
1814 1877
 		}
1815 1878
 	}
1816 1879
 }
... ...
@@ -1873,11 +2328,11 @@ func linkFlags(rawFlags uint32) net.Flags {
1873 1873
 }
1874 1874
 
1875 1875
 func addGretapAttrs(gretap *Gretap, linkInfo *nl.RtAttr) {
1876
-	data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
1876
+	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
1877 1877
 
1878 1878
 	if gretap.FlowBased {
1879 1879
 		// In flow based mode, no other attributes need to be configured
1880
-		nl.NewRtAttrChild(data, nl.IFLA_GRE_COLLECT_METADATA, boolAttr(gretap.FlowBased))
1880
+		data.AddRtAttr(nl.IFLA_GRE_COLLECT_METADATA, boolAttr(gretap.FlowBased))
1881 1881
 		return
1882 1882
 	}
1883 1883
 
... ...
@@ -1885,40 +2340,40 @@ func addGretapAttrs(gretap *Gretap, linkInfo *nl.RtAttr) {
1885 1885
 		if ip.To4() != nil {
1886 1886
 			ip = ip.To4()
1887 1887
 		}
1888
-		nl.NewRtAttrChild(data, nl.IFLA_GRE_LOCAL, []byte(ip))
1888
+		data.AddRtAttr(nl.IFLA_GRE_LOCAL, []byte(ip))
1889 1889
 	}
1890 1890
 
1891 1891
 	if ip := gretap.Remote; ip != nil {
1892 1892
 		if ip.To4() != nil {
1893 1893
 			ip = ip.To4()
1894 1894
 		}
1895
-		nl.NewRtAttrChild(data, nl.IFLA_GRE_REMOTE, []byte(ip))
1895
+		data.AddRtAttr(nl.IFLA_GRE_REMOTE, []byte(ip))
1896 1896
 	}
1897 1897
 
1898 1898
 	if gretap.IKey != 0 {
1899
-		nl.NewRtAttrChild(data, nl.IFLA_GRE_IKEY, htonl(gretap.IKey))
1899
+		data.AddRtAttr(nl.IFLA_GRE_IKEY, htonl(gretap.IKey))
1900 1900
 		gretap.IFlags |= uint16(nl.GRE_KEY)
1901 1901
 	}
1902 1902
 
1903 1903
 	if gretap.OKey != 0 {
1904
-		nl.NewRtAttrChild(data, nl.IFLA_GRE_OKEY, htonl(gretap.OKey))
1904
+		data.AddRtAttr(nl.IFLA_GRE_OKEY, htonl(gretap.OKey))
1905 1905
 		gretap.OFlags |= uint16(nl.GRE_KEY)
1906 1906
 	}
1907 1907
 
1908
-	nl.NewRtAttrChild(data, nl.IFLA_GRE_IFLAGS, htons(gretap.IFlags))
1909
-	nl.NewRtAttrChild(data, nl.IFLA_GRE_OFLAGS, htons(gretap.OFlags))
1908
+	data.AddRtAttr(nl.IFLA_GRE_IFLAGS, htons(gretap.IFlags))
1909
+	data.AddRtAttr(nl.IFLA_GRE_OFLAGS, htons(gretap.OFlags))
1910 1910
 
1911 1911
 	if gretap.Link != 0 {
1912
-		nl.NewRtAttrChild(data, nl.IFLA_GRE_LINK, nl.Uint32Attr(gretap.Link))
1912
+		data.AddRtAttr(nl.IFLA_GRE_LINK, nl.Uint32Attr(gretap.Link))
1913 1913
 	}
1914 1914
 
1915
-	nl.NewRtAttrChild(data, nl.IFLA_GRE_PMTUDISC, nl.Uint8Attr(gretap.PMtuDisc))
1916
-	nl.NewRtAttrChild(data, nl.IFLA_GRE_TTL, nl.Uint8Attr(gretap.Ttl))
1917
-	nl.NewRtAttrChild(data, nl.IFLA_GRE_TOS, nl.Uint8Attr(gretap.Tos))
1918
-	nl.NewRtAttrChild(data, nl.IFLA_GRE_ENCAP_TYPE, nl.Uint16Attr(gretap.EncapType))
1919
-	nl.NewRtAttrChild(data, nl.IFLA_GRE_ENCAP_FLAGS, nl.Uint16Attr(gretap.EncapFlags))
1920
-	nl.NewRtAttrChild(data, nl.IFLA_GRE_ENCAP_SPORT, htons(gretap.EncapSport))
1921
-	nl.NewRtAttrChild(data, nl.IFLA_GRE_ENCAP_DPORT, htons(gretap.EncapDport))
1915
+	data.AddRtAttr(nl.IFLA_GRE_PMTUDISC, nl.Uint8Attr(gretap.PMtuDisc))
1916
+	data.AddRtAttr(nl.IFLA_GRE_TTL, nl.Uint8Attr(gretap.Ttl))
1917
+	data.AddRtAttr(nl.IFLA_GRE_TOS, nl.Uint8Attr(gretap.Tos))
1918
+	data.AddRtAttr(nl.IFLA_GRE_ENCAP_TYPE, nl.Uint16Attr(gretap.EncapType))
1919
+	data.AddRtAttr(nl.IFLA_GRE_ENCAP_FLAGS, nl.Uint16Attr(gretap.EncapFlags))
1920
+	data.AddRtAttr(nl.IFLA_GRE_ENCAP_SPORT, htons(gretap.EncapSport))
1921
+	data.AddRtAttr(nl.IFLA_GRE_ENCAP_DPORT, htons(gretap.EncapDport))
1922 1922
 }
1923 1923
 
1924 1924
 func parseGretapData(link Link, data []syscall.NetlinkRouteAttr) {
... ...
@@ -1930,9 +2385,9 @@ func parseGretapData(link Link, data []syscall.NetlinkRouteAttr) {
1930 1930
 		case nl.IFLA_GRE_IKEY:
1931 1931
 			gre.OKey = ntohl(datum.Value[0:4])
1932 1932
 		case nl.IFLA_GRE_LOCAL:
1933
-			gre.Local = net.IP(datum.Value[0:16])
1933
+			gre.Local = net.IP(datum.Value)
1934 1934
 		case nl.IFLA_GRE_REMOTE:
1935
-			gre.Remote = net.IP(datum.Value[0:16])
1935
+			gre.Remote = net.IP(datum.Value)
1936 1936
 		case nl.IFLA_GRE_ENCAP_SPORT:
1937 1937
 			gre.EncapSport = ntohs(datum.Value[0:2])
1938 1938
 		case nl.IFLA_GRE_ENCAP_DPORT:
... ...
@@ -1941,7 +2396,6 @@ func parseGretapData(link Link, data []syscall.NetlinkRouteAttr) {
1941 1941
 			gre.IFlags = ntohs(datum.Value[0:2])
1942 1942
 		case nl.IFLA_GRE_OFLAGS:
1943 1943
 			gre.OFlags = ntohs(datum.Value[0:2])
1944
-
1945 1944
 		case nl.IFLA_GRE_TTL:
1946 1945
 			gre.Ttl = uint8(datum.Value[0])
1947 1946
 		case nl.IFLA_GRE_TOS:
... ...
@@ -1953,73 +2407,70 @@ func parseGretapData(link Link, data []syscall.NetlinkRouteAttr) {
1953 1953
 		case nl.IFLA_GRE_ENCAP_FLAGS:
1954 1954
 			gre.EncapFlags = native.Uint16(datum.Value[0:2])
1955 1955
 		case nl.IFLA_GRE_COLLECT_METADATA:
1956
-			if len(datum.Value) > 0 {
1957
-				gre.FlowBased = int8(datum.Value[0]) != 0
1958
-			}
1956
+			gre.FlowBased = true
1959 1957
 		}
1960 1958
 	}
1961 1959
 }
1962 1960
 
1963 1961
 func addGretunAttrs(gre *Gretun, linkInfo *nl.RtAttr) {
1964
-	data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
1962
+	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
1965 1963
 
1966 1964
 	if ip := gre.Local; ip != nil {
1967 1965
 		if ip.To4() != nil {
1968 1966
 			ip = ip.To4()
1969 1967
 		}
1970
-		nl.NewRtAttrChild(data, nl.IFLA_GRE_LOCAL, []byte(ip))
1968
+		data.AddRtAttr(nl.IFLA_GRE_LOCAL, []byte(ip))
1971 1969
 	}
1972 1970
 
1973 1971
 	if ip := gre.Remote; ip != nil {
1974 1972
 		if ip.To4() != nil {
1975 1973
 			ip = ip.To4()
1976 1974
 		}
1977
-		nl.NewRtAttrChild(data, nl.IFLA_GRE_REMOTE, []byte(ip))
1975
+		data.AddRtAttr(nl.IFLA_GRE_REMOTE, []byte(ip))
1978 1976
 	}
1979 1977
 
1980 1978
 	if gre.IKey != 0 {
1981
-		nl.NewRtAttrChild(data, nl.IFLA_GRE_IKEY, htonl(gre.IKey))
1979
+		data.AddRtAttr(nl.IFLA_GRE_IKEY, htonl(gre.IKey))
1982 1980
 		gre.IFlags |= uint16(nl.GRE_KEY)
1983 1981
 	}
1984 1982
 
1985 1983
 	if gre.OKey != 0 {
1986
-		nl.NewRtAttrChild(data, nl.IFLA_GRE_OKEY, htonl(gre.OKey))
1984
+		data.AddRtAttr(nl.IFLA_GRE_OKEY, htonl(gre.OKey))
1987 1985
 		gre.OFlags |= uint16(nl.GRE_KEY)
1988 1986
 	}
1989 1987
 
1990
-	nl.NewRtAttrChild(data, nl.IFLA_GRE_IFLAGS, htons(gre.IFlags))
1991
-	nl.NewRtAttrChild(data, nl.IFLA_GRE_OFLAGS, htons(gre.OFlags))
1988
+	data.AddRtAttr(nl.IFLA_GRE_IFLAGS, htons(gre.IFlags))
1989
+	data.AddRtAttr(nl.IFLA_GRE_OFLAGS, htons(gre.OFlags))
1992 1990
 
1993 1991
 	if gre.Link != 0 {
1994
-		nl.NewRtAttrChild(data, nl.IFLA_GRE_LINK, nl.Uint32Attr(gre.Link))
1992
+		data.AddRtAttr(nl.IFLA_GRE_LINK, nl.Uint32Attr(gre.Link))
1995 1993
 	}
1996 1994
 
1997
-	nl.NewRtAttrChild(data, nl.IFLA_GRE_PMTUDISC, nl.Uint8Attr(gre.PMtuDisc))
1998
-	nl.NewRtAttrChild(data, nl.IFLA_GRE_TTL, nl.Uint8Attr(gre.Ttl))
1999
-	nl.NewRtAttrChild(data, nl.IFLA_GRE_TOS, nl.Uint8Attr(gre.Tos))
2000
-	nl.NewRtAttrChild(data, nl.IFLA_GRE_ENCAP_TYPE, nl.Uint16Attr(gre.EncapType))
2001
-	nl.NewRtAttrChild(data, nl.IFLA_GRE_ENCAP_FLAGS, nl.Uint16Attr(gre.EncapFlags))
2002
-	nl.NewRtAttrChild(data, nl.IFLA_GRE_ENCAP_SPORT, htons(gre.EncapSport))
2003
-	nl.NewRtAttrChild(data, nl.IFLA_GRE_ENCAP_DPORT, htons(gre.EncapDport))
1995
+	data.AddRtAttr(nl.IFLA_GRE_PMTUDISC, nl.Uint8Attr(gre.PMtuDisc))
1996
+	data.AddRtAttr(nl.IFLA_GRE_TTL, nl.Uint8Attr(gre.Ttl))
1997
+	data.AddRtAttr(nl.IFLA_GRE_TOS, nl.Uint8Attr(gre.Tos))
1998
+	data.AddRtAttr(nl.IFLA_GRE_ENCAP_TYPE, nl.Uint16Attr(gre.EncapType))
1999
+	data.AddRtAttr(nl.IFLA_GRE_ENCAP_FLAGS, nl.Uint16Attr(gre.EncapFlags))
2000
+	data.AddRtAttr(nl.IFLA_GRE_ENCAP_SPORT, htons(gre.EncapSport))
2001
+	data.AddRtAttr(nl.IFLA_GRE_ENCAP_DPORT, htons(gre.EncapDport))
2004 2002
 }
2005 2003
 
2006 2004
 func parseGretunData(link Link, data []syscall.NetlinkRouteAttr) {
2007 2005
 	gre := link.(*Gretun)
2008 2006
 	for _, datum := range data {
2009 2007
 		switch datum.Attr.Type {
2010
-		case nl.IFLA_GRE_OKEY:
2011
-			gre.IKey = ntohl(datum.Value[0:4])
2012 2008
 		case nl.IFLA_GRE_IKEY:
2009
+			gre.IKey = ntohl(datum.Value[0:4])
2010
+		case nl.IFLA_GRE_OKEY:
2013 2011
 			gre.OKey = ntohl(datum.Value[0:4])
2014 2012
 		case nl.IFLA_GRE_LOCAL:
2015
-			gre.Local = net.IP(datum.Value[0:16])
2013
+			gre.Local = net.IP(datum.Value)
2016 2014
 		case nl.IFLA_GRE_REMOTE:
2017
-			gre.Remote = net.IP(datum.Value[0:16])
2015
+			gre.Remote = net.IP(datum.Value)
2018 2016
 		case nl.IFLA_GRE_IFLAGS:
2019 2017
 			gre.IFlags = ntohs(datum.Value[0:2])
2020 2018
 		case nl.IFLA_GRE_OFLAGS:
2021 2019
 			gre.OFlags = ntohs(datum.Value[0:2])
2022
-
2023 2020
 		case nl.IFLA_GRE_TTL:
2024 2021
 			gre.Ttl = uint8(datum.Value[0])
2025 2022
 		case nl.IFLA_GRE_TOS:
... ...
@@ -2038,23 +2489,15 @@ func parseGretunData(link Link, data []syscall.NetlinkRouteAttr) {
2038 2038
 	}
2039 2039
 }
2040 2040
 
2041
-func parseLinkStats32(data []byte) *LinkStatistics {
2042
-	return (*LinkStatistics)((*LinkStatistics32)(unsafe.Pointer(&data[0:SizeofLinkStats32][0])).to64())
2043
-}
2044
-
2045
-func parseLinkStats64(data []byte) *LinkStatistics {
2046
-	return (*LinkStatistics)((*LinkStatistics64)(unsafe.Pointer(&data[0:SizeofLinkStats64][0])))
2047
-}
2048
-
2049 2041
 func addXdpAttrs(xdp *LinkXdp, req *nl.NetlinkRequest) {
2050 2042
 	attrs := nl.NewRtAttr(unix.IFLA_XDP|unix.NLA_F_NESTED, nil)
2051 2043
 	b := make([]byte, 4)
2052 2044
 	native.PutUint32(b, uint32(xdp.Fd))
2053
-	nl.NewRtAttrChild(attrs, nl.IFLA_XDP_FD, b)
2045
+	attrs.AddRtAttr(nl.IFLA_XDP_FD, b)
2054 2046
 	if xdp.Flags != 0 {
2055 2047
 		b := make([]byte, 4)
2056 2048
 		native.PutUint32(b, xdp.Flags)
2057
-		nl.NewRtAttrChild(attrs, nl.IFLA_XDP_FLAGS, b)
2049
+		attrs.AddRtAttr(nl.IFLA_XDP_FLAGS, b)
2058 2050
 	}
2059 2051
 	req.AddData(attrs)
2060 2052
 }
... ...
@@ -2083,32 +2526,32 @@ func parseLinkXdp(data []byte) (*LinkXdp, error) {
2083 2083
 func addIptunAttrs(iptun *Iptun, linkInfo *nl.RtAttr) {
2084 2084
 	if iptun.FlowBased {
2085 2085
 		// In flow based mode, no other attributes need to be configured
2086
-		nl.NewRtAttrChild(linkInfo, nl.IFLA_IPTUN_COLLECT_METADATA, boolAttr(iptun.FlowBased))
2086
+		linkInfo.AddRtAttr(nl.IFLA_IPTUN_COLLECT_METADATA, boolAttr(iptun.FlowBased))
2087 2087
 		return
2088 2088
 	}
2089 2089
 
2090
-	data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
2090
+	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
2091 2091
 
2092 2092
 	ip := iptun.Local.To4()
2093 2093
 	if ip != nil {
2094
-		nl.NewRtAttrChild(data, nl.IFLA_IPTUN_LOCAL, []byte(ip))
2094
+		data.AddRtAttr(nl.IFLA_IPTUN_LOCAL, []byte(ip))
2095 2095
 	}
2096 2096
 
2097 2097
 	ip = iptun.Remote.To4()
2098 2098
 	if ip != nil {
2099
-		nl.NewRtAttrChild(data, nl.IFLA_IPTUN_REMOTE, []byte(ip))
2099
+		data.AddRtAttr(nl.IFLA_IPTUN_REMOTE, []byte(ip))
2100 2100
 	}
2101 2101
 
2102 2102
 	if iptun.Link != 0 {
2103
-		nl.NewRtAttrChild(data, nl.IFLA_IPTUN_LINK, nl.Uint32Attr(iptun.Link))
2103
+		data.AddRtAttr(nl.IFLA_IPTUN_LINK, nl.Uint32Attr(iptun.Link))
2104 2104
 	}
2105
-	nl.NewRtAttrChild(data, nl.IFLA_IPTUN_PMTUDISC, nl.Uint8Attr(iptun.PMtuDisc))
2106
-	nl.NewRtAttrChild(data, nl.IFLA_IPTUN_TTL, nl.Uint8Attr(iptun.Ttl))
2107
-	nl.NewRtAttrChild(data, nl.IFLA_IPTUN_TOS, nl.Uint8Attr(iptun.Tos))
2108
-	nl.NewRtAttrChild(data, nl.IFLA_IPTUN_ENCAP_TYPE, nl.Uint16Attr(iptun.EncapType))
2109
-	nl.NewRtAttrChild(data, nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(iptun.EncapFlags))
2110
-	nl.NewRtAttrChild(data, nl.IFLA_IPTUN_ENCAP_SPORT, htons(iptun.EncapSport))
2111
-	nl.NewRtAttrChild(data, nl.IFLA_IPTUN_ENCAP_DPORT, htons(iptun.EncapDport))
2105
+	data.AddRtAttr(nl.IFLA_IPTUN_PMTUDISC, nl.Uint8Attr(iptun.PMtuDisc))
2106
+	data.AddRtAttr(nl.IFLA_IPTUN_TTL, nl.Uint8Attr(iptun.Ttl))
2107
+	data.AddRtAttr(nl.IFLA_IPTUN_TOS, nl.Uint8Attr(iptun.Tos))
2108
+	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_TYPE, nl.Uint16Attr(iptun.EncapType))
2109
+	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(iptun.EncapFlags))
2110
+	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_SPORT, htons(iptun.EncapSport))
2111
+	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_DPORT, htons(iptun.EncapDport))
2112 2112
 }
2113 2113
 
2114 2114
 func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) {
... ...
@@ -2139,34 +2582,83 @@ func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) {
2139 2139
 	}
2140 2140
 }
2141 2141
 
2142
+func addIp6tnlAttrs(ip6tnl *Ip6tnl, linkInfo *nl.RtAttr) {
2143
+	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
2144
+
2145
+	if ip6tnl.Link != 0 {
2146
+		data.AddRtAttr(nl.IFLA_IPTUN_LINK, nl.Uint32Attr(ip6tnl.Link))
2147
+	}
2148
+
2149
+	ip := ip6tnl.Local.To16()
2150
+	if ip != nil {
2151
+		data.AddRtAttr(nl.IFLA_IPTUN_LOCAL, []byte(ip))
2152
+	}
2153
+
2154
+	ip = ip6tnl.Remote.To16()
2155
+	if ip != nil {
2156
+		data.AddRtAttr(nl.IFLA_IPTUN_REMOTE, []byte(ip))
2157
+	}
2158
+
2159
+	data.AddRtAttr(nl.IFLA_IPTUN_TTL, nl.Uint8Attr(ip6tnl.Ttl))
2160
+	data.AddRtAttr(nl.IFLA_IPTUN_TOS, nl.Uint8Attr(ip6tnl.Tos))
2161
+	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_LIMIT, nl.Uint8Attr(ip6tnl.EncapLimit))
2162
+	data.AddRtAttr(nl.IFLA_IPTUN_FLAGS, nl.Uint32Attr(ip6tnl.Flags))
2163
+	data.AddRtAttr(nl.IFLA_IPTUN_PROTO, nl.Uint8Attr(ip6tnl.Proto))
2164
+	data.AddRtAttr(nl.IFLA_IPTUN_FLOWINFO, nl.Uint32Attr(ip6tnl.FlowInfo))
2165
+}
2166
+
2167
+func parseIp6tnlData(link Link, data []syscall.NetlinkRouteAttr) {
2168
+	ip6tnl := link.(*Ip6tnl)
2169
+	for _, datum := range data {
2170
+		switch datum.Attr.Type {
2171
+		case nl.IFLA_IPTUN_LOCAL:
2172
+			ip6tnl.Local = net.IP(datum.Value[:16])
2173
+		case nl.IFLA_IPTUN_REMOTE:
2174
+			ip6tnl.Remote = net.IP(datum.Value[:16])
2175
+		case nl.IFLA_IPTUN_TTL:
2176
+			ip6tnl.Ttl = uint8(datum.Value[0])
2177
+		case nl.IFLA_IPTUN_TOS:
2178
+			ip6tnl.Tos = uint8(datum.Value[0])
2179
+		case nl.IFLA_IPTUN_ENCAP_LIMIT:
2180
+			ip6tnl.EncapLimit = uint8(datum.Value[0])
2181
+		case nl.IFLA_IPTUN_FLAGS:
2182
+			ip6tnl.Flags = native.Uint32(datum.Value[:4])
2183
+		case nl.IFLA_IPTUN_PROTO:
2184
+			ip6tnl.Proto = uint8(datum.Value[0])
2185
+		case nl.IFLA_IPTUN_FLOWINFO:
2186
+			ip6tnl.FlowInfo = native.Uint32(datum.Value[:4])
2187
+		}
2188
+	}
2189
+}
2190
+
2142 2191
 func addSittunAttrs(sittun *Sittun, linkInfo *nl.RtAttr) {
2143
-	data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
2192
+	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
2144 2193
 
2145 2194
 	if sittun.Link != 0 {
2146
-		nl.NewRtAttrChild(data, nl.IFLA_IPTUN_LINK, nl.Uint32Attr(sittun.Link))
2195
+		data.AddRtAttr(nl.IFLA_IPTUN_LINK, nl.Uint32Attr(sittun.Link))
2147 2196
 	}
2148 2197
 
2149 2198
 	ip := sittun.Local.To4()
2150 2199
 	if ip != nil {
2151
-		nl.NewRtAttrChild(data, nl.IFLA_IPTUN_LOCAL, []byte(ip))
2200
+		data.AddRtAttr(nl.IFLA_IPTUN_LOCAL, []byte(ip))
2152 2201
 	}
2153 2202
 
2154 2203
 	ip = sittun.Remote.To4()
2155 2204
 	if ip != nil {
2156
-		nl.NewRtAttrChild(data, nl.IFLA_IPTUN_REMOTE, []byte(ip))
2205
+		data.AddRtAttr(nl.IFLA_IPTUN_REMOTE, []byte(ip))
2157 2206
 	}
2158 2207
 
2159 2208
 	if sittun.Ttl > 0 {
2160 2209
 		// Would otherwise fail on 3.10 kernel
2161
-		nl.NewRtAttrChild(data, nl.IFLA_IPTUN_TTL, nl.Uint8Attr(sittun.Ttl))
2210
+		data.AddRtAttr(nl.IFLA_IPTUN_TTL, nl.Uint8Attr(sittun.Ttl))
2162 2211
 	}
2163 2212
 
2164
-	nl.NewRtAttrChild(data, nl.IFLA_IPTUN_TOS, nl.Uint8Attr(sittun.Tos))
2165
-	nl.NewRtAttrChild(data, nl.IFLA_IPTUN_PMTUDISC, nl.Uint8Attr(sittun.PMtuDisc))
2166
-	nl.NewRtAttrChild(data, nl.IFLA_IPTUN_ENCAP_TYPE, nl.Uint16Attr(sittun.EncapType))
2167
-	nl.NewRtAttrChild(data, nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(sittun.EncapFlags))
2168
-	nl.NewRtAttrChild(data, nl.IFLA_IPTUN_ENCAP_SPORT, htons(sittun.EncapSport))
2169
-	nl.NewRtAttrChild(data, nl.IFLA_IPTUN_ENCAP_DPORT, htons(sittun.EncapDport))
2213
+	data.AddRtAttr(nl.IFLA_IPTUN_TOS, nl.Uint8Attr(sittun.Tos))
2214
+	data.AddRtAttr(nl.IFLA_IPTUN_PMTUDISC, nl.Uint8Attr(sittun.PMtuDisc))
2215
+	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_TYPE, nl.Uint16Attr(sittun.EncapType))
2216
+	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(sittun.EncapFlags))
2217
+	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_SPORT, htons(sittun.EncapSport))
2218
+	data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_DPORT, htons(sittun.EncapDport))
2170 2219
 }
2171 2220
 
2172 2221
 func parseSittunData(link Link, data []syscall.NetlinkRouteAttr) {
... ...
@@ -2196,24 +2688,39 @@ func parseSittunData(link Link, data []syscall.NetlinkRouteAttr) {
2196 2196
 }
2197 2197
 
2198 2198
 func addVtiAttrs(vti *Vti, linkInfo *nl.RtAttr) {
2199
-	data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
2199
+	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
2200
+
2201
+	family := FAMILY_V4
2202
+	if vti.Local.To4() == nil {
2203
+		family = FAMILY_V6
2204
+	}
2200 2205
 
2201
-	ip := vti.Local.To4()
2206
+	var ip net.IP
2207
+
2208
+	if family == FAMILY_V4 {
2209
+		ip = vti.Local.To4()
2210
+	} else {
2211
+		ip = vti.Local
2212
+	}
2202 2213
 	if ip != nil {
2203
-		nl.NewRtAttrChild(data, nl.IFLA_VTI_LOCAL, []byte(ip))
2214
+		data.AddRtAttr(nl.IFLA_VTI_LOCAL, []byte(ip))
2204 2215
 	}
2205 2216
 
2206
-	ip = vti.Remote.To4()
2217
+	if family == FAMILY_V4 {
2218
+		ip = vti.Remote.To4()
2219
+	} else {
2220
+		ip = vti.Remote
2221
+	}
2207 2222
 	if ip != nil {
2208
-		nl.NewRtAttrChild(data, nl.IFLA_VTI_REMOTE, []byte(ip))
2223
+		data.AddRtAttr(nl.IFLA_VTI_REMOTE, []byte(ip))
2209 2224
 	}
2210 2225
 
2211 2226
 	if vti.Link != 0 {
2212
-		nl.NewRtAttrChild(data, nl.IFLA_VTI_LINK, nl.Uint32Attr(vti.Link))
2227
+		data.AddRtAttr(nl.IFLA_VTI_LINK, nl.Uint32Attr(vti.Link))
2213 2228
 	}
2214 2229
 
2215
-	nl.NewRtAttrChild(data, nl.IFLA_VTI_IKEY, htonl(vti.IKey))
2216
-	nl.NewRtAttrChild(data, nl.IFLA_VTI_OKEY, htonl(vti.OKey))
2230
+	data.AddRtAttr(nl.IFLA_VTI_IKEY, htonl(vti.IKey))
2231
+	data.AddRtAttr(nl.IFLA_VTI_OKEY, htonl(vti.OKey))
2217 2232
 }
2218 2233
 
2219 2234
 func parseVtiData(link Link, data []syscall.NetlinkRouteAttr) {
... ...
@@ -2221,9 +2728,9 @@ func parseVtiData(link Link, data []syscall.NetlinkRouteAttr) {
2221 2221
 	for _, datum := range data {
2222 2222
 		switch datum.Attr.Type {
2223 2223
 		case nl.IFLA_VTI_LOCAL:
2224
-			vti.Local = net.IP(datum.Value[0:4])
2224
+			vti.Local = net.IP(datum.Value)
2225 2225
 		case nl.IFLA_VTI_REMOTE:
2226
-			vti.Remote = net.IP(datum.Value[0:4])
2226
+			vti.Remote = net.IP(datum.Value)
2227 2227
 		case nl.IFLA_VTI_IKEY:
2228 2228
 			vti.IKey = ntohl(datum.Value[0:4])
2229 2229
 		case nl.IFLA_VTI_OKEY:
... ...
@@ -2233,10 +2740,10 @@ func parseVtiData(link Link, data []syscall.NetlinkRouteAttr) {
2233 2233
 }
2234 2234
 
2235 2235
 func addVrfAttrs(vrf *Vrf, linkInfo *nl.RtAttr) {
2236
-	data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
2236
+	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
2237 2237
 	b := make([]byte, 4)
2238 2238
 	native.PutUint32(b, uint32(vrf.Table))
2239
-	nl.NewRtAttrChild(data, nl.IFLA_VRF_TABLE, b)
2239
+	data.AddRtAttr(nl.IFLA_VRF_TABLE, b)
2240 2240
 }
2241 2241
 
2242 2242
 func parseVrfData(link Link, data []syscall.NetlinkRouteAttr) {
... ...
@@ -2250,12 +2757,15 @@ func parseVrfData(link Link, data []syscall.NetlinkRouteAttr) {
2250 2250
 }
2251 2251
 
2252 2252
 func addBridgeAttrs(bridge *Bridge, linkInfo *nl.RtAttr) {
2253
-	data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
2253
+	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
2254 2254
 	if bridge.MulticastSnooping != nil {
2255
-		nl.NewRtAttrChild(data, nl.IFLA_BR_MCAST_SNOOPING, boolToByte(*bridge.MulticastSnooping))
2255
+		data.AddRtAttr(nl.IFLA_BR_MCAST_SNOOPING, boolToByte(*bridge.MulticastSnooping))
2256 2256
 	}
2257 2257
 	if bridge.HelloTime != nil {
2258
-		nl.NewRtAttrChild(data, nl.IFLA_BR_HELLO_TIME, nl.Uint32Attr(*bridge.HelloTime))
2258
+		data.AddRtAttr(nl.IFLA_BR_HELLO_TIME, nl.Uint32Attr(*bridge.HelloTime))
2259
+	}
2260
+	if bridge.VlanFiltering != nil {
2261
+		data.AddRtAttr(nl.IFLA_BR_VLAN_FILTERING, boolToByte(*bridge.VlanFiltering))
2259 2262
 	}
2260 2263
 }
2261 2264
 
... ...
@@ -2269,17 +2779,20 @@ func parseBridgeData(bridge Link, data []syscall.NetlinkRouteAttr) {
2269 2269
 		case nl.IFLA_BR_MCAST_SNOOPING:
2270 2270
 			mcastSnooping := datum.Value[0] == 1
2271 2271
 			br.MulticastSnooping = &mcastSnooping
2272
+		case nl.IFLA_BR_VLAN_FILTERING:
2273
+			vlanFiltering := datum.Value[0] == 1
2274
+			br.VlanFiltering = &vlanFiltering
2272 2275
 		}
2273 2276
 	}
2274 2277
 }
2275 2278
 
2276 2279
 func addGTPAttrs(gtp *GTP, linkInfo *nl.RtAttr) {
2277
-	data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil)
2278
-	nl.NewRtAttrChild(data, nl.IFLA_GTP_FD0, nl.Uint32Attr(uint32(gtp.FD0)))
2279
-	nl.NewRtAttrChild(data, nl.IFLA_GTP_FD1, nl.Uint32Attr(uint32(gtp.FD1)))
2280
-	nl.NewRtAttrChild(data, nl.IFLA_GTP_PDP_HASHSIZE, nl.Uint32Attr(131072))
2280
+	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
2281
+	data.AddRtAttr(nl.IFLA_GTP_FD0, nl.Uint32Attr(uint32(gtp.FD0)))
2282
+	data.AddRtAttr(nl.IFLA_GTP_FD1, nl.Uint32Attr(uint32(gtp.FD1)))
2283
+	data.AddRtAttr(nl.IFLA_GTP_PDP_HASHSIZE, nl.Uint32Attr(131072))
2281 2284
 	if gtp.Role != nl.GTP_ROLE_GGSN {
2282
-		nl.NewRtAttrChild(data, nl.IFLA_GTP_ROLE, nl.Uint32Attr(uint32(gtp.Role)))
2285
+		data.AddRtAttr(nl.IFLA_GTP_ROLE, nl.Uint32Attr(uint32(gtp.Role)))
2283 2286
 	}
2284 2287
 }
2285 2288
 
... ...
@@ -2299,6 +2812,70 @@ func parseGTPData(link Link, data []syscall.NetlinkRouteAttr) {
2299 2299
 	}
2300 2300
 }
2301 2301
 
2302
+func parseVfInfoList(data []syscall.NetlinkRouteAttr) ([]VfInfo, error) {
2303
+	var vfs []VfInfo
2304
+
2305
+	for i, element := range data {
2306
+		if element.Attr.Type != nl.IFLA_VF_INFO {
2307
+			return nil, fmt.Errorf("Incorrect element type in vf info list: %d", element.Attr.Type)
2308
+		}
2309
+		vfAttrs, err := nl.ParseRouteAttr(element.Value)
2310
+		if err != nil {
2311
+			return nil, err
2312
+		}
2313
+		vfs = append(vfs, parseVfInfo(vfAttrs, i))
2314
+	}
2315
+	return vfs, nil
2316
+}
2317
+
2318
+func parseVfInfo(data []syscall.NetlinkRouteAttr, id int) VfInfo {
2319
+	vf := VfInfo{ID: id}
2320
+	for _, element := range data {
2321
+		switch element.Attr.Type {
2322
+		case nl.IFLA_VF_MAC:
2323
+			mac := nl.DeserializeVfMac(element.Value[:])
2324
+			vf.Mac = mac.Mac[:6]
2325
+		case nl.IFLA_VF_VLAN:
2326
+			vl := nl.DeserializeVfVlan(element.Value[:])
2327
+			vf.Vlan = int(vl.Vlan)
2328
+			vf.Qos = int(vl.Qos)
2329
+		case nl.IFLA_VF_TX_RATE:
2330
+			txr := nl.DeserializeVfTxRate(element.Value[:])
2331
+			vf.TxRate = int(txr.Rate)
2332
+		case nl.IFLA_VF_SPOOFCHK:
2333
+			sp := nl.DeserializeVfSpoofchk(element.Value[:])
2334
+			vf.Spoofchk = sp.Setting != 0
2335
+		case nl.IFLA_VF_LINK_STATE:
2336
+			ls := nl.DeserializeVfLinkState(element.Value[:])
2337
+			vf.LinkState = ls.LinkState
2338
+		case nl.IFLA_VF_RATE:
2339
+			vfr := nl.DeserializeVfRate(element.Value[:])
2340
+			vf.MaxTxRate = vfr.MaxTxRate
2341
+			vf.MinTxRate = vfr.MinTxRate
2342
+		}
2343
+	}
2344
+	return vf
2345
+}
2346
+
2347
+func addXfrmiAttrs(xfrmi *Xfrmi, linkInfo *nl.RtAttr) {
2348
+	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
2349
+	data.AddRtAttr(nl.IFLA_XFRM_LINK, nl.Uint32Attr(uint32(xfrmi.ParentIndex)))
2350
+	data.AddRtAttr(nl.IFLA_XFRM_IF_ID, nl.Uint32Attr(xfrmi.Ifid))
2351
+
2352
+}
2353
+
2354
+func parseXfrmiData(link Link, data []syscall.NetlinkRouteAttr) {
2355
+	xfrmi := link.(*Xfrmi)
2356
+	for _, datum := range data {
2357
+		switch datum.Attr.Type {
2358
+		case nl.IFLA_XFRM_LINK:
2359
+			xfrmi.ParentIndex = int(native.Uint32(datum.Value))
2360
+		case nl.IFLA_XFRM_IF_ID:
2361
+			xfrmi.Ifid = native.Uint32(datum.Value)
2362
+		}
2363
+	}
2364
+}
2365
+
2302 2366
 // LinkSetBondSlave add slave to bond link via ioctl interface.
2303 2367
 func LinkSetBondSlave(link Link, master *Bond) error {
2304 2368
 	fd, err := getSocketUDP()
... ...
@@ -2316,6 +2893,52 @@ func LinkSetBondSlave(link Link, master *Bond) error {
2316 2316
 	return nil
2317 2317
 }
2318 2318
 
2319
+// LinkSetBondSlaveQueueId modify bond slave queue-id.
2320
+func (h *Handle) LinkSetBondSlaveQueueId(link Link, queueId uint16) error {
2321
+	base := link.Attrs()
2322
+	h.ensureIndex(base)
2323
+	req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK)
2324
+
2325
+	msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
2326
+	msg.Index = int32(base.Index)
2327
+	req.AddData(msg)
2328
+
2329
+	linkInfo := nl.NewRtAttr(unix.IFLA_LINKINFO, nil)
2330
+	data := linkInfo.AddRtAttr(nl.IFLA_INFO_SLAVE_DATA, nil)
2331
+	data.AddRtAttr(nl.IFLA_BOND_SLAVE_QUEUE_ID, nl.Uint16Attr(queueId))
2332
+
2333
+	req.AddData(linkInfo)
2334
+	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
2335
+	return err
2336
+}
2337
+
2338
+// LinkSetBondSlaveQueueId modify bond slave queue-id.
2339
+func LinkSetBondSlaveQueueId(link Link, queueId uint16) error {
2340
+	return pkgHandle.LinkSetBondSlaveQueueId(link, queueId)
2341
+}
2342
+
2343
+func vethStatsSerialize(stats ethtoolStats) ([]byte, error) {
2344
+	statsSize := int(unsafe.Sizeof(stats)) + int(stats.nStats)*int(unsafe.Sizeof(uint64(0)))
2345
+	b := make([]byte, 0, statsSize)
2346
+	buf := bytes.NewBuffer(b)
2347
+	err := binary.Write(buf, nl.NativeEndian(), stats)
2348
+	return buf.Bytes()[:statsSize], err
2349
+}
2350
+
2351
+type vethEthtoolStats struct {
2352
+	Cmd    uint32
2353
+	NStats uint32
2354
+	Peer   uint64
2355
+	// Newer kernels have XDP stats in here, but we only care
2356
+	// to extract the peer ifindex here.
2357
+}
2358
+
2359
+func vethStatsDeserialize(b []byte) (vethEthtoolStats, error) {
2360
+	var stats = vethEthtoolStats{}
2361
+	err := binary.Read(bytes.NewReader(b), nl.NativeEndian(), &stats)
2362
+	return stats, err
2363
+}
2364
+
2319 2365
 // VethPeerIndex get veth peer index.
2320 2366
 func VethPeerIndex(link *Veth) (int, error) {
2321 2367
 	fd, err := getSocketUDP()
... ...
@@ -2330,25 +2953,66 @@ func VethPeerIndex(link *Veth) (int, error) {
2330 2330
 		return -1, fmt.Errorf("SIOCETHTOOL request for %q failed, errno=%v", link.Attrs().Name, errno)
2331 2331
 	}
2332 2332
 
2333
-	gstrings := &ethtoolGstrings{
2334
-		cmd:       ETHTOOL_GSTRINGS,
2335
-		stringSet: ETH_SS_STATS,
2336
-		length:    sSet.data[0],
2333
+	stats := ethtoolStats{
2334
+		cmd:    ETHTOOL_GSTATS,
2335
+		nStats: sSet.data[0],
2336
+	}
2337
+
2338
+	buffer, err := vethStatsSerialize(stats)
2339
+	if err != nil {
2340
+		return -1, err
2337 2341
 	}
2338
-	ifreq.Data = uintptr(unsafe.Pointer(gstrings))
2342
+
2343
+	ifreq.Data = uintptr(unsafe.Pointer(&buffer[0]))
2339 2344
 	_, _, errno = syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), SIOCETHTOOL, uintptr(unsafe.Pointer(ifreq)))
2340 2345
 	if errno != 0 {
2341 2346
 		return -1, fmt.Errorf("SIOCETHTOOL request for %q failed, errno=%v", link.Attrs().Name, errno)
2342 2347
 	}
2343 2348
 
2344
-	stats := &ethtoolStats{
2345
-		cmd:    ETHTOOL_GSTATS,
2346
-		nStats: gstrings.length,
2349
+	vstats, err := vethStatsDeserialize(buffer)
2350
+	if err != nil {
2351
+		return -1, err
2347 2352
 	}
2348
-	ifreq.Data = uintptr(unsafe.Pointer(stats))
2349
-	_, _, errno = syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), SIOCETHTOOL, uintptr(unsafe.Pointer(ifreq)))
2350
-	if errno != 0 {
2351
-		return -1, fmt.Errorf("SIOCETHTOOL request for %q failed, errno=%v", link.Attrs().Name, errno)
2353
+
2354
+	return int(vstats.Peer), nil
2355
+}
2356
+
2357
+func parseTuntapData(link Link, data []syscall.NetlinkRouteAttr) {
2358
+	tuntap := link.(*Tuntap)
2359
+	for _, datum := range data {
2360
+		switch datum.Attr.Type {
2361
+		case nl.IFLA_TUN_OWNER:
2362
+			tuntap.Owner = native.Uint32(datum.Value)
2363
+		case nl.IFLA_TUN_GROUP:
2364
+			tuntap.Group = native.Uint32(datum.Value)
2365
+		case nl.IFLA_TUN_TYPE:
2366
+			tuntap.Mode = TuntapMode(uint8(datum.Value[0]))
2367
+		case nl.IFLA_TUN_PERSIST:
2368
+			tuntap.NonPersist = false
2369
+			if uint8(datum.Value[0]) == 0 {
2370
+				tuntap.NonPersist = true
2371
+			}
2372
+		}
2373
+	}
2374
+}
2375
+
2376
+func parseIPoIBData(link Link, data []syscall.NetlinkRouteAttr) {
2377
+	ipoib := link.(*IPoIB)
2378
+	for _, datum := range data {
2379
+		switch datum.Attr.Type {
2380
+		case nl.IFLA_IPOIB_PKEY:
2381
+			ipoib.Pkey = uint16(native.Uint16(datum.Value))
2382
+		case nl.IFLA_IPOIB_MODE:
2383
+			ipoib.Mode = IPoIBMode(native.Uint16(datum.Value))
2384
+		case nl.IFLA_IPOIB_UMCAST:
2385
+			ipoib.Umcast = uint16(native.Uint16(datum.Value))
2386
+		}
2352 2387
 	}
2353
-	return int(stats.data[0]), nil
2388
+}
2389
+
2390
+func addIPoIBAttrs(ipoib *IPoIB, linkInfo *nl.RtAttr) {
2391
+	data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
2392
+	data.AddRtAttr(nl.IFLA_IPOIB_PKEY, nl.Uint16Attr(uint16(ipoib.Pkey)))
2393
+	data.AddRtAttr(nl.IFLA_IPOIB_MODE, nl.Uint16Attr(uint16(ipoib.Mode)))
2394
+	data.AddRtAttr(nl.IFLA_IPOIB_UMCAST, nl.Uint16Attr(uint16(ipoib.Umcast)))
2354 2395
 }
... ...
@@ -17,9 +17,16 @@ type Neigh struct {
17 17
 	LLIPAddr     net.IP //Used in the case of NHRP
18 18
 	Vlan         int
19 19
 	VNI          int
20
+	MasterIndex  int
20 21
 }
21 22
 
22 23
 // String returns $ip/$hwaddr $label
23 24
 func (neigh *Neigh) String() string {
24 25
 	return fmt.Sprintf("%s %s", neigh.IP, neigh.HardwareAddr)
25 26
 }
27
+
28
+// NeighUpdate is sent when a neighbor changes - type is RTM_NEWNEIGH or RTM_DELNEIGH.
29
+type NeighUpdate struct {
30
+	Type uint16
31
+	Neigh
32
+}
... ...
@@ -1,10 +1,13 @@
1 1
 package netlink
2 2
 
3 3
 import (
4
+	"fmt"
4 5
 	"net"
6
+	"syscall"
5 7
 	"unsafe"
6 8
 
7 9
 	"github.com/vishvananda/netlink/nl"
10
+	"github.com/vishvananda/netns"
8 11
 	"golang.org/x/sys/unix"
9 12
 )
10 13
 
... ...
@@ -18,7 +21,10 @@ const (
18 18
 	NDA_PORT
19 19
 	NDA_VNI
20 20
 	NDA_IFINDEX
21
-	NDA_MAX = NDA_IFINDEX
21
+	NDA_MASTER
22
+	NDA_LINK_NETNSID
23
+	NDA_SRC_VNI
24
+	NDA_MAX = NDA_SRC_VNI
22 25
 )
23 26
 
24 27
 // Neighbor Cache Entry States.
... ...
@@ -43,6 +49,7 @@ const (
43 43
 	NTF_ROUTER = 0x80
44 44
 )
45 45
 
46
+// Ndmsg is for adding, removing or receiving information about a neighbor table entry
46 47
 type Ndmsg struct {
47 48
 	Family uint8
48 49
 	Index  uint32
... ...
@@ -170,45 +177,58 @@ func neighHandle(neigh *Neigh, req *nl.NetlinkRequest) error {
170 170
 		req.AddData(vniData)
171 171
 	}
172 172
 
173
+	if neigh.MasterIndex != 0 {
174
+		masterData := nl.NewRtAttr(NDA_MASTER, nl.Uint32Attr(uint32(neigh.MasterIndex)))
175
+		req.AddData(masterData)
176
+	}
177
+
173 178
 	_, err := req.Execute(unix.NETLINK_ROUTE, 0)
174 179
 	return err
175 180
 }
176 181
 
177
-// NeighList gets a list of IP-MAC mappings in the system (ARP table).
182
+// NeighList returns a list of IP-MAC mappings in the system (ARP table).
178 183
 // Equivalent to: `ip neighbor show`.
179 184
 // The list can be filtered by link and ip family.
180 185
 func NeighList(linkIndex, family int) ([]Neigh, error) {
181 186
 	return pkgHandle.NeighList(linkIndex, family)
182 187
 }
183 188
 
184
-// NeighProxyList gets a list of neighbor proxies in the system.
189
+// NeighProxyList returns a list of neighbor proxies in the system.
185 190
 // Equivalent to: `ip neighbor show proxy`.
186 191
 // The list can be filtered by link and ip family.
187 192
 func NeighProxyList(linkIndex, family int) ([]Neigh, error) {
188 193
 	return pkgHandle.NeighProxyList(linkIndex, family)
189 194
 }
190 195
 
191
-// NeighList gets a list of IP-MAC mappings in the system (ARP table).
196
+// NeighList returns a list of IP-MAC mappings in the system (ARP table).
192 197
 // Equivalent to: `ip neighbor show`.
193 198
 // The list can be filtered by link and ip family.
194 199
 func (h *Handle) NeighList(linkIndex, family int) ([]Neigh, error) {
195
-	return h.neighList(linkIndex, family, 0)
200
+	return h.NeighListExecute(Ndmsg{
201
+		Family: uint8(family),
202
+		Index:  uint32(linkIndex),
203
+	})
196 204
 }
197 205
 
198
-// NeighProxyList gets a list of neighbor proxies in the system.
206
+// NeighProxyList returns a list of neighbor proxies in the system.
199 207
 // Equivalent to: `ip neighbor show proxy`.
200 208
 // The list can be filtered by link, ip family.
201 209
 func (h *Handle) NeighProxyList(linkIndex, family int) ([]Neigh, error) {
202
-	return h.neighList(linkIndex, family, NTF_PROXY)
210
+	return h.NeighListExecute(Ndmsg{
211
+		Family: uint8(family),
212
+		Index:  uint32(linkIndex),
213
+		Flags:  NTF_PROXY,
214
+	})
215
+}
216
+
217
+// NeighListExecute returns a list of neighbour entries filtered by link, ip family, flag and state.
218
+func NeighListExecute(msg Ndmsg) ([]Neigh, error) {
219
+	return pkgHandle.NeighListExecute(msg)
203 220
 }
204 221
 
205
-func (h *Handle) neighList(linkIndex, family, flags int) ([]Neigh, error) {
222
+// NeighListExecute returns a list of neighbour entries filtered by link, ip family, flag and state.
223
+func (h *Handle) NeighListExecute(msg Ndmsg) ([]Neigh, error) {
206 224
 	req := h.newNetlinkRequest(unix.RTM_GETNEIGH, unix.NLM_F_DUMP)
207
-	msg := Ndmsg{
208
-		Family: uint8(family),
209
-		Index:  uint32(linkIndex),
210
-		Flags:  uint8(flags),
211
-	}
212 225
 	req.AddData(&msg)
213 226
 
214 227
 	msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWNEIGH)
... ...
@@ -219,7 +239,7 @@ func (h *Handle) neighList(linkIndex, family, flags int) ([]Neigh, error) {
219 219
 	var res []Neigh
220 220
 	for _, m := range msgs {
221 221
 		ndm := deserializeNdmsg(m)
222
-		if linkIndex != 0 && int(ndm.Index) != linkIndex {
222
+		if msg.Index != 0 && ndm.Index != msg.Index {
223 223
 			// Ignore messages from other interfaces
224 224
 			continue
225 225
 		}
... ...
@@ -251,14 +271,6 @@ func NeighDeserialize(m []byte) (*Neigh, error) {
251 251
 		return nil, err
252 252
 	}
253 253
 
254
-	// This should be cached for perfomance
255
-	// once per table dump
256
-	link, err := LinkByIndex(neigh.LinkIndex)
257
-	if err != nil {
258
-		return nil, err
259
-	}
260
-	encapType := link.Attrs().EncapType
261
-
262 254
 	for _, attr := range attrs {
263 255
 		switch attr.Attr.Type {
264 256
 		case NDA_DST:
... ...
@@ -268,13 +280,16 @@ func NeighDeserialize(m []byte) (*Neigh, error) {
268 268
 			// #define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len))
269 269
 			// #define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0))
270 270
 			attrLen := attr.Attr.Len - unix.SizeofRtAttr
271
-			if attrLen == 4 && (encapType == "ipip" ||
272
-				encapType == "sit" ||
273
-				encapType == "gre") {
271
+			if attrLen == 4 {
274 272
 				neigh.LLIPAddr = net.IP(attr.Value)
275
-			} else if attrLen == 16 &&
276
-				encapType == "tunnel6" {
277
-				neigh.IP = net.IP(attr.Value)
273
+			} else if attrLen == 16 {
274
+				// Can be IPv6 or FireWire HWAddr
275
+				link, err := LinkByIndex(neigh.LinkIndex)
276
+				if err == nil && link.Attrs().EncapType == "tunnel6" {
277
+					neigh.IP = net.IP(attr.Value)
278
+				} else {
279
+					neigh.HardwareAddr = net.HardwareAddr(attr.Value)
280
+				}
278 281
 			} else {
279 282
 				neigh.HardwareAddr = net.HardwareAddr(attr.Value)
280 283
 			}
... ...
@@ -282,8 +297,126 @@ func NeighDeserialize(m []byte) (*Neigh, error) {
282 282
 			neigh.Vlan = int(native.Uint16(attr.Value[0:2]))
283 283
 		case NDA_VNI:
284 284
 			neigh.VNI = int(native.Uint32(attr.Value[0:4]))
285
+		case NDA_MASTER:
286
+			neigh.MasterIndex = int(native.Uint32(attr.Value[0:4]))
285 287
 		}
286 288
 	}
287 289
 
288 290
 	return &neigh, nil
289 291
 }
292
+
293
+// NeighSubscribe takes a chan down which notifications will be sent
294
+// when neighbors are added or deleted. Close the 'done' chan to stop subscription.
295
+func NeighSubscribe(ch chan<- NeighUpdate, done <-chan struct{}) error {
296
+	return neighSubscribeAt(netns.None(), netns.None(), ch, done, nil, false)
297
+}
298
+
299
+// NeighSubscribeAt works like NeighSubscribe plus it allows the caller
300
+// to choose the network namespace in which to subscribe (ns).
301
+func NeighSubscribeAt(ns netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}) error {
302
+	return neighSubscribeAt(ns, netns.None(), ch, done, nil, false)
303
+}
304
+
305
+// NeighSubscribeOptions contains a set of options to use with
306
+// NeighSubscribeWithOptions.
307
+type NeighSubscribeOptions struct {
308
+	Namespace     *netns.NsHandle
309
+	ErrorCallback func(error)
310
+	ListExisting  bool
311
+}
312
+
313
+// NeighSubscribeWithOptions work like NeighSubscribe but enable to
314
+// provide additional options to modify the behavior. Currently, the
315
+// namespace can be provided as well as an error callback.
316
+func NeighSubscribeWithOptions(ch chan<- NeighUpdate, done <-chan struct{}, options NeighSubscribeOptions) error {
317
+	if options.Namespace == nil {
318
+		none := netns.None()
319
+		options.Namespace = &none
320
+	}
321
+	return neighSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting)
322
+}
323
+
324
+func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error {
325
+	s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_NEIGH)
326
+	makeRequest := func(family int) error {
327
+		req := pkgHandle.newNetlinkRequest(unix.RTM_GETNEIGH,
328
+			unix.NLM_F_DUMP)
329
+		infmsg := nl.NewIfInfomsg(family)
330
+		req.AddData(infmsg)
331
+		if err := s.Send(req); err != nil {
332
+			return err
333
+		}
334
+		return nil
335
+	}
336
+	if err != nil {
337
+		return err
338
+	}
339
+	if done != nil {
340
+		go func() {
341
+			<-done
342
+			s.Close()
343
+		}()
344
+	}
345
+	if listExisting {
346
+		if err := makeRequest(unix.AF_UNSPEC); err != nil {
347
+			return err
348
+		}
349
+		// We have to wait for NLMSG_DONE before making AF_BRIDGE request
350
+	}
351
+	go func() {
352
+		defer close(ch)
353
+		for {
354
+			msgs, from, err := s.Receive()
355
+			if err != nil {
356
+				if cberr != nil {
357
+					cberr(err)
358
+				}
359
+				return
360
+			}
361
+			if from.Pid != nl.PidKernel {
362
+				if cberr != nil {
363
+					cberr(fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel))
364
+				}
365
+				continue
366
+			}
367
+			for _, m := range msgs {
368
+				if m.Header.Type == unix.NLMSG_DONE {
369
+					if listExisting {
370
+						// This will be called after handling AF_UNSPEC
371
+						// list request, we have to wait for NLMSG_DONE
372
+						// before making another request
373
+						if err := makeRequest(unix.AF_BRIDGE); err != nil {
374
+							if cberr != nil {
375
+								cberr(err)
376
+							}
377
+							return
378
+						}
379
+						listExisting = false
380
+					}
381
+					continue
382
+				}
383
+				if m.Header.Type == unix.NLMSG_ERROR {
384
+					native := nl.NativeEndian()
385
+					error := int32(native.Uint32(m.Data[0:4]))
386
+					if error == 0 {
387
+						continue
388
+					}
389
+					if cberr != nil {
390
+						cberr(syscall.Errno(-error))
391
+					}
392
+					return
393
+				}
394
+				neigh, err := NeighDeserialize(m.Data)
395
+				if err != nil {
396
+					if cberr != nil {
397
+						cberr(err)
398
+					}
399
+					return
400
+				}
401
+				ch <- NeighUpdate{Type: m.Header.Type, Neigh: *neigh}
402
+			}
403
+		}
404
+	}()
405
+
406
+	return nil
407
+}
... ...
@@ -27,7 +27,8 @@ func ParseIPNet(s string) (*net.IPNet, error) {
27 27
 	if err != nil {
28 28
 		return nil, err
29 29
 	}
30
-	return &net.IPNet{IP: ip, Mask: ipNet.Mask}, nil
30
+	ipNet.IP = ip
31
+	return ipNet, nil
31 32
 }
32 33
 
33 34
 // NewIPNet generates an IPNet from an ip address using a netmask of 32 or 128.
... ...
@@ -48,10 +48,18 @@ func LinkSetVfVlan(link Link, vf, vlan int) error {
48 48
 	return ErrNotImplemented
49 49
 }
50 50
 
51
+func LinkSetVfVlanQos(link Link, vf, vlan, qos int) error {
52
+	return ErrNotImplemented
53
+}
54
+
51 55
 func LinkSetVfTxRate(link Link, vf, rate int) error {
52 56
 	return ErrNotImplemented
53 57
 }
54 58
 
59
+func LinkSetVfRate(link Link, vf, minRate, maxRate int) error {
60
+	return ErrNotImplemented
61
+}
62
+
55 63
 func LinkSetNoMaster(link Link) error {
56 64
 	return ErrNotImplemented
57 65
 }
... ...
@@ -152,6 +160,10 @@ func AddrAdd(link Link, addr *Addr) error {
152 152
 	return ErrNotImplemented
153 153
 }
154 154
 
155
+func AddrReplace(link Link, addr *Addr) error {
156
+	return ErrNotImplemented
157
+}
158
+
155 159
 func AddrDel(link Link, addr *Addr) error {
156 160
 	return ErrNotImplemented
157 161
 }
158 162
new file mode 100644
... ...
@@ -0,0 +1,141 @@
0
+package netlink
1
+
2
+// Network namespace ID functions
3
+//
4
+// The kernel has a weird concept called the network namespace ID.
5
+// This is different from the file reference in proc (and any bind-mounted
6
+// namespaces, etc.)
7
+//
8
+// Instead, namespaces can be assigned a numeric ID at any time. Once set,
9
+// the ID is fixed. The ID can either be set manually by the user, or
10
+// automatically, triggered by certain kernel actions. The most common kernel
11
+// action that triggers namespace ID creation is moving one end of a veth pair
12
+// in to that namespace.
13
+
14
+import (
15
+	"fmt"
16
+
17
+	"github.com/vishvananda/netlink/nl"
18
+	"golang.org/x/sys/unix"
19
+)
20
+
21
+// These can be replaced by the values from sys/unix when it is next released.
22
+const (
23
+	_ = iota
24
+	NETNSA_NSID
25
+	NETNSA_PID
26
+	NETNSA_FD
27
+)
28
+
29
+// GetNetNsIdByPid looks up the network namespace ID for a given pid (really thread id).
30
+// Returns -1 if the namespace does not have an ID set.
31
+func (h *Handle) GetNetNsIdByPid(pid int) (int, error) {
32
+	return h.getNetNsId(NETNSA_PID, uint32(pid))
33
+}
34
+
35
+// GetNetNsIdByPid looks up the network namespace ID for a given pid (really thread id).
36
+// Returns -1 if the namespace does not have an ID set.
37
+func GetNetNsIdByPid(pid int) (int, error) {
38
+	return pkgHandle.GetNetNsIdByPid(pid)
39
+}
40
+
41
+// SetNetNSIdByPid sets the ID of the network namespace for a given pid (really thread id).
42
+// The ID can only be set for namespaces without an ID already set.
43
+func (h *Handle) SetNetNsIdByPid(pid, nsid int) error {
44
+	return h.setNetNsId(NETNSA_PID, uint32(pid), uint32(nsid))
45
+}
46
+
47
+// SetNetNSIdByPid sets the ID of the network namespace for a given pid (really thread id).
48
+// The ID can only be set for namespaces without an ID already set.
49
+func SetNetNsIdByPid(pid, nsid int) error {
50
+	return pkgHandle.SetNetNsIdByPid(pid, nsid)
51
+}
52
+
53
+// GetNetNsIdByFd looks up the network namespace ID for a given fd.
54
+// fd must be an open file descriptor to a namespace file.
55
+// Returns -1 if the namespace does not have an ID set.
56
+func (h *Handle) GetNetNsIdByFd(fd int) (int, error) {
57
+	return h.getNetNsId(NETNSA_FD, uint32(fd))
58
+}
59
+
60
+// GetNetNsIdByFd looks up the network namespace ID for a given fd.
61
+// fd must be an open file descriptor to a namespace file.
62
+// Returns -1 if the namespace does not have an ID set.
63
+func GetNetNsIdByFd(fd int) (int, error) {
64
+	return pkgHandle.GetNetNsIdByFd(fd)
65
+}
66
+
67
+// SetNetNSIdByFd sets the ID of the network namespace for a given fd.
68
+// fd must be an open file descriptor to a namespace file.
69
+// The ID can only be set for namespaces without an ID already set.
70
+func (h *Handle) SetNetNsIdByFd(fd, nsid int) error {
71
+	return h.setNetNsId(NETNSA_FD, uint32(fd), uint32(nsid))
72
+}
73
+
74
+// SetNetNSIdByFd sets the ID of the network namespace for a given fd.
75
+// fd must be an open file descriptor to a namespace file.
76
+// The ID can only be set for namespaces without an ID already set.
77
+func SetNetNsIdByFd(fd, nsid int) error {
78
+	return pkgHandle.SetNetNsIdByFd(fd, nsid)
79
+}
80
+
81
+// getNetNsId requests the netnsid for a given type-val pair
82
+// type should be either NETNSA_PID or NETNSA_FD
83
+func (h *Handle) getNetNsId(attrType int, val uint32) (int, error) {
84
+	req := h.newNetlinkRequest(unix.RTM_GETNSID, unix.NLM_F_REQUEST)
85
+
86
+	rtgen := nl.NewRtGenMsg()
87
+	req.AddData(rtgen)
88
+
89
+	b := make([]byte, 4, 4)
90
+	native.PutUint32(b, val)
91
+	attr := nl.NewRtAttr(attrType, b)
92
+	req.AddData(attr)
93
+
94
+	msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWNSID)
95
+
96
+	if err != nil {
97
+		return 0, err
98
+	}
99
+
100
+	for _, m := range msgs {
101
+		msg := nl.DeserializeRtGenMsg(m)
102
+
103
+		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
104
+		if err != nil {
105
+			return 0, err
106
+		}
107
+
108
+		for _, attr := range attrs {
109
+			switch attr.Attr.Type {
110
+			case NETNSA_NSID:
111
+				return int(int32(native.Uint32(attr.Value))), nil
112
+			}
113
+		}
114
+	}
115
+
116
+	return 0, fmt.Errorf("unexpected empty result")
117
+}
118
+
119
+// setNetNsId sets the netnsid for a given type-val pair
120
+// type should be either NETNSA_PID or NETNSA_FD
121
+// The ID can only be set for namespaces without an ID already set
122
+func (h *Handle) setNetNsId(attrType int, val uint32, newnsid uint32) error {
123
+	req := h.newNetlinkRequest(unix.RTM_NEWNSID, unix.NLM_F_REQUEST|unix.NLM_F_ACK)
124
+
125
+	rtgen := nl.NewRtGenMsg()
126
+	req.AddData(rtgen)
127
+
128
+	b := make([]byte, 4, 4)
129
+	native.PutUint32(b, val)
130
+	attr := nl.NewRtAttr(attrType, b)
131
+	req.AddData(attr)
132
+
133
+	b1 := make([]byte, 4, 4)
134
+	native.PutUint32(b1, newnsid)
135
+	attr1 := nl.NewRtAttr(NETNSA_NSID, b1)
136
+	req.AddData(attr1)
137
+
138
+	_, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWNSID)
139
+	return err
140
+}
0 141
new file mode 100644
... ...
@@ -0,0 +1,19 @@
0
+// +build !linux
1
+
2
+package netlink
3
+
4
+func GetNetNsIdByPid(pid int) (int, error) {
5
+	return 0, ErrNotImplemented
6
+}
7
+
8
+func SetNetNsIdByPid(pid, nsid int) error {
9
+	return ErrNotImplemented
10
+}
11
+
12
+func GetNetNsIdByFd(fd int) (int, error) {
13
+	return 0, ErrNotImplemented
14
+}
15
+
16
+func SetNetNsIdByFd(fd, nsid int) error {
17
+	return ErrNotImplemented
18
+}
... ...
@@ -11,8 +11,8 @@ const (
11 11
 
12 12
 /* Bridge Flags */
13 13
 const (
14
-	BRIDGE_FLAGS_MASTER = iota /* Bridge command to/from master */
15
-	BRIDGE_FLAGS_SELF          /* Bridge command to/from lowerdev */
14
+	BRIDGE_FLAGS_MASTER = iota + 1 /* Bridge command to/from master */
15
+	BRIDGE_FLAGS_SELF              /* Bridge command to/from lowerdev */
16 16
 )
17 17
 
18 18
 /* Bridge management nested attributes
... ...
@@ -76,12 +76,17 @@ const (
76 76
 // 	__CTA_MAX
77 77
 // };
78 78
 const (
79
-	CTA_TUPLE_ORIG  = 1
80
-	CTA_TUPLE_REPLY = 2
81
-	CTA_STATUS      = 3
82
-	CTA_TIMEOUT     = 7
83
-	CTA_MARK        = 8
84
-	CTA_PROTOINFO   = 4
79
+	CTA_TUPLE_ORIG     = 1
80
+	CTA_TUPLE_REPLY    = 2
81
+	CTA_STATUS         = 3
82
+	CTA_PROTOINFO      = 4
83
+	CTA_TIMEOUT        = 7
84
+	CTA_MARK           = 8
85
+	CTA_COUNTERS_ORIG  = 9
86
+	CTA_COUNTERS_REPLY = 10
87
+	CTA_USE            = 11
88
+	CTA_ID             = 12
89
+	CTA_TIMESTAMP      = 20
85 90
 )
86 91
 
87 92
 // enum ctattr_tuple {
... ...
@@ -163,6 +168,29 @@ const (
163 163
 	CTA_PROTOINFO_TCP_FLAGS_REPLY     = 5
164 164
 )
165 165
 
166
+// enum ctattr_counters {
167
+// 	CTA_COUNTERS_UNSPEC,
168
+// 	CTA_COUNTERS_PACKETS,		/* 64bit counters */
169
+// 	CTA_COUNTERS_BYTES,		/* 64bit counters */
170
+// 	CTA_COUNTERS32_PACKETS,		/* old 32bit counters, unused */
171
+// 	CTA_COUNTERS32_BYTES,		/* old 32bit counters, unused */
172
+// 	CTA_COUNTERS_PAD,
173
+// 	__CTA_COUNTERS_M
174
+// };
175
+// #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1)
176
+const (
177
+	CTA_COUNTERS_PACKETS = 1
178
+	CTA_COUNTERS_BYTES   = 2
179
+)
180
+
181
+// enum CTA TIMESTAMP TLVs
182
+// CTA_TIMESTAMP_START       /* 64bit value */
183
+// CTA_TIMESTAMP_STOP        /* 64bit value */
184
+const (
185
+	CTA_TIMESTAMP_START = 1
186
+	CTA_TIMESTAMP_STOP  = 2
187
+)
188
+
166 189
 // /* General form of address family dependent message.
167 190
 //  */
168 191
 // struct nfgenmsg {
169 192
new file mode 100644
... ...
@@ -0,0 +1,40 @@
0
+package nl
1
+
2
+// All the following constants are coming from:
3
+// https://github.com/torvalds/linux/blob/master/include/uapi/linux/devlink.h
4
+
5
+const (
6
+	GENL_DEVLINK_VERSION = 1
7
+	GENL_DEVLINK_NAME    = "devlink"
8
+)
9
+
10
+const (
11
+	DEVLINK_CMD_GET         = 1
12
+	DEVLINK_CMD_ESWITCH_GET = 29
13
+	DEVLINK_CMD_ESWITCH_SET = 30
14
+)
15
+
16
+const (
17
+	DEVLINK_ATTR_BUS_NAME            = 1
18
+	DEVLINK_ATTR_DEV_NAME            = 2
19
+	DEVLINK_ATTR_ESWITCH_MODE        = 25
20
+	DEVLINK_ATTR_ESWITCH_INLINE_MODE = 26
21
+	DEVLINK_ATTR_ESWITCH_ENCAP_MODE  = 62
22
+)
23
+
24
+const (
25
+	DEVLINK_ESWITCH_MODE_LEGACY    = 0
26
+	DEVLINK_ESWITCH_MODE_SWITCHDEV = 1
27
+)
28
+
29
+const (
30
+	DEVLINK_ESWITCH_INLINE_MODE_NONE      = 0
31
+	DEVLINK_ESWITCH_INLINE_MODE_LINK      = 1
32
+	DEVLINK_ESWITCH_INLINE_MODE_NETWORK   = 2
33
+	DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT = 3
34
+)
35
+
36
+const (
37
+	DEVLINK_ESWITCH_ENCAP_MODE_NONE  = 0
38
+	DEVLINK_ESWITCH_ENCAP_MODE_BASIC = 1
39
+)
... ...
@@ -13,7 +13,9 @@ const (
13 13
 	IFLA_INFO_KIND
14 14
 	IFLA_INFO_DATA
15 15
 	IFLA_INFO_XSTATS
16
-	IFLA_INFO_MAX = IFLA_INFO_XSTATS
16
+	IFLA_INFO_SLAVE_KIND
17
+	IFLA_INFO_SLAVE_DATA
18
+	IFLA_INFO_MAX = IFLA_INFO_SLAVE_DATA
17 19
 )
18 20
 
19 21
 const (
... ...
@@ -87,7 +89,8 @@ const (
87 87
 const (
88 88
 	IFLA_IPVLAN_UNSPEC = iota
89 89
 	IFLA_IPVLAN_MODE
90
-	IFLA_IPVLAN_MAX = IFLA_IPVLAN_MODE
90
+	IFLA_IPVLAN_FLAG
91
+	IFLA_IPVLAN_MAX = IFLA_IPVLAN_FLAG
91 92
 )
92 93
 
93 94
 const (
... ...
@@ -164,6 +167,8 @@ const (
164 164
 	IFLA_BOND_SLAVE_PERM_HWADDR
165 165
 	IFLA_BOND_SLAVE_QUEUE_ID
166 166
 	IFLA_BOND_SLAVE_AD_AGGREGATOR_ID
167
+	IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE
168
+	IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE
167 169
 )
168 170
 
169 171
 const (
... ...
@@ -217,9 +222,11 @@ const (
217 217
 	IFLA_VF_RSS_QUERY_EN /* RSS Redirection Table and Hash Key query
218 218
 	 * on/off switch
219 219
 	 */
220
-	IFLA_VF_STATS /* network device statistics */
221
-	IFLA_VF_TRUST /* Trust state of VF */
222
-	IFLA_VF_MAX   = IFLA_VF_TRUST
220
+	IFLA_VF_STATS        /* network device statistics */
221
+	IFLA_VF_TRUST        /* Trust state of VF */
222
+	IFLA_VF_IB_NODE_GUID /* VF Infiniband node GUID */
223
+	IFLA_VF_IB_PORT_GUID /* VF Infiniband port GUID */
224
+	IFLA_VF_MAX          = IFLA_VF_IB_PORT_GUID
223 225
 )
224 226
 
225 227
 const (
... ...
@@ -248,6 +255,7 @@ const (
248 248
 	SizeofVfLinkState  = 0x08
249 249
 	SizeofVfRssQueryEn = 0x08
250 250
 	SizeofVfTrust      = 0x08
251
+	SizeofVfGUID       = 0x10
251 252
 )
252 253
 
253 254
 // struct ifla_vf_mac {
... ...
@@ -430,6 +438,30 @@ func (msg *VfTrust) Serialize() []byte {
430 430
 	return (*(*[SizeofVfTrust]byte)(unsafe.Pointer(msg)))[:]
431 431
 }
432 432
 
433
+// struct ifla_vf_guid {
434
+//   __u32 vf;
435
+//   __u32 rsvd;
436
+//   __u64 guid;
437
+// };
438
+
439
+type VfGUID struct {
440
+	Vf   uint32
441
+	Rsvd uint32
442
+	GUID uint64
443
+}
444
+
445
+func (msg *VfGUID) Len() int {
446
+	return SizeofVfGUID
447
+}
448
+
449
+func DeserializeVfGUID(b []byte) *VfGUID {
450
+	return (*VfGUID)(unsafe.Pointer(&b[0:SizeofVfGUID][0]))
451
+}
452
+
453
+func (msg *VfGUID) Serialize() []byte {
454
+	return (*(*[SizeofVfGUID]byte)(unsafe.Pointer(msg)))[:]
455
+}
456
+
433 457
 const (
434 458
 	XDP_FLAGS_UPDATE_IF_NOEXIST = 1 << iota
435 459
 	XDP_FLAGS_SKB_MODE
... ...
@@ -546,3 +578,33 @@ const (
546 546
 	GTP_ROLE_GGSN = iota
547 547
 	GTP_ROLE_SGSN
548 548
 )
549
+
550
+const (
551
+	IFLA_XFRM_UNSPEC = iota
552
+	IFLA_XFRM_LINK
553
+	IFLA_XFRM_IF_ID
554
+
555
+	IFLA_XFRM_MAX = iota - 1
556
+)
557
+
558
+const (
559
+	IFLA_TUN_UNSPEC = iota
560
+	IFLA_TUN_OWNER
561
+	IFLA_TUN_GROUP
562
+	IFLA_TUN_TYPE
563
+	IFLA_TUN_PI
564
+	IFLA_TUN_VNET_HDR
565
+	IFLA_TUN_PERSIST
566
+	IFLA_TUN_MULTI_QUEUE
567
+	IFLA_TUN_NUM_QUEUES
568
+	IFLA_TUN_NUM_DISABLED_QUEUES
569
+	IFLA_TUN_MAX = IFLA_TUN_NUM_DISABLED_QUEUES
570
+)
571
+
572
+const (
573
+	IFLA_IPOIB_UNSPEC = iota
574
+	IFLA_IPOIB_PKEY
575
+	IFLA_IPOIB_MODE
576
+	IFLA_IPOIB_UMCAST
577
+	IFLA_IPOIB_MAX = IFLA_IPOIB_UMCAST
578
+)
... ...
@@ -21,7 +21,13 @@ const (
21 21
 	FAMILY_ALL  = unix.AF_UNSPEC
22 22
 	FAMILY_V4   = unix.AF_INET
23 23
 	FAMILY_V6   = unix.AF_INET6
24
-	FAMILY_MPLS = AF_MPLS
24
+	FAMILY_MPLS = unix.AF_MPLS
25
+	// Arbitrary set value (greater than default 4k) to allow receiving
26
+	// from kernel more verbose messages e.g. for statistics,
27
+	// tc rules or filters, or other more memory requiring data.
28
+	RECEIVE_BUFFER_SIZE = 65536
29
+	// Kernel netlink pid
30
+	PidKernel uint32 = 0
25 31
 )
26 32
 
27 33
 // SupportedNlFamilies contains the list of netlink families this netlink package supports
... ...
@@ -42,7 +48,7 @@ func GetIPFamily(ip net.IP) int {
42 42
 
43 43
 var nativeEndian binary.ByteOrder
44 44
 
45
-// Get native endianness for the system
45
+// NativeEndian gets native endianness for the system
46 46
 func NativeEndian() binary.ByteOrder {
47 47
 	if nativeEndian == nil {
48 48
 		var x uint32 = 0x01020304
... ...
@@ -271,15 +277,22 @@ func NewRtAttr(attrType int, data []byte) *RtAttr {
271 271
 	}
272 272
 }
273 273
 
274
-// Create a new RtAttr obj anc add it as a child of an existing object
274
+// NewRtAttrChild adds an RtAttr as a child to the parent and returns the new attribute
275
+//
276
+// Deprecated: Use AddRtAttr() on the parent object
275 277
 func NewRtAttrChild(parent *RtAttr, attrType int, data []byte) *RtAttr {
278
+	return parent.AddRtAttr(attrType, data)
279
+}
280
+
281
+// AddRtAttr adds an RtAttr as a child and returns the new attribute
282
+func (a *RtAttr) AddRtAttr(attrType int, data []byte) *RtAttr {
276 283
 	attr := NewRtAttr(attrType, data)
277
-	parent.children = append(parent.children, attr)
284
+	a.children = append(a.children, attr)
278 285
 	return attr
279 286
 }
280 287
 
281
-// AddChild adds an existing RtAttr as a child.
282
-func (a *RtAttr) AddChild(attr *RtAttr) {
288
+// AddChild adds an existing NetlinkRequestData as a child.
289
+func (a *RtAttr) AddChild(attr NetlinkRequestData) {
283 290
 	a.children = append(a.children, attr)
284 291
 }
285 292
 
... ...
@@ -360,16 +373,12 @@ func (req *NetlinkRequest) Serialize() []byte {
360 360
 }
361 361
 
362 362
 func (req *NetlinkRequest) AddData(data NetlinkRequestData) {
363
-	if data != nil {
364
-		req.Data = append(req.Data, data)
365
-	}
363
+	req.Data = append(req.Data, data)
366 364
 }
367 365
 
368 366
 // AddRawData adds raw bytes to the end of the NetlinkRequest object during serialization
369 367
 func (req *NetlinkRequest) AddRawData(data []byte) {
370
-	if data != nil {
371
-		req.RawData = append(req.RawData, data...)
372
-	}
368
+	req.RawData = append(req.RawData, data...)
373 369
 }
374 370
 
375 371
 // Execute the request against a the given sockType.
... ...
@@ -413,10 +422,13 @@ func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, erro
413 413
 
414 414
 done:
415 415
 	for {
416
-		msgs, err := s.Receive()
416
+		msgs, from, err := s.Receive()
417 417
 		if err != nil {
418 418
 			return nil, err
419 419
 		}
420
+		if from.Pid != PidKernel {
421
+			return nil, fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, PidKernel)
422
+		}
420 423
 		for _, m := range msgs {
421 424
 			if m.Header.Seq != req.Seq {
422 425
 				if sharedSocket {
... ...
@@ -425,7 +437,7 @@ done:
425 425
 				return nil, fmt.Errorf("Wrong Seq nr %d, expected %d", m.Header.Seq, req.Seq)
426 426
 			}
427 427
 			if m.Header.Pid != pid {
428
-				return nil, fmt.Errorf("Wrong pid %d, expected %d", m.Header.Pid, pid)
428
+				continue
429 429
 			}
430 430
 			if m.Header.Type == unix.NLMSG_DONE {
431 431
 				break done
... ...
@@ -610,21 +622,31 @@ func (s *NetlinkSocket) Send(request *NetlinkRequest) error {
610 610
 	return nil
611 611
 }
612 612
 
613
-func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, error) {
613
+func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, *unix.SockaddrNetlink, error) {
614 614
 	fd := int(atomic.LoadInt32(&s.fd))
615 615
 	if fd < 0 {
616
-		return nil, fmt.Errorf("Receive called on a closed socket")
616
+		return nil, nil, fmt.Errorf("Receive called on a closed socket")
617 617
 	}
618
-	rb := make([]byte, unix.Getpagesize())
619
-	nr, _, err := unix.Recvfrom(fd, rb, 0)
618
+	var fromAddr *unix.SockaddrNetlink
619
+	var rb [RECEIVE_BUFFER_SIZE]byte
620
+	nr, from, err := unix.Recvfrom(fd, rb[:], 0)
620 621
 	if err != nil {
621
-		return nil, err
622
+		return nil, nil, err
623
+	}
624
+	fromAddr, ok := from.(*unix.SockaddrNetlink)
625
+	if !ok {
626
+		return nil, nil, fmt.Errorf("Error converting to netlink sockaddr")
622 627
 	}
623 628
 	if nr < unix.NLMSG_HDRLEN {
624
-		return nil, fmt.Errorf("Got short response from netlink")
629
+		return nil, nil, fmt.Errorf("Got short response from netlink")
630
+	}
631
+	rb2 := make([]byte, nr)
632
+	copy(rb2, rb[:nr])
633
+	nl, err := syscall.ParseNetlinkMessage(rb2)
634
+	if err != nil {
635
+		return nil, nil, err
625 636
 	}
626
-	rb = rb[:nr]
627
-	return syscall.ParseNetlinkMessage(rb)
637
+	return nl, fromAddr, nil
628 638
 }
629 639
 
630 640
 // SetSendTimeout allows to set a send timeout on the socket
631 641
new file mode 100644
... ...
@@ -0,0 +1,35 @@
0
+package nl
1
+
2
+const (
3
+	RDMA_NL_GET_CLIENT_SHIFT = 10
4
+)
5
+
6
+const (
7
+	RDMA_NL_NLDEV = 5
8
+)
9
+
10
+const (
11
+	RDMA_NLDEV_CMD_GET     = 1
12
+	RDMA_NLDEV_CMD_SET     = 2
13
+	RDMA_NLDEV_CMD_SYS_GET = 6
14
+	RDMA_NLDEV_CMD_SYS_SET = 7
15
+)
16
+
17
+const (
18
+	RDMA_NLDEV_ATTR_DEV_INDEX       = 1
19
+	RDMA_NLDEV_ATTR_DEV_NAME        = 2
20
+	RDMA_NLDEV_ATTR_PORT_INDEX      = 3
21
+	RDMA_NLDEV_ATTR_CAP_FLAGS       = 4
22
+	RDMA_NLDEV_ATTR_FW_VERSION      = 5
23
+	RDMA_NLDEV_ATTR_NODE_GUID       = 6
24
+	RDMA_NLDEV_ATTR_SYS_IMAGE_GUID  = 7
25
+	RDMA_NLDEV_ATTR_SUBNET_PREFIX   = 8
26
+	RDMA_NLDEV_ATTR_LID             = 9
27
+	RDMA_NLDEV_ATTR_SM_LID          = 10
28
+	RDMA_NLDEV_ATTR_LMC             = 11
29
+	RDMA_NLDEV_ATTR_PORT_STATE      = 12
30
+	RDMA_NLDEV_ATTR_PORT_PHYS_STATE = 13
31
+	RDMA_NLDEV_ATTR_DEV_NODE_TYPE   = 14
32
+	RDMA_NLDEV_SYS_ATTR_NETNS_MODE  = 66
33
+	RDMA_NLDEV_NET_NS_FD            = 68
34
+)
... ...
@@ -79,3 +79,29 @@ func (msg *RtNexthop) Serialize() []byte {
79 79
 	}
80 80
 	return buf
81 81
 }
82
+
83
+type RtGenMsg struct {
84
+	unix.RtGenmsg
85
+}
86
+
87
+func NewRtGenMsg() *RtGenMsg {
88
+	return &RtGenMsg{
89
+		RtGenmsg: unix.RtGenmsg{
90
+			Family: unix.AF_UNSPEC,
91
+		},
92
+	}
93
+}
94
+
95
+func (msg *RtGenMsg) Len() int {
96
+	return rtaAlignOf(unix.SizeofRtGenmsg)
97
+}
98
+
99
+func DeserializeRtGenMsg(b []byte) *RtGenMsg {
100
+	return &RtGenMsg{RtGenmsg: unix.RtGenmsg{Family: b[0]}}
101
+}
102
+
103
+func (msg *RtGenMsg) Serialize() []byte {
104
+	out := make([]byte, msg.Len())
105
+	out[0] = msg.Family
106
+	return out
107
+}
... ...
@@ -99,6 +99,49 @@ func DecodeSEG6Encap(buf []byte) (int, []net.IP, error) {
99 99
 	return mode, srh.Segments, nil
100 100
 }
101 101
 
102
+func DecodeSEG6Srh(buf []byte) ([]net.IP, error) {
103
+	native := NativeEndian()
104
+	srh := IPv6SrHdr{
105
+		nextHdr:      buf[0],
106
+		hdrLen:       buf[1],
107
+		routingType:  buf[2],
108
+		segmentsLeft: buf[3],
109
+		firstSegment: buf[4],
110
+		flags:        buf[5],
111
+		reserved:     native.Uint16(buf[6:8]),
112
+	}
113
+	buf = buf[8:]
114
+	if len(buf)%16 != 0 {
115
+		err := fmt.Errorf("DecodeSEG6Srh: error parsing Segment List (buf len: %d)", len(buf))
116
+		return nil, err
117
+	}
118
+	for len(buf) > 0 {
119
+		srh.Segments = append(srh.Segments, net.IP(buf[:16]))
120
+		buf = buf[16:]
121
+	}
122
+	return srh.Segments, nil
123
+}
124
+func EncodeSEG6Srh(segments []net.IP) ([]byte, error) {
125
+	nsegs := len(segments) // nsegs: number of segments
126
+	if nsegs == 0 {
127
+		return nil, errors.New("EncodeSEG6Srh: No Segments")
128
+	}
129
+	b := make([]byte, 8, 8+len(segments)*16)
130
+	native := NativeEndian()
131
+	b[0] = 0                      // srh.nextHdr (0 when calling netlink)
132
+	b[1] = uint8(16 * nsegs >> 3) // srh.hdrLen (in 8-octets unit)
133
+	b[2] = IPV6_SRCRT_TYPE_4      // srh.routingType (assigned by IANA)
134
+	b[3] = uint8(nsegs - 1)       // srh.segmentsLeft
135
+	b[4] = uint8(nsegs - 1)       // srh.firstSegment
136
+	b[5] = 0                      // srh.flags (SR6_FLAG1_HMAC for srh_hmac)
137
+	// srh.reserved: Defined as "Tag" in draft-ietf-6man-segment-routing-header-07
138
+	native.PutUint16(b[6:], 0) // srh.reserved
139
+	for _, netIP := range segments {
140
+		b = append(b, netIP...) // srh.Segments
141
+	}
142
+	return b, nil
143
+}
144
+
102 145
 // Helper functions
103 146
 func SEG6EncapModeString(mode int) string {
104 147
 	switch mode {
105 148
new file mode 100644
... ...
@@ -0,0 +1,76 @@
0
+package nl
1
+
2
+import ()
3
+
4
+// seg6local parameters
5
+const (
6
+	SEG6_LOCAL_UNSPEC = iota
7
+	SEG6_LOCAL_ACTION
8
+	SEG6_LOCAL_SRH
9
+	SEG6_LOCAL_TABLE
10
+	SEG6_LOCAL_NH4
11
+	SEG6_LOCAL_NH6
12
+	SEG6_LOCAL_IIF
13
+	SEG6_LOCAL_OIF
14
+	__SEG6_LOCAL_MAX
15
+)
16
+const (
17
+	SEG6_LOCAL_MAX = __SEG6_LOCAL_MAX
18
+)
19
+
20
+// seg6local actions
21
+const (
22
+	SEG6_LOCAL_ACTION_END           = iota + 1 // 1
23
+	SEG6_LOCAL_ACTION_END_X                    // 2
24
+	SEG6_LOCAL_ACTION_END_T                    // 3
25
+	SEG6_LOCAL_ACTION_END_DX2                  // 4
26
+	SEG6_LOCAL_ACTION_END_DX6                  // 5
27
+	SEG6_LOCAL_ACTION_END_DX4                  // 6
28
+	SEG6_LOCAL_ACTION_END_DT6                  // 7
29
+	SEG6_LOCAL_ACTION_END_DT4                  // 8
30
+	SEG6_LOCAL_ACTION_END_B6                   // 9
31
+	SEG6_LOCAL_ACTION_END_B6_ENCAPS            // 10
32
+	SEG6_LOCAL_ACTION_END_BM                   // 11
33
+	SEG6_LOCAL_ACTION_END_S                    // 12
34
+	SEG6_LOCAL_ACTION_END_AS                   // 13
35
+	SEG6_LOCAL_ACTION_END_AM                   // 14
36
+	__SEG6_LOCAL_ACTION_MAX
37
+)
38
+const (
39
+	SEG6_LOCAL_ACTION_MAX = __SEG6_LOCAL_ACTION_MAX - 1
40
+)
41
+
42
+// Helper functions
43
+func SEG6LocalActionString(action int) string {
44
+	switch action {
45
+	case SEG6_LOCAL_ACTION_END:
46
+		return "End"
47
+	case SEG6_LOCAL_ACTION_END_X:
48
+		return "End.X"
49
+	case SEG6_LOCAL_ACTION_END_T:
50
+		return "End.T"
51
+	case SEG6_LOCAL_ACTION_END_DX2:
52
+		return "End.DX2"
53
+	case SEG6_LOCAL_ACTION_END_DX6:
54
+		return "End.DX6"
55
+	case SEG6_LOCAL_ACTION_END_DX4:
56
+		return "End.DX4"
57
+	case SEG6_LOCAL_ACTION_END_DT6:
58
+		return "End.DT6"
59
+	case SEG6_LOCAL_ACTION_END_DT4:
60
+		return "End.DT4"
61
+	case SEG6_LOCAL_ACTION_END_B6:
62
+		return "End.B6"
63
+	case SEG6_LOCAL_ACTION_END_B6_ENCAPS:
64
+		return "End.B6.Encaps"
65
+	case SEG6_LOCAL_ACTION_END_BM:
66
+		return "End.BM"
67
+	case SEG6_LOCAL_ACTION_END_S:
68
+		return "End.S"
69
+	case SEG6_LOCAL_ACTION_END_AS:
70
+		return "End.AS"
71
+	case SEG6_LOCAL_ACTION_END_AM:
72
+		return "End.AM"
73
+	}
74
+	return "unknown"
75
+}
... ...
@@ -42,16 +42,6 @@ const (
42 42
 	TCPDIAG_NOCOOKIE    = 0xFFFFFFFF /* TCPDIAG_NOCOOKIE in net/ipv4/tcp_diag.h*/
43 43
 )
44 44
 
45
-const (
46
-	AF_MPLS = 28
47
-)
48
-
49
-const (
50
-	RTA_NEWDST     = 0x13
51
-	RTA_ENCAP_TYPE = 0x15
52
-	RTA_ENCAP      = 0x16
53
-)
54
-
55 45
 // RTA_ENCAP subtype
56 46
 const (
57 47
 	MPLS_IPTUNNEL_UNSPEC = iota
... ...
@@ -67,6 +57,7 @@ const (
67 67
 	LWTUNNEL_ENCAP_IP6
68 68
 	LWTUNNEL_ENCAP_SEG6
69 69
 	LWTUNNEL_ENCAP_BPF
70
+	LWTUNNEL_ENCAP_SEG6_LOCAL
70 71
 )
71 72
 
72 73
 // routing header types
... ...
@@ -1,6 +1,7 @@
1 1
 package nl
2 2
 
3 3
 import (
4
+	"encoding/binary"
4 5
 	"unsafe"
5 6
 )
6 7
 
... ...
@@ -65,6 +66,15 @@ const (
65 65
 )
66 66
 
67 67
 const (
68
+	TCA_STATS_UNSPEC = iota
69
+	TCA_STATS_BASIC
70
+	TCA_STATS_RATE_EST
71
+	TCA_STATS_QUEUE
72
+	TCA_STATS_APP
73
+	TCA_STATS_MAX = TCA_STATS_APP
74
+)
75
+
76
+const (
68 77
 	SizeofTcMsg          = 0x14
69 78
 	SizeofTcActionMsg    = 0x04
70 79
 	SizeofTcPrioMap      = 0x14
... ...
@@ -79,7 +89,10 @@ const (
79 79
 	SizeofTcU32Key       = 0x10
80 80
 	SizeofTcU32Sel       = 0x10 // without keys
81 81
 	SizeofTcGen          = 0x14
82
+	SizeofTcConnmark     = SizeofTcGen + 0x04
82 83
 	SizeofTcMirred       = SizeofTcGen + 0x08
84
+	SizeofTcTunnelKey    = SizeofTcGen + 0x04
85
+	SizeofTcSkbEdit      = SizeofTcGen
83 86
 	SizeofTcPolice       = 2*SizeofTcRateSpec + 0x20
84 87
 )
85 88
 
... ...
@@ -412,6 +425,57 @@ func (x *TcHtbGlob) Serialize() []byte {
412 412
 	return (*(*[SizeofTcHtbGlob]byte)(unsafe.Pointer(x)))[:]
413 413
 }
414 414
 
415
+// HFSC
416
+
417
+type Curve struct {
418
+	m1 uint32
419
+	d  uint32
420
+	m2 uint32
421
+}
422
+
423
+type HfscCopt struct {
424
+	Rsc Curve
425
+	Fsc Curve
426
+	Usc Curve
427
+}
428
+
429
+func (c *Curve) Attrs() (uint32, uint32, uint32) {
430
+	return c.m1, c.d, c.m2
431
+}
432
+
433
+func (c *Curve) Set(m1 uint32, d uint32, m2 uint32) {
434
+	c.m1 = m1
435
+	c.d = d
436
+	c.m2 = m2
437
+}
438
+
439
+func DeserializeHfscCurve(b []byte) *Curve {
440
+	return &Curve{
441
+		m1: binary.LittleEndian.Uint32(b[0:4]),
442
+		d:  binary.LittleEndian.Uint32(b[4:8]),
443
+		m2: binary.LittleEndian.Uint32(b[8:12]),
444
+	}
445
+}
446
+
447
+func SerializeHfscCurve(c *Curve) (b []byte) {
448
+	t := make([]byte, binary.MaxVarintLen32)
449
+	binary.LittleEndian.PutUint32(t, c.m1)
450
+	b = append(b, t[:4]...)
451
+	binary.LittleEndian.PutUint32(t, c.d)
452
+	b = append(b, t[:4]...)
453
+	binary.LittleEndian.PutUint32(t, c.m2)
454
+	b = append(b, t[:4]...)
455
+	return b
456
+}
457
+
458
+type TcHfscOpt struct {
459
+	Defcls uint16
460
+}
461
+
462
+func (x *TcHfscOpt) Serialize() []byte {
463
+	return (*(*[2]byte)(unsafe.Pointer(x)))[:]
464
+}
465
+
415 466
 const (
416 467
 	TCA_U32_UNSPEC = iota
417 468
 	TCA_U32_CLASSID
... ...
@@ -586,12 +650,48 @@ const (
586 586
 	TCA_BPF_FD
587 587
 	TCA_BPF_NAME
588 588
 	TCA_BPF_FLAGS
589
-	TCA_BPF_MAX = TCA_BPF_FLAGS
589
+	TCA_BPF_FLAGS_GEN
590
+	TCA_BPF_TAG
591
+	TCA_BPF_ID
592
+	TCA_BPF_MAX = TCA_BPF_ID
590 593
 )
591 594
 
592 595
 type TcBpf TcGen
593 596
 
594 597
 const (
598
+	TCA_ACT_CONNMARK = 14
599
+)
600
+
601
+const (
602
+	TCA_CONNMARK_UNSPEC = iota
603
+	TCA_CONNMARK_PARMS
604
+	TCA_CONNMARK_TM
605
+	TCA_CONNMARK_MAX = TCA_CONNMARK_TM
606
+)
607
+
608
+// struct tc_connmark {
609
+//   tc_gen;
610
+//   __u16 zone;
611
+// };
612
+
613
+type TcConnmark struct {
614
+	TcGen
615
+	Zone uint16
616
+}
617
+
618
+func (msg *TcConnmark) Len() int {
619
+	return SizeofTcConnmark
620
+}
621
+
622
+func DeserializeTcConnmark(b []byte) *TcConnmark {
623
+	return (*TcConnmark)(unsafe.Pointer(&b[0:SizeofTcConnmark][0]))
624
+}
625
+
626
+func (x *TcConnmark) Serialize() []byte {
627
+	return (*(*[SizeofTcConnmark]byte)(unsafe.Pointer(x)))[:]
628
+}
629
+
630
+const (
595 631
 	TCA_ACT_MIRRED = 8
596 632
 )
597 633
 
... ...
@@ -626,6 +726,63 @@ func (x *TcMirred) Serialize() []byte {
626 626
 	return (*(*[SizeofTcMirred]byte)(unsafe.Pointer(x)))[:]
627 627
 }
628 628
 
629
+const (
630
+	TCA_TUNNEL_KEY_UNSPEC = iota
631
+	TCA_TUNNEL_KEY_TM
632
+	TCA_TUNNEL_KEY_PARMS
633
+	TCA_TUNNEL_KEY_ENC_IPV4_SRC
634
+	TCA_TUNNEL_KEY_ENC_IPV4_DST
635
+	TCA_TUNNEL_KEY_ENC_IPV6_SRC
636
+	TCA_TUNNEL_KEY_ENC_IPV6_DST
637
+	TCA_TUNNEL_KEY_ENC_KEY_ID
638
+	TCA_TUNNEL_KEY_MAX = TCA_TUNNEL_KEY_ENC_KEY_ID
639
+)
640
+
641
+type TcTunnelKey struct {
642
+	TcGen
643
+	Action int32
644
+}
645
+
646
+func (x *TcTunnelKey) Len() int {
647
+	return SizeofTcTunnelKey
648
+}
649
+
650
+func DeserializeTunnelKey(b []byte) *TcTunnelKey {
651
+	return (*TcTunnelKey)(unsafe.Pointer(&b[0:SizeofTcTunnelKey][0]))
652
+}
653
+
654
+func (x *TcTunnelKey) Serialize() []byte {
655
+	return (*(*[SizeofTcTunnelKey]byte)(unsafe.Pointer(x)))[:]
656
+}
657
+
658
+const (
659
+	TCA_SKBEDIT_UNSPEC = iota
660
+	TCA_SKBEDIT_TM
661
+	TCA_SKBEDIT_PARMS
662
+	TCA_SKBEDIT_PRIORITY
663
+	TCA_SKBEDIT_QUEUE_MAPPING
664
+	TCA_SKBEDIT_MARK
665
+	TCA_SKBEDIT_PAD
666
+	TCA_SKBEDIT_PTYPE
667
+	TCA_SKBEDIT_MAX = TCA_SKBEDIT_MARK
668
+)
669
+
670
+type TcSkbEdit struct {
671
+	TcGen
672
+}
673
+
674
+func (x *TcSkbEdit) Len() int {
675
+	return SizeofTcSkbEdit
676
+}
677
+
678
+func DeserializeSkbEdit(b []byte) *TcSkbEdit {
679
+	return (*TcSkbEdit)(unsafe.Pointer(&b[0:SizeofTcSkbEdit][0]))
680
+}
681
+
682
+func (x *TcSkbEdit) Serialize() []byte {
683
+	return (*(*[SizeofTcSkbEdit]byte)(unsafe.Pointer(x)))[:]
684
+}
685
+
629 686
 // struct tc_police {
630 687
 // 	__u32			index;
631 688
 // 	int			action;
... ...
@@ -708,3 +865,10 @@ const (
708 708
 	TCA_FQ_CODEL_DROP_BATCH_SIZE
709 709
 	TCA_FQ_CODEL_MEMORY_LIMIT
710 710
 )
711
+
712
+const (
713
+	TCA_HFSC_UNSPEC = iota
714
+	TCA_HFSC_RSC
715
+	TCA_HFSC_FSC
716
+	TCA_HFSC_USC
717
+)
... ...
@@ -50,34 +50,44 @@ const (
50 50
 // Attribute types
51 51
 const (
52 52
 	/* Netlink message attributes.  */
53
-	XFRMA_UNSPEC         = 0x00
54
-	XFRMA_ALG_AUTH       = 0x01 /* struct xfrm_algo */
55
-	XFRMA_ALG_CRYPT      = 0x02 /* struct xfrm_algo */
56
-	XFRMA_ALG_COMP       = 0x03 /* struct xfrm_algo */
57
-	XFRMA_ENCAP          = 0x04 /* struct xfrm_algo + struct xfrm_encap_tmpl */
58
-	XFRMA_TMPL           = 0x05 /* 1 or more struct xfrm_user_tmpl */
59
-	XFRMA_SA             = 0x06 /* struct xfrm_usersa_info  */
60
-	XFRMA_POLICY         = 0x07 /* struct xfrm_userpolicy_info */
61
-	XFRMA_SEC_CTX        = 0x08 /* struct xfrm_sec_ctx */
62
-	XFRMA_LTIME_VAL      = 0x09
63
-	XFRMA_REPLAY_VAL     = 0x0a
64
-	XFRMA_REPLAY_THRESH  = 0x0b
65
-	XFRMA_ETIMER_THRESH  = 0x0c
66
-	XFRMA_SRCADDR        = 0x0d /* xfrm_address_t */
67
-	XFRMA_COADDR         = 0x0e /* xfrm_address_t */
68
-	XFRMA_LASTUSED       = 0x0f /* unsigned long  */
69
-	XFRMA_POLICY_TYPE    = 0x10 /* struct xfrm_userpolicy_type */
70
-	XFRMA_MIGRATE        = 0x11
71
-	XFRMA_ALG_AEAD       = 0x12 /* struct xfrm_algo_aead */
72
-	XFRMA_KMADDRESS      = 0x13 /* struct xfrm_user_kmaddress */
73
-	XFRMA_ALG_AUTH_TRUNC = 0x14 /* struct xfrm_algo_auth */
74
-	XFRMA_MARK           = 0x15 /* struct xfrm_mark */
75
-	XFRMA_TFCPAD         = 0x16 /* __u32 */
76
-	XFRMA_REPLAY_ESN_VAL = 0x17 /* struct xfrm_replay_esn */
77
-	XFRMA_SA_EXTRA_FLAGS = 0x18 /* __u32 */
78
-	XFRMA_MAX            = 0x18
53
+	XFRMA_UNSPEC    = iota
54
+	XFRMA_ALG_AUTH  /* struct xfrm_algo */
55
+	XFRMA_ALG_CRYPT /* struct xfrm_algo */
56
+	XFRMA_ALG_COMP  /* struct xfrm_algo */
57
+	XFRMA_ENCAP     /* struct xfrm_algo + struct xfrm_encap_tmpl */
58
+	XFRMA_TMPL      /* 1 or more struct xfrm_user_tmpl */
59
+	XFRMA_SA        /* struct xfrm_usersa_info  */
60
+	XFRMA_POLICY    /* struct xfrm_userpolicy_info */
61
+	XFRMA_SEC_CTX   /* struct xfrm_sec_ctx */
62
+	XFRMA_LTIME_VAL
63
+	XFRMA_REPLAY_VAL
64
+	XFRMA_REPLAY_THRESH
65
+	XFRMA_ETIMER_THRESH
66
+	XFRMA_SRCADDR     /* xfrm_address_t */
67
+	XFRMA_COADDR      /* xfrm_address_t */
68
+	XFRMA_LASTUSED    /* unsigned long  */
69
+	XFRMA_POLICY_TYPE /* struct xfrm_userpolicy_type */
70
+	XFRMA_MIGRATE
71
+	XFRMA_ALG_AEAD       /* struct xfrm_algo_aead */
72
+	XFRMA_KMADDRESS      /* struct xfrm_user_kmaddress */
73
+	XFRMA_ALG_AUTH_TRUNC /* struct xfrm_algo_auth */
74
+	XFRMA_MARK           /* struct xfrm_mark */
75
+	XFRMA_TFCPAD         /* __u32 */
76
+	XFRMA_REPLAY_ESN_VAL /* struct xfrm_replay_esn */
77
+	XFRMA_SA_EXTRA_FLAGS /* __u32 */
78
+	XFRMA_PROTO          /* __u8 */
79
+	XFRMA_ADDRESS_FILTER /* struct xfrm_address_filter */
80
+	XFRMA_PAD
81
+	XFRMA_OFFLOAD_DEV   /* struct xfrm_state_offload */
82
+	XFRMA_SET_MARK      /* __u32 */
83
+	XFRMA_SET_MARK_MASK /* __u32 */
84
+	XFRMA_IF_ID         /* __u32 */
85
+
86
+	XFRMA_MAX = iota - 1
79 87
 )
80 88
 
89
+const XFRMA_OUTPUT_MARK = XFRMA_SET_MARK
90
+
81 91
 const (
82 92
 	SizeofXfrmAddress     = 0x10
83 93
 	SizeofXfrmSelector    = 0x38
... ...
@@ -18,6 +18,10 @@ type Protinfo struct {
18 18
 
19 19
 // String returns a list of enabled flags
20 20
 func (prot *Protinfo) String() string {
21
+	if prot == nil {
22
+		return "<nil>"
23
+	}
24
+
21 25
 	var boolStrings []string
22 26
 	if prot.Hairpin {
23 27
 		boolStrings = append(boolStrings, "Hairpin")
... ...
@@ -41,7 +41,7 @@ func (h *Handle) LinkGetProtinfo(link Link) (Protinfo, error) {
41 41
 			if err != nil {
42 42
 				return pi, err
43 43
 			}
44
-			pi = *parseProtinfo(infos)
44
+			pi = parseProtinfo(infos)
45 45
 
46 46
 			return pi, nil
47 47
 		}
... ...
@@ -49,8 +49,7 @@ func (h *Handle) LinkGetProtinfo(link Link) (Protinfo, error) {
49 49
 	return pi, fmt.Errorf("Device with index %d not found", base.Index)
50 50
 }
51 51
 
52
-func parseProtinfo(infos []syscall.NetlinkRouteAttr) *Protinfo {
53
-	var pi Protinfo
52
+func parseProtinfo(infos []syscall.NetlinkRouteAttr) (pi Protinfo) {
54 53
 	for _, info := range infos {
55 54
 		switch info.Attr.Type {
56 55
 		case nl.IFLA_BRPORT_MODE:
... ...
@@ -71,5 +70,5 @@ func parseProtinfo(infos []syscall.NetlinkRouteAttr) *Protinfo {
71 71
 			pi.ProxyArpWiFi = byteToBool(info.Value[0])
72 72
 		}
73 73
 	}
74
-	return &pi
74
+	return
75 75
 }
... ...
@@ -176,6 +176,13 @@ type Netem struct {
176 176
 	CorruptCorr   uint32
177 177
 }
178 178
 
179
+func (netem *Netem) String() string {
180
+	return fmt.Sprintf(
181
+		"{Latency: %v, Limit: %v, Loss: %v, Gap: %v, Duplicate: %v, Jitter: %v}",
182
+		netem.Latency, netem.Limit, netem.Loss, netem.Gap, netem.Duplicate, netem.Jitter,
183
+	)
184
+}
185
+
179 186
 func (qdisc *Netem) Attrs() *QdiscAttrs {
180 187
 	return &qdisc.QdiscAttrs
181 188
 }
... ...
@@ -231,6 +238,33 @@ func (qdisc *GenericQdisc) Type() string {
231 231
 	return qdisc.QdiscType
232 232
 }
233 233
 
234
+type Hfsc struct {
235
+	QdiscAttrs
236
+	Defcls uint16
237
+}
238
+
239
+func NewHfsc(attrs QdiscAttrs) *Hfsc {
240
+	return &Hfsc{
241
+		QdiscAttrs: attrs,
242
+		Defcls:     1,
243
+	}
244
+}
245
+
246
+func (hfsc *Hfsc) Attrs() *QdiscAttrs {
247
+	return &hfsc.QdiscAttrs
248
+}
249
+
250
+func (hfsc *Hfsc) Type() string {
251
+	return "hfsc"
252
+}
253
+
254
+func (hfsc *Hfsc) String() string {
255
+	return fmt.Sprintf(
256
+		"{%v -- default: %d}",
257
+		hfsc.Attrs(), hfsc.Defcls,
258
+	)
259
+}
260
+
234 261
 // Fq is a classless packet scheduler meant to be mostly used for locally generated traffic.
235 262
 type Fq struct {
236 263
 	QdiscAttrs
... ...
@@ -249,6 +283,13 @@ type Fq struct {
249 249
 	LowRateThreshold uint32
250 250
 }
251 251
 
252
+func (fq *Fq) String() string {
253
+	return fmt.Sprintf(
254
+		"{PacketLimit: %v, FlowPacketLimit: %v, Quantum: %v, InitialQuantum: %v, Pacing: %v, FlowDefaultRate: %v, FlowMaxRate: %v, Buckets: %v, FlowRefillDelay: %v,  LowRateThreshold: %v}",
255
+		fq.PacketLimit, fq.FlowPacketLimit, fq.Quantum, fq.InitialQuantum, fq.Pacing, fq.FlowDefaultRate, fq.FlowMaxRate, fq.Buckets, fq.FlowRefillDelay, fq.LowRateThreshold,
256
+	)
257
+}
258
+
252 259
 func NewFq(attrs QdiscAttrs) *Fq {
253 260
 	return &Fq{
254 261
 		QdiscAttrs: attrs,
... ...
@@ -276,6 +317,13 @@ type FqCodel struct {
276 276
 	// There are some more attributes here, but support for them seems not ubiquitous
277 277
 }
278 278
 
279
+func (fqcodel *FqCodel) String() string {
280
+	return fmt.Sprintf(
281
+		"{%v -- Target: %v, Limit: %v, Interval: %v, ECM: %v, Flows: %v, Quantum: %v}",
282
+		fqcodel.Attrs(), fqcodel.Target, fqcodel.Limit, fqcodel.Interval, fqcodel.ECN, fqcodel.Flows, fqcodel.Quantum,
283
+	)
284
+}
285
+
279 286
 func NewFqCodel(attrs QdiscAttrs) *FqCodel {
280 287
 	return &FqCodel{
281 288
 		QdiscAttrs: attrs,
... ...
@@ -175,15 +175,15 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
175 175
 		opt.Peakrate.Rate = uint32(qdisc.Peakrate)
176 176
 		opt.Limit = qdisc.Limit
177 177
 		opt.Buffer = qdisc.Buffer
178
-		nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize())
178
+		options.AddRtAttr(nl.TCA_TBF_PARMS, opt.Serialize())
179 179
 		if qdisc.Rate >= uint64(1<<32) {
180
-			nl.NewRtAttrChild(options, nl.TCA_TBF_RATE64, nl.Uint64Attr(qdisc.Rate))
180
+			options.AddRtAttr(nl.TCA_TBF_RATE64, nl.Uint64Attr(qdisc.Rate))
181 181
 		}
182 182
 		if qdisc.Peakrate >= uint64(1<<32) {
183
-			nl.NewRtAttrChild(options, nl.TCA_TBF_PRATE64, nl.Uint64Attr(qdisc.Peakrate))
183
+			options.AddRtAttr(nl.TCA_TBF_PRATE64, nl.Uint64Attr(qdisc.Peakrate))
184 184
 		}
185 185
 		if qdisc.Peakrate > 0 {
186
-			nl.NewRtAttrChild(options, nl.TCA_TBF_PBURST, nl.Uint32Attr(qdisc.Minburst))
186
+			options.AddRtAttr(nl.TCA_TBF_PBURST, nl.Uint32Attr(qdisc.Minburst))
187 187
 		}
188 188
 	case *Htb:
189 189
 		opt := nl.TcHtbGlob{}
... ...
@@ -193,8 +193,12 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
193 193
 		// TODO: Handle Debug properly. For now default to 0
194 194
 		opt.Debug = qdisc.Debug
195 195
 		opt.DirectPkts = qdisc.DirectPkts
196
-		nl.NewRtAttrChild(options, nl.TCA_HTB_INIT, opt.Serialize())
197
-		// nl.NewRtAttrChild(options, nl.TCA_HTB_DIRECT_QLEN, opt.Serialize())
196
+		options.AddRtAttr(nl.TCA_HTB_INIT, opt.Serialize())
197
+		// options.AddRtAttr(nl.TCA_HTB_DIRECT_QLEN, opt.Serialize())
198
+	case *Hfsc:
199
+		opt := nl.TcHfscOpt{}
200
+		opt.Defcls = qdisc.Defcls
201
+		options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize())
198 202
 	case *Netem:
199 203
 		opt := nl.TcNetemQopt{}
200 204
 		opt.Latency = qdisc.Latency
... ...
@@ -211,21 +215,21 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
211 211
 		corr.DupCorr = qdisc.DuplicateCorr
212 212
 
213 213
 		if corr.DelayCorr > 0 || corr.LossCorr > 0 || corr.DupCorr > 0 {
214
-			nl.NewRtAttrChild(options, nl.TCA_NETEM_CORR, corr.Serialize())
214
+			options.AddRtAttr(nl.TCA_NETEM_CORR, corr.Serialize())
215 215
 		}
216 216
 		// Corruption
217 217
 		corruption := nl.TcNetemCorrupt{}
218 218
 		corruption.Probability = qdisc.CorruptProb
219 219
 		corruption.Correlation = qdisc.CorruptCorr
220 220
 		if corruption.Probability > 0 {
221
-			nl.NewRtAttrChild(options, nl.TCA_NETEM_CORRUPT, corruption.Serialize())
221
+			options.AddRtAttr(nl.TCA_NETEM_CORRUPT, corruption.Serialize())
222 222
 		}
223 223
 		// Reorder
224 224
 		reorder := nl.TcNetemReorder{}
225 225
 		reorder.Probability = qdisc.ReorderProb
226 226
 		reorder.Correlation = qdisc.ReorderCorr
227 227
 		if reorder.Probability > 0 {
228
-			nl.NewRtAttrChild(options, nl.TCA_NETEM_REORDER, reorder.Serialize())
228
+			options.AddRtAttr(nl.TCA_NETEM_REORDER, reorder.Serialize())
229 229
 		}
230 230
 	case *Ingress:
231 231
 		// ingress filters must use the proper handle
... ...
@@ -233,50 +237,54 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
233 233
 			return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS")
234 234
 		}
235 235
 	case *FqCodel:
236
-		nl.NewRtAttrChild(options, nl.TCA_FQ_CODEL_ECN, nl.Uint32Attr((uint32(qdisc.ECN))))
236
+		options.AddRtAttr(nl.TCA_FQ_CODEL_ECN, nl.Uint32Attr((uint32(qdisc.ECN))))
237 237
 		if qdisc.Limit > 0 {
238
-			nl.NewRtAttrChild(options, nl.TCA_FQ_CODEL_LIMIT, nl.Uint32Attr((uint32(qdisc.Limit))))
238
+			options.AddRtAttr(nl.TCA_FQ_CODEL_LIMIT, nl.Uint32Attr((uint32(qdisc.Limit))))
239 239
 		}
240 240
 		if qdisc.Interval > 0 {
241
-			nl.NewRtAttrChild(options, nl.TCA_FQ_CODEL_INTERVAL, nl.Uint32Attr((uint32(qdisc.Interval))))
241
+			options.AddRtAttr(nl.TCA_FQ_CODEL_INTERVAL, nl.Uint32Attr((uint32(qdisc.Interval))))
242 242
 		}
243 243
 		if qdisc.Flows > 0 {
244
-			nl.NewRtAttrChild(options, nl.TCA_FQ_CODEL_FLOWS, nl.Uint32Attr((uint32(qdisc.Flows))))
244
+			options.AddRtAttr(nl.TCA_FQ_CODEL_FLOWS, nl.Uint32Attr((uint32(qdisc.Flows))))
245 245
 		}
246 246
 		if qdisc.Quantum > 0 {
247
-			nl.NewRtAttrChild(options, nl.TCA_FQ_CODEL_QUANTUM, nl.Uint32Attr((uint32(qdisc.Quantum))))
247
+			options.AddRtAttr(nl.TCA_FQ_CODEL_QUANTUM, nl.Uint32Attr((uint32(qdisc.Quantum))))
248 248
 		}
249 249
 
250 250
 	case *Fq:
251
-		nl.NewRtAttrChild(options, nl.TCA_FQ_RATE_ENABLE, nl.Uint32Attr((uint32(qdisc.Pacing))))
251
+		options.AddRtAttr(nl.TCA_FQ_RATE_ENABLE, nl.Uint32Attr((uint32(qdisc.Pacing))))
252 252
 
253 253
 		if qdisc.Buckets > 0 {
254
-			nl.NewRtAttrChild(options, nl.TCA_FQ_BUCKETS_LOG, nl.Uint32Attr((uint32(qdisc.Buckets))))
254
+			options.AddRtAttr(nl.TCA_FQ_BUCKETS_LOG, nl.Uint32Attr((uint32(qdisc.Buckets))))
255 255
 		}
256 256
 		if qdisc.LowRateThreshold > 0 {
257
-			nl.NewRtAttrChild(options, nl.TCA_FQ_LOW_RATE_THRESHOLD, nl.Uint32Attr((uint32(qdisc.LowRateThreshold))))
257
+			options.AddRtAttr(nl.TCA_FQ_LOW_RATE_THRESHOLD, nl.Uint32Attr((uint32(qdisc.LowRateThreshold))))
258 258
 		}
259 259
 		if qdisc.Quantum > 0 {
260
-			nl.NewRtAttrChild(options, nl.TCA_FQ_QUANTUM, nl.Uint32Attr((uint32(qdisc.Quantum))))
260
+			options.AddRtAttr(nl.TCA_FQ_QUANTUM, nl.Uint32Attr((uint32(qdisc.Quantum))))
261 261
 		}
262 262
 		if qdisc.InitialQuantum > 0 {
263
-			nl.NewRtAttrChild(options, nl.TCA_FQ_INITIAL_QUANTUM, nl.Uint32Attr((uint32(qdisc.InitialQuantum))))
263
+			options.AddRtAttr(nl.TCA_FQ_INITIAL_QUANTUM, nl.Uint32Attr((uint32(qdisc.InitialQuantum))))
264 264
 		}
265 265
 		if qdisc.FlowRefillDelay > 0 {
266
-			nl.NewRtAttrChild(options, nl.TCA_FQ_FLOW_REFILL_DELAY, nl.Uint32Attr((uint32(qdisc.FlowRefillDelay))))
266
+			options.AddRtAttr(nl.TCA_FQ_FLOW_REFILL_DELAY, nl.Uint32Attr((uint32(qdisc.FlowRefillDelay))))
267 267
 		}
268 268
 		if qdisc.FlowPacketLimit > 0 {
269
-			nl.NewRtAttrChild(options, nl.TCA_FQ_FLOW_PLIMIT, nl.Uint32Attr((uint32(qdisc.FlowPacketLimit))))
269
+			options.AddRtAttr(nl.TCA_FQ_FLOW_PLIMIT, nl.Uint32Attr((uint32(qdisc.FlowPacketLimit))))
270 270
 		}
271 271
 		if qdisc.FlowMaxRate > 0 {
272
-			nl.NewRtAttrChild(options, nl.TCA_FQ_FLOW_MAX_RATE, nl.Uint32Attr((uint32(qdisc.FlowMaxRate))))
272
+			options.AddRtAttr(nl.TCA_FQ_FLOW_MAX_RATE, nl.Uint32Attr((uint32(qdisc.FlowMaxRate))))
273 273
 		}
274 274
 		if qdisc.FlowDefaultRate > 0 {
275
-			nl.NewRtAttrChild(options, nl.TCA_FQ_FLOW_DEFAULT_RATE, nl.Uint32Attr((uint32(qdisc.FlowDefaultRate))))
275
+			options.AddRtAttr(nl.TCA_FQ_FLOW_DEFAULT_RATE, nl.Uint32Attr((uint32(qdisc.FlowDefaultRate))))
276 276
 		}
277
+	default:
278
+		options = nil
277 279
 	}
278 280
 
279
-	req.AddData(options)
281
+	if options != nil {
282
+		req.AddData(options)
283
+	}
280 284
 	return nil
281 285
 }
282 286
 
... ...
@@ -348,6 +356,8 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) {
348 348
 					qdisc = &Htb{}
349 349
 				case "fq":
350 350
 					qdisc = &Fq{}
351
+				case "hfsc":
352
+					qdisc = &Hfsc{}
351 353
 				case "fq_codel":
352 354
 					qdisc = &FqCodel{}
353 355
 				case "netem":
... ...
@@ -375,6 +385,10 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) {
375 375
 					if err := parseTbfData(qdisc, data); err != nil {
376 376
 						return nil, err
377 377
 					}
378
+				case "hfsc":
379
+					if err := parseHfscData(qdisc, attr.Value); err != nil {
380
+						return nil, err
381
+					}
378 382
 				case "htb":
379 383
 					data, err := nl.ParseRouteAttr(attr.Value)
380 384
 					if err != nil {
... ...
@@ -474,6 +488,13 @@ func parseFqCodelData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
474 474
 	return nil
475 475
 }
476 476
 
477
+func parseHfscData(qdisc Qdisc, data []byte) error {
478
+	Hfsc := qdisc.(*Hfsc)
479
+	native = nl.NativeEndian()
480
+	Hfsc.Defcls = native.Uint16(data)
481
+	return nil
482
+}
483
+
477 484
 func parseFqData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
478 485
 	native = nl.NativeEndian()
479 486
 	fq := qdisc.(*Fq)
480 487
new file mode 100644
... ...
@@ -0,0 +1,264 @@
0
+package netlink
1
+
2
+import (
3
+	"bytes"
4
+	"encoding/binary"
5
+	"fmt"
6
+	"net"
7
+
8
+	"github.com/vishvananda/netlink/nl"
9
+	"golang.org/x/sys/unix"
10
+)
11
+
12
+// LinkAttrs represents data shared by most link types
13
+type RdmaLinkAttrs struct {
14
+	Index           uint32
15
+	Name            string
16
+	FirmwareVersion string
17
+	NodeGuid        string
18
+	SysImageGuid    string
19
+}
20
+
21
+// Link represents a rdma device from netlink.
22
+type RdmaLink struct {
23
+	Attrs RdmaLinkAttrs
24
+}
25
+
26
+func getProtoField(clientType int, op int) int {
27
+	return ((clientType << nl.RDMA_NL_GET_CLIENT_SHIFT) | op)
28
+}
29
+
30
+func uint64ToGuidString(guid uint64) string {
31
+	//Convert to byte array
32
+	sysGuidBytes := new(bytes.Buffer)
33
+	binary.Write(sysGuidBytes, binary.LittleEndian, guid)
34
+
35
+	//Convert to HardwareAddr
36
+	sysGuidNet := net.HardwareAddr(sysGuidBytes.Bytes())
37
+
38
+	//Get the String
39
+	return sysGuidNet.String()
40
+}
41
+
42
+func executeOneGetRdmaLink(data []byte) (*RdmaLink, error) {
43
+
44
+	link := RdmaLink{}
45
+
46
+	reader := bytes.NewReader(data)
47
+	for reader.Len() >= 4 {
48
+		_, attrType, len, value := parseNfAttrTLV(reader)
49
+
50
+		switch attrType {
51
+		case nl.RDMA_NLDEV_ATTR_DEV_INDEX:
52
+			var Index uint32
53
+			r := bytes.NewReader(value)
54
+			binary.Read(r, nl.NativeEndian(), &Index)
55
+			link.Attrs.Index = Index
56
+		case nl.RDMA_NLDEV_ATTR_DEV_NAME:
57
+			link.Attrs.Name = string(value[0 : len-1])
58
+		case nl.RDMA_NLDEV_ATTR_FW_VERSION:
59
+			link.Attrs.FirmwareVersion = string(value[0 : len-1])
60
+		case nl.RDMA_NLDEV_ATTR_NODE_GUID:
61
+			var guid uint64
62
+			r := bytes.NewReader(value)
63
+			binary.Read(r, nl.NativeEndian(), &guid)
64
+			link.Attrs.NodeGuid = uint64ToGuidString(guid)
65
+		case nl.RDMA_NLDEV_ATTR_SYS_IMAGE_GUID:
66
+			var sysGuid uint64
67
+			r := bytes.NewReader(value)
68
+			binary.Read(r, nl.NativeEndian(), &sysGuid)
69
+			link.Attrs.SysImageGuid = uint64ToGuidString(sysGuid)
70
+		}
71
+		if (len % 4) != 0 {
72
+			// Skip pad bytes
73
+			reader.Seek(int64(4-(len%4)), seekCurrent)
74
+		}
75
+	}
76
+	return &link, nil
77
+}
78
+
79
+func execRdmaGetLink(req *nl.NetlinkRequest, name string) (*RdmaLink, error) {
80
+
81
+	msgs, err := req.Execute(unix.NETLINK_RDMA, 0)
82
+	if err != nil {
83
+		return nil, err
84
+	}
85
+	for _, m := range msgs {
86
+		link, err := executeOneGetRdmaLink(m)
87
+		if err != nil {
88
+			return nil, err
89
+		}
90
+		if link.Attrs.Name == name {
91
+			return link, nil
92
+		}
93
+	}
94
+	return nil, fmt.Errorf("Rdma device %v not found", name)
95
+}
96
+
97
+func execRdmaSetLink(req *nl.NetlinkRequest) error {
98
+
99
+	_, err := req.Execute(unix.NETLINK_RDMA, 0)
100
+	return err
101
+}
102
+
103
+// RdmaLinkByName finds a link by name and returns a pointer to the object if
104
+// found and nil error, otherwise returns error code.
105
+func RdmaLinkByName(name string) (*RdmaLink, error) {
106
+	return pkgHandle.RdmaLinkByName(name)
107
+}
108
+
109
+// RdmaLinkByName finds a link by name and returns a pointer to the object if
110
+// found and nil error, otherwise returns error code.
111
+func (h *Handle) RdmaLinkByName(name string) (*RdmaLink, error) {
112
+
113
+	proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_GET)
114
+	req := h.newNetlinkRequest(proto, unix.NLM_F_ACK|unix.NLM_F_DUMP)
115
+
116
+	return execRdmaGetLink(req, name)
117
+}
118
+
119
+// RdmaLinkSetName sets the name of the rdma link device. Return nil on success
120
+// or error otherwise.
121
+// Equivalent to: `rdma dev set $old_devname name $name`
122
+func RdmaLinkSetName(link *RdmaLink, name string) error {
123
+	return pkgHandle.RdmaLinkSetName(link, name)
124
+}
125
+
126
+// RdmaLinkSetName sets the name of the rdma link device. Return nil on success
127
+// or error otherwise.
128
+// Equivalent to: `rdma dev set $old_devname name $name`
129
+func (h *Handle) RdmaLinkSetName(link *RdmaLink, name string) error {
130
+	proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_SET)
131
+	req := h.newNetlinkRequest(proto, unix.NLM_F_ACK)
132
+
133
+	b := make([]byte, 4)
134
+	native.PutUint32(b, uint32(link.Attrs.Index))
135
+	data := nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_INDEX, b)
136
+	req.AddData(data)
137
+
138
+	b = make([]byte, len(name)+1)
139
+	copy(b, name)
140
+	data = nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_NAME, b)
141
+	req.AddData(data)
142
+
143
+	return execRdmaSetLink(req)
144
+}
145
+
146
+func netnsModeToString(mode uint8) string {
147
+	switch mode {
148
+	case 0:
149
+		return "exclusive"
150
+	case 1:
151
+		return "shared"
152
+	default:
153
+		return "unknown"
154
+	}
155
+}
156
+
157
+func executeOneGetRdmaNetnsMode(data []byte) (string, error) {
158
+	reader := bytes.NewReader(data)
159
+	for reader.Len() >= 4 {
160
+		_, attrType, len, value := parseNfAttrTLV(reader)
161
+
162
+		switch attrType {
163
+		case nl.RDMA_NLDEV_SYS_ATTR_NETNS_MODE:
164
+			var mode uint8
165
+			r := bytes.NewReader(value)
166
+			binary.Read(r, nl.NativeEndian(), &mode)
167
+			return netnsModeToString(mode), nil
168
+		}
169
+		if (len % 4) != 0 {
170
+			// Skip pad bytes
171
+			reader.Seek(int64(4-(len%4)), seekCurrent)
172
+		}
173
+	}
174
+	return "", fmt.Errorf("Invalid netns mode")
175
+}
176
+
177
+// RdmaSystemGetNetnsMode gets the net namespace mode for RDMA subsystem
178
+// Returns mode string and error status as nil on success or returns error
179
+// otherwise.
180
+// Equivalent to: `rdma system show netns'
181
+func RdmaSystemGetNetnsMode() (string, error) {
182
+	return pkgHandle.RdmaSystemGetNetnsMode()
183
+}
184
+
185
+// RdmaSystemGetNetnsMode gets the net namespace mode for RDMA subsystem
186
+// Returns mode string and error status as nil on success or returns error
187
+// otherwise.
188
+// Equivalent to: `rdma system show netns'
189
+func (h *Handle) RdmaSystemGetNetnsMode() (string, error) {
190
+
191
+	proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_SYS_GET)
192
+	req := h.newNetlinkRequest(proto, unix.NLM_F_ACK)
193
+
194
+	msgs, err := req.Execute(unix.NETLINK_RDMA, 0)
195
+	if err != nil {
196
+		return "", err
197
+	}
198
+	if len(msgs) == 0 {
199
+		return "", fmt.Errorf("No valid response from kernel")
200
+	}
201
+	return executeOneGetRdmaNetnsMode(msgs[0])
202
+}
203
+
204
+func netnsModeStringToUint8(mode string) (uint8, error) {
205
+	switch mode {
206
+	case "exclusive":
207
+		return 0, nil
208
+	case "shared":
209
+		return 1, nil
210
+	default:
211
+		return 0, fmt.Errorf("Invalid mode; %q", mode)
212
+	}
213
+}
214
+
215
+// RdmaSystemSetNetnsMode sets the net namespace mode for RDMA subsystem
216
+// Returns nil on success or appropriate error code.
217
+// Equivalent to: `rdma system set netns { shared | exclusive }'
218
+func RdmaSystemSetNetnsMode(NewMode string) error {
219
+	return pkgHandle.RdmaSystemSetNetnsMode(NewMode)
220
+}
221
+
222
+// RdmaSystemSetNetnsMode sets the net namespace mode for RDMA subsystem
223
+// Returns nil on success or appropriate error code.
224
+// Equivalent to: `rdma system set netns { shared | exclusive }'
225
+func (h *Handle) RdmaSystemSetNetnsMode(NewMode string) error {
226
+	value, err := netnsModeStringToUint8(NewMode)
227
+	if err != nil {
228
+		return err
229
+	}
230
+
231
+	proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_SYS_SET)
232
+	req := h.newNetlinkRequest(proto, unix.NLM_F_ACK)
233
+
234
+	data := nl.NewRtAttr(nl.RDMA_NLDEV_SYS_ATTR_NETNS_MODE, []byte{value})
235
+	req.AddData(data)
236
+
237
+	_, err = req.Execute(unix.NETLINK_RDMA, 0)
238
+	return err
239
+}
240
+
241
+// RdmaLinkSetNsFd puts the RDMA device into a new network namespace. The
242
+// fd must be an open file descriptor to a network namespace.
243
+// Similar to: `rdma dev set $dev netns $ns`
244
+func RdmaLinkSetNsFd(link *RdmaLink, fd uint32) error {
245
+	return pkgHandle.RdmaLinkSetNsFd(link, fd)
246
+}
247
+
248
+// RdmaLinkSetNsFd puts the RDMA device into a new network namespace. The
249
+// fd must be an open file descriptor to a network namespace.
250
+// Similar to: `rdma dev set $dev netns $ns`
251
+func (h *Handle) RdmaLinkSetNsFd(link *RdmaLink, fd uint32) error {
252
+	proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_SET)
253
+	req := h.newNetlinkRequest(proto, unix.NLM_F_ACK)
254
+
255
+	data := nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_INDEX,
256
+		nl.Uint32Attr(link.Attrs.Index))
257
+	req.AddData(data)
258
+
259
+	data = nl.NewRtAttr(nl.RDMA_NLDEV_NET_NS_FD, nl.Uint32Attr(fd))
260
+	req.AddData(data)
261
+
262
+	return execRdmaSetLink(req)
263
+}
... ...
@@ -47,6 +47,7 @@ type Route struct {
47 47
 	Encap      Encap
48 48
 	MTU        int
49 49
 	AdvMSS     int
50
+	Hoplimit   int
50 51
 }
51 52
 
52 53
 func (r Route) String() string {
... ...
@@ -89,6 +90,7 @@ func (r Route) Equal(x Route) bool {
89 89
 		r.Table == x.Table &&
90 90
 		r.Type == x.Type &&
91 91
 		r.Tos == x.Tos &&
92
+		r.Hoplimit == x.Hoplimit &&
92 93
 		r.Flags == x.Flags &&
93 94
 		(r.MPLSDst == x.MPLSDst || (r.MPLSDst != nil && x.MPLSDst != nil && *r.MPLSDst == *x.MPLSDst)) &&
94 95
 		(r.NewDst == x.NewDst || (r.NewDst != nil && r.NewDst.Equal(x.NewDst))) &&
... ...
@@ -32,6 +32,7 @@ const (
32 32
 	RT_FILTER_SRC
33 33
 	RT_FILTER_GW
34 34
 	RT_FILTER_TABLE
35
+	RT_FILTER_HOPLIMIT
35 36
 )
36 37
 
37 38
 const (
... ...
@@ -207,6 +208,7 @@ func (e *SEG6Encap) Decode(buf []byte) error {
207 207
 	}
208 208
 	buf = buf[:l] // make sure buf size upper limit is Length
209 209
 	typ := native.Uint16(buf[2:])
210
+	// LWTUNNEL_ENCAP_SEG6 has only one attr type SEG6_IPTUNNEL_SRH
210 211
 	if typ != nl.SEG6_IPTUNNEL_SRH {
211 212
 		return fmt.Errorf("unknown SEG6 Type: %d", typ)
212 213
 	}
... ...
@@ -259,6 +261,188 @@ func (e *SEG6Encap) Equal(x Encap) bool {
259 259
 	return true
260 260
 }
261 261
 
262
+// SEG6LocalEncap definitions
263
+type SEG6LocalEncap struct {
264
+	Flags    [nl.SEG6_LOCAL_MAX]bool
265
+	Action   int
266
+	Segments []net.IP // from SRH in seg6_local_lwt
267
+	Table    int      // table id for End.T and End.DT6
268
+	InAddr   net.IP
269
+	In6Addr  net.IP
270
+	Iif      int
271
+	Oif      int
272
+}
273
+
274
+func (e *SEG6LocalEncap) Type() int {
275
+	return nl.LWTUNNEL_ENCAP_SEG6_LOCAL
276
+}
277
+func (e *SEG6LocalEncap) Decode(buf []byte) error {
278
+	attrs, err := nl.ParseRouteAttr(buf)
279
+	if err != nil {
280
+		return err
281
+	}
282
+	native := nl.NativeEndian()
283
+	for _, attr := range attrs {
284
+		switch attr.Attr.Type {
285
+		case nl.SEG6_LOCAL_ACTION:
286
+			e.Action = int(native.Uint32(attr.Value[0:4]))
287
+			e.Flags[nl.SEG6_LOCAL_ACTION] = true
288
+		case nl.SEG6_LOCAL_SRH:
289
+			e.Segments, err = nl.DecodeSEG6Srh(attr.Value[:])
290
+			e.Flags[nl.SEG6_LOCAL_SRH] = true
291
+		case nl.SEG6_LOCAL_TABLE:
292
+			e.Table = int(native.Uint32(attr.Value[0:4]))
293
+			e.Flags[nl.SEG6_LOCAL_TABLE] = true
294
+		case nl.SEG6_LOCAL_NH4:
295
+			e.InAddr = net.IP(attr.Value[0:4])
296
+			e.Flags[nl.SEG6_LOCAL_NH4] = true
297
+		case nl.SEG6_LOCAL_NH6:
298
+			e.In6Addr = net.IP(attr.Value[0:16])
299
+			e.Flags[nl.SEG6_LOCAL_NH6] = true
300
+		case nl.SEG6_LOCAL_IIF:
301
+			e.Iif = int(native.Uint32(attr.Value[0:4]))
302
+			e.Flags[nl.SEG6_LOCAL_IIF] = true
303
+		case nl.SEG6_LOCAL_OIF:
304
+			e.Oif = int(native.Uint32(attr.Value[0:4]))
305
+			e.Flags[nl.SEG6_LOCAL_OIF] = true
306
+		}
307
+	}
308
+	return err
309
+}
310
+func (e *SEG6LocalEncap) Encode() ([]byte, error) {
311
+	var err error
312
+	native := nl.NativeEndian()
313
+	res := make([]byte, 8)
314
+	native.PutUint16(res, 8) // length
315
+	native.PutUint16(res[2:], nl.SEG6_LOCAL_ACTION)
316
+	native.PutUint32(res[4:], uint32(e.Action))
317
+	if e.Flags[nl.SEG6_LOCAL_SRH] {
318
+		srh, err := nl.EncodeSEG6Srh(e.Segments)
319
+		if err != nil {
320
+			return nil, err
321
+		}
322
+		attr := make([]byte, 4)
323
+		native.PutUint16(attr, uint16(len(srh)+4))
324
+		native.PutUint16(attr[2:], nl.SEG6_LOCAL_SRH)
325
+		attr = append(attr, srh...)
326
+		res = append(res, attr...)
327
+	}
328
+	if e.Flags[nl.SEG6_LOCAL_TABLE] {
329
+		attr := make([]byte, 8)
330
+		native.PutUint16(attr, 8)
331
+		native.PutUint16(attr[2:], nl.SEG6_LOCAL_TABLE)
332
+		native.PutUint32(attr[4:], uint32(e.Table))
333
+		res = append(res, attr...)
334
+	}
335
+	if e.Flags[nl.SEG6_LOCAL_NH4] {
336
+		attr := make([]byte, 4)
337
+		native.PutUint16(attr, 8)
338
+		native.PutUint16(attr[2:], nl.SEG6_LOCAL_NH4)
339
+		ipv4 := e.InAddr.To4()
340
+		if ipv4 == nil {
341
+			err = fmt.Errorf("SEG6_LOCAL_NH4 has invalid IPv4 address")
342
+			return nil, err
343
+		}
344
+		attr = append(attr, ipv4...)
345
+		res = append(res, attr...)
346
+	}
347
+	if e.Flags[nl.SEG6_LOCAL_NH6] {
348
+		attr := make([]byte, 4)
349
+		native.PutUint16(attr, 20)
350
+		native.PutUint16(attr[2:], nl.SEG6_LOCAL_NH6)
351
+		attr = append(attr, e.In6Addr...)
352
+		res = append(res, attr...)
353
+	}
354
+	if e.Flags[nl.SEG6_LOCAL_IIF] {
355
+		attr := make([]byte, 8)
356
+		native.PutUint16(attr, 8)
357
+		native.PutUint16(attr[2:], nl.SEG6_LOCAL_IIF)
358
+		native.PutUint32(attr[4:], uint32(e.Iif))
359
+		res = append(res, attr...)
360
+	}
361
+	if e.Flags[nl.SEG6_LOCAL_OIF] {
362
+		attr := make([]byte, 8)
363
+		native.PutUint16(attr, 8)
364
+		native.PutUint16(attr[2:], nl.SEG6_LOCAL_OIF)
365
+		native.PutUint32(attr[4:], uint32(e.Oif))
366
+		res = append(res, attr...)
367
+	}
368
+	return res, err
369
+}
370
+func (e *SEG6LocalEncap) String() string {
371
+	strs := make([]string, 0, nl.SEG6_LOCAL_MAX)
372
+	strs = append(strs, fmt.Sprintf("action %s", nl.SEG6LocalActionString(e.Action)))
373
+
374
+	if e.Flags[nl.SEG6_LOCAL_TABLE] {
375
+		strs = append(strs, fmt.Sprintf("table %d", e.Table))
376
+	}
377
+	if e.Flags[nl.SEG6_LOCAL_NH4] {
378
+		strs = append(strs, fmt.Sprintf("nh4 %s", e.InAddr))
379
+	}
380
+	if e.Flags[nl.SEG6_LOCAL_NH6] {
381
+		strs = append(strs, fmt.Sprintf("nh6 %s", e.In6Addr))
382
+	}
383
+	if e.Flags[nl.SEG6_LOCAL_IIF] {
384
+		link, err := LinkByIndex(e.Iif)
385
+		if err != nil {
386
+			strs = append(strs, fmt.Sprintf("iif %d", e.Iif))
387
+		} else {
388
+			strs = append(strs, fmt.Sprintf("iif %s", link.Attrs().Name))
389
+		}
390
+	}
391
+	if e.Flags[nl.SEG6_LOCAL_OIF] {
392
+		link, err := LinkByIndex(e.Oif)
393
+		if err != nil {
394
+			strs = append(strs, fmt.Sprintf("oif %d", e.Oif))
395
+		} else {
396
+			strs = append(strs, fmt.Sprintf("oif %s", link.Attrs().Name))
397
+		}
398
+	}
399
+	if e.Flags[nl.SEG6_LOCAL_SRH] {
400
+		segs := make([]string, 0, len(e.Segments))
401
+		//append segment backwards (from n to 0) since seg#0 is the last segment.
402
+		for i := len(e.Segments); i > 0; i-- {
403
+			segs = append(segs, fmt.Sprintf("%s", e.Segments[i-1]))
404
+		}
405
+		strs = append(strs, fmt.Sprintf("segs %d [ %s ]", len(e.Segments), strings.Join(segs, " ")))
406
+	}
407
+	return strings.Join(strs, " ")
408
+}
409
+func (e *SEG6LocalEncap) Equal(x Encap) bool {
410
+	o, ok := x.(*SEG6LocalEncap)
411
+	if !ok {
412
+		return false
413
+	}
414
+	if e == o {
415
+		return true
416
+	}
417
+	if e == nil || o == nil {
418
+		return false
419
+	}
420
+	// compare all arrays first
421
+	for i := range e.Flags {
422
+		if e.Flags[i] != o.Flags[i] {
423
+			return false
424
+		}
425
+	}
426
+	if len(e.Segments) != len(o.Segments) {
427
+		return false
428
+	}
429
+	for i := range e.Segments {
430
+		if !e.Segments[i].Equal(o.Segments[i]) {
431
+			return false
432
+		}
433
+	}
434
+	// compare values
435
+	if !e.InAddr.Equal(o.InAddr) || !e.In6Addr.Equal(o.In6Addr) {
436
+		return false
437
+	}
438
+	if e.Action != o.Action || e.Table != o.Table || e.Iif != o.Iif || e.Oif != o.Oif {
439
+		return false
440
+	}
441
+	return true
442
+}
443
+
262 444
 // RouteAdd will add a route to the system.
263 445
 // Equivalent to: `ip route add $route`
264 446
 func RouteAdd(route *Route) error {
... ...
@@ -335,18 +519,18 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
335 335
 		if err != nil {
336 336
 			return err
337 337
 		}
338
-		rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_NEWDST, buf))
338
+		rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_NEWDST, buf))
339 339
 	}
340 340
 
341 341
 	if route.Encap != nil {
342 342
 		buf := make([]byte, 2)
343 343
 		native.PutUint16(buf, uint16(route.Encap.Type()))
344
-		rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_ENCAP_TYPE, buf))
344
+		rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP_TYPE, buf))
345 345
 		buf, err := route.Encap.Encode()
346 346
 		if err != nil {
347 347
 			return err
348 348
 		}
349
-		rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_ENCAP, buf))
349
+		rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP, buf))
350 350
 	}
351 351
 
352 352
 	if route.Src != nil {
... ...
@@ -410,17 +594,17 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
410 410
 				if err != nil {
411 411
 					return err
412 412
 				}
413
-				children = append(children, nl.NewRtAttr(nl.RTA_NEWDST, buf))
413
+				children = append(children, nl.NewRtAttr(unix.RTA_NEWDST, buf))
414 414
 			}
415 415
 			if nh.Encap != nil {
416 416
 				buf := make([]byte, 2)
417 417
 				native.PutUint16(buf, uint16(nh.Encap.Type()))
418
-				rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_ENCAP_TYPE, buf))
418
+				children = append(children, nl.NewRtAttr(unix.RTA_ENCAP_TYPE, buf))
419 419
 				buf, err := nh.Encap.Encode()
420 420
 				if err != nil {
421 421
 					return err
422 422
 				}
423
-				children = append(children, nl.NewRtAttr(nl.RTA_ENCAP, buf))
423
+				children = append(children, nl.NewRtAttr(unix.RTA_ENCAP, buf))
424 424
 			}
425 425
 			rtnh.Children = children
426 426
 			buf = append(buf, rtnh.Serialize()...)
... ...
@@ -464,6 +648,10 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
464 464
 		b := nl.Uint32Attr(uint32(route.AdvMSS))
465 465
 		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_ADVMSS, b))
466 466
 	}
467
+	if route.Hoplimit > 0 {
468
+		b := nl.Uint32Attr(uint32(route.Hoplimit))
469
+		metrics = append(metrics, nl.NewRtAttr(unix.RTAX_HOPLIMIT, b))
470
+	}
467 471
 
468 472
 	if metrics != nil {
469 473
 		attr := nl.NewRtAttr(unix.RTA_METRICS, nil)
... ...
@@ -574,6 +762,8 @@ func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64)
574 574
 						continue
575 575
 					}
576 576
 				}
577
+			case filterMask&RT_FILTER_HOPLIMIT != 0 && route.Hoplimit != filter.Hoplimit:
578
+				continue
577 579
 			}
578 580
 		}
579 581
 		res = append(res, route)
... ...
@@ -649,7 +839,7 @@ func deserializeRoute(m []byte) (Route, error) {
649 649
 					switch attr.Attr.Type {
650 650
 					case unix.RTA_GATEWAY:
651 651
 						info.Gw = net.IP(attr.Value)
652
-					case nl.RTA_NEWDST:
652
+					case unix.RTA_NEWDST:
653 653
 						var d Destination
654 654
 						switch msg.Family {
655 655
 						case nl.FAMILY_MPLS:
... ...
@@ -659,9 +849,9 @@ func deserializeRoute(m []byte) (Route, error) {
659 659
 							return nil, nil, err
660 660
 						}
661 661
 						info.NewDst = d
662
-					case nl.RTA_ENCAP_TYPE:
662
+					case unix.RTA_ENCAP_TYPE:
663 663
 						encapType = attr
664
-					case nl.RTA_ENCAP:
664
+					case unix.RTA_ENCAP:
665 665
 						encap = attr
666 666
 					}
667 667
 				}
... ...
@@ -690,7 +880,7 @@ func deserializeRoute(m []byte) (Route, error) {
690 690
 				route.MultiPath = append(route.MultiPath, info)
691 691
 				rest = buf
692 692
 			}
693
-		case nl.RTA_NEWDST:
693
+		case unix.RTA_NEWDST:
694 694
 			var d Destination
695 695
 			switch msg.Family {
696 696
 			case nl.FAMILY_MPLS:
... ...
@@ -700,9 +890,9 @@ func deserializeRoute(m []byte) (Route, error) {
700 700
 				return route, err
701 701
 			}
702 702
 			route.NewDst = d
703
-		case nl.RTA_ENCAP_TYPE:
703
+		case unix.RTA_ENCAP_TYPE:
704 704
 			encapType = attr
705
-		case nl.RTA_ENCAP:
705
+		case unix.RTA_ENCAP:
706 706
 			encap = attr
707 707
 		case unix.RTA_METRICS:
708 708
 			metrics, err := nl.ParseRouteAttr(attr.Value)
... ...
@@ -715,6 +905,8 @@ func deserializeRoute(m []byte) (Route, error) {
715 715
 					route.MTU = int(native.Uint32(metric.Value[0:4]))
716 716
 				case unix.RTAX_ADVMSS:
717 717
 					route.AdvMSS = int(native.Uint32(metric.Value[0:4]))
718
+				case unix.RTAX_HOPLIMIT:
719
+					route.Hoplimit = int(native.Uint32(metric.Value[0:4]))
718 720
 				}
719 721
 			}
720 722
 		}
... ...
@@ -734,6 +926,11 @@ func deserializeRoute(m []byte) (Route, error) {
734 734
 			if err := e.Decode(encap.Value); err != nil {
735 735
 				return route, err
736 736
 			}
737
+		case nl.LWTUNNEL_ENCAP_SEG6_LOCAL:
738
+			e = &SEG6LocalEncap{}
739
+			if err := e.Decode(encap.Value); err != nil {
740
+				return route, err
741
+			}
737 742
 		}
738 743
 		route.Encap = e
739 744
 	}
... ...
@@ -840,13 +1037,19 @@ func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <
840 840
 	go func() {
841 841
 		defer close(ch)
842 842
 		for {
843
-			msgs, err := s.Receive()
843
+			msgs, from, err := s.Receive()
844 844
 			if err != nil {
845 845
 				if cberr != nil {
846 846
 					cberr(err)
847 847
 				}
848 848
 				return
849 849
 			}
850
+			if from.Pid != nl.PidKernel {
851
+				if cberr != nil {
852
+					cberr(fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel))
853
+				}
854
+				continue
855
+			}
850 856
 			for _, m := range msgs {
851 857
 				if m.Header.Type == unix.NLMSG_DONE {
852 858
 					continue
... ...
@@ -144,7 +144,7 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error {
144 144
 		req.AddData(nl.NewRtAttr(nl.FRA_OIFNAME, []byte(rule.OifName)))
145 145
 	}
146 146
 	if rule.Goto >= 0 {
147
-		msg.Type = nl.FR_ACT_NOP
147
+		msg.Type = nl.FR_ACT_GOTO
148 148
 		b := make([]byte, 4)
149 149
 		native.PutUint32(b, uint32(rule.Goto))
150 150
 		req.AddData(nl.NewRtAttr(nl.FRA_GOTO, b))
... ...
@@ -141,10 +141,13 @@ func SocketGet(local, remote net.Addr) (*Socket, error) {
141 141
 		},
142 142
 	})
143 143
 	s.Send(req)
144
-	msgs, err := s.Receive()
144
+	msgs, from, err := s.Receive()
145 145
 	if err != nil {
146 146
 		return nil, err
147 147
 	}
148
+	if from.Pid != nl.PidKernel {
149
+		return nil, fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel)
150
+	}
148 151
 	if len(msgs) == 0 {
149 152
 		return nil, errors.New("no message nor error from netlink")
150 153
 	}
... ...
@@ -54,11 +54,15 @@ func XfrmMonitor(ch chan<- XfrmMsg, done <-chan struct{}, errorChan chan<- error
54 54
 	go func() {
55 55
 		defer close(ch)
56 56
 		for {
57
-			msgs, err := s.Receive()
57
+			msgs, from, err := s.Receive()
58 58
 			if err != nil {
59 59
 				errorChan <- err
60 60
 				return
61 61
 			}
62
+			if from.Pid != nl.PidKernel {
63
+				errorChan <- fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel)
64
+				return
65
+			}
62 66
 			for _, m := range msgs {
63 67
 				switch m.Header.Type {
64 68
 				case nl.XFRM_MSG_EXPIRE:
... ...
@@ -35,6 +35,25 @@ func (d Dir) String() string {
35 35
 	return fmt.Sprintf("socket %d", d-XFRM_SOCKET_IN)
36 36
 }
37 37
 
38
+// PolicyAction is an enum representing an ipsec policy action.
39
+type PolicyAction uint8
40
+
41
+const (
42
+	XFRM_POLICY_ALLOW PolicyAction = 0
43
+	XFRM_POLICY_BLOCK PolicyAction = 1
44
+)
45
+
46
+func (a PolicyAction) String() string {
47
+	switch a {
48
+	case XFRM_POLICY_ALLOW:
49
+		return "allow"
50
+	case XFRM_POLICY_BLOCK:
51
+		return "block"
52
+	default:
53
+		return fmt.Sprintf("action %d", a)
54
+	}
55
+}
56
+
38 57
 // XfrmPolicyTmpl encapsulates a rule for the base addresses of an ipsec
39 58
 // policy. These rules are matched with XfrmState to determine encryption
40 59
 // and authentication algorithms.
... ...
@@ -64,11 +83,14 @@ type XfrmPolicy struct {
64 64
 	Dir      Dir
65 65
 	Priority int
66 66
 	Index    int
67
+	Action   PolicyAction
68
+	Ifindex  int
69
+	Ifid     int
67 70
 	Mark     *XfrmMark
68 71
 	Tmpls    []XfrmPolicyTmpl
69 72
 }
70 73
 
71 74
 func (p XfrmPolicy) String() string {
72
-	return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, DstPort: %d, SrcPort: %d, Dir: %s, Priority: %d, Index: %d, Mark: %s, Tmpls: %s}",
73
-		p.Dst, p.Src, p.Proto, p.DstPort, p.SrcPort, p.Dir, p.Priority, p.Index, p.Mark, p.Tmpls)
75
+	return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, DstPort: %d, SrcPort: %d, Dir: %s, Priority: %d, Index: %d, Action: %s, Ifindex: %d, Ifid: %d, Mark: %s, Tmpls: %s}",
76
+		p.Dst, p.Src, p.Proto, p.DstPort, p.SrcPort, p.Dir, p.Priority, p.Index, p.Action, p.Ifindex, p.Ifid, p.Mark, p.Tmpls)
74 77
 }
... ...
@@ -27,6 +27,7 @@ func selFromPolicy(sel *nl.XfrmSelector, policy *XfrmPolicy) {
27 27
 	if sel.Sport != 0 {
28 28
 		sel.SportMask = ^uint16(0)
29 29
 	}
30
+	sel.Ifindex = int32(policy.Ifindex)
30 31
 }
31 32
 
32 33
 // XfrmPolicyAdd will add an xfrm policy to the system.
... ...
@@ -61,6 +62,7 @@ func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error {
61 61
 	msg.Priority = uint32(policy.Priority)
62 62
 	msg.Index = uint32(policy.Index)
63 63
 	msg.Dir = uint8(policy.Dir)
64
+	msg.Action = uint8(policy.Action)
64 65
 	msg.Lft.SoftByteLimit = nl.XFRM_INF
65 66
 	msg.Lft.HardByteLimit = nl.XFRM_INF
66 67
 	msg.Lft.SoftPacketLimit = nl.XFRM_INF
... ...
@@ -90,6 +92,9 @@ func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error {
90 90
 		req.AddData(out)
91 91
 	}
92 92
 
93
+	ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid)))
94
+	req.AddData(ifId)
95
+
93 96
 	_, err := req.Execute(unix.NETLINK_XFRM, 0)
94 97
 	return err
95 98
 }
... ...
@@ -183,6 +188,9 @@ func (h *Handle) xfrmPolicyGetOrDelete(policy *XfrmPolicy, nlProto int) (*XfrmPo
183 183
 		req.AddData(out)
184 184
 	}
185 185
 
186
+	ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid)))
187
+	req.AddData(ifId)
188
+
186 189
 	resType := nl.XFRM_MSG_NEWPOLICY
187 190
 	if nlProto == nl.XFRM_MSG_DELPOLICY {
188 191
 		resType = 0
... ...
@@ -197,12 +205,7 @@ func (h *Handle) xfrmPolicyGetOrDelete(policy *XfrmPolicy, nlProto int) (*XfrmPo
197 197
 		return nil, err
198 198
 	}
199 199
 
200
-	p, err := parseXfrmPolicy(msgs[0], FAMILY_ALL)
201
-	if err != nil {
202
-		return nil, err
203
-	}
204
-
205
-	return p, nil
200
+	return parseXfrmPolicy(msgs[0], FAMILY_ALL)
206 201
 }
207 202
 
208 203
 func parseXfrmPolicy(m []byte, family int) (*XfrmPolicy, error) {
... ...
@@ -220,9 +223,11 @@ func parseXfrmPolicy(m []byte, family int) (*XfrmPolicy, error) {
220 220
 	policy.Proto = Proto(msg.Sel.Proto)
221 221
 	policy.DstPort = int(nl.Swap16(msg.Sel.Dport))
222 222
 	policy.SrcPort = int(nl.Swap16(msg.Sel.Sport))
223
+	policy.Ifindex = int(msg.Sel.Ifindex)
223 224
 	policy.Priority = int(msg.Priority)
224 225
 	policy.Index = int(msg.Index)
225 226
 	policy.Dir = Dir(msg.Dir)
227
+	policy.Action = PolicyAction(msg.Action)
226 228
 
227 229
 	attrs, err := nl.ParseRouteAttr(m[msg.Len():])
228 230
 	if err != nil {
... ...
@@ -249,6 +254,8 @@ func parseXfrmPolicy(m []byte, family int) (*XfrmPolicy, error) {
249 249
 			policy.Mark = new(XfrmMark)
250 250
 			policy.Mark.Value = mark.Value
251 251
 			policy.Mark.Mask = mark.Mask
252
+		case nl.XFRMA_IF_ID:
253
+			policy.Ifid = int(native.Uint32(attr.Value))
252 254
 		}
253 255
 	}
254 256
 
... ...
@@ -94,6 +94,8 @@ type XfrmState struct {
94 94
 	Limits       XfrmStateLimits
95 95
 	Statistics   XfrmStateStats
96 96
 	Mark         *XfrmMark
97
+	OutputMark   int
98
+	Ifid         int
97 99
 	Auth         *XfrmStateAlgo
98 100
 	Crypt        *XfrmStateAlgo
99 101
 	Aead         *XfrmStateAlgo
... ...
@@ -102,8 +104,8 @@ type XfrmState struct {
102 102
 }
103 103
 
104 104
 func (sa XfrmState) String() string {
105
-	return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t",
106
-		sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.Auth, sa.Crypt, sa.Aead, sa.Encap, sa.ESN)
105
+	return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, OutputMark: %d, Ifid: %d, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t",
106
+		sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.OutputMark, sa.Ifid, sa.Auth, sa.Crypt, sa.Aead, sa.Encap, sa.ESN)
107 107
 }
108 108
 func (sa XfrmState) Print(stats bool) string {
109 109
 	if !stats {
... ...
@@ -158,6 +158,13 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error {
158 158
 		out := nl.NewRtAttr(nl.XFRMA_REPLAY_ESN_VAL, writeReplayEsn(state.ReplayWindow))
159 159
 		req.AddData(out)
160 160
 	}
161
+	if state.OutputMark != 0 {
162
+		out := nl.NewRtAttr(nl.XFRMA_OUTPUT_MARK, nl.Uint32Attr(uint32(state.OutputMark)))
163
+		req.AddData(out)
164
+	}
165
+
166
+	ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid)))
167
+	req.AddData(ifId)
161 168
 
162 169
 	_, err := req.Execute(unix.NETLINK_XFRM, 0)
163 170
 	return err
... ...
@@ -184,12 +191,7 @@ func (h *Handle) xfrmStateAllocSpi(state *XfrmState) (*XfrmState, error) {
184 184
 		return nil, err
185 185
 	}
186 186
 
187
-	s, err := parseXfrmState(msgs[0], FAMILY_ALL)
188
-	if err != nil {
189
-		return nil, err
190
-	}
191
-
192
-	return s, err
187
+	return parseXfrmState(msgs[0], FAMILY_ALL)
193 188
 }
194 189
 
195 190
 // XfrmStateDel will delete an xfrm state from the system. Note that
... ...
@@ -275,6 +277,9 @@ func (h *Handle) xfrmStateGetOrDelete(state *XfrmState, nlProto int) (*XfrmState
275 275
 		req.AddData(out)
276 276
 	}
277 277
 
278
+	ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid)))
279
+	req.AddData(ifId)
280
+
278 281
 	resType := nl.XFRM_MSG_NEWSA
279 282
 	if nlProto == nl.XFRM_MSG_DELSA {
280 283
 		resType = 0
... ...
@@ -372,6 +377,10 @@ func parseXfrmState(m []byte, family int) (*XfrmState, error) {
372 372
 			state.Mark = new(XfrmMark)
373 373
 			state.Mark.Value = mark.Value
374 374
 			state.Mark.Mask = mark.Mask
375
+		case nl.XFRMA_OUTPUT_MARK:
376
+			state.OutputMark = int(native.Uint32(attr.Value))
377
+		case nl.XFRMA_IF_ID:
378
+			state.Ifid = int(native.Uint32(attr.Value))
375 379
 		}
376 380
 	}
377 381
 
... ...
@@ -394,11 +403,7 @@ func (h *Handle) XfrmStateFlush(proto Proto) error {
394 394
 	req.AddData(&nl.XfrmUsersaFlush{Proto: uint8(proto)})
395 395
 
396 396
 	_, err := req.Execute(unix.NETLINK_XFRM, 0)
397
-	if err != nil {
398
-		return err
399
-	}
400
-
401
-	return nil
397
+	return err
402 398
 }
403 399
 
404 400
 func limitsToLft(lmts XfrmStateLimits, lft *nl.XfrmLifetimeCfg) {
405 401
new file mode 100644
... ...
@@ -0,0 +1,3 @@
0
+module github.com/vishvananda/netns
1
+
2
+go 1.12