Browse code

Vendoring libnetwork bbd6e6d8ca1e7c9b42f6f53277b0bde72847ff90

Signed-off-by: Alessandro Boch <aboch@docker.com>

Alessandro Boch authored on 2015/12/08 08:46:14
Showing 34 changed files
... ...
@@ -22,14 +22,14 @@ clone git github.com/vdemeester/shakers 3c10293ce22b900c27acad7b28656196fcc2f73b
22 22
 clone git golang.org/x/net 47990a1ba55743e6ef1affd3a14e5bac8553615d https://github.com/golang/net.git
23 23
 
24 24
 #get libnetwork packages
25
-clone git github.com/docker/libnetwork 04cc1fa0a89f8c407b7be8cab883d4b17531ea7d
25
+clone git github.com/docker/libnetwork bbd6e6d8ca1e7c9b42f6f53277b0bde72847ff90
26 26
 clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
27 27
 clone git github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b
28 28
 clone git github.com/hashicorp/memberlist 9a1e242e454d2443df330bdd51a436d5a9058fc4
29 29
 clone git github.com/hashicorp/serf 7151adcef72687bf95f451a2e0ba15cb19412bf2
30 30
 clone git github.com/docker/libkv c2aac5dbbaa5c872211edea7c0f32b3bd67e7410
31 31
 clone git github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25
32
-clone git github.com/vishvananda/netlink 4b5dce31de6d42af5bb9811c6d265472199e0fec
32
+clone git github.com/vishvananda/netlink 8e810149a2e531fed9b837c0c7d8a8922d2bedf7
33 33
 clone git github.com/BurntSushi/toml f706d00e3de6abe700c994cdd545a1a4915af060
34 34
 clone git github.com/samuel/go-zookeeper d0e0d8e11f318e000a8cc434616d69e329edc374
35 35
 clone git github.com/deckarep/golang-set ef32fa3046d9f249d399f98ebaf9be944430fd1d
... ...
@@ -569,6 +569,9 @@ func (s *DockerDaemonSuite) TestDockerNetworkNoDiscoveryDefaultBridgeNetwork(c *
569 569
 	out, err = s.d.Cmd("network", "connect", network, cid1)
570 570
 	c.Assert(err, check.IsNil, check.Commentf(out))
571 571
 
572
+	hosts, err = s.d.Cmd("exec", cid1, "cat", hostsFile)
573
+	c.Assert(err, checker.IsNil)
574
+
572 575
 	hostsPost, err = s.d.Cmd("exec", cid1, "cat", hostsFile)
573 576
 	c.Assert(err, checker.IsNil)
574 577
 	c.Assert(string(hosts), checker.Equals, string(hostsPost),
... ...
@@ -631,6 +634,9 @@ func (s *DockerNetworkSuite) TestDockerNetworkAnonymousEndpoint(c *check.C) {
631 631
 
632 632
 	dockerCmd(c, "network", "connect", cstmBridgeNw1, cid2)
633 633
 
634
+	hosts2, err = readContainerFileWithExec(cid2, hostsFile)
635
+	c.Assert(err, checker.IsNil)
636
+
634 637
 	hosts1post, err = readContainerFileWithExec(cid1, hostsFile)
635 638
 	c.Assert(err, checker.IsNil)
636 639
 	c.Assert(string(hosts1), checker.Equals, string(hosts1post),
... ...
@@ -1,5 +1,52 @@
1
-Alessandro Boch <aboch@docker.com> (@aboch)
2
-Alexandr Morozov <lk4d4@docker.com> (@LK4D4)
3
-Arnaud Porterie <arnaud@docker.com> (@icecrime)
4
-Jana Radhakrishnan <mrjana@docker.com> (@mrjana)
5
-Madhu Venugopal <madhu@docker.com> (@mavenugo)
1
+# Libnetwork maintainers file
2
+#
3
+# This file describes who runs the docker/libnetwork project and how.
4
+# This is a living document - if you see something out of date or missing, speak up!
5
+#
6
+# It is structured to be consumable by both humans and programs.
7
+# To extract its contents programmatically, use any TOML-compliant parser.
8
+#
9
+# This file is compiled into the MAINTAINERS file in docker/opensource.
10
+#
11
+[Org]
12
+	[Org."Core maintainers"]
13
+		people = [
14
+			"aboch",
15
+			"LK4D4",
16
+			"icecrime",
17
+			"mrjana",
18
+			"mavenugo",
19
+		]
20
+
21
+[people]
22
+
23
+# A reference list of all people associated with the project.
24
+# All other sections should refer to people by their canonical key
25
+# in the people section.
26
+
27
+	# ADD YOURSELF HERE IN ALPHABETICAL ORDER
28
+
29
+	[people.aboch]
30
+	Name = "Alessandro Boch"
31
+	Email = "aboch@docker.com"
32
+	GitHub = "aboch"
33
+
34
+	[people.LK4D4]
35
+	Name = "Alexandr Morozov"
36
+	Email = "lk4d4@docker.com"
37
+	GitHub = "LK4D4"
38
+
39
+	[people.icecrime]
40
+	Name = "Arnaud Porterie"
41
+	Email = "arnaud@docker.com"
42
+	GitHub = "icecrime"
43
+
44
+	[people.mrjana]
45
+	Name = "Jana Radhakrishnan"
46
+	Email = "mrjana@docker.com"
47
+	GitHub = "mrjana"
48
+
49
+	[people.mavenugo]
50
+	Name = "Madhu Venugopal"
51
+	Email = "madhu@docker.com"
52
+	GitHub = "mavenugo"
... ...
@@ -6,8 +6,6 @@ Libnetwork provides a native Go implementation for connecting containers
6 6
 
7 7
 The goal of libnetwork is to deliver a robust Container Network Model that provides a consistent programming interface and the required network abstractions for applications.
8 8
 
9
-**NOTE**: libnetwork project is under heavy development and is not ready for general use.
10
-
11 9
 #### Design
12 10
 Please refer to the [design](docs/design.md) for more information.
13 11
 
... ...
@@ -67,10 +65,6 @@ There are many networking solutions available to suit a broad range of use-cases
67 67
 		}
68 68
 	}
69 69
 ```
70
-#### Current Status
71
-Please watch this space for updates on the progress.
72
-
73
-Currently libnetwork is nothing more than an attempt to modularize the Docker platform's networking subsystem by moving it into libnetwork as a library.
74 70
 
75 71
 ## Future
76 72
 Please refer to [roadmap](ROADMAP.md) for more information.
... ...
@@ -24,7 +24,10 @@ const (
24 24
 )
25 25
 
26 26
 var (
27
-	errNoBitAvailable = fmt.Errorf("no bit available")
27
+	// ErrNoBitAvailable is returned when no more bits are available to set
28
+	ErrNoBitAvailable = fmt.Errorf("no bit available")
29
+	// ErrBitAllocated is returned when the specific bit requested is already set
30
+	ErrBitAllocated = fmt.Errorf("requested bit is already allocated")
28 31
 )
29 32
 
30 33
 // Handle contains the sequece representing the bitmask and its identifier
... ...
@@ -94,7 +97,7 @@ func (s *sequence) toString() string {
94 94
 // GetAvailableBit returns the position of the first unset bit in the bitmask represented by this sequence
95 95
 func (s *sequence) getAvailableBit(from uint64) (uint64, uint64, error) {
96 96
 	if s.block == blockMAX || s.count == 0 {
97
-		return invalidPos, invalidPos, errNoBitAvailable
97
+		return invalidPos, invalidPos, ErrNoBitAvailable
98 98
 	}
99 99
 	bits := from
100 100
 	bitSel := blockFirstBit >> from
... ...
@@ -197,7 +200,7 @@ func (h *Handle) SetAnyInRange(start, end uint64) (uint64, error) {
197 197
 		return invalidPos, fmt.Errorf("invalid bit range [%d, %d]", start, end)
198 198
 	}
199 199
 	if h.Unselected() == 0 {
200
-		return invalidPos, errNoBitAvailable
200
+		return invalidPos, ErrNoBitAvailable
201 201
 	}
202 202
 	return h.set(0, start, end, true, false)
203 203
 }
... ...
@@ -205,7 +208,7 @@ func (h *Handle) SetAnyInRange(start, end uint64) (uint64, error) {
205 205
 // SetAny atomically sets the first unset bit in the sequence and returns the corresponding ordinal
206 206
 func (h *Handle) SetAny() (uint64, error) {
207 207
 	if h.Unselected() == 0 {
208
-		return invalidPos, errNoBitAvailable
208
+		return invalidPos, ErrNoBitAvailable
209 209
 	}
210 210
 	return h.set(0, 0, h.bits-1, true, false)
211 211
 }
... ...
@@ -250,8 +253,12 @@ func (h *Handle) set(ordinal, start, end uint64, any bool, release bool) (uint64
250 250
 	)
251 251
 
252 252
 	for {
253
-		if h.store != nil {
254
-			if err := h.store.GetObject(datastore.Key(h.Key()...), h); err != nil && err != datastore.ErrKeyNotFound {
253
+		var store datastore.DataStore
254
+		h.Lock()
255
+		store = h.store
256
+		h.Unlock()
257
+		if store != nil {
258
+			if err := store.GetObject(datastore.Key(h.Key()...), h); err != nil && err != datastore.ErrKeyNotFound {
255 259
 				return ret, err
256 260
 			}
257 261
 		}
... ...
@@ -265,7 +272,7 @@ func (h *Handle) set(ordinal, start, end uint64, any bool, release bool) (uint64
265 265
 				bytePos, bitPos, err = getFirstAvailable(h.head, start)
266 266
 				ret = posToOrdinal(bytePos, bitPos)
267 267
 				if end < ret {
268
-					err = errNoBitAvailable
268
+					err = ErrNoBitAvailable
269 269
 				}
270 270
 			} else {
271 271
 				bytePos, bitPos, err = checkIfAvailable(h.head, ordinal)
... ...
@@ -445,7 +452,7 @@ func getFirstAvailable(head *sequence, start uint64) (uint64, uint64, error) {
445 445
 		byteOffset += current.count * blockBytes
446 446
 		current = current.next
447 447
 	}
448
-	return invalidPos, invalidPos, errNoBitAvailable
448
+	return invalidPos, invalidPos, ErrNoBitAvailable
449 449
 }
450 450
 
451 451
 // checkIfAvailable checks if the bit correspondent to the specified ordinal is unset
... ...
@@ -463,7 +470,7 @@ func checkIfAvailable(head *sequence, ordinal uint64) (uint64, uint64, error) {
463 463
 		}
464 464
 	}
465 465
 
466
-	return invalidPos, invalidPos, fmt.Errorf("requested bit is not available")
466
+	return invalidPos, invalidPos, ErrBitAllocated
467 467
 }
468 468
 
469 469
 // Given the byte position and the sequences list head, return the pointer to the
... ...
@@ -4,7 +4,7 @@ machine:
4 4
 
5 5
 dependencies:
6 6
   override:
7
-    - sudo apt-get update; sudo apt-get install -y iptables zookeeperd
7
+    - sudo apt-get update; sudo apt-get install -y iptables zookeeperd 
8 8
     - go get golang.org/x/tools/cmd/vet
9 9
     - go get golang.org/x/tools/cmd/goimports
10 10
     - go get golang.org/x/tools/cmd/cover
... ...
@@ -315,17 +315,17 @@ func (c *controller) RegisterIpamDriver(name string, driver ipamapi.Ipam) error
315 315
 	_, ok := c.ipamDrivers[name]
316 316
 	c.Unlock()
317 317
 	if ok {
318
-		return driverapi.ErrActiveRegistration(name)
318
+		return types.ForbiddenErrorf("ipam driver %q already registered", name)
319 319
 	}
320 320
 	locAS, glbAS, err := driver.GetDefaultAddressSpaces()
321 321
 	if err != nil {
322
-		return fmt.Errorf("ipam driver %s failed to return default address spaces: %v", name, err)
322
+		return types.InternalErrorf("ipam driver %q failed to return default address spaces: %v", name, err)
323 323
 	}
324 324
 	c.Lock()
325 325
 	c.ipamDrivers[name] = &ipamData{driver: driver, defaultLocalAddressSpace: locAS, defaultGlobalAddressSpace: glbAS}
326 326
 	c.Unlock()
327 327
 
328
-	log.Debugf("Registering ipam provider: %s", name)
328
+	log.Debugf("Registering ipam driver: %q", name)
329 329
 
330 330
 	return nil
331 331
 }
... ...
@@ -667,7 +667,7 @@ func (c *controller) loadIpamDriver(name string) (*ipamData, error) {
667 667
 	id, ok := c.ipamDrivers[name]
668 668
 	c.Unlock()
669 669
 	if !ok {
670
-		return nil, ErrInvalidNetworkDriver(name)
670
+		return nil, types.BadRequestErrorf("invalid ipam driver: %q", name)
671 671
 	}
672 672
 	return id, nil
673 673
 }
... ...
@@ -103,10 +103,18 @@ func (sb *sandbox) needDefaultGW() bool {
103 103
 		if ep.getNetwork().Type() == "null" || ep.getNetwork().Type() == "host" {
104 104
 			continue
105 105
 		}
106
+		if ep.joinInfo.disableGatewayService {
107
+			return false
108
+		}
106 109
 		// TODO v6 needs to be handled.
107 110
 		if len(ep.Gateway()) > 0 {
108 111
 			return false
109 112
 		}
113
+		for _, r := range ep.StaticRoutes() {
114
+			if r.Destination.String() == "0.0.0.0/0" {
115
+				return false
116
+			}
117
+		}
110 118
 		needGW = true
111 119
 	}
112 120
 	return needGW
... ...
@@ -91,6 +91,9 @@ type JoinInfo interface {
91 91
 	// AddStaticRoute adds a routes to the sandbox.
92 92
 	// It may be used in addtion to or instead of a default gateway (as above).
93 93
 	AddStaticRoute(destination *net.IPNet, routeType int, nextHop net.IP) error
94
+
95
+	// DisableGatewayService tells libnetwork not to provide Default GW for the container
96
+	DisableGatewayService()
94 97
 }
95 98
 
96 99
 // DriverCallback provides a Callback interface for Drivers into LibNetwork
... ...
@@ -151,6 +151,10 @@ func (d *driver) processQuery(q *serf.Query) {
151 151
 }
152 152
 
153 153
 func (d *driver) resolvePeer(nid string, peerIP net.IP) (net.HardwareAddr, net.IPMask, net.IP, error) {
154
+	if d.serfInstance == nil {
155
+		return nil, nil, nil, fmt.Errorf("could not resolve peer: serf instance not initialized")
156
+	}
157
+
154 158
 	qPayload := fmt.Sprintf("%s %s", string(nid), peerIP.String())
155 159
 	resp, err := d.serfInstance.Query("peerlookup", []byte(qPayload), nil)
156 160
 	if err != nil {
... ...
@@ -134,10 +134,11 @@ type StaticRoute struct {
134 134
 // JoinResponse is the response to a JoinRequest.
135 135
 type JoinResponse struct {
136 136
 	Response
137
-	InterfaceName *InterfaceName
138
-	Gateway       string
139
-	GatewayIPv6   string
140
-	StaticRoutes  []StaticRoute
137
+	InterfaceName         *InterfaceName
138
+	Gateway               string
139
+	GatewayIPv6           string
140
+	StaticRoutes          []StaticRoute
141
+	DisableGatewayService bool
141 142
 }
142 143
 
143 144
 // LeaveRequest describes the API for detaching an endpoint from a sandbox.
... ...
@@ -231,6 +231,9 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
231 231
 			}
232 232
 		}
233 233
 	}
234
+	if res.DisableGatewayService {
235
+		jinfo.DisableGatewayService()
236
+	}
234 237
 	return nil
235 238
 }
236 239
 
... ...
@@ -60,6 +60,8 @@ type endpoint struct {
60 60
 	anonymous     bool
61 61
 	generic       map[string]interface{}
62 62
 	joinLeaveDone chan struct{}
63
+	prefAddress   net.IP
64
+	ipamOptions   map[string]string
63 65
 	dbIndex       uint64
64 66
 	dbExists      bool
65 67
 	sync.Mutex
... ...
@@ -386,6 +388,9 @@ func (ep *endpoint) sbJoin(sbox Sandbox, options ...EndpointOption) error {
386 386
 		}
387 387
 	}()
388 388
 
389
+	// Watch for service records
390
+	network.getController().watchSvcRecord(ep)
391
+
389 392
 	address := ""
390 393
 	if ip := ep.getFirstInterfaceAddress(); ip != nil {
391 394
 		address = ip.String()
... ...
@@ -394,9 +399,6 @@ func (ep *endpoint) sbJoin(sbox Sandbox, options ...EndpointOption) error {
394 394
 		return err
395 395
 	}
396 396
 
397
-	// Watch for service records
398
-	network.getController().watchSvcRecord(ep)
399
-
400 397
 	if err = sb.updateDNS(network.enableIPv6); err != nil {
401 398
 		return err
402 399
 	}
... ...
@@ -559,7 +561,7 @@ func (ep *endpoint) sbLeave(sbox Sandbox, options ...EndpointOption) error {
559 559
 
560 560
 	sb.deleteHostsEntries(n.getSvcRecords(ep))
561 561
 
562
-	if sb.needDefaultGW() {
562
+	if !sb.inDelete && sb.needDefaultGW() {
563 563
 		ep := sb.getEPwithoutGateway()
564 564
 		if ep == nil {
565 565
 			return fmt.Errorf("endpoint without GW expected, but not found")
... ...
@@ -685,6 +687,14 @@ func EndpointOptionGeneric(generic map[string]interface{}) EndpointOption {
685 685
 	}
686 686
 }
687 687
 
688
+// CreateOptionIpam function returns an option setter for the ipam configuration for this endpoint
689
+func CreateOptionIpam(prefAddress net.IP, ipamOptions map[string]string) EndpointOption {
690
+	return func(ep *endpoint) {
691
+		ep.prefAddress = prefAddress
692
+		ep.ipamOptions = ipamOptions
693
+	}
694
+}
695
+
688 696
 // CreateOptionExposedPorts function returns an option setter for the container exposed
689 697
 // ports option to be passed to network.CreateEndpoint() method.
690 698
 func CreateOptionExposedPorts(exposedPorts []types.TransportPort) EndpointOption {
... ...
@@ -799,7 +809,7 @@ func (ep *endpoint) assignAddressVersion(ipVer int, ipam ipamapi.Ipam) error {
799 799
 		if *address != nil {
800 800
 			prefIP = (*address).IP
801 801
 		}
802
-		addr, _, err := ipam.RequestAddress(d.PoolID, prefIP, nil)
802
+		addr, _, err := ipam.RequestAddress(d.PoolID, prefIP, ep.ipamOptions)
803 803
 		if err == nil {
804 804
 			ep.Lock()
805 805
 			*address = addr
... ...
@@ -25,6 +25,10 @@ type EndpointInfo interface {
25 25
 	// This will only return a valid value if a container has joined the endpoint.
26 26
 	GatewayIPv6() net.IP
27 27
 
28
+	// StaticRoutes returns the list of static routes configured by the network
29
+	// driver when the container joins a network
30
+	StaticRoutes() []*types.StaticRoute
31
+
28 32
 	// Sandbox returns the attached sandbox if there, nil otherwise.
29 33
 	Sandbox() Sandbox
30 34
 }
... ...
@@ -136,9 +140,10 @@ func (epi *endpointInterface) CopyTo(dstEpi *endpointInterface) error {
136 136
 }
137 137
 
138 138
 type endpointJoinInfo struct {
139
-	gw           net.IP
140
-	gw6          net.IP
141
-	StaticRoutes []*types.StaticRoute
139
+	gw                    net.IP
140
+	gw6                   net.IP
141
+	StaticRoutes          []*types.StaticRoute
142
+	disableGatewayService bool
142 143
 }
143 144
 
144 145
 func (ep *endpoint) Info() EndpointInfo {
... ...
@@ -295,6 +300,17 @@ func (ep *endpoint) Sandbox() Sandbox {
295 295
 	return cnt
296 296
 }
297 297
 
298
+func (ep *endpoint) StaticRoutes() []*types.StaticRoute {
299
+	ep.Lock()
300
+	defer ep.Unlock()
301
+
302
+	if ep.joinInfo == nil {
303
+		return nil
304
+	}
305
+
306
+	return ep.joinInfo.StaticRoutes
307
+}
308
+
298 309
 func (ep *endpoint) Gateway() net.IP {
299 310
 	ep.Lock()
300 311
 	defer ep.Unlock()
... ...
@@ -340,3 +356,10 @@ func (ep *endpoint) retrieveFromStore() (*endpoint, error) {
340 340
 	}
341 341
 	return n.getEndpointFromStore(ep.ID())
342 342
 }
343
+
344
+func (ep *endpoint) DisableGatewayService() {
345
+	ep.Lock()
346
+	defer ep.Unlock()
347
+
348
+	ep.joinInfo.disableGatewayService = true
349
+}
... ...
@@ -1,6 +1,7 @@
1 1
 package etchosts
2 2
 
3 3
 import (
4
+	"bufio"
4 5
 	"bytes"
5 6
 	"fmt"
6 7
 	"io"
... ...
@@ -138,19 +139,36 @@ func Delete(path string, recs []Record) error {
138 138
 	if len(recs) == 0 {
139 139
 		return nil
140 140
 	}
141
-
142
-	old, err := ioutil.ReadFile(path)
141
+	old, err := os.Open(path)
143 142
 	if err != nil {
144 143
 		return err
145 144
 	}
146 145
 
147
-	regexpStr := fmt.Sprintf("\\S*\\t%s\\n", regexp.QuoteMeta(recs[0].Hosts))
148
-	for _, r := range recs[1:] {
149
-		regexpStr = regexpStr + "|" + fmt.Sprintf("\\S*\\t%s\\n", regexp.QuoteMeta(r.Hosts))
146
+	var buf bytes.Buffer
147
+
148
+	s := bufio.NewScanner(old)
149
+	eol := []byte{'\n'}
150
+loop:
151
+	for s.Scan() {
152
+		b := s.Bytes()
153
+		if b[0] == '#' {
154
+			buf.Write(b)
155
+			buf.Write(eol)
156
+			continue
157
+		}
158
+		for _, r := range recs {
159
+			if bytes.HasSuffix(b, []byte("\t"+r.Hosts)) {
160
+				continue loop
161
+			}
162
+		}
163
+		buf.Write(b)
164
+		buf.Write(eol)
150 165
 	}
151
-
152
-	var re = regexp.MustCompile(regexpStr)
153
-	return ioutil.WriteFile(path, re.ReplaceAll(old, []byte("")), 0644)
166
+	old.Close()
167
+	if err := s.Err(); err != nil {
168
+		return err
169
+	}
170
+	return ioutil.WriteFile(path, buf.Bytes(), 0644)
154 171
 }
155 172
 
156 173
 // Update all IP addresses where hostname matches.
... ...
@@ -76,8 +76,7 @@ func NewAllocator(lcDs, glDs datastore.DataStore) (*Allocator, error) {
76 76
 func (a *Allocator) refresh(as string) error {
77 77
 	aSpace, err := a.getAddressSpaceFromStore(as)
78 78
 	if err != nil {
79
-		return fmt.Errorf("error getting pools config from store during init: %v",
80
-			err)
79
+		return types.InternalErrorf("error getting pools config from store: %v", err)
81 80
 	}
82 81
 
83 82
 	if aSpace == nil {
... ...
@@ -239,7 +238,7 @@ func (a *Allocator) insertBitMask(key SubnetKey, pool *net.IPNet) error {
239 239
 
240 240
 	store := a.getStore(key.AddressSpace)
241 241
 	if store == nil {
242
-		return fmt.Errorf("could not find store for address space %s while inserting bit mask", key.AddressSpace)
242
+		return types.InternalErrorf("could not find store for address space %s while inserting bit mask", key.AddressSpace)
243 243
 	}
244 244
 
245 245
 	ipVer := getAddressVersion(pool.IP)
... ...
@@ -279,7 +278,7 @@ func (a *Allocator) retrieveBitmask(k SubnetKey, n *net.IPNet) (*bitseq.Handle,
279 279
 	if !ok {
280 280
 		log.Debugf("Retrieving bitmask (%s, %s)", k.String(), n.String())
281 281
 		if err := a.insertBitMask(k, n); err != nil {
282
-			return nil, fmt.Errorf("could not find bitmask in datastore for %s", k.String())
282
+			return nil, types.InternalErrorf("could not find bitmask in datastore for %s", k.String())
283 283
 		}
284 284
 		a.Lock()
285 285
 		bm = a.addresses[k]
... ...
@@ -306,7 +305,7 @@ func (a *Allocator) getPredefinedPool(as string, ipV6 bool) (*net.IPNet, error)
306 306
 	}
307 307
 
308 308
 	if as != localAddressSpace && as != globalAddressSpace {
309
-		return nil, fmt.Errorf("no default pool availbale for non-default addresss spaces")
309
+		return nil, types.NotImplementedErrorf("no default pool availbale for non-default addresss spaces")
310 310
 	}
311 311
 
312 312
 	aSpace, err := a.getAddrSpace(as)
... ...
@@ -378,7 +377,7 @@ func (a *Allocator) RequestAddress(poolID string, prefAddress net.IP, opts map[s
378 378
 
379 379
 	bm, err := a.retrieveBitmask(k, c.Pool)
380 380
 	if err != nil {
381
-		return nil, nil, fmt.Errorf("could not find bitmask in datastore for %s on address %v request from pool %s: %v",
381
+		return nil, nil, types.InternalErrorf("could not find bitmask in datastore for %s on address %v request from pool %s: %v",
382 382
 			k.String(), prefAddress, poolID, err)
383 383
 	}
384 384
 	ip, err := a.getAddress(p.Pool, bm, prefAddress, p.Range)
... ...
@@ -410,12 +409,12 @@ func (a *Allocator) ReleaseAddress(poolID string, address net.IP) error {
410 410
 	p, ok := aSpace.subnets[k]
411 411
 	if !ok {
412 412
 		aSpace.Unlock()
413
-		return ipamapi.ErrBadPool
413
+		return types.NotFoundErrorf("cannot find address pool for poolID:%s", poolID)
414 414
 	}
415 415
 
416 416
 	if address == nil {
417 417
 		aSpace.Unlock()
418
-		return ipamapi.ErrInvalidRequest
418
+		return types.BadRequestErrorf("invalid address: nil")
419 419
 	}
420 420
 
421 421
 	if !p.Pool.Contains(address) {
... ...
@@ -434,12 +433,12 @@ func (a *Allocator) ReleaseAddress(poolID string, address net.IP) error {
434 434
 
435 435
 	h, err := types.GetHostPartIP(address, mask)
436 436
 	if err != nil {
437
-		return fmt.Errorf("failed to release address %s: %v", address.String(), err)
437
+		return types.InternalErrorf("failed to release address %s: %v", address.String(), err)
438 438
 	}
439 439
 
440 440
 	bm, err := a.retrieveBitmask(k, c.Pool)
441 441
 	if err != nil {
442
-		return fmt.Errorf("could not find bitmask in datastore for %s on address %v release from pool %s: %v",
442
+		return types.InternalErrorf("could not find bitmask in datastore for %s on address %v release from pool %s: %v",
443 443
 			k.String(), address, poolID, err)
444 444
 	}
445 445
 
... ...
@@ -463,19 +462,25 @@ func (a *Allocator) getAddress(nw *net.IPNet, bitmask *bitseq.Handle, prefAddres
463 463
 	} else if prefAddress != nil {
464 464
 		hostPart, e := types.GetHostPartIP(prefAddress, base.Mask)
465 465
 		if e != nil {
466
-			return nil, fmt.Errorf("failed to allocate preferred address %s: %v", prefAddress.String(), e)
466
+			return nil, types.InternalErrorf("failed to allocate preferred address %s: %v", prefAddress.String(), e)
467 467
 		}
468 468
 		ordinal = ipToUint64(types.GetMinimalIP(hostPart))
469 469
 		err = bitmask.Set(ordinal)
470 470
 	} else {
471 471
 		ordinal, err = bitmask.SetAnyInRange(ipr.Start, ipr.End)
472 472
 	}
473
-	if err != nil {
473
+
474
+	switch err {
475
+	case nil:
476
+		// Convert IP ordinal for this subnet into IP address
477
+		return generateAddress(ordinal, base), nil
478
+	case bitseq.ErrBitAllocated:
479
+		return nil, ipamapi.ErrIPAlreadyAllocated
480
+	case bitseq.ErrNoBitAvailable:
474 481
 		return nil, ipamapi.ErrNoAvailableIPs
482
+	default:
483
+		return nil, err
475 484
 	}
476
-
477
-	// Convert IP ordinal for this subnet into IP address
478
-	return generateAddress(ordinal, base), nil
479 485
 }
480 486
 
481 487
 // DumpDatabase dumps the internal info
... ...
@@ -2,7 +2,6 @@ package ipam
2 2
 
3 3
 import (
4 4
 	"encoding/json"
5
-	"fmt"
6 5
 
7 6
 	log "github.com/Sirupsen/logrus"
8 7
 	"github.com/docker/libnetwork/datastore"
... ...
@@ -84,7 +83,7 @@ func (a *Allocator) getStore(as string) datastore.DataStore {
84 84
 func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) {
85 85
 	store := a.getStore(as)
86 86
 	if store == nil {
87
-		return nil, fmt.Errorf("store for address space %s not found", as)
87
+		return nil, types.InternalErrorf("store for address space %s not found", as)
88 88
 	}
89 89
 
90 90
 	pc := &addrSpace{id: dsConfigKey + "/" + as, ds: store, alloc: a}
... ...
@@ -93,7 +92,7 @@ func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) {
93 93
 			return nil, nil
94 94
 		}
95 95
 
96
-		return nil, fmt.Errorf("could not get pools config from store: %v", err)
96
+		return nil, types.InternalErrorf("could not get pools config from store: %v", err)
97 97
 	}
98 98
 
99 99
 	return pc, nil
... ...
@@ -102,7 +101,7 @@ func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) {
102 102
 func (a *Allocator) writeToStore(aSpace *addrSpace) error {
103 103
 	store := aSpace.store()
104 104
 	if store == nil {
105
-		return fmt.Errorf("invalid store while trying to write %s address space", aSpace.DataScope())
105
+		return types.InternalErrorf("invalid store while trying to write %s address space", aSpace.DataScope())
106 106
 	}
107 107
 
108 108
 	err := store.PutObjectAtomic(aSpace)
... ...
@@ -116,7 +115,7 @@ func (a *Allocator) writeToStore(aSpace *addrSpace) error {
116 116
 func (a *Allocator) deleteFromStore(aSpace *addrSpace) error {
117 117
 	store := aSpace.store()
118 118
 	if store == nil {
119
-		return fmt.Errorf("invalid store while trying to delete %s address space", aSpace.DataScope())
119
+		return types.InternalErrorf("invalid store while trying to delete %s address space", aSpace.DataScope())
120 120
 	}
121 121
 
122 122
 	return store.DeleteObjectAtomic(aSpace)
... ...
@@ -88,12 +88,12 @@ func (s *SubnetKey) String() string {
88 88
 // FromString populate the SubnetKey object reading it from string
89 89
 func (s *SubnetKey) FromString(str string) error {
90 90
 	if str == "" || !strings.Contains(str, "/") {
91
-		return fmt.Errorf("invalid string form for subnetkey: %s", str)
91
+		return types.BadRequestErrorf("invalid string form for subnetkey: %s", str)
92 92
 	}
93 93
 
94 94
 	p := strings.Split(str, "/")
95 95
 	if len(p) != 3 && len(p) != 5 {
96
-		return fmt.Errorf("invalid string form for subnetkey: %s", str)
96
+		return types.BadRequestErrorf("invalid string form for subnetkey: %s", str)
97 97
 	}
98 98
 	s.AddressSpace = p[0]
99 99
 	s.Subnet = fmt.Sprintf("%s/%s", p[1], p[2])
... ...
@@ -317,7 +317,7 @@ func (aSpace *addrSpace) updatePoolDBOnRemoval(k SubnetKey) (func() error, error
317 317
 				return func() error {
318 318
 					bm, err := aSpace.alloc.retrieveBitmask(k, c.Pool)
319 319
 					if err != nil {
320
-						return fmt.Errorf("could not find bitmask in datastore for pool %s removal: %v", k.String(), err)
320
+						return types.InternalErrorf("could not find bitmask in datastore for pool %s removal: %v", k.String(), err)
321 321
 					}
322 322
 					return bm.Destroy()
323 323
 				}, nil
... ...
@@ -2,8 +2,9 @@
2 2
 package ipamapi
3 3
 
4 4
 import (
5
-	"errors"
6 5
 	"net"
6
+
7
+	"github.com/docker/libnetwork/types"
7 8
 )
8 9
 
9 10
 /********************
... ...
@@ -15,6 +16,8 @@ const (
15 15
 	DefaultIPAM = "default"
16 16
 	// PluginEndpointType represents the Endpoint Type used by Plugin system
17 17
 	PluginEndpointType = "IpamDriver"
18
+	// RequestAddressType represents the Address Type used when requesting an address
19
+	RequestAddressType = "RequestAddressType"
18 20
 )
19 21
 
20 22
 // Callback provides a Callback interface for registering an IPAM instance into LibNetwork
... ...
@@ -29,22 +32,19 @@ type Callback interface {
29 29
 
30 30
 // Weel-known errors returned by IPAM
31 31
 var (
32
-	ErrInvalidIpamService       = errors.New("Invalid IPAM Service")
33
-	ErrInvalidIpamConfigService = errors.New("Invalid IPAM Config Service")
34
-	ErrIpamNotAvailable         = errors.New("IPAM Service not available")
35
-	ErrIpamInternalError        = errors.New("IPAM Internal Error")
36
-	ErrInvalidAddressSpace      = errors.New("Invalid Address Space")
37
-	ErrInvalidPool              = errors.New("Invalid Address Pool")
38
-	ErrInvalidSubPool           = errors.New("Invalid Address SubPool")
39
-	ErrInvalidRequest           = errors.New("Invalid Request")
40
-	ErrPoolNotFound             = errors.New("Address Pool not found")
41
-	ErrOverlapPool              = errors.New("Address pool overlaps with existing pool on this address space")
42
-	ErrNoAvailablePool          = errors.New("No available pool")
43
-	ErrNoAvailableIPs           = errors.New("No available addresses on this pool")
44
-	ErrIPAlreadyAllocated       = errors.New("Address already in use")
45
-	ErrIPOutOfRange             = errors.New("Requested address is out of range")
46
-	ErrPoolOverlap              = errors.New("Pool overlaps with other one on this address space")
47
-	ErrBadPool                  = errors.New("Address space does not contain specified address pool")
32
+	ErrIpamInternalError   = types.InternalErrorf("IPAM Internal Error")
33
+	ErrInvalidAddressSpace = types.BadRequestErrorf("Invalid Address Space")
34
+	ErrInvalidPool         = types.BadRequestErrorf("Invalid Address Pool")
35
+	ErrInvalidSubPool      = types.BadRequestErrorf("Invalid Address SubPool")
36
+	ErrInvalidRequest      = types.BadRequestErrorf("Invalid Request")
37
+	ErrPoolNotFound        = types.BadRequestErrorf("Address Pool not found")
38
+	ErrOverlapPool         = types.ForbiddenErrorf("Address pool overlaps with existing pool on this address space")
39
+	ErrNoAvailablePool     = types.NoServiceErrorf("No available pool")
40
+	ErrNoAvailableIPs      = types.NoServiceErrorf("No available addresses on this pool")
41
+	ErrIPAlreadyAllocated  = types.ForbiddenErrorf("Address already in use")
42
+	ErrIPOutOfRange        = types.BadRequestErrorf("Requested address is out of range")
43
+	ErrPoolOverlap         = types.ForbiddenErrorf("Pool overlaps with other one on this address space")
44
+	ErrBadPool             = types.BadRequestErrorf("Address space does not contain specified address pool")
48 45
 )
49 46
 
50 47
 /*******************************
... ...
@@ -603,12 +603,13 @@ func (n *network) Delete() error {
603 603
 	if err = n.getController().deleteFromStore(n.getEpCnt()); err != nil {
604 604
 		return fmt.Errorf("error deleting network endpoint count from store: %v", err)
605 605
 	}
606
+
607
+	n.ipamRelease()
608
+
606 609
 	if err = n.getController().deleteFromStore(n); err != nil {
607 610
 		return fmt.Errorf("error deleting network from store: %v", err)
608 611
 	}
609 612
 
610
-	n.ipamRelease()
611
-
612 613
 	return nil
613 614
 }
614 615
 
... ...
@@ -970,7 +971,10 @@ func (n *network) ipamAllocateVersion(ipVer int, ipam ipamapi.Ipam) error {
970 970
 		// irrespective of whether ipam driver returned a gateway already.
971 971
 		// If none of the above is true, libnetwork will allocate one.
972 972
 		if cfg.Gateway != "" || d.Gateway == nil {
973
-			if d.Gateway, _, err = ipam.RequestAddress(d.PoolID, net.ParseIP(cfg.Gateway), nil); err != nil {
973
+			var gatewayOpts = map[string]string{
974
+				ipamapi.RequestAddressType: netlabel.Gateway,
975
+			}
976
+			if d.Gateway, _, err = ipam.RequestAddress(d.PoolID, net.ParseIP(cfg.Gateway), gatewayOpts); err != nil {
974 977
 				return types.InternalErrorf("failed to allocate gateway (%v): %v", cfg.Gateway, err)
975 978
 			}
976 979
 		}
... ...
@@ -62,7 +62,6 @@ type sandbox struct {
62 62
 	osSbox        osl.Sandbox
63 63
 	controller    *controller
64 64
 	refCnt        int
65
-	hostsOnce     sync.Once
66 65
 	endpoints     epHeap
67 66
 	epPriority    map[string]int
68 67
 	joinLeaveDone chan struct{}
... ...
@@ -601,41 +600,21 @@ func (sb *sandbox) buildHostsFile() error {
601 601
 }
602 602
 
603 603
 func (sb *sandbox) updateHostsFile(ifaceIP string, svcRecords []etchosts.Record) error {
604
-	var err error
604
+	var mhost string
605 605
 
606 606
 	if sb.config.originHostsPath != "" {
607 607
 		return nil
608 608
 	}
609 609
 
610
-	max := func(a, b int) int {
611
-		if a < b {
612
-			return b
613
-		}
614
-
615
-		return a
616
-	}
617
-
618
-	extraContent := make([]etchosts.Record, 0,
619
-		max(len(sb.config.extraHosts), len(svcRecords)))
620
-
621
-	sb.hostsOnce.Do(func() {
622
-		// Rebuild the hosts file accounting for the passed
623
-		// interface IP and service records
624
-
625
-		for _, extraHost := range sb.config.extraHosts {
626
-			extraContent = append(extraContent,
627
-				etchosts.Record{Hosts: extraHost.name, IP: extraHost.IP})
628
-		}
629
-
630
-		err = etchosts.Build(sb.config.hostsPath, ifaceIP,
631
-			sb.config.hostName, sb.config.domainName, extraContent)
632
-	})
633
-
634
-	if err != nil {
635
-		return err
610
+	if sb.config.domainName != "" {
611
+		mhost = fmt.Sprintf("%s.%s %s", sb.config.hostName, sb.config.domainName,
612
+			sb.config.hostName)
613
+	} else {
614
+		mhost = sb.config.hostName
636 615
 	}
637 616
 
638
-	extraContent = extraContent[:0]
617
+	extraContent := make([]etchosts.Record, 0, len(svcRecords)+1)
618
+	extraContent = append(extraContent, etchosts.Record{Hosts: mhost, IP: ifaceIP})
639 619
 	for _, svc := range svcRecords {
640 620
 		extraContent = append(extraContent, svc)
641 621
 	}
642 622
new file mode 100644
... ...
@@ -0,0 +1,110 @@
0
+package netlink
1
+
2
+import (
3
+	"fmt"
4
+)
5
+
6
+type Class interface {
7
+	Attrs() *ClassAttrs
8
+	Type() string
9
+}
10
+
11
+// Class represents a netlink class. A filter is associated with a link,
12
+// has a handle and a parent. The root filter of a device should have a
13
+// parent == HANDLE_ROOT.
14
+type ClassAttrs struct {
15
+	LinkIndex int
16
+	Handle    uint32
17
+	Parent    uint32
18
+	Leaf      uint32
19
+}
20
+
21
+func (q ClassAttrs) String() string {
22
+	return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Leaf: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Leaf)
23
+}
24
+
25
+type HtbClassAttrs struct {
26
+	// TODO handle all attributes
27
+	Rate    uint64
28
+	Ceil    uint64
29
+	Buffer  uint32
30
+	Cbuffer uint32
31
+	Quantum uint32
32
+	Level   uint32
33
+	Prio    uint32
34
+}
35
+
36
+func (q HtbClassAttrs) String() string {
37
+	return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer)
38
+}
39
+
40
+// Htb class
41
+type HtbClass struct {
42
+	ClassAttrs
43
+	Rate    uint64
44
+	Ceil    uint64
45
+	Buffer  uint32
46
+	Cbuffer uint32
47
+	Quantum uint32
48
+	Level   uint32
49
+	Prio    uint32
50
+}
51
+
52
+func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass {
53
+	mtu := 1600
54
+	rate := cattrs.Rate / 8
55
+	ceil := cattrs.Ceil / 8
56
+	buffer := cattrs.Buffer
57
+	cbuffer := cattrs.Cbuffer
58
+	if ceil == 0 {
59
+		ceil = rate
60
+	}
61
+
62
+	if buffer == 0 {
63
+		buffer = uint32(float64(rate)/Hz() + float64(mtu))
64
+	}
65
+	buffer = uint32(Xmittime(rate, buffer))
66
+
67
+	if cbuffer == 0 {
68
+		cbuffer = uint32(float64(ceil)/Hz() + float64(mtu))
69
+	}
70
+	cbuffer = uint32(Xmittime(ceil, cbuffer))
71
+
72
+	return &HtbClass{
73
+		ClassAttrs: attrs,
74
+		Rate:       rate,
75
+		Ceil:       ceil,
76
+		Buffer:     buffer,
77
+		Cbuffer:    cbuffer,
78
+		Quantum:    10,
79
+		Level:      0,
80
+		Prio:       0,
81
+	}
82
+}
83
+
84
+func (q HtbClass) String() string {
85
+	return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer)
86
+}
87
+
88
+func (class *HtbClass) Attrs() *ClassAttrs {
89
+	return &class.ClassAttrs
90
+}
91
+
92
+func (class *HtbClass) Type() string {
93
+	return "htb"
94
+}
95
+
96
+// GenericClass classes represent types that are not currently understood
97
+// by this netlink library.
98
+type GenericClass struct {
99
+	ClassAttrs
100
+	ClassType string
101
+}
102
+
103
+func (class *GenericClass) Attrs() *ClassAttrs {
104
+	return &class.ClassAttrs
105
+}
106
+
107
+func (class *GenericClass) Type() string {
108
+	return class.ClassType
109
+}
0 110
new file mode 100644
... ...
@@ -0,0 +1,144 @@
0
+package netlink
1
+
2
+import (
3
+	"syscall"
4
+
5
+	"github.com/vishvananda/netlink/nl"
6
+)
7
+
8
+// ClassDel will delete a class from the system.
9
+// Equivalent to: `tc class del $class`
10
+func ClassDel(class Class) error {
11
+	req := nl.NewNetlinkRequest(syscall.RTM_DELTCLASS, syscall.NLM_F_ACK)
12
+	base := class.Attrs()
13
+	msg := &nl.TcMsg{
14
+		Family:  nl.FAMILY_ALL,
15
+		Ifindex: int32(base.LinkIndex),
16
+		Handle:  base.Handle,
17
+		Parent:  base.Parent,
18
+	}
19
+	req.AddData(msg)
20
+
21
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
22
+	return err
23
+}
24
+
25
+// ClassAdd will add a class to the system.
26
+// Equivalent to: `tc class add $class`
27
+func ClassAdd(class Class) error {
28
+	req := nl.NewNetlinkRequest(syscall.RTM_NEWTCLASS, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
29
+	base := class.Attrs()
30
+	msg := &nl.TcMsg{
31
+		Family:  nl.FAMILY_ALL,
32
+		Ifindex: int32(base.LinkIndex),
33
+		Handle:  base.Handle,
34
+		Parent:  base.Parent,
35
+	}
36
+	req.AddData(msg)
37
+	req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(class.Type())))
38
+
39
+	options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
40
+	if htb, ok := class.(*HtbClass); ok {
41
+		opt := nl.TcHtbCopt{}
42
+		opt.Rate.Rate = uint32(htb.Rate)
43
+		opt.Ceil.Rate = uint32(htb.Ceil)
44
+		opt.Buffer = htb.Buffer
45
+		opt.Cbuffer = htb.Cbuffer
46
+		opt.Quantum = htb.Quantum
47
+		opt.Level = htb.Level
48
+		opt.Prio = htb.Prio
49
+		// TODO: Handle Debug properly. For now default to 0
50
+		nl.NewRtAttrChild(options, nl.TCA_HTB_PARMS, opt.Serialize())
51
+	}
52
+	req.AddData(options)
53
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
54
+	return err
55
+}
56
+
57
+// ClassList gets a list of classes in the system.
58
+// Equivalent to: `tc class show`.
59
+// Generally retunrs nothing if link and parent are not specified.
60
+func ClassList(link Link, parent uint32) ([]Class, error) {
61
+	req := nl.NewNetlinkRequest(syscall.RTM_GETTCLASS, syscall.NLM_F_DUMP)
62
+	msg := &nl.TcMsg{
63
+		Family: nl.FAMILY_ALL,
64
+		Parent: parent,
65
+	}
66
+	if link != nil {
67
+		base := link.Attrs()
68
+		ensureIndex(base)
69
+		msg.Ifindex = int32(base.Index)
70
+	}
71
+	req.AddData(msg)
72
+
73
+	msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTCLASS)
74
+	if err != nil {
75
+		return nil, err
76
+	}
77
+
78
+	var res []Class
79
+	for _, m := range msgs {
80
+		msg := nl.DeserializeTcMsg(m)
81
+
82
+		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
83
+		if err != nil {
84
+			return nil, err
85
+		}
86
+
87
+		base := ClassAttrs{
88
+			LinkIndex: int(msg.Ifindex),
89
+			Handle:    msg.Handle,
90
+			Parent:    msg.Parent,
91
+		}
92
+
93
+		var class Class
94
+		classType := ""
95
+		for _, attr := range attrs {
96
+			switch attr.Attr.Type {
97
+			case nl.TCA_KIND:
98
+				classType = string(attr.Value[:len(attr.Value)-1])
99
+				switch classType {
100
+				case "htb":
101
+					class = &HtbClass{}
102
+				default:
103
+					class = &GenericClass{ClassType: classType}
104
+				}
105
+			case nl.TCA_OPTIONS:
106
+				switch classType {
107
+				case "htb":
108
+					data, err := nl.ParseRouteAttr(attr.Value)
109
+					if err != nil {
110
+						return nil, err
111
+					}
112
+					_, err = parseHtbClassData(class, data)
113
+					if err != nil {
114
+						return nil, err
115
+					}
116
+				}
117
+			}
118
+		}
119
+		*class.Attrs() = base
120
+		res = append(res, class)
121
+	}
122
+
123
+	return res, nil
124
+}
125
+
126
+func parseHtbClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, error) {
127
+	htb := class.(*HtbClass)
128
+	detailed := false
129
+	for _, datum := range data {
130
+		switch datum.Attr.Type {
131
+		case nl.TCA_HTB_PARMS:
132
+			opt := nl.DeserializeTcHtbCopt(datum.Value)
133
+			htb.Rate = uint64(opt.Rate.Rate)
134
+			htb.Ceil = uint64(opt.Ceil.Rate)
135
+			htb.Buffer = opt.Buffer
136
+			htb.Cbuffer = opt.Cbuffer
137
+			htb.Quantum = opt.Quantum
138
+			htb.Level = opt.Level
139
+			htb.Prio = opt.Prio
140
+		}
141
+	}
142
+	return detailed, nil
143
+}
0 144
new file mode 100644
... ...
@@ -0,0 +1,140 @@
0
+package netlink
1
+
2
+import (
3
+	"errors"
4
+	"fmt"
5
+	"github.com/vishvananda/netlink/nl"
6
+)
7
+
8
+type Filter interface {
9
+	Attrs() *FilterAttrs
10
+	Type() string
11
+}
12
+
13
+// Filter represents a netlink filter. A filter is associated with a link,
14
+// has a handle and a parent. The root filter of a device should have a
15
+// parent == HANDLE_ROOT.
16
+type FilterAttrs struct {
17
+	LinkIndex int
18
+	Handle    uint32
19
+	Parent    uint32
20
+	Priority  uint16 // lower is higher priority
21
+	Protocol  uint16 // syscall.ETH_P_*
22
+}
23
+
24
+func (q FilterAttrs) String() string {
25
+	return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Priority: %d, Protocol: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Priority, q.Protocol)
26
+}
27
+
28
+// U32 filters on many packet related properties
29
+type U32 struct {
30
+	FilterAttrs
31
+	// Currently only supports redirecting to another interface
32
+	RedirIndex int
33
+}
34
+
35
+func (filter *U32) Attrs() *FilterAttrs {
36
+	return &filter.FilterAttrs
37
+}
38
+
39
+func (filter *U32) Type() string {
40
+	return "u32"
41
+}
42
+
43
+type FilterFwAttrs struct {
44
+	ClassId   uint32
45
+	InDev     string
46
+	Mask      uint32
47
+	Index     uint32
48
+	Buffer    uint32
49
+	Mtu       uint32
50
+	Mpu       uint16
51
+	Rate      uint32
52
+	AvRate    uint32
53
+	PeakRate  uint32
54
+	Action    int
55
+	Overhead  uint16
56
+	LinkLayer int
57
+}
58
+
59
+// FwFilter filters on firewall marks
60
+type Fw struct {
61
+	FilterAttrs
62
+	ClassId uint32
63
+	Police  nl.TcPolice
64
+	InDev   string
65
+	// TODO Action
66
+	Mask   uint32
67
+	AvRate uint32
68
+	Rtab   [256]uint32
69
+	Ptab   [256]uint32
70
+}
71
+
72
+func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) {
73
+	var rtab [256]uint32
74
+	var ptab [256]uint32
75
+	rcell_log := -1
76
+	pcell_log := -1
77
+	avrate := fattrs.AvRate / 8
78
+	police := nl.TcPolice{}
79
+	police.Rate.Rate = fattrs.Rate / 8
80
+	police.PeakRate.Rate = fattrs.PeakRate / 8
81
+	buffer := fattrs.Buffer
82
+	linklayer := nl.LINKLAYER_ETHERNET
83
+
84
+	if fattrs.LinkLayer != nl.LINKLAYER_UNSPEC {
85
+		linklayer = fattrs.LinkLayer
86
+	}
87
+
88
+	police.Action = int32(fattrs.Action)
89
+	if police.Rate.Rate != 0 {
90
+		police.Rate.Mpu = fattrs.Mpu
91
+		police.Rate.Overhead = fattrs.Overhead
92
+		if CalcRtable(&police.Rate, rtab, rcell_log, fattrs.Mtu, linklayer) < 0 {
93
+			return nil, errors.New("TBF: failed to calculate rate table.")
94
+		}
95
+		police.Burst = uint32(Xmittime(uint64(police.Rate.Rate), uint32(buffer)))
96
+	}
97
+	police.Mtu = fattrs.Mtu
98
+	if police.PeakRate.Rate != 0 {
99
+		police.PeakRate.Mpu = fattrs.Mpu
100
+		police.PeakRate.Overhead = fattrs.Overhead
101
+		if CalcRtable(&police.PeakRate, ptab, pcell_log, fattrs.Mtu, linklayer) < 0 {
102
+			return nil, errors.New("POLICE: failed to calculate peak rate table.")
103
+		}
104
+	}
105
+
106
+	return &Fw{
107
+		FilterAttrs: attrs,
108
+		ClassId:     fattrs.ClassId,
109
+		InDev:       fattrs.InDev,
110
+		Mask:        fattrs.Mask,
111
+		Police:      police,
112
+		AvRate:      avrate,
113
+		Rtab:        rtab,
114
+		Ptab:        ptab,
115
+	}, nil
116
+}
117
+
118
+func (filter *Fw) Attrs() *FilterAttrs {
119
+	return &filter.FilterAttrs
120
+}
121
+
122
+func (filter *Fw) Type() string {
123
+	return "fw"
124
+}
125
+
126
+// GenericFilter filters represent types that are not currently understood
127
+// by this netlink library.
128
+type GenericFilter struct {
129
+	FilterAttrs
130
+	FilterType string
131
+}
132
+
133
+func (filter *GenericFilter) Attrs() *FilterAttrs {
134
+	return &filter.FilterAttrs
135
+}
136
+
137
+func (filter *GenericFilter) Type() string {
138
+	return filter.FilterType
139
+}
0 140
new file mode 100644
... ...
@@ -0,0 +1,322 @@
0
+package netlink
1
+
2
+import (
3
+	"bytes"
4
+	"encoding/binary"
5
+	"fmt"
6
+	"syscall"
7
+
8
+	"github.com/vishvananda/netlink/nl"
9
+)
10
+
11
+// FilterDel will delete a filter from the system.
12
+// Equivalent to: `tc filter del $filter`
13
+func FilterDel(filter Filter) error {
14
+	req := nl.NewNetlinkRequest(syscall.RTM_DELTFILTER, syscall.NLM_F_ACK)
15
+	base := filter.Attrs()
16
+	msg := &nl.TcMsg{
17
+		Family:  nl.FAMILY_ALL,
18
+		Ifindex: int32(base.LinkIndex),
19
+		Handle:  base.Handle,
20
+		Parent:  base.Parent,
21
+		Info:    MakeHandle(base.Priority, nl.Swap16(base.Protocol)),
22
+	}
23
+	req.AddData(msg)
24
+
25
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
26
+	return err
27
+}
28
+
29
+// FilterAdd will add a filter to the system.
30
+// Equivalent to: `tc filter add $filter`
31
+func FilterAdd(filter Filter) error {
32
+	native = nl.NativeEndian()
33
+	req := nl.NewNetlinkRequest(syscall.RTM_NEWTFILTER, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
34
+	base := filter.Attrs()
35
+	msg := &nl.TcMsg{
36
+		Family:  nl.FAMILY_ALL,
37
+		Ifindex: int32(base.LinkIndex),
38
+		Handle:  base.Handle,
39
+		Parent:  base.Parent,
40
+		Info:    MakeHandle(base.Priority, nl.Swap16(base.Protocol)),
41
+	}
42
+	req.AddData(msg)
43
+	req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type())))
44
+
45
+	options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
46
+	if u32, ok := filter.(*U32); ok {
47
+		// match all
48
+		sel := nl.TcU32Sel{
49
+			Nkeys: 1,
50
+			Flags: nl.TC_U32_TERMINAL,
51
+		}
52
+		sel.Keys = append(sel.Keys, nl.TcU32Key{})
53
+		nl.NewRtAttrChild(options, nl.TCA_U32_SEL, sel.Serialize())
54
+		actions := nl.NewRtAttrChild(options, nl.TCA_U32_ACT, nil)
55
+		table := nl.NewRtAttrChild(actions, nl.TCA_ACT_TAB, nil)
56
+		nl.NewRtAttrChild(table, nl.TCA_KIND, nl.ZeroTerminated("mirred"))
57
+		// redirect to other interface
58
+		mir := nl.TcMirred{
59
+			Action:  nl.TC_ACT_STOLEN,
60
+			Eaction: nl.TCA_EGRESS_REDIR,
61
+			Ifindex: uint32(u32.RedirIndex),
62
+		}
63
+		aopts := nl.NewRtAttrChild(table, nl.TCA_OPTIONS, nil)
64
+		nl.NewRtAttrChild(aopts, nl.TCA_MIRRED_PARMS, mir.Serialize())
65
+	} else if fw, ok := filter.(*Fw); ok {
66
+		if fw.Mask != 0 {
67
+			b := make([]byte, 4)
68
+			native.PutUint32(b, fw.Mask)
69
+			nl.NewRtAttrChild(options, nl.TCA_FW_MASK, b)
70
+		}
71
+		if fw.InDev != "" {
72
+			nl.NewRtAttrChild(options, nl.TCA_FW_INDEV, nl.ZeroTerminated(fw.InDev))
73
+		}
74
+		if (fw.Police != nl.TcPolice{}) {
75
+
76
+			police := nl.NewRtAttrChild(options, nl.TCA_FW_POLICE, nil)
77
+			nl.NewRtAttrChild(police, nl.TCA_POLICE_TBF, fw.Police.Serialize())
78
+			if (fw.Police.Rate != nl.TcRateSpec{}) {
79
+				payload := SerializeRtab(fw.Rtab)
80
+				nl.NewRtAttrChild(police, nl.TCA_POLICE_RATE, payload)
81
+			}
82
+			if (fw.Police.PeakRate != nl.TcRateSpec{}) {
83
+				payload := SerializeRtab(fw.Ptab)
84
+				nl.NewRtAttrChild(police, nl.TCA_POLICE_PEAKRATE, payload)
85
+			}
86
+		}
87
+		if fw.ClassId != 0 {
88
+			b := make([]byte, 4)
89
+			native.PutUint32(b, fw.ClassId)
90
+			nl.NewRtAttrChild(options, nl.TCA_FW_CLASSID, b)
91
+		}
92
+	}
93
+
94
+	req.AddData(options)
95
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
96
+	return err
97
+}
98
+
99
+// FilterList gets a list of filters in the system.
100
+// Equivalent to: `tc filter show`.
101
+// Generally retunrs nothing if link and parent are not specified.
102
+func FilterList(link Link, parent uint32) ([]Filter, error) {
103
+	req := nl.NewNetlinkRequest(syscall.RTM_GETTFILTER, syscall.NLM_F_DUMP)
104
+	msg := &nl.TcMsg{
105
+		Family: nl.FAMILY_ALL,
106
+		Parent: parent,
107
+	}
108
+	if link != nil {
109
+		base := link.Attrs()
110
+		ensureIndex(base)
111
+		msg.Ifindex = int32(base.Index)
112
+	}
113
+	req.AddData(msg)
114
+
115
+	msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTFILTER)
116
+	if err != nil {
117
+		return nil, err
118
+	}
119
+
120
+	var res []Filter
121
+	for _, m := range msgs {
122
+		msg := nl.DeserializeTcMsg(m)
123
+
124
+		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
125
+		if err != nil {
126
+			return nil, err
127
+		}
128
+
129
+		base := FilterAttrs{
130
+			LinkIndex: int(msg.Ifindex),
131
+			Handle:    msg.Handle,
132
+			Parent:    msg.Parent,
133
+		}
134
+		base.Priority, base.Protocol = MajorMinor(msg.Info)
135
+		base.Protocol = nl.Swap16(base.Protocol)
136
+
137
+		var filter Filter
138
+		filterType := ""
139
+		detailed := false
140
+		for _, attr := range attrs {
141
+			switch attr.Attr.Type {
142
+			case nl.TCA_KIND:
143
+				filterType = string(attr.Value[:len(attr.Value)-1])
144
+				switch filterType {
145
+				case "u32":
146
+					filter = &U32{}
147
+				case "fw":
148
+					filter = &Fw{}
149
+				default:
150
+					filter = &GenericFilter{FilterType: filterType}
151
+				}
152
+			case nl.TCA_OPTIONS:
153
+				switch filterType {
154
+				case "u32":
155
+					data, err := nl.ParseRouteAttr(attr.Value)
156
+					if err != nil {
157
+						return nil, err
158
+					}
159
+					detailed, err = parseU32Data(filter, data)
160
+					if err != nil {
161
+						return nil, err
162
+					}
163
+				case "fw":
164
+					data, err := nl.ParseRouteAttr(attr.Value)
165
+					if err != nil {
166
+						return nil, err
167
+					}
168
+					detailed, err = parseFwData(filter, data)
169
+					if err != nil {
170
+						return nil, err
171
+					}
172
+				}
173
+			}
174
+		}
175
+		// only return the detailed version of the filter
176
+		if detailed {
177
+			*filter.Attrs() = base
178
+			res = append(res, filter)
179
+		}
180
+	}
181
+
182
+	return res, nil
183
+}
184
+
185
+func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
186
+	native = nl.NativeEndian()
187
+	u32 := filter.(*U32)
188
+	detailed := false
189
+	for _, datum := range data {
190
+		switch datum.Attr.Type {
191
+		case nl.TCA_U32_SEL:
192
+			detailed = true
193
+			sel := nl.DeserializeTcU32Sel(datum.Value)
194
+			// only parse if we have a very basic redirect
195
+			if sel.Flags&nl.TC_U32_TERMINAL == 0 || sel.Nkeys != 1 {
196
+				return detailed, nil
197
+			}
198
+		case nl.TCA_U32_ACT:
199
+			table, err := nl.ParseRouteAttr(datum.Value)
200
+			if err != nil {
201
+				return detailed, err
202
+			}
203
+			if len(table) != 1 || table[0].Attr.Type != nl.TCA_ACT_TAB {
204
+				return detailed, fmt.Errorf("Action table not formed properly")
205
+			}
206
+			aattrs, err := nl.ParseRouteAttr(table[0].Value)
207
+			for _, aattr := range aattrs {
208
+				switch aattr.Attr.Type {
209
+				case nl.TCA_KIND:
210
+					actionType := string(aattr.Value[:len(aattr.Value)-1])
211
+					// only parse if the action is mirred
212
+					if actionType != "mirred" {
213
+						return detailed, nil
214
+					}
215
+				case nl.TCA_OPTIONS:
216
+					adata, err := nl.ParseRouteAttr(aattr.Value)
217
+					if err != nil {
218
+						return detailed, err
219
+					}
220
+					for _, adatum := range adata {
221
+						switch adatum.Attr.Type {
222
+						case nl.TCA_MIRRED_PARMS:
223
+							mir := nl.DeserializeTcMirred(adatum.Value)
224
+							u32.RedirIndex = int(mir.Ifindex)
225
+						}
226
+					}
227
+				}
228
+			}
229
+		}
230
+	}
231
+	return detailed, nil
232
+}
233
+
234
+func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
235
+	native = nl.NativeEndian()
236
+	fw := filter.(*Fw)
237
+	detailed := true
238
+	for _, datum := range data {
239
+		switch datum.Attr.Type {
240
+		case nl.TCA_FW_MASK:
241
+			fw.Mask = native.Uint32(datum.Value[0:4])
242
+		case nl.TCA_FW_CLASSID:
243
+			fw.ClassId = native.Uint32(datum.Value[0:4])
244
+		case nl.TCA_FW_INDEV:
245
+			fw.InDev = string(datum.Value[:len(datum.Value)-1])
246
+		case nl.TCA_FW_POLICE:
247
+			adata, _ := nl.ParseRouteAttr(datum.Value)
248
+			for _, aattr := range adata {
249
+				switch aattr.Attr.Type {
250
+				case nl.TCA_POLICE_TBF:
251
+					fw.Police = *nl.DeserializeTcPolice(aattr.Value)
252
+				case nl.TCA_POLICE_RATE:
253
+					fw.Rtab = DeserializeRtab(aattr.Value)
254
+				case nl.TCA_POLICE_PEAKRATE:
255
+					fw.Ptab = DeserializeRtab(aattr.Value)
256
+				}
257
+			}
258
+		}
259
+	}
260
+	return detailed, nil
261
+}
262
+
263
+func AlignToAtm(size uint) uint {
264
+	var linksize, cells int
265
+	cells = int(size / nl.ATM_CELL_PAYLOAD)
266
+	if (size % nl.ATM_CELL_PAYLOAD) > 0 {
267
+		cells++
268
+	}
269
+	linksize = cells * nl.ATM_CELL_SIZE
270
+	return uint(linksize)
271
+}
272
+
273
+func AdjustSize(sz uint, mpu uint, linklayer int) uint {
274
+	if sz < mpu {
275
+		sz = mpu
276
+	}
277
+	switch linklayer {
278
+	case nl.LINKLAYER_ATM:
279
+		return AlignToAtm(sz)
280
+	default:
281
+		return sz
282
+	}
283
+}
284
+
285
+func CalcRtable(rate *nl.TcRateSpec, rtab [256]uint32, cell_log int, mtu uint32, linklayer int) int {
286
+	bps := rate.Rate
287
+	mpu := rate.Mpu
288
+	var sz uint
289
+	if mtu == 0 {
290
+		mtu = 2047
291
+	}
292
+	if cell_log < 0 {
293
+		cell_log = 0
294
+		for (mtu >> uint(cell_log)) > 255 {
295
+			cell_log++
296
+		}
297
+	}
298
+	for i := 0; i < 256; i++ {
299
+		sz = AdjustSize(uint((i+1)<<uint32(cell_log)), uint(mpu), linklayer)
300
+		rtab[i] = uint32(Xmittime(uint64(bps), uint32(sz)))
301
+	}
302
+	rate.CellAlign = -1
303
+	rate.CellLog = uint8(cell_log)
304
+	rate.Linklayer = uint8(linklayer & nl.TC_LINKLAYER_MASK)
305
+	return cell_log
306
+}
307
+
308
+func DeserializeRtab(b []byte) [256]uint32 {
309
+	var rtab [256]uint32
310
+	native := nl.NativeEndian()
311
+	r := bytes.NewReader(b)
312
+	_ = binary.Read(r, native, &rtab)
313
+	return rtab
314
+}
315
+
316
+func SerializeRtab(rtab [256]uint32) []byte {
317
+	native := nl.NativeEndian()
318
+	var w bytes.Buffer
319
+	_ = binary.Write(&w, native, rtab)
320
+	return w.Bytes()
321
+}
... ...
@@ -1,6 +1,9 @@
1 1
 package netlink
2 2
 
3
-import "net"
3
+import (
4
+	"net"
5
+	"syscall"
6
+)
4 7
 
5 8
 // Link represents a link device from netlink. Shared link attributes
6 9
 // like name may be retrieved using the Attrs() method. Unique data
... ...
@@ -62,6 +65,19 @@ func (dummy *Dummy) Type() string {
62 62
 	return "dummy"
63 63
 }
64 64
 
65
+// Ifb links are advanced dummy devices for packet filtering
66
+type Ifb struct {
67
+	LinkAttrs
68
+}
69
+
70
+func (ifb *Ifb) Attrs() *LinkAttrs {
71
+	return &ifb.LinkAttrs
72
+}
73
+
74
+func (ifb *Ifb) Type() string {
75
+	return "ifb"
76
+}
77
+
65 78
 // Bridge links are simple linux bridges
66 79
 type Bridge struct {
67 80
 	LinkAttrs
... ...
@@ -114,6 +130,36 @@ func (macvlan *Macvlan) Type() string {
114 114
 	return "macvlan"
115 115
 }
116 116
 
117
+// Macvtap - macvtap is a virtual interfaces based on macvlan
118
+type Macvtap struct {
119
+	Macvlan
120
+}
121
+
122
+func (macvtap Macvtap) Type() string {
123
+	return "macvtap"
124
+}
125
+
126
+type TuntapMode uint16
127
+
128
+const (
129
+	TUNTAP_MODE_TUN TuntapMode = syscall.IFF_TUN
130
+	TUNTAP_MODE_TAP TuntapMode = syscall.IFF_TAP
131
+)
132
+
133
+// Tuntap links created via /dev/tun/tap, but can be destroyed via netlink
134
+type Tuntap struct {
135
+	LinkAttrs
136
+	Mode TuntapMode
137
+}
138
+
139
+func (tuntap *Tuntap) Attrs() *LinkAttrs {
140
+	return &tuntap.LinkAttrs
141
+}
142
+
143
+func (tuntap *Tuntap) Type() string {
144
+	return "tuntap"
145
+}
146
+
117 147
 // Veth devices must specify PeerName on create
118 148
 type Veth struct {
119 149
 	LinkAttrs
... ...
@@ -128,18 +174,18 @@ func (veth *Veth) Type() string {
128 128
 	return "veth"
129 129
 }
130 130
 
131
-// Generic links represent types that are not currently understood
131
+// GenericLink links represent types that are not currently understood
132 132
 // by this netlink library.
133
-type Generic struct {
133
+type GenericLink struct {
134 134
 	LinkAttrs
135 135
 	LinkType string
136 136
 }
137 137
 
138
-func (generic *Generic) Attrs() *LinkAttrs {
138
+func (generic *GenericLink) Attrs() *LinkAttrs {
139 139
 	return &generic.LinkAttrs
140 140
 }
141 141
 
142
-func (generic *Generic) Type() string {
142
+func (generic *GenericLink) Type() string {
143 143
 	return generic.LinkType
144 144
 }
145 145
 
... ...
@@ -5,7 +5,9 @@ import (
5 5
 	"encoding/binary"
6 6
 	"fmt"
7 7
 	"net"
8
+	"os"
8 9
 	"syscall"
10
+	"unsafe"
9 11
 
10 12
 	"github.com/vishvananda/netlink/nl"
11 13
 )
... ...
@@ -285,6 +287,44 @@ func LinkAdd(link Link) error {
285 285
 		return fmt.Errorf("LinkAttrs.Name cannot be empty!")
286 286
 	}
287 287
 
288
+	if tuntap, ok := link.(*Tuntap); ok {
289
+		// TODO: support user
290
+		// TODO: support group
291
+		// TODO: support non- one_queue
292
+		// TODO: support pi | vnet_hdr | multi_queue
293
+		// TODO: support non- exclusive
294
+		// TODO: support non- persistent
295
+		if tuntap.Mode < syscall.IFF_TUN || tuntap.Mode > syscall.IFF_TAP {
296
+			return fmt.Errorf("Tuntap.Mode %v unknown!", tuntap.Mode)
297
+		}
298
+		file, err := os.OpenFile("/dev/net/tun", os.O_RDWR, 0)
299
+		if err != nil {
300
+			return err
301
+		}
302
+		defer file.Close()
303
+		var req ifReq
304
+		req.Flags |= syscall.IFF_ONE_QUEUE
305
+		req.Flags |= syscall.IFF_TUN_EXCL
306
+		copy(req.Name[:15], base.Name)
307
+		req.Flags |= uint16(tuntap.Mode)
308
+		_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), uintptr(syscall.TUNSETIFF), uintptr(unsafe.Pointer(&req)))
309
+		if errno != 0 {
310
+			return fmt.Errorf("Tuntap IOCTL TUNSETIFF failed, errno %v", errno)
311
+		}
312
+		_, _, errno = syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), uintptr(syscall.TUNSETPERSIST), 1)
313
+		if errno != 0 {
314
+			return fmt.Errorf("Tuntap IOCTL TUNSETPERSIST failed, errno %v", errno)
315
+		}
316
+		ensureIndex(base)
317
+
318
+		// can't set master during create, so set it afterwards
319
+		if base.MasterIndex != 0 {
320
+			// TODO: verify MasterIndex is actually a bridge?
321
+			return LinkSetMasterByIndex(link, base.MasterIndex)
322
+		}
323
+		return nil
324
+	}
325
+
288 326
 	req := nl.NewNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
289 327
 
290 328
 	msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
... ...
@@ -493,6 +533,8 @@ func linkDeserialize(m []byte) (Link, error) {
493 493
 					switch linkType {
494 494
 					case "dummy":
495 495
 						link = &Dummy{}
496
+					case "ifb":
497
+						link = &Ifb{}
496 498
 					case "bridge":
497 499
 						link = &Bridge{}
498 500
 					case "vlan":
... ...
@@ -505,8 +547,10 @@ func linkDeserialize(m []byte) (Link, error) {
505 505
 						link = &IPVlan{}
506 506
 					case "macvlan":
507 507
 						link = &Macvlan{}
508
+					case "macvtap":
509
+						link = &Macvtap{}
508 510
 					default:
509
-						link = &Generic{LinkType: linkType}
511
+						link = &GenericLink{LinkType: linkType}
510 512
 					}
511 513
 				case nl.IFLA_INFO_DATA:
512 514
 					data, err := nl.ParseRouteAttr(info.Value)
... ...
@@ -522,6 +566,8 @@ func linkDeserialize(m []byte) (Link, error) {
522 522
 						parseIPVlanData(link, data)
523 523
 					case "macvlan":
524 524
 						parseMacvlanData(link, data)
525
+					case "macvtap":
526
+						parseMacvtapData(link, data)
525 527
 					}
526 528
 				}
527 529
 			}
... ...
@@ -583,6 +629,46 @@ func LinkList() ([]Link, error) {
583 583
 	return res, nil
584 584
 }
585 585
 
586
+// LinkUpdate is used to pass information back from LinkSubscribe()
587
+type LinkUpdate struct {
588
+	nl.IfInfomsg
589
+	Link
590
+}
591
+
592
+// LinkSubscribe takes a chan down which notifications will be sent
593
+// when links change.  Close the 'done' chan to stop subscription.
594
+func LinkSubscribe(ch chan<- LinkUpdate, done <-chan struct{}) error {
595
+	s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_LINK)
596
+	if err != nil {
597
+		return err
598
+	}
599
+	if done != nil {
600
+		go func() {
601
+			<-done
602
+			s.Close()
603
+		}()
604
+	}
605
+	go func() {
606
+		defer close(ch)
607
+		for {
608
+			msgs, err := s.Receive()
609
+			if err != nil {
610
+				return
611
+			}
612
+			for _, m := range msgs {
613
+				ifmsg := nl.DeserializeIfInfomsg(m.Data)
614
+				link, err := linkDeserialize(m.Data)
615
+				if err != nil {
616
+					return
617
+				}
618
+				ch <- LinkUpdate{IfInfomsg: *ifmsg, Link: link}
619
+			}
620
+		}
621
+	}()
622
+
623
+	return nil
624
+}
625
+
586 626
 func LinkSetHairpin(link Link, mode bool) error {
587 627
 	return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_MODE)
588 628
 }
... ...
@@ -696,6 +782,11 @@ func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) {
696 696
 	}
697 697
 }
698 698
 
699
+func parseMacvtapData(link Link, data []syscall.NetlinkRouteAttr) {
700
+	macv := link.(*Macvtap)
701
+	parseMacvlanData(&macv.Macvlan, data)
702
+}
703
+
699 704
 func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) {
700 705
 	macv := link.(*Macvlan)
701 706
 	for _, datum := range data {
702 707
new file mode 100644
... ...
@@ -0,0 +1,14 @@
0
+package netlink
1
+
2
+// ideally golang.org/x/sys/unix would define IfReq but it only has
3
+// IFNAMSIZ, hence this minimalistic implementation
4
+const (
5
+	SizeOfIfReq = 40
6
+	IFNAMSIZ    = 16
7
+)
8
+
9
+type ifReq struct {
10
+	Name  [IFNAMSIZ]byte
11
+	Flags uint16
12
+	pad   [SizeOfIfReq - IFNAMSIZ - 2]byte
13
+}
... ...
@@ -142,7 +142,7 @@ func (a *RtAttr) Len() int {
142 142
 }
143 143
 
144 144
 // Serialize the RtAttr into a byte array
145
-// This can't ust unsafe.cast because it must iterate through children.
145
+// This can't just unsafe.cast because it must iterate through children.
146 146
 func (a *RtAttr) Serialize() []byte {
147 147
 	native := NativeEndian()
148 148
 
149 149
new file mode 100644
... ...
@@ -0,0 +1,508 @@
0
+package nl
1
+
2
+import (
3
+	"unsafe"
4
+)
5
+
6
+// LinkLayer
7
+const (
8
+	LINKLAYER_UNSPEC = iota
9
+	LINKLAYER_ETHERNET
10
+	LINKLAYER_ATM
11
+)
12
+
13
+// ATM
14
+const (
15
+	ATM_CELL_PAYLOAD = 48
16
+	ATM_CELL_SIZE    = 53
17
+)
18
+
19
+const TC_LINKLAYER_MASK = 0x0F
20
+
21
+// Police
22
+const (
23
+	TCA_POLICE_UNSPEC = iota
24
+	TCA_POLICE_TBF
25
+	TCA_POLICE_RATE
26
+	TCA_POLICE_PEAKRATE
27
+	TCA_POLICE_AVRATE
28
+	TCA_POLICE_RESULT
29
+	TCA_POLICE_MAX = TCA_POLICE_RESULT
30
+)
31
+
32
+// Message types
33
+const (
34
+	TCA_UNSPEC = iota
35
+	TCA_KIND
36
+	TCA_OPTIONS
37
+	TCA_STATS
38
+	TCA_XSTATS
39
+	TCA_RATE
40
+	TCA_FCNT
41
+	TCA_STATS2
42
+	TCA_STAB
43
+	TCA_MAX = TCA_STAB
44
+)
45
+
46
+const (
47
+	TCA_ACT_TAB = 1
48
+	TCAA_MAX    = 1
49
+)
50
+
51
+const (
52
+	TCA_PRIO_UNSPEC = iota
53
+	TCA_PRIO_MQ
54
+	TCA_PRIO_MAX = TCA_PRIO_MQ
55
+)
56
+
57
+const (
58
+	SizeofTcMsg       = 0x14
59
+	SizeofTcActionMsg = 0x04
60
+	SizeofTcPrioMap   = 0x14
61
+	SizeofTcRateSpec  = 0x0c
62
+	SizeofTcTbfQopt   = 2*SizeofTcRateSpec + 0x0c
63
+	SizeofTcHtbCopt   = 2*SizeofTcRateSpec + 0x14
64
+	SizeofTcHtbGlob   = 0x14
65
+	SizeofTcU32Key    = 0x10
66
+	SizeofTcU32Sel    = 0x10 // without keys
67
+	SizeofTcMirred    = 0x1c
68
+	SizeofTcPolice    = 2*SizeofTcRateSpec + 0x20
69
+)
70
+
71
+// struct tcmsg {
72
+//   unsigned char tcm_family;
73
+//   unsigned char tcm__pad1;
74
+//   unsigned short  tcm__pad2;
75
+//   int   tcm_ifindex;
76
+//   __u32   tcm_handle;
77
+//   __u32   tcm_parent;
78
+//   __u32   tcm_info;
79
+// };
80
+
81
+type TcMsg struct {
82
+	Family  uint8
83
+	Pad     [3]byte
84
+	Ifindex int32
85
+	Handle  uint32
86
+	Parent  uint32
87
+	Info    uint32
88
+}
89
+
90
+func (msg *TcMsg) Len() int {
91
+	return SizeofTcMsg
92
+}
93
+
94
+func DeserializeTcMsg(b []byte) *TcMsg {
95
+	return (*TcMsg)(unsafe.Pointer(&b[0:SizeofTcMsg][0]))
96
+}
97
+
98
+func (x *TcMsg) Serialize() []byte {
99
+	return (*(*[SizeofTcMsg]byte)(unsafe.Pointer(x)))[:]
100
+}
101
+
102
+// struct tcamsg {
103
+//   unsigned char tca_family;
104
+//   unsigned char tca__pad1;
105
+//   unsigned short  tca__pad2;
106
+// };
107
+
108
+type TcActionMsg struct {
109
+	Family uint8
110
+	Pad    [3]byte
111
+}
112
+
113
+func (msg *TcActionMsg) Len() int {
114
+	return SizeofTcActionMsg
115
+}
116
+
117
+func DeserializeTcActionMsg(b []byte) *TcActionMsg {
118
+	return (*TcActionMsg)(unsafe.Pointer(&b[0:SizeofTcActionMsg][0]))
119
+}
120
+
121
+func (x *TcActionMsg) Serialize() []byte {
122
+	return (*(*[SizeofTcActionMsg]byte)(unsafe.Pointer(x)))[:]
123
+}
124
+
125
+const (
126
+	TC_PRIO_MAX = 15
127
+)
128
+
129
+// struct tc_prio_qopt {
130
+// 	int bands;      /* Number of bands */
131
+// 	__u8  priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */
132
+// };
133
+
134
+type TcPrioMap struct {
135
+	Bands   int32
136
+	Priomap [TC_PRIO_MAX + 1]uint8
137
+}
138
+
139
+func (msg *TcPrioMap) Len() int {
140
+	return SizeofTcPrioMap
141
+}
142
+
143
+func DeserializeTcPrioMap(b []byte) *TcPrioMap {
144
+	return (*TcPrioMap)(unsafe.Pointer(&b[0:SizeofTcPrioMap][0]))
145
+}
146
+
147
+func (x *TcPrioMap) Serialize() []byte {
148
+	return (*(*[SizeofTcPrioMap]byte)(unsafe.Pointer(x)))[:]
149
+}
150
+
151
+const (
152
+	TCA_TBF_UNSPEC = iota
153
+	TCA_TBF_PARMS
154
+	TCA_TBF_RTAB
155
+	TCA_TBF_PTAB
156
+	TCA_TBF_RATE64
157
+	TCA_TBF_PRATE64
158
+	TCA_TBF_BURST
159
+	TCA_TBF_PBURST
160
+	TCA_TBF_MAX = TCA_TBF_PBURST
161
+)
162
+
163
+// struct tc_ratespec {
164
+//   unsigned char cell_log;
165
+//   __u8    linklayer; /* lower 4 bits */
166
+//   unsigned short  overhead;
167
+//   short   cell_align;
168
+//   unsigned short  mpu;
169
+//   __u32   rate;
170
+// };
171
+
172
+type TcRateSpec struct {
173
+	CellLog   uint8
174
+	Linklayer uint8
175
+	Overhead  uint16
176
+	CellAlign int16
177
+	Mpu       uint16
178
+	Rate      uint32
179
+}
180
+
181
+func (msg *TcRateSpec) Len() int {
182
+	return SizeofTcRateSpec
183
+}
184
+
185
+func DeserializeTcRateSpec(b []byte) *TcRateSpec {
186
+	return (*TcRateSpec)(unsafe.Pointer(&b[0:SizeofTcRateSpec][0]))
187
+}
188
+
189
+func (x *TcRateSpec) Serialize() []byte {
190
+	return (*(*[SizeofTcRateSpec]byte)(unsafe.Pointer(x)))[:]
191
+}
192
+
193
+// struct tc_tbf_qopt {
194
+//   struct tc_ratespec rate;
195
+//   struct tc_ratespec peakrate;
196
+//   __u32   limit;
197
+//   __u32   buffer;
198
+//   __u32   mtu;
199
+// };
200
+
201
+type TcTbfQopt struct {
202
+	Rate     TcRateSpec
203
+	Peakrate TcRateSpec
204
+	Limit    uint32
205
+	Buffer   uint32
206
+	Mtu      uint32
207
+}
208
+
209
+func (msg *TcTbfQopt) Len() int {
210
+	return SizeofTcTbfQopt
211
+}
212
+
213
+func DeserializeTcTbfQopt(b []byte) *TcTbfQopt {
214
+	return (*TcTbfQopt)(unsafe.Pointer(&b[0:SizeofTcTbfQopt][0]))
215
+}
216
+
217
+func (x *TcTbfQopt) Serialize() []byte {
218
+	return (*(*[SizeofTcTbfQopt]byte)(unsafe.Pointer(x)))[:]
219
+}
220
+
221
+const (
222
+	TCA_HTB_UNSPEC = iota
223
+	TCA_HTB_PARMS
224
+	TCA_HTB_INIT
225
+	TCA_HTB_CTAB
226
+	TCA_HTB_RTAB
227
+	TCA_HTB_DIRECT_QLEN
228
+	TCA_HTB_RATE64
229
+	TCA_HTB_CEIL64
230
+	TCA_HTB_MAX = TCA_HTB_CEIL64
231
+)
232
+
233
+//struct tc_htb_opt {
234
+//	struct tc_ratespec	rate;
235
+//	struct tc_ratespec	ceil;
236
+//	__u32	buffer;
237
+//	__u32	cbuffer;
238
+//	__u32	quantum;
239
+//	__u32	level;		/* out only */
240
+//	__u32	prio;
241
+//};
242
+
243
+type TcHtbCopt struct {
244
+	Rate    TcRateSpec
245
+	Ceil    TcRateSpec
246
+	Buffer  uint32
247
+	Cbuffer uint32
248
+	Quantum uint32
249
+	Level   uint32
250
+	Prio    uint32
251
+}
252
+
253
+func (msg *TcHtbCopt) Len() int {
254
+	return SizeofTcHtbCopt
255
+}
256
+
257
+func DeserializeTcHtbCopt(b []byte) *TcHtbCopt {
258
+	return (*TcHtbCopt)(unsafe.Pointer(&b[0:SizeofTcHtbCopt][0]))
259
+}
260
+
261
+func (x *TcHtbCopt) Serialize() []byte {
262
+	return (*(*[SizeofTcHtbCopt]byte)(unsafe.Pointer(x)))[:]
263
+}
264
+
265
+type TcHtbGlob struct {
266
+	Version      uint32
267
+	Rate2Quantum uint32
268
+	Defcls       uint32
269
+	Debug        uint32
270
+	DirectPkts   uint32
271
+}
272
+
273
+func (msg *TcHtbGlob) Len() int {
274
+	return SizeofTcHtbGlob
275
+}
276
+
277
+func DeserializeTcHtbGlob(b []byte) *TcHtbGlob {
278
+	return (*TcHtbGlob)(unsafe.Pointer(&b[0:SizeofTcHtbGlob][0]))
279
+}
280
+
281
+func (x *TcHtbGlob) Serialize() []byte {
282
+	return (*(*[SizeofTcHtbGlob]byte)(unsafe.Pointer(x)))[:]
283
+}
284
+
285
+const (
286
+	TCA_U32_UNSPEC = iota
287
+	TCA_U32_CLASSID
288
+	TCA_U32_HASH
289
+	TCA_U32_LINK
290
+	TCA_U32_DIVISOR
291
+	TCA_U32_SEL
292
+	TCA_U32_POLICE
293
+	TCA_U32_ACT
294
+	TCA_U32_INDEV
295
+	TCA_U32_PCNT
296
+	TCA_U32_MARK
297
+	TCA_U32_MAX = TCA_U32_MARK
298
+)
299
+
300
+// struct tc_u32_key {
301
+//   __be32    mask;
302
+//   __be32    val;
303
+//   int   off;
304
+//   int   offmask;
305
+// };
306
+
307
+type TcU32Key struct {
308
+	Mask    uint32 // big endian
309
+	Val     uint32 // big endian
310
+	Off     int32
311
+	OffMask int32
312
+}
313
+
314
+func (msg *TcU32Key) Len() int {
315
+	return SizeofTcU32Key
316
+}
317
+
318
+func DeserializeTcU32Key(b []byte) *TcU32Key {
319
+	return (*TcU32Key)(unsafe.Pointer(&b[0:SizeofTcU32Key][0]))
320
+}
321
+
322
+func (x *TcU32Key) Serialize() []byte {
323
+	return (*(*[SizeofTcU32Key]byte)(unsafe.Pointer(x)))[:]
324
+}
325
+
326
+// struct tc_u32_sel {
327
+//   unsigned char   flags;
328
+//   unsigned char   offshift;
329
+//   unsigned char   nkeys;
330
+//
331
+//   __be16      offmask;
332
+//   __u16     off;
333
+//   short     offoff;
334
+//
335
+//   short     hoff;
336
+//   __be32      hmask;
337
+//   struct tc_u32_key keys[0];
338
+// };
339
+
340
+const (
341
+	TC_U32_TERMINAL  = 1 << iota
342
+	TC_U32_OFFSET    = 1 << iota
343
+	TC_U32_VAROFFSET = 1 << iota
344
+	TC_U32_EAT       = 1 << iota
345
+)
346
+
347
+type TcU32Sel struct {
348
+	Flags    uint8
349
+	Offshift uint8
350
+	Nkeys    uint8
351
+	Pad      uint8
352
+	Offmask  uint16 // big endian
353
+	Off      uint16
354
+	Offoff   int16
355
+	Hoff     int16
356
+	Hmask    uint32 // big endian
357
+	Keys     []TcU32Key
358
+}
359
+
360
+func (msg *TcU32Sel) Len() int {
361
+	return SizeofTcU32Sel + int(msg.Nkeys)*SizeofTcU32Key
362
+}
363
+
364
+func DeserializeTcU32Sel(b []byte) *TcU32Sel {
365
+	x := &TcU32Sel{}
366
+	copy((*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:], b)
367
+	next := SizeofTcU32Sel
368
+	var i uint8
369
+	for i = 0; i < x.Nkeys; i++ {
370
+		x.Keys = append(x.Keys, *DeserializeTcU32Key(b[next:]))
371
+		next += SizeofTcU32Key
372
+	}
373
+	return x
374
+}
375
+
376
+func (x *TcU32Sel) Serialize() []byte {
377
+	// This can't just unsafe.cast because it must iterate through keys.
378
+	buf := make([]byte, x.Len())
379
+	copy(buf, (*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:])
380
+	next := SizeofTcU32Sel
381
+	for _, key := range x.Keys {
382
+		keyBuf := key.Serialize()
383
+		copy(buf[next:], keyBuf)
384
+		next += SizeofTcU32Key
385
+	}
386
+	return buf
387
+}
388
+
389
+const (
390
+	TCA_ACT_MIRRED = 8
391
+)
392
+
393
+const (
394
+	TCA_MIRRED_UNSPEC = iota
395
+	TCA_MIRRED_TM
396
+	TCA_MIRRED_PARMS
397
+	TCA_MIRRED_MAX = TCA_MIRRED_PARMS
398
+)
399
+
400
+const (
401
+	TCA_EGRESS_REDIR   = 1 /* packet redirect to EGRESS*/
402
+	TCA_EGRESS_MIRROR  = 2 /* mirror packet to EGRESS */
403
+	TCA_INGRESS_REDIR  = 3 /* packet redirect to INGRESS*/
404
+	TCA_INGRESS_MIRROR = 4 /* mirror packet to INGRESS */
405
+)
406
+
407
+const (
408
+	TC_ACT_UNSPEC     = int32(-1)
409
+	TC_ACT_OK         = 0
410
+	TC_ACT_RECLASSIFY = 1
411
+	TC_ACT_SHOT       = 2
412
+	TC_ACT_PIPE       = 3
413
+	TC_ACT_STOLEN     = 4
414
+	TC_ACT_QUEUED     = 5
415
+	TC_ACT_REPEAT     = 6
416
+	TC_ACT_JUMP       = 0x10000000
417
+)
418
+
419
+// #define tc_gen \
420
+//   __u32                 index; \
421
+//   __u32                 capab; \
422
+//   int                   action; \
423
+//   int                   refcnt; \
424
+//   int                   bindcnt
425
+// struct tc_mirred {
426
+// 	tc_gen;
427
+// 	int                     eaction;   /* one of IN/EGRESS_MIRROR/REDIR */
428
+// 	__u32                   ifindex;  /* ifindex of egress port */
429
+// };
430
+
431
+type TcMirred struct {
432
+	Index   uint32
433
+	Capab   uint32
434
+	Action  int32
435
+	Refcnt  int32
436
+	Bindcnt int32
437
+	Eaction int32
438
+	Ifindex uint32
439
+}
440
+
441
+func (msg *TcMirred) Len() int {
442
+	return SizeofTcMirred
443
+}
444
+
445
+func DeserializeTcMirred(b []byte) *TcMirred {
446
+	return (*TcMirred)(unsafe.Pointer(&b[0:SizeofTcMirred][0]))
447
+}
448
+
449
+func (x *TcMirred) Serialize() []byte {
450
+	return (*(*[SizeofTcMirred]byte)(unsafe.Pointer(x)))[:]
451
+}
452
+
453
+const (
454
+	TC_POLICE_UNSPEC     = TC_ACT_UNSPEC
455
+	TC_POLICE_OK         = TC_ACT_OK
456
+	TC_POLICE_RECLASSIFY = TC_ACT_RECLASSIFY
457
+	TC_POLICE_SHOT       = TC_ACT_SHOT
458
+	TC_POLICE_PIPE       = TC_ACT_PIPE
459
+)
460
+
461
+// struct tc_police {
462
+// 	__u32			index;
463
+// 	int			action;
464
+// 	__u32			limit;
465
+// 	__u32			burst;
466
+// 	__u32			mtu;
467
+// 	struct tc_ratespec	rate;
468
+// 	struct tc_ratespec	peakrate;
469
+// 	int				refcnt;
470
+// 	int				bindcnt;
471
+// 	__u32			capab;
472
+// };
473
+
474
+type TcPolice struct {
475
+	Index    uint32
476
+	Action   int32
477
+	Limit    uint32
478
+	Burst    uint32
479
+	Mtu      uint32
480
+	Rate     TcRateSpec
481
+	PeakRate TcRateSpec
482
+	Refcnt   int32
483
+	Bindcnt  int32
484
+	Capab    uint32
485
+}
486
+
487
+func (msg *TcPolice) Len() int {
488
+	return SizeofTcPolice
489
+}
490
+
491
+func DeserializeTcPolice(b []byte) *TcPolice {
492
+	return (*TcPolice)(unsafe.Pointer(&b[0:SizeofTcPolice][0]))
493
+}
494
+
495
+func (x *TcPolice) Serialize() []byte {
496
+	return (*(*[SizeofTcPolice]byte)(unsafe.Pointer(x)))[:]
497
+}
498
+
499
+const (
500
+	TCA_FW_UNSPEC = iota
501
+	TCA_FW_CLASSID
502
+	TCA_FW_POLICE
503
+	TCA_FW_INDEV
504
+	TCA_FW_ACT
505
+	TCA_FW_MASK
506
+	TCA_FW_MAX = TCA_FW_MASK
507
+)
0 508
new file mode 100644
... ...
@@ -0,0 +1,167 @@
0
+package netlink
1
+
2
+import (
3
+	"fmt"
4
+)
5
+
6
+const (
7
+	HANDLE_NONE      = 0
8
+	HANDLE_INGRESS   = 0xFFFFFFF1
9
+	HANDLE_ROOT      = 0xFFFFFFFF
10
+	PRIORITY_MAP_LEN = 16
11
+)
12
+
13
+type Qdisc interface {
14
+	Attrs() *QdiscAttrs
15
+	Type() string
16
+}
17
+
18
+// Qdisc represents a netlink qdisc. A qdisc is associated with a link,
19
+// has a handle, a parent and a refcnt. The root qdisc of a device should
20
+// have parent == HANDLE_ROOT.
21
+type QdiscAttrs struct {
22
+	LinkIndex int
23
+	Handle    uint32
24
+	Parent    uint32
25
+	Refcnt    uint32 // read only
26
+}
27
+
28
+func (q QdiscAttrs) String() string {
29
+	return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Refcnt: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Refcnt)
30
+}
31
+
32
+func MakeHandle(major, minor uint16) uint32 {
33
+	return (uint32(major) << 16) | uint32(minor)
34
+}
35
+
36
+func MajorMinor(handle uint32) (uint16, uint16) {
37
+	return uint16((handle & 0xFFFF0000) >> 16), uint16(handle & 0x0000FFFFF)
38
+}
39
+
40
+func HandleStr(handle uint32) string {
41
+	switch handle {
42
+	case HANDLE_NONE:
43
+		return "none"
44
+	case HANDLE_INGRESS:
45
+		return "ingress"
46
+	case HANDLE_ROOT:
47
+		return "root"
48
+	default:
49
+		major, minor := MajorMinor(handle)
50
+		return fmt.Sprintf("%x:%x", major, minor)
51
+	}
52
+}
53
+
54
+// PfifoFast is the default qdisc created by the kernel if one has not
55
+// been defined for the interface
56
+type PfifoFast struct {
57
+	QdiscAttrs
58
+	Bands       uint8
59
+	PriorityMap [PRIORITY_MAP_LEN]uint8
60
+}
61
+
62
+func (qdisc *PfifoFast) Attrs() *QdiscAttrs {
63
+	return &qdisc.QdiscAttrs
64
+}
65
+
66
+func (qdisc *PfifoFast) Type() string {
67
+	return "pfifo_fast"
68
+}
69
+
70
+// Prio is a basic qdisc that works just like PfifoFast
71
+type Prio struct {
72
+	QdiscAttrs
73
+	Bands       uint8
74
+	PriorityMap [PRIORITY_MAP_LEN]uint8
75
+}
76
+
77
+func NewPrio(attrs QdiscAttrs) *Prio {
78
+	return &Prio{
79
+		QdiscAttrs:  attrs,
80
+		Bands:       3,
81
+		PriorityMap: [PRIORITY_MAP_LEN]uint8{1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1},
82
+	}
83
+}
84
+
85
+func (qdisc *Prio) Attrs() *QdiscAttrs {
86
+	return &qdisc.QdiscAttrs
87
+}
88
+
89
+func (qdisc *Prio) Type() string {
90
+	return "prio"
91
+}
92
+
93
+// Htb is a classful qdisc that rate limits based on tokens
94
+type Htb struct {
95
+	QdiscAttrs
96
+	Version      uint32
97
+	Rate2Quantum uint32
98
+	Defcls       uint32
99
+	Debug        uint32
100
+	DirectPkts   uint32
101
+}
102
+
103
+func NewHtb(attrs QdiscAttrs) *Htb {
104
+	return &Htb{
105
+		QdiscAttrs:   attrs,
106
+		Version:      3,
107
+		Defcls:       0,
108
+		Rate2Quantum: 10,
109
+		Debug:        0,
110
+		DirectPkts:   0,
111
+	}
112
+}
113
+
114
+func (qdisc *Htb) Attrs() *QdiscAttrs {
115
+	return &qdisc.QdiscAttrs
116
+}
117
+
118
+func (qdisc *Htb) Type() string {
119
+	return "htb"
120
+}
121
+
122
+// Tbf is a classless qdisc that rate limits based on tokens
123
+type Tbf struct {
124
+	QdiscAttrs
125
+	// TODO: handle 64bit rate properly
126
+	Rate   uint64
127
+	Limit  uint32
128
+	Buffer uint32
129
+	// TODO: handle other settings
130
+}
131
+
132
+func (qdisc *Tbf) Attrs() *QdiscAttrs {
133
+	return &qdisc.QdiscAttrs
134
+}
135
+
136
+func (qdisc *Tbf) Type() string {
137
+	return "tbf"
138
+}
139
+
140
+// Ingress is a qdisc for adding ingress filters
141
+type Ingress struct {
142
+	QdiscAttrs
143
+}
144
+
145
+func (qdisc *Ingress) Attrs() *QdiscAttrs {
146
+	return &qdisc.QdiscAttrs
147
+}
148
+
149
+func (qdisc *Ingress) Type() string {
150
+	return "ingress"
151
+}
152
+
153
+// GenericQdisc qdiscs represent types that are not currently understood
154
+// by this netlink library.
155
+type GenericQdisc struct {
156
+	QdiscAttrs
157
+	QdiscType string
158
+}
159
+
160
+func (qdisc *GenericQdisc) Attrs() *QdiscAttrs {
161
+	return &qdisc.QdiscAttrs
162
+}
163
+
164
+func (qdisc *GenericQdisc) Type() string {
165
+	return qdisc.QdiscType
166
+}
0 167
new file mode 100644
... ...
@@ -0,0 +1,316 @@
0
+package netlink
1
+
2
+import (
3
+	"fmt"
4
+	"io/ioutil"
5
+	"strconv"
6
+	"strings"
7
+	"syscall"
8
+
9
+	"github.com/vishvananda/netlink/nl"
10
+)
11
+
12
+// QdiscDel will delete a qdisc from the system.
13
+// Equivalent to: `tc qdisc del $qdisc`
14
+func QdiscDel(qdisc Qdisc) error {
15
+	req := nl.NewNetlinkRequest(syscall.RTM_DELQDISC, syscall.NLM_F_ACK)
16
+	base := qdisc.Attrs()
17
+	msg := &nl.TcMsg{
18
+		Family:  nl.FAMILY_ALL,
19
+		Ifindex: int32(base.LinkIndex),
20
+		Handle:  base.Handle,
21
+		Parent:  base.Parent,
22
+	}
23
+	req.AddData(msg)
24
+
25
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
26
+	return err
27
+}
28
+
29
+// QdiscAdd will add a qdisc to the system.
30
+// Equivalent to: `tc qdisc add $qdisc`
31
+func QdiscAdd(qdisc Qdisc) error {
32
+	req := nl.NewNetlinkRequest(syscall.RTM_NEWQDISC, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
33
+	base := qdisc.Attrs()
34
+	msg := &nl.TcMsg{
35
+		Family:  nl.FAMILY_ALL,
36
+		Ifindex: int32(base.LinkIndex),
37
+		Handle:  base.Handle,
38
+		Parent:  base.Parent,
39
+	}
40
+	req.AddData(msg)
41
+	req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type())))
42
+
43
+	options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
44
+	if prio, ok := qdisc.(*Prio); ok {
45
+		tcmap := nl.TcPrioMap{
46
+			Bands:   int32(prio.Bands),
47
+			Priomap: prio.PriorityMap,
48
+		}
49
+		options = nl.NewRtAttr(nl.TCA_OPTIONS, tcmap.Serialize())
50
+	} else if tbf, ok := qdisc.(*Tbf); ok {
51
+		opt := nl.TcTbfQopt{}
52
+		// TODO: handle rate > uint32
53
+		opt.Rate.Rate = uint32(tbf.Rate)
54
+		opt.Limit = tbf.Limit
55
+		opt.Buffer = tbf.Buffer
56
+		nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize())
57
+	} else if htb, ok := qdisc.(*Htb); ok {
58
+		opt := nl.TcHtbGlob{}
59
+		opt.Version = htb.Version
60
+		opt.Rate2Quantum = htb.Rate2Quantum
61
+		opt.Defcls = htb.Defcls
62
+		// TODO: Handle Debug properly. For now default to 0
63
+		opt.Debug = htb.Debug
64
+		opt.DirectPkts = htb.DirectPkts
65
+		nl.NewRtAttrChild(options, nl.TCA_HTB_INIT, opt.Serialize())
66
+		// nl.NewRtAttrChild(options, nl.TCA_HTB_DIRECT_QLEN, opt.Serialize())
67
+	} else if _, ok := qdisc.(*Ingress); ok {
68
+		// ingress filters must use the proper handle
69
+		if msg.Parent != HANDLE_INGRESS {
70
+			return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS")
71
+		}
72
+	}
73
+	req.AddData(options)
74
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
75
+	return err
76
+}
77
+
78
+// QdiscList gets a list of qdiscs in the system.
79
+// Equivalent to: `tc qdisc show`.
80
+// The list can be filtered by link.
81
+func QdiscList(link Link) ([]Qdisc, error) {
82
+	req := nl.NewNetlinkRequest(syscall.RTM_GETQDISC, syscall.NLM_F_DUMP)
83
+	index := int32(0)
84
+	if link != nil {
85
+		base := link.Attrs()
86
+		ensureIndex(base)
87
+		index = int32(base.Index)
88
+	}
89
+	msg := &nl.TcMsg{
90
+		Family:  nl.FAMILY_ALL,
91
+		Ifindex: index,
92
+	}
93
+	req.AddData(msg)
94
+
95
+	msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWQDISC)
96
+	if err != nil {
97
+		return nil, err
98
+	}
99
+
100
+	var res []Qdisc
101
+	for _, m := range msgs {
102
+		msg := nl.DeserializeTcMsg(m)
103
+
104
+		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
105
+		if err != nil {
106
+			return nil, err
107
+		}
108
+
109
+		// skip qdiscs from other interfaces
110
+		if link != nil && msg.Ifindex != index {
111
+			continue
112
+		}
113
+
114
+		base := QdiscAttrs{
115
+			LinkIndex: int(msg.Ifindex),
116
+			Handle:    msg.Handle,
117
+			Parent:    msg.Parent,
118
+			Refcnt:    msg.Info,
119
+		}
120
+		var qdisc Qdisc
121
+		qdiscType := ""
122
+		for _, attr := range attrs {
123
+			switch attr.Attr.Type {
124
+			case nl.TCA_KIND:
125
+				qdiscType = string(attr.Value[:len(attr.Value)-1])
126
+				switch qdiscType {
127
+				case "pfifo_fast":
128
+					qdisc = &PfifoFast{}
129
+				case "prio":
130
+					qdisc = &Prio{}
131
+				case "tbf":
132
+					qdisc = &Tbf{}
133
+				case "ingress":
134
+					qdisc = &Ingress{}
135
+				case "htb":
136
+					qdisc = &Htb{}
137
+				default:
138
+					qdisc = &GenericQdisc{QdiscType: qdiscType}
139
+				}
140
+			case nl.TCA_OPTIONS:
141
+				switch qdiscType {
142
+				case "pfifo_fast":
143
+					// pfifo returns TcPrioMap directly without wrapping it in rtattr
144
+					if err := parsePfifoFastData(qdisc, attr.Value); err != nil {
145
+						return nil, err
146
+					}
147
+				case "prio":
148
+					// prio returns TcPrioMap directly without wrapping it in rtattr
149
+					if err := parsePrioData(qdisc, attr.Value); err != nil {
150
+						return nil, err
151
+					}
152
+				case "tbf":
153
+					data, err := nl.ParseRouteAttr(attr.Value)
154
+					if err != nil {
155
+						return nil, err
156
+					}
157
+					if err := parseTbfData(qdisc, data); err != nil {
158
+						return nil, err
159
+					}
160
+				case "htb":
161
+					data, err := nl.ParseRouteAttr(attr.Value)
162
+					if err != nil {
163
+						return nil, err
164
+					}
165
+					if err := parseHtbData(qdisc, data); err != nil {
166
+						return nil, err
167
+					}
168
+
169
+					// no options for ingress
170
+				}
171
+			}
172
+		}
173
+		*qdisc.Attrs() = base
174
+		res = append(res, qdisc)
175
+	}
176
+
177
+	return res, nil
178
+}
179
+
180
+func parsePfifoFastData(qdisc Qdisc, value []byte) error {
181
+	pfifo := qdisc.(*PfifoFast)
182
+	tcmap := nl.DeserializeTcPrioMap(value)
183
+	pfifo.PriorityMap = tcmap.Priomap
184
+	pfifo.Bands = uint8(tcmap.Bands)
185
+	return nil
186
+}
187
+
188
+func parsePrioData(qdisc Qdisc, value []byte) error {
189
+	prio := qdisc.(*Prio)
190
+	tcmap := nl.DeserializeTcPrioMap(value)
191
+	prio.PriorityMap = tcmap.Priomap
192
+	prio.Bands = uint8(tcmap.Bands)
193
+	return nil
194
+}
195
+
196
+func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
197
+	native = nl.NativeEndian()
198
+	htb := qdisc.(*Htb)
199
+	for _, datum := range data {
200
+		switch datum.Attr.Type {
201
+		case nl.TCA_HTB_INIT:
202
+			opt := nl.DeserializeTcHtbGlob(datum.Value)
203
+			htb.Version = opt.Version
204
+			htb.Rate2Quantum = opt.Rate2Quantum
205
+			htb.Defcls = opt.Defcls
206
+			htb.Debug = opt.Debug
207
+			htb.DirectPkts = opt.DirectPkts
208
+		case nl.TCA_HTB_DIRECT_QLEN:
209
+			// TODO
210
+			//htb.DirectQlen = native.uint32(datum.Value)
211
+		}
212
+	}
213
+	return nil
214
+}
215
+func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
216
+	native = nl.NativeEndian()
217
+	tbf := qdisc.(*Tbf)
218
+	for _, datum := range data {
219
+		switch datum.Attr.Type {
220
+		case nl.TCA_TBF_PARMS:
221
+			opt := nl.DeserializeTcTbfQopt(datum.Value)
222
+			tbf.Rate = uint64(opt.Rate.Rate)
223
+			tbf.Limit = opt.Limit
224
+			tbf.Buffer = opt.Buffer
225
+		case nl.TCA_TBF_RATE64:
226
+			tbf.Rate = native.Uint64(datum.Value[0:4])
227
+		}
228
+	}
229
+	return nil
230
+}
231
+
232
+const (
233
+	TIME_UNITS_PER_SEC = 1000000
234
+)
235
+
236
+var (
237
+	tickInUsec  float64 = 0.0
238
+	clockFactor float64 = 0.0
239
+	hz          float64 = 0.0
240
+)
241
+
242
+func initClock() {
243
+	data, err := ioutil.ReadFile("/proc/net/psched")
244
+	if err != nil {
245
+		return
246
+	}
247
+	parts := strings.Split(strings.TrimSpace(string(data)), " ")
248
+	if len(parts) < 3 {
249
+		return
250
+	}
251
+	var vals [3]uint64
252
+	for i := range vals {
253
+		val, err := strconv.ParseUint(parts[i], 16, 32)
254
+		if err != nil {
255
+			return
256
+		}
257
+		vals[i] = val
258
+	}
259
+	// compatibility
260
+	if vals[2] == 1000000000 {
261
+		vals[0] = vals[1]
262
+	}
263
+	clockFactor = float64(vals[2]) / TIME_UNITS_PER_SEC
264
+	tickInUsec = float64(vals[0]) / float64(vals[1]) * clockFactor
265
+	hz = float64(vals[0])
266
+}
267
+
268
+func TickInUsec() float64 {
269
+	if tickInUsec == 0.0 {
270
+		initClock()
271
+	}
272
+	return tickInUsec
273
+}
274
+
275
+func ClockFactor() float64 {
276
+	if clockFactor == 0.0 {
277
+		initClock()
278
+	}
279
+	return clockFactor
280
+}
281
+
282
+func Hz() float64 {
283
+	if hz == 0.0 {
284
+		initClock()
285
+	}
286
+	return hz
287
+}
288
+
289
+func time2Tick(time uint32) uint32 {
290
+	return uint32(float64(time) * TickInUsec())
291
+}
292
+
293
+func tick2Time(tick uint32) uint32 {
294
+	return uint32(float64(tick) / TickInUsec())
295
+}
296
+
297
+func time2Ktime(time uint32) uint32 {
298
+	return uint32(float64(time) * ClockFactor())
299
+}
300
+
301
+func ktime2Time(ktime uint32) uint32 {
302
+	return uint32(float64(ktime) / ClockFactor())
303
+}
304
+
305
+func burst(rate uint64, buffer uint32) uint32 {
306
+	return uint32(float64(rate) * float64(tick2Time(buffer)) / TIME_UNITS_PER_SEC)
307
+}
308
+
309
+func latency(rate uint64, limit, buffer uint32) float64 {
310
+	return TIME_UNITS_PER_SEC*(float64(limit)/float64(rate)) - float64(tick2Time(buffer))
311
+}
312
+
313
+func Xmittime(rate uint64, size uint32) float64 {
314
+	return TickInUsec() * TIME_UNITS_PER_SEC * (float64(size) / float64(rate))
315
+}
... ...
@@ -17,6 +17,13 @@ const (
17 17
 	SCOPE_NOWHERE  Scope = syscall.RT_SCOPE_NOWHERE
18 18
 )
19 19
 
20
+type NextHopFlag int
21
+
22
+const (
23
+	FLAG_ONLINK    NextHopFlag = syscall.RTNH_F_ONLINK
24
+	FLAG_PERVASIVE NextHopFlag = syscall.RTNH_F_PERVASIVE
25
+)
26
+
20 27
 // Route represents a netlink route. A route is associated with a link,
21 28
 // has a destination network, an optional source ip, and optional
22 29
 // gateway. Advanced route parameters and non-main routing tables are
... ...
@@ -27,9 +34,44 @@ type Route struct {
27 27
 	Dst       *net.IPNet
28 28
 	Src       net.IP
29 29
 	Gw        net.IP
30
+	Flags     int
30 31
 }
31 32
 
32 33
 func (r Route) String() string {
33
-	return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s}", r.LinkIndex, r.Dst,
34
-		r.Src, r.Gw)
34
+	return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s Flags: %s}", r.LinkIndex, r.Dst,
35
+		r.Src, r.Gw, r.ListFlags())
36
+}
37
+
38
+func (r *Route) SetFlag(flag NextHopFlag) {
39
+	r.Flags |= int(flag)
40
+}
41
+
42
+func (r *Route) ClearFlag(flag NextHopFlag) {
43
+	r.Flags &^= int(flag)
44
+}
45
+
46
+type flagString struct {
47
+	f NextHopFlag
48
+	s string
49
+}
50
+
51
+var testFlags = []flagString{
52
+	flagString{f: FLAG_ONLINK, s: "onlink"},
53
+	flagString{f: FLAG_PERVASIVE, s: "pervasive"},
54
+}
55
+
56
+func (r *Route) ListFlags() []string {
57
+	var flags []string
58
+	for _, tf := range testFlags {
59
+		if r.Flags&int(tf.f) != 0 {
60
+			flags = append(flags, tf.s)
61
+		}
62
+	}
63
+	return flags
64
+}
65
+
66
+// RouteUpdate is sent when a route changes - type is RTM_NEWROUTE or RTM_DELROUTE
67
+type RouteUpdate struct {
68
+	Type uint16
69
+	Route
35 70
 }
... ...
@@ -17,7 +17,7 @@ func RouteAdd(route *Route) error {
17 17
 	return routeHandle(route, req, nl.NewRtMsg())
18 18
 }
19 19
 
20
-// RouteAdd will delete a route from the system.
20
+// RouteDel will delete a route from the system.
21 21
 // Equivalent to: `ip route del $route`
22 22
 func RouteDel(route *Route) error {
23 23
 	req := nl.NewNetlinkRequest(syscall.RTM_DELROUTE, syscall.NLM_F_ACK)
... ...
@@ -30,6 +30,7 @@ func routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error {
30 30
 	}
31 31
 
32 32
 	msg.Scope = uint8(route.Scope)
33
+	msg.Flags = uint32(route.Flags)
33 34
 	family := -1
34 35
 	var rtAttrs []*nl.RtAttr
35 36
 
... ...
@@ -117,7 +118,6 @@ func RouteList(link Link, family int) ([]Route, error) {
117 117
 		index = base.Index
118 118
 	}
119 119
 
120
-	native := nl.NativeEndian()
121 120
 	var res []Route
122 121
 	for _, m := range msgs {
123 122
 		msg := nl.DeserializeRtMsg(m)
... ...
@@ -132,31 +132,14 @@ func RouteList(link Link, family int) ([]Route, error) {
132 132
 			continue
133 133
 		}
134 134
 
135
-		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
135
+		route, err := deserializeRoute(m)
136 136
 		if err != nil {
137 137
 			return nil, err
138 138
 		}
139 139
 
140
-		route := Route{Scope: Scope(msg.Scope)}
141
-		for _, attr := range attrs {
142
-			switch attr.Attr.Type {
143
-			case syscall.RTA_GATEWAY:
144
-				route.Gw = net.IP(attr.Value)
145
-			case syscall.RTA_PREFSRC:
146
-				route.Src = net.IP(attr.Value)
147
-			case syscall.RTA_DST:
148
-				route.Dst = &net.IPNet{
149
-					IP:   attr.Value,
150
-					Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
151
-				}
152
-			case syscall.RTA_OIF:
153
-				routeIndex := int(native.Uint32(attr.Value[0:4]))
154
-				if link != nil && routeIndex != index {
155
-					// Ignore routes from other interfaces
156
-					continue
157
-				}
158
-				route.LinkIndex = routeIndex
159
-			}
140
+		if link != nil && route.LinkIndex != index {
141
+			// Ignore routes from other interfaces
142
+			continue
160 143
 		}
161 144
 		res = append(res, route)
162 145
 	}
... ...
@@ -164,6 +147,37 @@ func RouteList(link Link, family int) ([]Route, error) {
164 164
 	return res, nil
165 165
 }
166 166
 
167
+// deserializeRoute decodes a binary netlink message into a Route struct
168
+func deserializeRoute(m []byte) (Route, error) {
169
+	route := Route{}
170
+	msg := nl.DeserializeRtMsg(m)
171
+	attrs, err := nl.ParseRouteAttr(m[msg.Len():])
172
+	if err != nil {
173
+		return route, err
174
+	}
175
+	route.Scope = Scope(msg.Scope)
176
+	route.Flags = int(msg.Flags)
177
+
178
+	native := nl.NativeEndian()
179
+	for _, attr := range attrs {
180
+		switch attr.Attr.Type {
181
+		case syscall.RTA_GATEWAY:
182
+			route.Gw = net.IP(attr.Value)
183
+		case syscall.RTA_PREFSRC:
184
+			route.Src = net.IP(attr.Value)
185
+		case syscall.RTA_DST:
186
+			route.Dst = &net.IPNet{
187
+				IP:   attr.Value,
188
+				Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
189
+			}
190
+		case syscall.RTA_OIF:
191
+			routeIndex := int(native.Uint32(attr.Value[0:4]))
192
+			route.LinkIndex = routeIndex
193
+		}
194
+	}
195
+	return route, nil
196
+}
197
+
167 198
 // RouteGet gets a route to a specific destination from the host system.
168 199
 // Equivalent to: 'ip route get'.
169 200
 func RouteGet(destination net.IP) ([]Route, error) {
... ...
@@ -191,34 +205,47 @@ func RouteGet(destination net.IP) ([]Route, error) {
191 191
 		return nil, err
192 192
 	}
193 193
 
194
-	native := nl.NativeEndian()
195 194
 	var res []Route
196 195
 	for _, m := range msgs {
197
-		msg := nl.DeserializeRtMsg(m)
198
-		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
196
+		route, err := deserializeRoute(m)
199 197
 		if err != nil {
200 198
 			return nil, err
201 199
 		}
200
+		res = append(res, route)
201
+	}
202
+	return res, nil
203
+
204
+}
202 205
 
203
-		route := Route{}
204
-		for _, attr := range attrs {
205
-			switch attr.Attr.Type {
206
-			case syscall.RTA_GATEWAY:
207
-				route.Gw = net.IP(attr.Value)
208
-			case syscall.RTA_PREFSRC:
209
-				route.Src = net.IP(attr.Value)
210
-			case syscall.RTA_DST:
211
-				route.Dst = &net.IPNet{
212
-					IP:   attr.Value,
213
-					Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
206
+// RouteSubscribe takes a chan down which notifications will be sent
207
+// when routes are added or deleted. Close the 'done' chan to stop subscription.
208
+func RouteSubscribe(ch chan<- RouteUpdate, done <-chan struct{}) error {
209
+	s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_ROUTE, syscall.RTNLGRP_IPV6_ROUTE)
210
+	if err != nil {
211
+		return err
212
+	}
213
+	if done != nil {
214
+		go func() {
215
+			<-done
216
+			s.Close()
217
+		}()
218
+	}
219
+	go func() {
220
+		defer close(ch)
221
+		for {
222
+			msgs, err := s.Receive()
223
+			if err != nil {
224
+				return
225
+			}
226
+			for _, m := range msgs {
227
+				route, err := deserializeRoute(m.Data)
228
+				if err != nil {
229
+					return
214 230
 				}
215
-			case syscall.RTA_OIF:
216
-				routeIndex := int(native.Uint32(attr.Value[0:4]))
217
-				route.LinkIndex = routeIndex
231
+				ch <- RouteUpdate{Type: m.Header.Type, Route: route}
218 232
 			}
219 233
 		}
220
-		res = append(res, route)
221
-	}
222
-	return res, nil
234
+	}()
223 235
 
236
+	return nil
224 237
 }