Browse code

Merge pull request #8293 from crosbymichael/update-libcontainer-sep8

Update libcontainer to c744f6470e37be5ce1f1ae09b842c15c1bee120d

Tianon Gravi authored on 2014/09/30 09:09:25
Showing 16 changed files
... ...
@@ -30,6 +30,7 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e
30 30
 	container.Cgroups.Name = c.ID
31 31
 	container.Cgroups.AllowedDevices = c.AllowedDevices
32 32
 	container.MountConfig.DeviceNodes = c.AutoCreatedDevices
33
+	container.RootFs = c.Rootfs
33 34
 
34 35
 	// check to see if we are running in ramdisk to disable pivot root
35 36
 	container.MountConfig.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
... ...
@@ -100,7 +100,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
100 100
 		return -1, err
101 101
 	}
102 102
 
103
-	return namespaces.Exec(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, c.Rootfs, dataPath, args, func(container *libcontainer.Config, console, rootfs, dataPath, init string, child *os.File, args []string) *exec.Cmd {
103
+	return namespaces.Exec(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, dataPath, args, func(container *libcontainer.Config, console, dataPath, init string, child *os.File, args []string) *exec.Cmd {
104 104
 		c.ProcessConfig.Path = d.initPath
105 105
 		c.ProcessConfig.Args = append([]string{
106 106
 			DriverName,
... ...
@@ -117,7 +117,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
117 117
 		c.ProcessConfig.ExtraFiles = []*os.File{child}
118 118
 
119 119
 		c.ProcessConfig.Env = container.Env
120
-		c.ProcessConfig.Dir = c.Rootfs
120
+		c.ProcessConfig.Dir = container.RootFs
121 121
 
122 122
 		return &c.ProcessConfig.Cmd
123 123
 	}, func() {
... ...
@@ -64,7 +64,7 @@ if [ "$1" = '--go' ]; then
64 64
 	mv tmp-tar src/code.google.com/p/go/src/pkg/archive/tar
65 65
 fi
66 66
 
67
-clone git github.com/docker/libcontainer 185328a42654f6dc9a41814e57882f69d65f6ab7
67
+clone git github.com/docker/libcontainer c744f6470e37be5ce1f1ae09b842c15c1bee120d
68 68
 # see src/github.com/docker/libcontainer/update-vendor.sh which is the "source of truth" for libcontainer deps (just like this file)
69 69
 rm -rf src/github.com/docker/libcontainer/vendor
70 70
 eval "$(grep '^clone ' src/github.com/docker/libcontainer/update-vendor.sh | grep -v 'github.com/codegangsta/cli')"
... ...
@@ -40,7 +40,7 @@ func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
40 40
 		return err
41 41
 	}
42 42
 
43
-	totalUsage, err := getCgroupParamInt(path, "cpuacct.usage")
43
+	totalUsage, err := getCgroupParamUint(path, "cpuacct.usage")
44 44
 	if err != nil {
45 45
 		return err
46 46
 	}
... ...
@@ -2,6 +2,7 @@ package fs
2 2
 
3 3
 import (
4 4
 	"bufio"
5
+	"fmt"
5 6
 	"os"
6 7
 	"path/filepath"
7 8
 	"strconv"
... ...
@@ -66,25 +67,25 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
66 66
 	for sc.Scan() {
67 67
 		t, v, err := getCgroupParamKeyValue(sc.Text())
68 68
 		if err != nil {
69
-			return err
69
+			return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err)
70 70
 		}
71 71
 		stats.MemoryStats.Stats[t] = v
72 72
 	}
73 73
 
74 74
 	// Set memory usage and max historical usage.
75
-	value, err := getCgroupParamInt(path, "memory.usage_in_bytes")
75
+	value, err := getCgroupParamUint(path, "memory.usage_in_bytes")
76 76
 	if err != nil {
77
-		return err
77
+		return fmt.Errorf("failed to parse memory.usage_in_bytes - %v", err)
78 78
 	}
79 79
 	stats.MemoryStats.Usage = value
80
-	value, err = getCgroupParamInt(path, "memory.max_usage_in_bytes")
80
+	value, err = getCgroupParamUint(path, "memory.max_usage_in_bytes")
81 81
 	if err != nil {
82
-		return err
82
+		return fmt.Errorf("failed to parse memory.max_usage_in_bytes - %v", err)
83 83
 	}
84 84
 	stats.MemoryStats.MaxUsage = value
85
-	value, err = getCgroupParamInt(path, "memory.failcnt")
85
+	value, err = getCgroupParamUint(path, "memory.failcnt")
86 86
 	if err != nil {
87
-		return err
87
+		return fmt.Errorf("failed to parse memory.failcnt - %v", err)
88 88
 	}
89 89
 	stats.MemoryStats.Failcnt = value
90 90
 
... ...
@@ -14,27 +14,49 @@ var (
14 14
 	ErrNotValidFormat = errors.New("line is not a valid key value format")
15 15
 )
16 16
 
17
+// Saturates negative values at zero and returns a uint64.
18
+// Due to kernel bugs, some of the memory cgroup stats can be negative.
19
+func parseUint(s string, base, bitSize int) (uint64, error) {
20
+	value, err := strconv.ParseUint(s, base, bitSize)
21
+	if err != nil {
22
+		intValue, intErr := strconv.ParseInt(s, base, bitSize)
23
+		// 1. Handle negative values greater than MinInt64 (and)
24
+		// 2. Handle negative values lesser than MinInt64
25
+		if intErr == nil && intValue < 0 {
26
+			return 0, nil
27
+		} else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
28
+			return 0, nil
29
+		}
30
+
31
+		return value, err
32
+	}
33
+
34
+	return value, nil
35
+}
36
+
17 37
 // Parses a cgroup param and returns as name, value
18 38
 //  i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234
19 39
 func getCgroupParamKeyValue(t string) (string, uint64, error) {
20 40
 	parts := strings.Fields(t)
21 41
 	switch len(parts) {
22 42
 	case 2:
23
-		value, err := strconv.ParseUint(parts[1], 10, 64)
43
+		value, err := parseUint(parts[1], 10, 64)
24 44
 		if err != nil {
25
-			return "", 0, fmt.Errorf("Unable to convert param value to uint64: %s", err)
45
+			return "", 0, fmt.Errorf("Unable to convert param value (%q) to uint64: %v", parts[1], err)
26 46
 		}
47
+
27 48
 		return parts[0], value, nil
28 49
 	default:
29 50
 		return "", 0, ErrNotValidFormat
30 51
 	}
31 52
 }
32 53
 
33
-// Gets a single int64 value from the specified cgroup file.
34
-func getCgroupParamInt(cgroupPath, cgroupFile string) (uint64, error) {
54
+// Gets a single uint64 value from the specified cgroup file.
55
+func getCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) {
35 56
 	contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile))
36 57
 	if err != nil {
37 58
 		return 0, err
38 59
 	}
39
-	return strconv.ParseUint(strings.TrimSpace(string(contents)), 10, 64)
60
+
61
+	return parseUint(strings.TrimSpace(string(contents)), 10, 64)
40 62
 }
... ...
@@ -2,8 +2,10 @@ package fs
2 2
 
3 3
 import (
4 4
 	"io/ioutil"
5
+	"math"
5 6
 	"os"
6 7
 	"path/filepath"
8
+	"strconv"
7 9
 	"testing"
8 10
 )
9 11
 
... ...
@@ -27,7 +29,7 @@ func TestGetCgroupParamsInt(t *testing.T) {
27 27
 	if err != nil {
28 28
 		t.Fatal(err)
29 29
 	}
30
-	value, err := getCgroupParamInt(tempDir, cgroupFile)
30
+	value, err := getCgroupParamUint(tempDir, cgroupFile)
31 31
 	if err != nil {
32 32
 		t.Fatal(err)
33 33
 	} else if value != floatValue {
... ...
@@ -39,19 +41,44 @@ func TestGetCgroupParamsInt(t *testing.T) {
39 39
 	if err != nil {
40 40
 		t.Fatal(err)
41 41
 	}
42
-	value, err = getCgroupParamInt(tempDir, cgroupFile)
42
+	value, err = getCgroupParamUint(tempDir, cgroupFile)
43 43
 	if err != nil {
44 44
 		t.Fatal(err)
45 45
 	} else if value != floatValue {
46 46
 		t.Fatalf("Expected %d to equal %f", value, floatValue)
47 47
 	}
48 48
 
49
+	// Success with negative values
50
+	err = ioutil.WriteFile(tempFile, []byte("-12345"), 0755)
51
+	if err != nil {
52
+		t.Fatal(err)
53
+	}
54
+	value, err = getCgroupParamUint(tempDir, cgroupFile)
55
+	if err != nil {
56
+		t.Fatal(err)
57
+	} else if value != 0 {
58
+		t.Fatalf("Expected %d to equal %f", value, 0)
59
+	}
60
+
61
+	// Success with negative values lesser than min int64
62
+	s := strconv.FormatFloat(math.MinInt64, 'f', -1, 64)
63
+	err = ioutil.WriteFile(tempFile, []byte(s), 0755)
64
+	if err != nil {
65
+		t.Fatal(err)
66
+	}
67
+	value, err = getCgroupParamUint(tempDir, cgroupFile)
68
+	if err != nil {
69
+		t.Fatal(err)
70
+	} else if value != 0 {
71
+		t.Fatalf("Expected %d to equal %f", value, 0)
72
+	}
73
+
49 74
 	// Not a float.
50 75
 	err = ioutil.WriteFile(tempFile, []byte("not-a-float"), 0755)
51 76
 	if err != nil {
52 77
 		t.Fatal(err)
53 78
 	}
54
-	_, err = getCgroupParamInt(tempDir, cgroupFile)
79
+	_, err = getCgroupParamUint(tempDir, cgroupFile)
55 80
 	if err == nil {
56 81
 		t.Fatal("Expecting error, got none")
57 82
 	}
... ...
@@ -61,7 +88,7 @@ func TestGetCgroupParamsInt(t *testing.T) {
61 61
 	if err != nil {
62 62
 		t.Fatal(err)
63 63
 	}
64
-	_, err = getCgroupParamInt(tempDir, cgroupFile)
64
+	_, err = getCgroupParamUint(tempDir, cgroupFile)
65 65
 	if err == nil {
66 66
 		t.Fatal("Expecting error, got none")
67 67
 	}
... ...
@@ -15,6 +15,9 @@ type Config struct {
15 15
 	// Mount specific options.
16 16
 	MountConfig *MountConfig `json:"mount_config,omitempty"`
17 17
 
18
+	// Pathname to container's root filesystem
19
+	RootFs string `json:"root_fs,omitempty"`
20
+
18 21
 	// Hostname optionally sets the container's hostname if provided
19 22
 	Hostname string `json:"hostname,omitempty"`
20 23
 
... ...
@@ -7,4 +7,4 @@ import (
7 7
 	"github.com/docker/libcontainer"
8 8
 )
9 9
 
10
-type CreateCommand func(container *libcontainer.Config, console, rootfs, dataPath, init string, childPipe *os.File, args []string) *exec.Cmd
10
+type CreateCommand func(container *libcontainer.Config, console, dataPath, init string, childPipe *os.File, args []string) *exec.Cmd
... ...
@@ -21,7 +21,7 @@ import (
21 21
 // Move this to libcontainer package.
22 22
 // Exec performs setup outside of a namespace so that a container can be
23 23
 // executed.  Exec is a high level function for working with container namespaces.
24
-func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Writer, console string, rootfs, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) {
24
+func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Writer, console, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) {
25 25
 	var (
26 26
 		err error
27 27
 	)
... ...
@@ -34,7 +34,7 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri
34 34
 	}
35 35
 	defer syncPipe.Close()
36 36
 
37
-	command := createCommand(container, console, rootfs, dataPath, os.Args[0], syncPipe.Child(), args)
37
+	command := createCommand(container, console, dataPath, os.Args[0], syncPipe.Child(), args)
38 38
 	// Note: these are only used in non-tty mode
39 39
 	// if there is a tty for the container it will be opened within the namespace and the
40 40
 	// fds will be duped to stdin, stdiout, and stderr
... ...
@@ -121,7 +121,7 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri
121 121
 // root: the path to the container json file and information
122 122
 // pipe: sync pipe to synchronize the parent and child processes
123 123
 // args: the arguments to pass to the container to run as the user's program
124
-func DefaultCreateCommand(container *libcontainer.Config, console, rootfs, dataPath, init string, pipe *os.File, args []string) *exec.Cmd {
124
+func DefaultCreateCommand(container *libcontainer.Config, console, dataPath, init string, pipe *os.File, args []string) *exec.Cmd {
125 125
 	// get our binary name from arg0 so we can always reexec ourself
126 126
 	env := []string{
127 127
 		"console=" + console,
... ...
@@ -141,7 +141,7 @@ func DefaultCreateCommand(container *libcontainer.Config, console, rootfs, dataP
141 141
 
142 142
 	command := exec.Command(init, append([]string{"init", "--"}, args...)...)
143 143
 	// make sure the process is executed inside the context of the rootfs
144
-	command.Dir = rootfs
144
+	command.Dir = container.RootFs
145 145
 	command.Env = append(os.Environ(), env...)
146 146
 
147 147
 	if command.SysProcAttr == nil {
... ...
@@ -12,15 +12,25 @@ import (
12 12
 )
13 13
 
14 14
 const (
15
-	IFNAMSIZ       = 16
16
-	DEFAULT_CHANGE = 0xFFFFFFFF
17
-	IFLA_INFO_KIND = 1
18
-	IFLA_INFO_DATA = 2
19
-	VETH_INFO_PEER = 1
20
-	IFLA_NET_NS_FD = 28
21
-	SIOC_BRADDBR   = 0x89a0
22
-	SIOC_BRDELBR   = 0x89a1
23
-	SIOC_BRADDIF   = 0x89a2
15
+	IFNAMSIZ          = 16
16
+	DEFAULT_CHANGE    = 0xFFFFFFFF
17
+	IFLA_INFO_KIND    = 1
18
+	IFLA_INFO_DATA    = 2
19
+	VETH_INFO_PEER    = 1
20
+	IFLA_MACVLAN_MODE = 1
21
+	IFLA_VLAN_ID      = 1
22
+	IFLA_NET_NS_FD    = 28
23
+	IFLA_ADDRESS      = 1
24
+	SIOC_BRADDBR      = 0x89a0
25
+	SIOC_BRDELBR      = 0x89a1
26
+	SIOC_BRADDIF      = 0x89a2
27
+)
28
+
29
+const (
30
+	MACVLAN_MODE_PRIVATE = 1 << iota
31
+	MACVLAN_MODE_VEPA
32
+	MACVLAN_MODE_BRIDGE
33
+	MACVLAN_MODE_PASSTHRU
24 34
 )
25 35
 
26 36
 var nextSeqNr uint32
... ...
@@ -375,10 +385,19 @@ outer:
375 375
 	return nil
376 376
 }
377 377
 
378
-// Add a new route table entry.
379
-func AddRoute(destination, source, gateway, device string) error {
380
-	if destination == "" && source == "" && gateway == "" {
381
-		return fmt.Errorf("one of destination, source or gateway must not be blank")
378
+func zeroTerminated(s string) []byte {
379
+	return []byte(s + "\000")
380
+}
381
+
382
+func nonZeroTerminated(s string) []byte {
383
+	return []byte(s)
384
+}
385
+
386
+// Add a new network link of a specified type.
387
+// This is identical to running: ip link add $name type $linkType
388
+func NetworkLinkAdd(name string, linkType string) error {
389
+	if name == "" || linkType == "" {
390
+		return fmt.Errorf("Neither link name nor link type can be empty!")
382 391
 	}
383 392
 
384 393
 	s, err := getNetlinkSocket()
... ...
@@ -387,101 +406,58 @@ func AddRoute(destination, source, gateway, device string) error {
387 387
 	}
388 388
 	defer s.Close()
389 389
 
390
-	wb := newNetlinkRequest(syscall.RTM_NEWROUTE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
391
-	msg := newRtMsg()
392
-	currentFamily := -1
393
-	var rtAttrs []*RtAttr
390
+	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
394 391
 
395
-	if destination != "" {
396
-		destIP, destNet, err := net.ParseCIDR(destination)
397
-		if err != nil {
398
-			return fmt.Errorf("destination CIDR %s couldn't be parsed", destination)
399
-		}
400
-		destFamily := getIpFamily(destIP)
401
-		currentFamily = destFamily
402
-		destLen, bits := destNet.Mask.Size()
403
-		if destLen == 0 && bits == 0 {
404
-			return fmt.Errorf("destination CIDR %s generated a non-canonical Mask", destination)
405
-		}
406
-		msg.Family = uint8(destFamily)
407
-		msg.Dst_len = uint8(destLen)
408
-		var destData []byte
409
-		if destFamily == syscall.AF_INET {
410
-			destData = destIP.To4()
411
-		} else {
412
-			destData = destIP.To16()
413
-		}
414
-		rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_DST, destData))
415
-	}
392
+	msg := newIfInfomsg(syscall.AF_UNSPEC)
393
+	wb.AddData(msg)
416 394
 
417
-	if source != "" {
418
-		srcIP, srcNet, err := net.ParseCIDR(source)
419
-		if err != nil {
420
-			return fmt.Errorf("source CIDR %s couldn't be parsed", source)
421
-		}
422
-		srcFamily := getIpFamily(srcIP)
423
-		if currentFamily != -1 && currentFamily != srcFamily {
424
-			return fmt.Errorf("source and destination ip were not the same IP family")
425
-		}
426
-		currentFamily = srcFamily
427
-		srcLen, bits := srcNet.Mask.Size()
428
-		if srcLen == 0 && bits == 0 {
429
-			return fmt.Errorf("source CIDR %s generated a non-canonical Mask", source)
430
-		}
431
-		msg.Family = uint8(srcFamily)
432
-		msg.Src_len = uint8(srcLen)
433
-		var srcData []byte
434
-		if srcFamily == syscall.AF_INET {
435
-			srcData = srcIP.To4()
436
-		} else {
437
-			srcData = srcIP.To16()
438
-		}
439
-		rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_SRC, srcData))
395
+	linkInfo := newRtAttr(syscall.IFLA_LINKINFO, nil)
396
+	newRtAttrChild(linkInfo, IFLA_INFO_KIND, nonZeroTerminated(linkType))
397
+	wb.AddData(linkInfo)
398
+
399
+	nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(name))
400
+	wb.AddData(nameData)
401
+
402
+	if err := s.Send(wb); err != nil {
403
+		return err
440 404
 	}
441 405
 
442
-	if gateway != "" {
443
-		gwIP := net.ParseIP(gateway)
444
-		if gwIP == nil {
445
-			return fmt.Errorf("gateway IP %s couldn't be parsed", gateway)
446
-		}
447
-		gwFamily := getIpFamily(gwIP)
448
-		if currentFamily != -1 && currentFamily != gwFamily {
449
-			return fmt.Errorf("gateway, source, and destination ip were not the same IP family")
450
-		}
451
-		msg.Family = uint8(gwFamily)
452
-		var gwData []byte
453
-		if gwFamily == syscall.AF_INET {
454
-			gwData = gwIP.To4()
455
-		} else {
456
-			gwData = gwIP.To16()
457
-		}
458
-		rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_GATEWAY, gwData))
406
+	return s.HandleAck(wb.Seq)
407
+}
408
+
409
+// Delete a network link.
410
+// This is identical to running: ip link del $name
411
+func NetworkLinkDel(name string) error {
412
+	if name == "" {
413
+		return fmt.Errorf("Network link name can not be empty!")
459 414
 	}
460 415
 
461
-	wb.AddData(msg)
462
-	for _, attr := range rtAttrs {
463
-		wb.AddData(attr)
416
+	s, err := getNetlinkSocket()
417
+	if err != nil {
418
+		return err
464 419
 	}
420
+	defer s.Close()
465 421
 
466
-	iface, err := net.InterfaceByName(device)
422
+	iface, err := net.InterfaceByName(name)
467 423
 	if err != nil {
468 424
 		return err
469 425
 	}
470
-	wb.AddData(uint32Attr(syscall.RTA_OIF, uint32(iface.Index)))
426
+
427
+	wb := newNetlinkRequest(syscall.RTM_DELLINK, syscall.NLM_F_ACK)
428
+
429
+	msg := newIfInfomsg(syscall.AF_UNSPEC)
430
+	msg.Index = int32(iface.Index)
431
+	wb.AddData(msg)
471 432
 
472 433
 	if err := s.Send(wb); err != nil {
473 434
 		return err
474 435
 	}
475
-	return s.HandleAck(wb.Seq)
476
-}
477 436
 
478
-// Add a new default gateway. Identical to:
479
-// ip route add default via $ip
480
-func AddDefaultGw(ip, device string) error {
481
-	return AddRoute("", "", ip, device)
437
+	return s.HandleAck(wb.Seq)
482 438
 }
483 439
 
484
-// Bring up a particular network interface
440
+// Bring up a particular network interface.
441
+// This is identical to running: ip link set dev $name up
485 442
 func NetworkLinkUp(iface *net.Interface) error {
486 443
 	s, err := getNetlinkSocket()
487 444
 	if err != nil {
... ...
@@ -492,9 +468,9 @@ func NetworkLinkUp(iface *net.Interface) error {
492 492
 	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
493 493
 
494 494
 	msg := newIfInfomsg(syscall.AF_UNSPEC)
495
-	msg.Change = syscall.IFF_UP
496
-	msg.Flags = syscall.IFF_UP
497 495
 	msg.Index = int32(iface.Index)
496
+	msg.Flags = syscall.IFF_UP
497
+	msg.Change = syscall.IFF_UP
498 498
 	wb.AddData(msg)
499 499
 
500 500
 	if err := s.Send(wb); err != nil {
... ...
@@ -504,6 +480,8 @@ func NetworkLinkUp(iface *net.Interface) error {
504 504
 	return s.HandleAck(wb.Seq)
505 505
 }
506 506
 
507
+// Bring down a particular network interface.
508
+// This is identical to running: ip link set $name down
507 509
 func NetworkLinkDown(iface *net.Interface) error {
508 510
 	s, err := getNetlinkSocket()
509 511
 	if err != nil {
... ...
@@ -514,9 +492,9 @@ func NetworkLinkDown(iface *net.Interface) error {
514 514
 	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
515 515
 
516 516
 	msg := newIfInfomsg(syscall.AF_UNSPEC)
517
-	msg.Change = syscall.IFF_UP
518
-	msg.Flags = 0 & ^syscall.IFF_UP
519 517
 	msg.Index = int32(iface.Index)
518
+	msg.Flags = 0 & ^syscall.IFF_UP
519
+	msg.Change = DEFAULT_CHANGE
520 520
 	wb.AddData(msg)
521 521
 
522 522
 	if err := s.Send(wb); err != nil {
... ...
@@ -526,22 +504,40 @@ func NetworkLinkDown(iface *net.Interface) error {
526 526
 	return s.HandleAck(wb.Seq)
527 527
 }
528 528
 
529
-func NetworkSetMTU(iface *net.Interface, mtu int) error {
529
+// Set link layer address ie. MAC Address.
530
+// This is identical to running: ip link set dev $name address $macaddress
531
+func NetworkSetMacAddress(iface *net.Interface, macaddr string) error {
530 532
 	s, err := getNetlinkSocket()
531 533
 	if err != nil {
532 534
 		return err
533 535
 	}
534 536
 	defer s.Close()
535 537
 
538
+	hwaddr, err := net.ParseMAC(macaddr)
539
+	if err != nil {
540
+		return err
541
+	}
542
+
543
+	var (
544
+		MULTICAST byte = 0x1
545
+		LOCALOUI  byte = 0x2
546
+	)
547
+
548
+	if hwaddr[0]&0x1 == MULTICAST || hwaddr[0]&0x2 != LOCALOUI {
549
+		return fmt.Errorf("Incorrect Local MAC Address specified: %s", macaddr)
550
+	}
551
+
536 552
 	wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
537 553
 
538 554
 	msg := newIfInfomsg(syscall.AF_UNSPEC)
539
-	msg.Type = syscall.RTM_SETLINK
540
-	msg.Flags = syscall.NLM_F_REQUEST
541 555
 	msg.Index = int32(iface.Index)
542 556
 	msg.Change = DEFAULT_CHANGE
543 557
 	wb.AddData(msg)
544
-	wb.AddData(uint32Attr(syscall.IFLA_MTU, uint32(mtu)))
558
+
559
+	macdata := make([]byte, 6)
560
+	copy(macdata, hwaddr)
561
+	data := newRtAttr(IFLA_ADDRESS, macdata)
562
+	wb.AddData(data)
545 563
 
546 564
 	if err := s.Send(wb); err != nil {
547 565
 		return err
... ...
@@ -549,8 +545,13 @@ func NetworkSetMTU(iface *net.Interface, mtu int) error {
549 549
 	return s.HandleAck(wb.Seq)
550 550
 }
551 551
 
552
-// same as ip link set $name master $master
553
-func NetworkSetMaster(iface, master *net.Interface) error {
552
+// Set link Maximum Transmission Unit
553
+// This is identical to running: ip link set dev $name mtu $MTU
554
+// bridge is a bitch here https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=292088
555
+// https://bugzilla.redhat.com/show_bug.cgi?id=697021
556
+// There is a discussion about how to deal with ifcs joining bridge with MTU > 1500
557
+// Regular network nterfaces do seem to work though!
558
+func NetworkSetMTU(iface *net.Interface, mtu int) error {
554 559
 	s, err := getNetlinkSocket()
555 560
 	if err != nil {
556 561
 		return err
... ...
@@ -565,16 +566,15 @@ func NetworkSetMaster(iface, master *net.Interface) error {
565 565
 	msg.Index = int32(iface.Index)
566 566
 	msg.Change = DEFAULT_CHANGE
567 567
 	wb.AddData(msg)
568
-	wb.AddData(uint32Attr(syscall.IFLA_MASTER, uint32(master.Index)))
568
+	wb.AddData(uint32Attr(syscall.IFLA_MTU, uint32(mtu)))
569 569
 
570 570
 	if err := s.Send(wb); err != nil {
571 571
 		return err
572 572
 	}
573
-
574 573
 	return s.HandleAck(wb.Seq)
575 574
 }
576 575
 
577
-func NetworkSetNsPid(iface *net.Interface, nspid int) error {
576
+func networkMasterAction(iface *net.Interface, rtattr *RtAttr) error {
578 577
 	s, err := getNetlinkSocket()
579 578
 	if err != nil {
580 579
 		return err
... ...
@@ -589,7 +589,41 @@ func NetworkSetNsPid(iface *net.Interface, nspid int) error {
589 589
 	msg.Index = int32(iface.Index)
590 590
 	msg.Change = DEFAULT_CHANGE
591 591
 	wb.AddData(msg)
592
-	wb.AddData(uint32Attr(syscall.IFLA_NET_NS_PID, uint32(nspid)))
592
+	wb.AddData(rtattr)
593
+
594
+	if err := s.Send(wb); err != nil {
595
+		return err
596
+	}
597
+
598
+	return s.HandleAck(wb.Seq)
599
+}
600
+
601
+// Add an interface to bridge.
602
+// This is identical to running: ip link set $name master $master
603
+func NetworkSetMaster(iface, master *net.Interface) error {
604
+	data := uint32Attr(syscall.IFLA_MASTER, uint32(master.Index))
605
+	return networkMasterAction(iface, data)
606
+}
607
+
608
+// Remove an interface from the bridge
609
+// This is is identical to to running: ip link $name set nomaster
610
+func NetworkSetNoMaster(iface *net.Interface) error {
611
+	data := uint32Attr(syscall.IFLA_MASTER, 0)
612
+	return networkMasterAction(iface, data)
613
+}
614
+
615
+func networkSetNsAction(iface *net.Interface, rtattr *RtAttr) error {
616
+	s, err := getNetlinkSocket()
617
+	if err != nil {
618
+		return err
619
+	}
620
+	defer s.Close()
621
+
622
+	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
623
+	msg := newIfInfomsg(syscall.AF_UNSPEC)
624
+	msg.Index = int32(iface.Index)
625
+	wb.AddData(msg)
626
+	wb.AddData(rtattr)
593 627
 
594 628
 	if err := s.Send(wb); err != nil {
595 629
 		return err
... ...
@@ -598,7 +632,29 @@ func NetworkSetNsPid(iface *net.Interface, nspid int) error {
598 598
 	return s.HandleAck(wb.Seq)
599 599
 }
600 600
 
601
+// Move a particular network interface to a particular network namespace
602
+// specified by PID. This is idential to running: ip link set dev $name netns $pid
603
+func NetworkSetNsPid(iface *net.Interface, nspid int) error {
604
+	data := uint32Attr(syscall.IFLA_NET_NS_PID, uint32(nspid))
605
+	return networkSetNsAction(iface, data)
606
+}
607
+
608
+// Move a particular network interface to a particular mounted
609
+// network namespace specified by file descriptor.
610
+// This is idential to running: ip link set dev $name netns $fd
601 611
 func NetworkSetNsFd(iface *net.Interface, fd int) error {
612
+	data := uint32Attr(IFLA_NET_NS_FD, uint32(fd))
613
+	return networkSetNsAction(iface, data)
614
+}
615
+
616
+// Rname a particular interface to a different name
617
+// !!! Note that you can't rename an active interface. You need to bring it down before renaming it.
618
+// This is identical to running: ip link set dev ${oldName} name ${newName}
619
+func NetworkChangeName(iface *net.Interface, newName string) error {
620
+	if len(newName) >= IFNAMSIZ {
621
+		return fmt.Errorf("Interface name %s too long", newName)
622
+	}
623
+
602 624
 	s, err := getNetlinkSocket()
603 625
 	if err != nil {
604 626
 		return err
... ...
@@ -608,12 +664,12 @@ func NetworkSetNsFd(iface *net.Interface, fd int) error {
608 608
 	wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
609 609
 
610 610
 	msg := newIfInfomsg(syscall.AF_UNSPEC)
611
-	msg.Type = syscall.RTM_SETLINK
612
-	msg.Flags = syscall.NLM_F_REQUEST
613 611
 	msg.Index = int32(iface.Index)
614 612
 	msg.Change = DEFAULT_CHANGE
615 613
 	wb.AddData(msg)
616
-	wb.AddData(uint32Attr(IFLA_NET_NS_FD, uint32(fd)))
614
+
615
+	nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(newName))
616
+	wb.AddData(nameData)
617 617
 
618 618
 	if err := s.Send(wb); err != nil {
619 619
 		return err
... ...
@@ -622,127 +678,152 @@ func NetworkSetNsFd(iface *net.Interface, fd int) error {
622 622
 	return s.HandleAck(wb.Seq)
623 623
 }
624 624
 
625
-func networkLinkIpAction(action, flags int, ifa IfAddr) error {
625
+// Add a new VETH pair link on the host
626
+// This is identical to running: ip link add name $name type veth peer name $peername
627
+func NetworkCreateVethPair(name1, name2 string) error {
626 628
 	s, err := getNetlinkSocket()
627 629
 	if err != nil {
628 630
 		return err
629 631
 	}
630 632
 	defer s.Close()
631 633
 
632
-	family := getIpFamily(ifa.IP)
633
-
634
-	wb := newNetlinkRequest(action, flags)
634
+	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
635 635
 
636
-	msg := newIfAddrmsg(family)
637
-	msg.Index = uint32(ifa.Iface.Index)
638
-	prefixLen, _ := ifa.IPNet.Mask.Size()
639
-	msg.Prefixlen = uint8(prefixLen)
636
+	msg := newIfInfomsg(syscall.AF_UNSPEC)
640 637
 	wb.AddData(msg)
641 638
 
642
-	var ipData []byte
643
-	if family == syscall.AF_INET {
644
-		ipData = ifa.IP.To4()
645
-	} else {
646
-		ipData = ifa.IP.To16()
647
-	}
639
+	nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(name1))
640
+	wb.AddData(nameData)
648 641
 
649
-	localData := newRtAttr(syscall.IFA_LOCAL, ipData)
650
-	wb.AddData(localData)
642
+	nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil)
643
+	newRtAttrChild(nest1, IFLA_INFO_KIND, zeroTerminated("veth"))
644
+	nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil)
645
+	nest3 := newRtAttrChild(nest2, VETH_INFO_PEER, nil)
651 646
 
652
-	addrData := newRtAttr(syscall.IFA_ADDRESS, ipData)
653
-	wb.AddData(addrData)
647
+	newIfInfomsgChild(nest3, syscall.AF_UNSPEC)
648
+	newRtAttrChild(nest3, syscall.IFLA_IFNAME, zeroTerminated(name2))
649
+
650
+	wb.AddData(nest1)
654 651
 
655 652
 	if err := s.Send(wb); err != nil {
656 653
 		return err
657 654
 	}
658
-
659 655
 	return s.HandleAck(wb.Seq)
660 656
 }
661 657
 
662
-// Delete an IP address from an interface. This is identical to:
663
-// ip addr del $ip/$ipNet dev $iface
664
-func NetworkLinkDelIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error {
665
-	return networkLinkIpAction(
666
-		syscall.RTM_DELADDR,
667
-		syscall.NLM_F_ACK,
668
-		IfAddr{iface, ip, ipNet},
669
-	)
670
-}
658
+// Add a new VLAN interface with masterDev as its upper device
659
+// This is identical to running:
660
+// ip link add name $name link $masterdev type vlan id $id
661
+func NetworkLinkAddVlan(masterDev, vlanDev string, vlanId uint16) error {
662
+	s, err := getNetlinkSocket()
663
+	if err != nil {
664
+		return err
665
+	}
666
+	defer s.Close()
671 667
 
672
-// Add an Ip address to an interface. This is identical to:
673
-// ip addr add $ip/$ipNet dev $iface
674
-func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error {
675
-	return networkLinkIpAction(
676
-		syscall.RTM_NEWADDR,
677
-		syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK,
678
-		IfAddr{iface, ip, ipNet},
679
-	)
680
-}
668
+	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
681 669
 
682
-func zeroTerminated(s string) []byte {
683
-	return []byte(s + "\000")
684
-}
670
+	masterDevIfc, err := net.InterfaceByName(masterDev)
671
+	if err != nil {
672
+		return err
673
+	}
685 674
 
686
-func nonZeroTerminated(s string) []byte {
687
-	return []byte(s)
688
-}
675
+	msg := newIfInfomsg(syscall.AF_UNSPEC)
676
+	wb.AddData(msg)
689 677
 
690
-// Add a new network link of a specified type. This is identical to
691
-// running: ip add link $name type $linkType
692
-func NetworkLinkAdd(name string, linkType string) error {
693
-	if name == "" || linkType == "" {
694
-		return fmt.Errorf("Neither link name nor link type can be empty!")
678
+	nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil)
679
+	newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated("vlan"))
680
+
681
+	nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil)
682
+	vlanData := make([]byte, 2)
683
+	native.PutUint16(vlanData, vlanId)
684
+	newRtAttrChild(nest2, IFLA_VLAN_ID, vlanData)
685
+	wb.AddData(nest1)
686
+
687
+	wb.AddData(uint32Attr(syscall.IFLA_LINK, uint32(masterDevIfc.Index)))
688
+	wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(vlanDev)))
689
+
690
+	if err := s.Send(wb); err != nil {
691
+		return err
695 692
 	}
693
+	return s.HandleAck(wb.Seq)
694
+}
696 695
 
696
+// Add MAC VLAN network interface with masterDev as its upper device
697
+// This is identical to running:
698
+// ip link add name $name link $masterdev type macvlan mode $mode
699
+func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error {
697 700
 	s, err := getNetlinkSocket()
698 701
 	if err != nil {
699 702
 		return err
700 703
 	}
701 704
 	defer s.Close()
702 705
 
706
+	macVlan := map[string]uint32{
707
+		"private":  MACVLAN_MODE_PRIVATE,
708
+		"vepa":     MACVLAN_MODE_VEPA,
709
+		"bridge":   MACVLAN_MODE_BRIDGE,
710
+		"passthru": MACVLAN_MODE_PASSTHRU,
711
+	}
712
+
703 713
 	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
704 714
 
715
+	masterDevIfc, err := net.InterfaceByName(masterDev)
716
+	if err != nil {
717
+		return err
718
+	}
719
+
705 720
 	msg := newIfInfomsg(syscall.AF_UNSPEC)
706 721
 	wb.AddData(msg)
707 722
 
708
-	linkInfo := newRtAttr(syscall.IFLA_LINKINFO, nil)
709
-	newRtAttrChild(linkInfo, IFLA_INFO_KIND, nonZeroTerminated(linkType))
710
-	wb.AddData(linkInfo)
723
+	nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil)
724
+	newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated("macvlan"))
711 725
 
712
-	nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(name))
713
-	wb.AddData(nameData)
726
+	nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil)
727
+	macVlanData := make([]byte, 4)
728
+	native.PutUint32(macVlanData, macVlan[mode])
729
+	newRtAttrChild(nest2, IFLA_MACVLAN_MODE, macVlanData)
730
+	wb.AddData(nest1)
731
+
732
+	wb.AddData(uint32Attr(syscall.IFLA_LINK, uint32(masterDevIfc.Index)))
733
+	wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(macVlanDev)))
714 734
 
715 735
 	if err := s.Send(wb); err != nil {
716 736
 		return err
717 737
 	}
718
-
719 738
 	return s.HandleAck(wb.Seq)
720 739
 }
721 740
 
722
-// Delete a network link. This is identical to
723
-// running: ip link del $name
724
-func NetworkLinkDel(name string) error {
725
-	if name == "" {
726
-		return fmt.Errorf("Network link name can not be empty!")
727
-	}
728
-
741
+func networkLinkIpAction(action, flags int, ifa IfAddr) error {
729 742
 	s, err := getNetlinkSocket()
730 743
 	if err != nil {
731 744
 		return err
732 745
 	}
733 746
 	defer s.Close()
734 747
 
735
-	iface, err := net.InterfaceByName(name)
736
-	if err != nil {
737
-		return err
738
-	}
748
+	family := getIpFamily(ifa.IP)
739 749
 
740
-	wb := newNetlinkRequest(syscall.RTM_DELLINK, syscall.NLM_F_ACK)
750
+	wb := newNetlinkRequest(action, flags)
741 751
 
742
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
743
-	msg.Index = int32(iface.Index)
752
+	msg := newIfAddrmsg(family)
753
+	msg.Index = uint32(ifa.Iface.Index)
754
+	prefixLen, _ := ifa.IPNet.Mask.Size()
755
+	msg.Prefixlen = uint8(prefixLen)
744 756
 	wb.AddData(msg)
745 757
 
758
+	var ipData []byte
759
+	if family == syscall.AF_INET {
760
+		ipData = ifa.IP.To4()
761
+	} else {
762
+		ipData = ifa.IP.To16()
763
+	}
764
+
765
+	localData := newRtAttr(syscall.IFA_LOCAL, ipData)
766
+	wb.AddData(localData)
767
+
768
+	addrData := newRtAttr(syscall.IFA_ADDRESS, ipData)
769
+	wb.AddData(addrData)
770
+
746 771
 	if err := s.Send(wb); err != nil {
747 772
 		return err
748 773
 	}
... ...
@@ -750,6 +831,26 @@ func NetworkLinkDel(name string) error {
750 750
 	return s.HandleAck(wb.Seq)
751 751
 }
752 752
 
753
+// Delete an IP address from an interface. This is identical to:
754
+// ip addr del $ip/$ipNet dev $iface
755
+func NetworkLinkDelIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error {
756
+	return networkLinkIpAction(
757
+		syscall.RTM_DELADDR,
758
+		syscall.NLM_F_ACK,
759
+		IfAddr{iface, ip, ipNet},
760
+	)
761
+}
762
+
763
+// Add an Ip address to an interface. This is identical to:
764
+// ip addr add $ip/$ipNet dev $iface
765
+func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error {
766
+	return networkLinkIpAction(
767
+		syscall.RTM_NEWADDR,
768
+		syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK,
769
+		IfAddr{iface, ip, ipNet},
770
+	)
771
+}
772
+
753 773
 // Returns an array of IPNet for all the currently routed subnets on ipv4
754 774
 // This is similar to the first column of "ip route" output
755 775
 func NetworkGetRoutes() ([]Route, error) {
... ...
@@ -842,69 +943,99 @@ outer:
842 842
 	return res, nil
843 843
 }
844 844
 
845
-func getIfSocket() (fd int, err error) {
846
-	for _, socket := range []int{
847
-		syscall.AF_INET,
848
-		syscall.AF_PACKET,
849
-		syscall.AF_INET6,
850
-	} {
851
-		if fd, err = syscall.Socket(socket, syscall.SOCK_DGRAM, 0); err == nil {
852
-			break
853
-		}
854
-	}
855
-	if err == nil {
856
-		return fd, nil
857
-	}
858
-	return -1, err
859
-}
860
-
861
-func NetworkChangeName(iface *net.Interface, newName string) error {
862
-	if len(newName) >= IFNAMSIZ {
863
-		return fmt.Errorf("Interface name %s too long", newName)
845
+// Add a new route table entry.
846
+func AddRoute(destination, source, gateway, device string) error {
847
+	if destination == "" && source == "" && gateway == "" {
848
+		return fmt.Errorf("one of destination, source or gateway must not be blank")
864 849
 	}
865 850
 
866
-	fd, err := getIfSocket()
851
+	s, err := getNetlinkSocket()
867 852
 	if err != nil {
868 853
 		return err
869 854
 	}
870
-	defer syscall.Close(fd)
855
+	defer s.Close()
871 856
 
872
-	data := [IFNAMSIZ * 2]byte{}
873
-	// the "-1"s here are very important for ensuring we get proper null
874
-	// termination of our new C strings
875
-	copy(data[:IFNAMSIZ-1], iface.Name)
876
-	copy(data[IFNAMSIZ:IFNAMSIZ*2-1], newName)
857
+	wb := newNetlinkRequest(syscall.RTM_NEWROUTE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
858
+	msg := newRtMsg()
859
+	currentFamily := -1
860
+	var rtAttrs []*RtAttr
877 861
 
878
-	if _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&data[0]))); errno != 0 {
879
-		return errno
862
+	if destination != "" {
863
+		destIP, destNet, err := net.ParseCIDR(destination)
864
+		if err != nil {
865
+			return fmt.Errorf("destination CIDR %s couldn't be parsed", destination)
866
+		}
867
+		destFamily := getIpFamily(destIP)
868
+		currentFamily = destFamily
869
+		destLen, bits := destNet.Mask.Size()
870
+		if destLen == 0 && bits == 0 {
871
+			return fmt.Errorf("destination CIDR %s generated a non-canonical Mask", destination)
872
+		}
873
+		msg.Family = uint8(destFamily)
874
+		msg.Dst_len = uint8(destLen)
875
+		var destData []byte
876
+		if destFamily == syscall.AF_INET {
877
+			destData = destIP.To4()
878
+		} else {
879
+			destData = destIP.To16()
880
+		}
881
+		rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_DST, destData))
880 882
 	}
881
-	return nil
882
-}
883 883
 
884
-func NetworkCreateVethPair(name1, name2 string) error {
885
-	s, err := getNetlinkSocket()
886
-	if err != nil {
887
-		return err
884
+	if source != "" {
885
+		srcIP, srcNet, err := net.ParseCIDR(source)
886
+		if err != nil {
887
+			return fmt.Errorf("source CIDR %s couldn't be parsed", source)
888
+		}
889
+		srcFamily := getIpFamily(srcIP)
890
+		if currentFamily != -1 && currentFamily != srcFamily {
891
+			return fmt.Errorf("source and destination ip were not the same IP family")
892
+		}
893
+		currentFamily = srcFamily
894
+		srcLen, bits := srcNet.Mask.Size()
895
+		if srcLen == 0 && bits == 0 {
896
+			return fmt.Errorf("source CIDR %s generated a non-canonical Mask", source)
897
+		}
898
+		msg.Family = uint8(srcFamily)
899
+		msg.Src_len = uint8(srcLen)
900
+		var srcData []byte
901
+		if srcFamily == syscall.AF_INET {
902
+			srcData = srcIP.To4()
903
+		} else {
904
+			srcData = srcIP.To16()
905
+		}
906
+		rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_SRC, srcData))
888 907
 	}
889
-	defer s.Close()
890 908
 
891
-	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
909
+	if gateway != "" {
910
+		gwIP := net.ParseIP(gateway)
911
+		if gwIP == nil {
912
+			return fmt.Errorf("gateway IP %s couldn't be parsed", gateway)
913
+		}
914
+		gwFamily := getIpFamily(gwIP)
915
+		if currentFamily != -1 && currentFamily != gwFamily {
916
+			return fmt.Errorf("gateway, source, and destination ip were not the same IP family")
917
+		}
918
+		msg.Family = uint8(gwFamily)
919
+		var gwData []byte
920
+		if gwFamily == syscall.AF_INET {
921
+			gwData = gwIP.To4()
922
+		} else {
923
+			gwData = gwIP.To16()
924
+		}
925
+		rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_GATEWAY, gwData))
926
+	}
892 927
 
893
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
894 928
 	wb.AddData(msg)
929
+	for _, attr := range rtAttrs {
930
+		wb.AddData(attr)
931
+	}
895 932
 
896
-	nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(name1))
897
-	wb.AddData(nameData)
898
-
899
-	nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil)
900
-	newRtAttrChild(nest1, IFLA_INFO_KIND, zeroTerminated("veth"))
901
-	nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil)
902
-	nest3 := newRtAttrChild(nest2, VETH_INFO_PEER, nil)
903
-
904
-	newIfInfomsgChild(nest3, syscall.AF_UNSPEC)
905
-	newRtAttrChild(nest3, syscall.IFLA_IFNAME, zeroTerminated(name2))
906
-
907
-	wb.AddData(nest1)
933
+	iface, err := net.InterfaceByName(device)
934
+	if err != nil {
935
+		return err
936
+	}
937
+	wb.AddData(uint32Attr(syscall.RTA_OIF, uint32(iface.Index)))
908 938
 
909 939
 	if err := s.Send(wb); err != nil {
910 940
 		return err
... ...
@@ -912,6 +1043,31 @@ func NetworkCreateVethPair(name1, name2 string) error {
912 912
 	return s.HandleAck(wb.Seq)
913 913
 }
914 914
 
915
+// Add a new default gateway. Identical to:
916
+// ip route add default via $ip
917
+func AddDefaultGw(ip, device string) error {
918
+	return AddRoute("", "", ip, device)
919
+}
920
+
921
+// THIS CODE DOES NOT COMMUNICATE WITH KERNEL VIA RTNETLINK INTERFACE
922
+// IT IS HERE FOR BACKWARDS COMPATIBILITY WITH OLDER LINUX KERNELS
923
+// WHICH SHIP WITH OLDER NOT ENTIRELY FUNCTIONAL VERSION OF NETLINK
924
+func getIfSocket() (fd int, err error) {
925
+	for _, socket := range []int{
926
+		syscall.AF_INET,
927
+		syscall.AF_PACKET,
928
+		syscall.AF_INET6,
929
+	} {
930
+		if fd, err = syscall.Socket(socket, syscall.SOCK_DGRAM, 0); err == nil {
931
+			break
932
+		}
933
+	}
934
+	if err == nil {
935
+		return fd, nil
936
+	}
937
+	return -1, err
938
+}
939
+
915 940
 // Create the actual bridge device.  This is more backward-compatible than
916 941
 // netlink.NetworkLinkAdd and works on RHEL 6.
917 942
 func CreateBridge(name string, setMacAddr bool) error {
... ...
@@ -933,7 +1089,7 @@ func CreateBridge(name string, setMacAddr bool) error {
933 933
 		return err
934 934
 	}
935 935
 	if setMacAddr {
936
-		return NetworkSetMacAddress(name, randMacAddr())
936
+		return SetMacAddress(name, randMacAddr())
937 937
 	}
938 938
 	return nil
939 939
 }
... ...
@@ -999,7 +1155,7 @@ func randMacAddr() string {
999 999
 	return hw.String()
1000 1000
 }
1001 1001
 
1002
-func NetworkSetMacAddress(name, addr string) error {
1002
+func SetMacAddress(name, addr string) error {
1003 1003
 	if len(name) >= IFNAMSIZ {
1004 1004
 		return fmt.Errorf("Interface name %s too long", name)
1005 1005
 	}
... ...
@@ -1028,3 +1184,26 @@ func NetworkSetMacAddress(name, addr string) error {
1028 1028
 	}
1029 1029
 	return nil
1030 1030
 }
1031
+
1032
+func ChangeName(iface *net.Interface, newName string) error {
1033
+	if len(newName) >= IFNAMSIZ {
1034
+		return fmt.Errorf("Interface name %s too long", newName)
1035
+	}
1036
+
1037
+	fd, err := getIfSocket()
1038
+	if err != nil {
1039
+		return err
1040
+	}
1041
+	defer syscall.Close(fd)
1042
+
1043
+	data := [IFNAMSIZ * 2]byte{}
1044
+	// the "-1"s here are very important for ensuring we get proper null
1045
+	// termination of our new C strings
1046
+	copy(data[:IFNAMSIZ-1], iface.Name)
1047
+	copy(data[IFNAMSIZ:IFNAMSIZ*2-1], newName)
1048
+
1049
+	if _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&data[0]))); errno != 0 {
1050
+		return errno
1051
+	}
1052
+	return nil
1053
+}
... ...
@@ -3,9 +3,50 @@ package netlink
3 3
 import (
4 4
 	"net"
5 5
 	"strings"
6
+	"syscall"
6 7
 	"testing"
7 8
 )
8 9
 
10
+type testLink struct {
11
+	name     string
12
+	linkType string
13
+}
14
+
15
+func addLink(t *testing.T, name string, linkType string) {
16
+	if err := NetworkLinkAdd(name, linkType); err != nil {
17
+		t.Fatalf("Unable to create %s link: %s", name, err)
18
+	}
19
+}
20
+
21
+func readLink(t *testing.T, name string) *net.Interface {
22
+	iface, err := net.InterfaceByName(name)
23
+	if err != nil {
24
+		t.Fatalf("Could not find %s interface: %s", name, err)
25
+	}
26
+
27
+	return iface
28
+}
29
+
30
+func deleteLink(t *testing.T, name string) {
31
+	if err := NetworkLinkDel(name); err != nil {
32
+		t.Fatalf("Unable to delete %s link: %s", name, err)
33
+	}
34
+}
35
+
36
+func upLink(t *testing.T, name string) {
37
+	iface := readLink(t, name)
38
+	if err := NetworkLinkUp(iface); err != nil {
39
+		t.Fatalf("Could not bring UP %#v interface: %s", iface, err)
40
+	}
41
+}
42
+
43
+func downLink(t *testing.T, name string) {
44
+	iface := readLink(t, name)
45
+	if err := NetworkLinkDown(iface); err != nil {
46
+		t.Fatalf("Could not bring DOWN %#v interface: %s", iface, err)
47
+	}
48
+}
49
+
9 50
 func ipAssigned(iface *net.Interface, ip net.IP) bool {
10 51
 	addrs, _ := iface.Addrs()
11 52
 
... ...
@@ -19,87 +60,224 @@ func ipAssigned(iface *net.Interface, ip net.IP) bool {
19 19
 	return false
20 20
 }
21 21
 
22
-func TestAddDelNetworkIp(t *testing.T) {
22
+func TestNetworkLinkAddDel(t *testing.T) {
23 23
 	if testing.Short() {
24 24
 		return
25 25
 	}
26 26
 
27
-	ifaceName := "lo"
28
-	ip := net.ParseIP("127.0.1.1")
29
-	mask := net.IPv4Mask(255, 255, 255, 255)
30
-	ipNet := &net.IPNet{IP: ip, Mask: mask}
27
+	testLinks := []testLink{
28
+		{"tstEth", "dummy"},
29
+		{"tstBr", "bridge"},
30
+	}
31 31
 
32
-	iface, err := net.InterfaceByName(ifaceName)
33
-	if err != nil {
34
-		t.Skip("No 'lo' interface; skipping tests")
32
+	for _, tl := range testLinks {
33
+		addLink(t, tl.name, tl.linkType)
34
+		defer deleteLink(t, tl.name)
35
+		readLink(t, tl.name)
35 36
 	}
37
+}
36 38
 
37
-	if err := NetworkLinkAddIp(iface, ip, ipNet); err != nil {
38
-		t.Fatal(err)
39
+func TestNetworkLinkUpDown(t *testing.T) {
40
+	if testing.Short() {
41
+		return
39 42
 	}
40 43
 
41
-	if !ipAssigned(iface, ip) {
42
-		t.Fatalf("Could not locate address '%s' in lo address list.", ip.String())
44
+	tl := testLink{name: "tstEth", linkType: "dummy"}
45
+
46
+	addLink(t, tl.name, tl.linkType)
47
+	defer deleteLink(t, tl.name)
48
+
49
+	upLink(t, tl.name)
50
+	ifcAfterUp := readLink(t, tl.name)
51
+
52
+	if (ifcAfterUp.Flags & syscall.IFF_UP) != syscall.IFF_UP {
53
+		t.Fatalf("Could not bring UP %#v initerface", tl)
43 54
 	}
44 55
 
45
-	if err := NetworkLinkDelIp(iface, ip, ipNet); err != nil {
46
-		t.Fatal(err)
56
+	downLink(t, tl.name)
57
+	ifcAfterDown := readLink(t, tl.name)
58
+
59
+	if (ifcAfterDown.Flags & syscall.IFF_UP) == syscall.IFF_UP {
60
+		t.Fatalf("Could not bring DOWN %#v initerface", tl)
47 61
 	}
62
+}
48 63
 
49
-	if ipAssigned(iface, ip) {
50
-		t.Fatalf("Located address '%s' in lo address list after removal.", ip.String())
64
+func TestNetworkSetMacAddress(t *testing.T) {
65
+	if testing.Short() {
66
+		return
67
+	}
68
+
69
+	tl := testLink{name: "tstEth", linkType: "dummy"}
70
+	macaddr := "22:ce:e0:99:63:6f"
71
+
72
+	addLink(t, tl.name, tl.linkType)
73
+	defer deleteLink(t, tl.name)
74
+
75
+	ifcBeforeSet := readLink(t, tl.name)
76
+
77
+	if err := NetworkSetMacAddress(ifcBeforeSet, macaddr); err != nil {
78
+		t.Fatalf("Could not set %s MAC address on %#v interface: err", macaddr, tl, err)
79
+	}
80
+
81
+	ifcAfterSet := readLink(t, tl.name)
82
+
83
+	if ifcAfterSet.HardwareAddr.String() != macaddr {
84
+		t.Fatalf("Could not set %s MAC address on %#v interface", macaddr, tl)
51 85
 	}
52 86
 }
53 87
 
54
-func TestCreateBridgeWithMac(t *testing.T) {
88
+func TestNetworkSetMTU(t *testing.T) {
55 89
 	if testing.Short() {
56 90
 		return
57 91
 	}
58 92
 
59
-	name := "testbridge"
93
+	tl := testLink{name: "tstEth", linkType: "dummy"}
94
+	mtu := 1400
60 95
 
61
-	if err := CreateBridge(name, true); err != nil {
62
-		t.Fatal(err)
96
+	addLink(t, tl.name, tl.linkType)
97
+	defer deleteLink(t, tl.name)
98
+
99
+	ifcBeforeSet := readLink(t, tl.name)
100
+
101
+	if err := NetworkSetMTU(ifcBeforeSet, mtu); err != nil {
102
+		t.Fatalf("Could not set %d MTU on %#v interface: err", mtu, tl, err)
63 103
 	}
64 104
 
65
-	if _, err := net.InterfaceByName(name); err != nil {
66
-		t.Fatal(err)
105
+	ifcAfterSet := readLink(t, tl.name)
106
+
107
+	if ifcAfterSet.MTU != mtu {
108
+		t.Fatalf("Could not set %d MTU on %#v interface", mtu, tl)
67 109
 	}
110
+}
68 111
 
69
-	// cleanup and tests
112
+func TestNetworkSetMasterNoMaster(t *testing.T) {
113
+	if testing.Short() {
114
+		return
115
+	}
70 116
 
71
-	if err := DeleteBridge(name); err != nil {
72
-		t.Fatal(err)
117
+	master := testLink{"tstBr", "bridge"}
118
+	slave := testLink{"tstEth", "dummy"}
119
+	testLinks := []testLink{master, slave}
120
+
121
+	for _, tl := range testLinks {
122
+		addLink(t, tl.name, tl.linkType)
123
+		defer deleteLink(t, tl.name)
124
+		upLink(t, tl.name)
73 125
 	}
74 126
 
75
-	if _, err := net.InterfaceByName(name); err == nil {
76
-		t.Fatalf("expected error getting interface because %s bridge was deleted", name)
127
+	masterIfc := readLink(t, master.name)
128
+	slaveIfc := readLink(t, slave.name)
129
+	if err := NetworkSetMaster(slaveIfc, masterIfc); err != nil {
130
+		t.Fatalf("Could not set %#v to be the master of %#v: %s", master, slave, err)
131
+	}
132
+
133
+	// Trying to figure out a way to test which will not break on RHEL6.
134
+	// We could check for existence of /sys/class/net/tstEth/upper_tstBr
135
+	// which should point to the ../tstBr which is the UPPER device i.e. network bridge
136
+
137
+	if err := NetworkSetNoMaster(slaveIfc); err != nil {
138
+		t.Fatalf("Could not UNset %#v master of %#v: %s", master, slave, err)
77 139
 	}
78 140
 }
79 141
 
80
-func TestCreateBridgeLink(t *testing.T) {
142
+func TestNetworkChangeName(t *testing.T) {
81 143
 	if testing.Short() {
82 144
 		return
83 145
 	}
84 146
 
85
-	name := "mybrlink"
147
+	tl := testLink{"tstEth", "dummy"}
148
+	newName := "newTst"
86 149
 
87
-	if err := NetworkLinkAdd(name, "bridge"); err != nil {
88
-		t.Fatal(err)
150
+	addLink(t, tl.name, tl.linkType)
151
+
152
+	linkIfc := readLink(t, tl.name)
153
+	if err := NetworkChangeName(linkIfc, newName); err != nil {
154
+		deleteLink(t, tl.name)
155
+		t.Fatalf("Could not change %#v interface name to %s: %s", tl, newName, err)
89 156
 	}
90 157
 
91
-	if _, err := net.InterfaceByName(name); err != nil {
92
-		t.Fatal(err)
158
+	readLink(t, newName)
159
+	deleteLink(t, newName)
160
+}
161
+
162
+func TestNetworkLinkAddVlan(t *testing.T) {
163
+	if testing.Short() {
164
+		return
93 165
 	}
94 166
 
95
-	if err := NetworkLinkDel(name); err != nil {
96
-		t.Fatal(err)
167
+	tl := struct {
168
+		name string
169
+		id   uint16
170
+	}{
171
+		name: "tstVlan",
172
+		id:   32,
97 173
 	}
174
+	masterLink := testLink{"tstEth", "dummy"}
98 175
 
99
-	if _, err := net.InterfaceByName(name); err == nil {
100
-		t.Fatalf("expected error getting interface because %s bridge was deleted", name)
176
+	addLink(t, masterLink.name, masterLink.linkType)
177
+	defer deleteLink(t, masterLink.name)
178
+
179
+	if err := NetworkLinkAddVlan(masterLink.name, tl.name, tl.id); err != nil {
180
+		t.Fatalf("Unable to create %#v VLAN interface: %s", tl, err)
101 181
 	}
102 182
 
183
+	readLink(t, tl.name)
184
+}
185
+
186
+func TestNetworkLinkAddMacVlan(t *testing.T) {
187
+	if testing.Short() {
188
+		return
189
+	}
190
+
191
+	tl := struct {
192
+		name string
193
+		mode string
194
+	}{
195
+		name: "tstVlan",
196
+		mode: "private",
197
+	}
198
+	masterLink := testLink{"tstEth", "dummy"}
199
+
200
+	addLink(t, masterLink.name, masterLink.linkType)
201
+	defer deleteLink(t, masterLink.name)
202
+
203
+	if err := NetworkLinkAddMacVlan(masterLink.name, tl.name, tl.mode); err != nil {
204
+		t.Fatalf("Unable to create %#v MAC VLAN interface: %s", tl, err)
205
+	}
206
+
207
+	readLink(t, tl.name)
208
+}
209
+
210
+func TestAddDelNetworkIp(t *testing.T) {
211
+	if testing.Short() {
212
+		return
213
+	}
214
+
215
+	ifaceName := "lo"
216
+	ip := net.ParseIP("127.0.1.1")
217
+	mask := net.IPv4Mask(255, 255, 255, 255)
218
+	ipNet := &net.IPNet{IP: ip, Mask: mask}
219
+
220
+	iface, err := net.InterfaceByName(ifaceName)
221
+	if err != nil {
222
+		t.Skip("No 'lo' interface; skipping tests")
223
+	}
224
+
225
+	if err := NetworkLinkAddIp(iface, ip, ipNet); err != nil {
226
+		t.Fatalf("Could not add IP address %s to interface %#v: %s", ip.String(), iface, err)
227
+	}
228
+
229
+	if !ipAssigned(iface, ip) {
230
+		t.Fatalf("Could not locate address '%s' in lo address list.", ip.String())
231
+	}
232
+
233
+	if err := NetworkLinkDelIp(iface, ip, ipNet); err != nil {
234
+		t.Fatalf("Could not delete IP address %s from interface %#v: %s", ip.String(), iface, err)
235
+	}
236
+
237
+	if ipAssigned(iface, ip) {
238
+		t.Fatalf("Located address '%s' in lo address list after removal.", ip.String())
239
+	}
103 240
 }
104 241
 
105 242
 func TestCreateVethPair(t *testing.T) {
... ...
@@ -113,17 +291,41 @@ func TestCreateVethPair(t *testing.T) {
113 113
 	)
114 114
 
115 115
 	if err := NetworkCreateVethPair(name1, name2); err != nil {
116
-		t.Fatal(err)
116
+		t.Fatalf("Could not create veth pair %s %s: %s", name1, name2, err)
117 117
 	}
118 118
 	defer NetworkLinkDel(name1)
119 119
 
120
-	if _, err := net.InterfaceByName(name1); err != nil {
120
+	readLink(t, name1)
121
+	readLink(t, name2)
122
+}
123
+
124
+//
125
+// netlink package tests which do not use RTNETLINK
126
+//
127
+func TestCreateBridgeWithMac(t *testing.T) {
128
+	if testing.Short() {
129
+		return
130
+	}
131
+
132
+	name := "testbridge"
133
+
134
+	if err := CreateBridge(name, true); err != nil {
121 135
 		t.Fatal(err)
122 136
 	}
123 137
 
124
-	if _, err := net.InterfaceByName(name2); err != nil {
138
+	if _, err := net.InterfaceByName(name); err != nil {
125 139
 		t.Fatal(err)
126 140
 	}
141
+
142
+	// cleanup and tests
143
+
144
+	if err := DeleteBridge(name); err != nil {
145
+		t.Fatal(err)
146
+	}
147
+
148
+	if _, err := net.InterfaceByName(name); err == nil {
149
+		t.Fatalf("expected error getting interface because %s bridge was deleted", name)
150
+	}
127 151
 }
128 152
 
129 153
 func TestSetMACAddress(t *testing.T) {
... ...
@@ -139,7 +341,7 @@ func TestSetMACAddress(t *testing.T) {
139 139
 	}
140 140
 	defer NetworkLinkDel(name)
141 141
 
142
-	if err := NetworkSetMacAddress(name, mac); err != nil {
142
+	if err := SetMacAddress(name, mac); err != nil {
143 143
 		t.Fatal(err)
144 144
 	}
145 145
 
... ...
@@ -30,8 +30,10 @@ func (v *NetNS) Initialize(config *Network, networkState *NetworkState) error {
30 30
 	}
31 31
 
32 32
 	if err := system.Setns(f.Fd(), syscall.CLONE_NEWNET); err != nil {
33
+		f.Close()
33 34
 		return fmt.Errorf("failed to setns current network namespace: %v", err)
34 35
 	}
35 36
 
37
+	f.Close()
36 38
 	return nil
37 39
 }
... ...
@@ -17,12 +17,18 @@ type Network struct {
17 17
 	// Prefix for the veth interfaces.
18 18
 	VethPrefix string `json:"veth_prefix,omitempty"`
19 19
 
20
-	// Address contains the IP and mask to set on the network interface
20
+	// Address contains the IPv4 and mask to set on the network interface
21 21
 	Address string `json:"address,omitempty"`
22 22
 
23
+	// IPv6Address contains the IPv6 and mask to set on the network interface
24
+	IPv6Address string `json:"ipv6_address,omitempty"`
25
+
23 26
 	// Gateway sets the gateway address that is used as the default for the interface
24 27
 	Gateway string `json:"gateway,omitempty"`
25 28
 
29
+	// IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface
30
+	IPv6Gateway string `json:"ipv6_gateway,omitempty"`
31
+
26 32
 	// Mtu sets the mtu value for the interface and will be mirrored on both the host and
27 33
 	// container's interfaces if a pair is created, specifically in the case of type veth
28 34
 	// Note: This does not apply to loopback interfaces.
... ...
@@ -63,6 +63,12 @@ func (v *Veth) Initialize(config *Network, networkState *NetworkState) error {
63 63
 	if err := SetInterfaceIp(defaultDevice, config.Address); err != nil {
64 64
 		return fmt.Errorf("set %s ip %s", defaultDevice, err)
65 65
 	}
66
+	if config.IPv6Address != "" {
67
+		if err := SetInterfaceIp(defaultDevice, config.IPv6Address); err != nil {
68
+			return fmt.Errorf("set %s ipv6 %s", defaultDevice, err)
69
+		}
70
+	}
71
+
66 72
 	if err := SetMtu(defaultDevice, config.Mtu); err != nil {
67 73
 		return fmt.Errorf("set %s mtu to %d %s", defaultDevice, config.Mtu, err)
68 74
 	}
... ...
@@ -74,6 +80,11 @@ func (v *Veth) Initialize(config *Network, networkState *NetworkState) error {
74 74
 			return fmt.Errorf("set gateway to %s on device %s failed with %s", config.Gateway, defaultDevice, err)
75 75
 		}
76 76
 	}
77
+	if config.IPv6Gateway != "" {
78
+		if err := SetDefaultGateway(config.IPv6Gateway, defaultDevice); err != nil {
79
+			return fmt.Errorf("set gateway for ipv6 to %s on device %s failed with %s", config.IPv6Gateway, defaultDevice, err)
80
+		}
81
+	}
77 82
 	return nil
78 83
 }
79 84
 
... ...
@@ -135,8 +135,8 @@ func startContainer(container *libcontainer.Config, dataPath string, args []stri
135 135
 
136 136
 	signal.Notify(sigc)
137 137
 
138
-	createCommand := func(container *libcontainer.Config, console, rootfs, dataPath, init string, pipe *os.File, args []string) *exec.Cmd {
139
-		cmd = namespaces.DefaultCreateCommand(container, console, rootfs, dataPath, init, pipe, args)
138
+	createCommand := func(container *libcontainer.Config, console, dataPath, init string, pipe *os.File, args []string) *exec.Cmd {
139
+		cmd = namespaces.DefaultCreateCommand(container, console, dataPath, init, pipe, args)
140 140
 		if logPath != "" {
141 141
 			cmd.Env = append(cmd.Env, fmt.Sprintf("log=%s", logPath))
142 142
 		}
... ...
@@ -189,7 +189,7 @@ func startContainer(container *libcontainer.Config, dataPath string, args []stri
189 189
 		}()
190 190
 	}
191 191
 
192
-	return namespaces.Exec(container, stdin, stdout, stderr, console, "", dataPath, args, createCommand, startCallback)
192
+	return namespaces.Exec(container, stdin, stdout, stderr, console, dataPath, args, createCommand, startCallback)
193 193
 }
194 194
 
195 195
 func resizeTty(master *os.File) {