Browse code

Bump libcontainer to 08b5415ffa3769ff7c1d2f673f613

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>

Michael Crosby authored on 2015/09/22 03:28:51
Showing 15 changed files
... ...
@@ -42,7 +42,7 @@ clone git github.com/endophage/gotuf 9bcdad0308e34a49f38448b8ad436ad8860825ce
42 42
 clone git github.com/jfrazelle/go 6e461eb70cb4187b41a84e9a567d7137bdbe0f16
43 43
 clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c
44 44
 
45
-clone git github.com/opencontainers/runc v0.0.4 # libcontainer
45
+clone git github.com/opencontainers/runc 08b5415ffa3769ff7c1d2f673f61382d69aabb7d # libcontainer
46 46
 # libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh)
47 47
 clone git github.com/coreos/go-systemd v3
48 48
 clone git github.com/godbus/dbus v2
... ...
@@ -32,20 +32,20 @@ struct describing how the container is to be created.  A sample would look simil
32 32
 config := &configs.Config{
33 33
     Rootfs: rootfs,
34 34
     Capabilities: []string{
35
-        "CHOWN",
36
-        "DAC_OVERRIDE",
37
-        "FSETID",
38
-        "FOWNER",
39
-        "MKNOD",
40
-        "NET_RAW",
41
-        "SETGID",
42
-        "SETUID",
43
-        "SETFCAP",
44
-        "SETPCAP",
45
-        "NET_BIND_SERVICE",
46
-        "SYS_CHROOT",
47
-        "KILL",
48
-        "AUDIT_WRITE",
35
+        "CAP_CHOWN",
36
+        "CAP_DAC_OVERRIDE",
37
+        "CAP_FSETID",
38
+        "CAP_FOWNER",
39
+        "CAP_MKNOD",
40
+        "CAP_NET_RAW",
41
+        "CAP_SETGID",
42
+        "CAP_SETUID",
43
+        "CAP_SETFCAP",
44
+        "CAP_SETPCAP",
45
+        "CAP_NET_BIND_SERVICE",
46
+        "CAP_SYS_CHROOT",
47
+        "CAP_KILL",
48
+        "CAP_AUDIT_WRITE",
49 49
     },
50 50
     Namespaces: configs.Namespaces([]configs.Namespace{
51 51
         {Type: configs.NEWNS},
... ...
@@ -12,44 +12,44 @@ import (
12 12
 const allCapabilityTypes = capability.CAPS | capability.BOUNDS
13 13
 
14 14
 var capabilityList = map[string]capability.Cap{
15
-	"SETPCAP":          capability.CAP_SETPCAP,
16
-	"SYS_MODULE":       capability.CAP_SYS_MODULE,
17
-	"SYS_RAWIO":        capability.CAP_SYS_RAWIO,
18
-	"SYS_PACCT":        capability.CAP_SYS_PACCT,
19
-	"SYS_ADMIN":        capability.CAP_SYS_ADMIN,
20
-	"SYS_NICE":         capability.CAP_SYS_NICE,
21
-	"SYS_RESOURCE":     capability.CAP_SYS_RESOURCE,
22
-	"SYS_TIME":         capability.CAP_SYS_TIME,
23
-	"SYS_TTY_CONFIG":   capability.CAP_SYS_TTY_CONFIG,
24
-	"MKNOD":            capability.CAP_MKNOD,
25
-	"AUDIT_WRITE":      capability.CAP_AUDIT_WRITE,
26
-	"AUDIT_CONTROL":    capability.CAP_AUDIT_CONTROL,
27
-	"MAC_OVERRIDE":     capability.CAP_MAC_OVERRIDE,
28
-	"MAC_ADMIN":        capability.CAP_MAC_ADMIN,
29
-	"NET_ADMIN":        capability.CAP_NET_ADMIN,
30
-	"SYSLOG":           capability.CAP_SYSLOG,
31
-	"CHOWN":            capability.CAP_CHOWN,
32
-	"NET_RAW":          capability.CAP_NET_RAW,
33
-	"DAC_OVERRIDE":     capability.CAP_DAC_OVERRIDE,
34
-	"FOWNER":           capability.CAP_FOWNER,
35
-	"DAC_READ_SEARCH":  capability.CAP_DAC_READ_SEARCH,
36
-	"FSETID":           capability.CAP_FSETID,
37
-	"KILL":             capability.CAP_KILL,
38
-	"SETGID":           capability.CAP_SETGID,
39
-	"SETUID":           capability.CAP_SETUID,
40
-	"LINUX_IMMUTABLE":  capability.CAP_LINUX_IMMUTABLE,
41
-	"NET_BIND_SERVICE": capability.CAP_NET_BIND_SERVICE,
42
-	"NET_BROADCAST":    capability.CAP_NET_BROADCAST,
43
-	"IPC_LOCK":         capability.CAP_IPC_LOCK,
44
-	"IPC_OWNER":        capability.CAP_IPC_OWNER,
45
-	"SYS_CHROOT":       capability.CAP_SYS_CHROOT,
46
-	"SYS_PTRACE":       capability.CAP_SYS_PTRACE,
47
-	"SYS_BOOT":         capability.CAP_SYS_BOOT,
48
-	"LEASE":            capability.CAP_LEASE,
49
-	"SETFCAP":          capability.CAP_SETFCAP,
50
-	"WAKE_ALARM":       capability.CAP_WAKE_ALARM,
51
-	"BLOCK_SUSPEND":    capability.CAP_BLOCK_SUSPEND,
52
-	"AUDIT_READ":       capability.CAP_AUDIT_READ,
15
+	"CAP_SETPCAP":          capability.CAP_SETPCAP,
16
+	"CAP_SYS_MODULE":       capability.CAP_SYS_MODULE,
17
+	"CAP_SYS_RAWIO":        capability.CAP_SYS_RAWIO,
18
+	"CAP_SYS_PACCT":        capability.CAP_SYS_PACCT,
19
+	"CAP_SYS_ADMIN":        capability.CAP_SYS_ADMIN,
20
+	"CAP_SYS_NICE":         capability.CAP_SYS_NICE,
21
+	"CAP_SYS_RESOURCE":     capability.CAP_SYS_RESOURCE,
22
+	"CAP_SYS_TIME":         capability.CAP_SYS_TIME,
23
+	"CAP_SYS_TTY_CONFIG":   capability.CAP_SYS_TTY_CONFIG,
24
+	"CAP_MKNOD":            capability.CAP_MKNOD,
25
+	"CAP_AUDIT_WRITE":      capability.CAP_AUDIT_WRITE,
26
+	"CAP_AUDIT_CONTROL":    capability.CAP_AUDIT_CONTROL,
27
+	"CAP_MAC_OVERRIDE":     capability.CAP_MAC_OVERRIDE,
28
+	"CAP_MAC_ADMIN":        capability.CAP_MAC_ADMIN,
29
+	"CAP_NET_ADMIN":        capability.CAP_NET_ADMIN,
30
+	"CAP_SYSLOG":           capability.CAP_SYSLOG,
31
+	"CAP_CHOWN":            capability.CAP_CHOWN,
32
+	"CAP_NET_RAW":          capability.CAP_NET_RAW,
33
+	"CAP_DAC_OVERRIDE":     capability.CAP_DAC_OVERRIDE,
34
+	"CAP_FOWNER":           capability.CAP_FOWNER,
35
+	"CAP_DAC_READ_SEARCH":  capability.CAP_DAC_READ_SEARCH,
36
+	"CAP_FSETID":           capability.CAP_FSETID,
37
+	"CAP_KILL":             capability.CAP_KILL,
38
+	"CAP_SETGID":           capability.CAP_SETGID,
39
+	"CAP_SETUID":           capability.CAP_SETUID,
40
+	"CAP_LINUX_IMMUTABLE":  capability.CAP_LINUX_IMMUTABLE,
41
+	"CAP_NET_BIND_SERVICE": capability.CAP_NET_BIND_SERVICE,
42
+	"CAP_NET_BROADCAST":    capability.CAP_NET_BROADCAST,
43
+	"CAP_IPC_LOCK":         capability.CAP_IPC_LOCK,
44
+	"CAP_IPC_OWNER":        capability.CAP_IPC_OWNER,
45
+	"CAP_SYS_CHROOT":       capability.CAP_SYS_CHROOT,
46
+	"CAP_SYS_PTRACE":       capability.CAP_SYS_PTRACE,
47
+	"CAP_SYS_BOOT":         capability.CAP_SYS_BOOT,
48
+	"CAP_LEASE":            capability.CAP_LEASE,
49
+	"CAP_SETFCAP":          capability.CAP_SETFCAP,
50
+	"CAP_WAKE_ALARM":       capability.CAP_WAKE_ALARM,
51
+	"CAP_BLOCK_SUSPEND":    capability.CAP_BLOCK_SUSPEND,
52
+	"CAP_AUDIT_READ":       capability.CAP_AUDIT_READ,
53 53
 }
54 54
 
55 55
 func newCapWhitelist(caps []string) (*whitelist, error) {
... ...
@@ -5,7 +5,6 @@ package cgroups
5 5
 import (
6 6
 	"bufio"
7 7
 	"fmt"
8
-	"io"
9 8
 	"io/ioutil"
10 9
 	"os"
11 10
 	"path/filepath"
... ...
@@ -105,12 +104,12 @@ type Mount struct {
105 105
 	Subsystems []string
106 106
 }
107 107
 
108
-func (m Mount) GetThisCgroupDir() (string, error) {
108
+func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
109 109
 	if len(m.Subsystems) == 0 {
110 110
 		return "", fmt.Errorf("no subsystem for mount")
111 111
 	}
112 112
 
113
-	return GetThisCgroupDir(m.Subsystems[0])
113
+	return getControllerPath(m.Subsystems[0], cgroups)
114 114
 }
115 115
 
116 116
 func GetCgroupMounts() ([]Mount, error) {
... ...
@@ -176,23 +175,22 @@ func GetAllSubsystems() ([]string, error) {
176 176
 
177 177
 // Returns the relative path to the cgroup docker is running in.
178 178
 func GetThisCgroupDir(subsystem string) (string, error) {
179
-	f, err := os.Open("/proc/self/cgroup")
179
+	cgroups, err := ParseCgroupFile("/proc/self/cgroup")
180 180
 	if err != nil {
181 181
 		return "", err
182 182
 	}
183
-	defer f.Close()
184 183
 
185
-	return ParseCgroupFile(subsystem, f)
184
+	return getControllerPath(subsystem, cgroups)
186 185
 }
187 186
 
188 187
 func GetInitCgroupDir(subsystem string) (string, error) {
189
-	f, err := os.Open("/proc/1/cgroup")
188
+
189
+	cgroups, err := ParseCgroupFile("/proc/1/cgroup")
190 190
 	if err != nil {
191 191
 		return "", err
192 192
 	}
193
-	defer f.Close()
194 193
 
195
-	return ParseCgroupFile(subsystem, f)
194
+	return getControllerPath(subsystem, cgroups)
196 195
 }
197 196
 
198 197
 func ReadProcsFile(dir string) ([]int, error) {
... ...
@@ -219,23 +217,40 @@ func ReadProcsFile(dir string) ([]int, error) {
219 219
 	return out, nil
220 220
 }
221 221
 
222
-func ParseCgroupFile(subsystem string, r io.Reader) (string, error) {
223
-	s := bufio.NewScanner(r)
222
+func ParseCgroupFile(path string) (map[string]string, error) {
223
+	f, err := os.Open(path)
224
+	if err != nil {
225
+		return nil, err
226
+	}
227
+	defer f.Close()
228
+
229
+	s := bufio.NewScanner(f)
230
+	cgroups := make(map[string]string)
224 231
 
225 232
 	for s.Scan() {
226 233
 		if err := s.Err(); err != nil {
227
-			return "", err
234
+			return nil, err
228 235
 		}
229 236
 
230 237
 		text := s.Text()
231 238
 		parts := strings.Split(text, ":")
232 239
 
233 240
 		for _, subs := range strings.Split(parts[1], ",") {
234
-			if subs == subsystem || subs == cgroupNamePrefix+subsystem {
235
-				return parts[2], nil
236
-			}
241
+			cgroups[subs] = parts[2]
237 242
 		}
238 243
 	}
244
+	return cgroups, nil
245
+}
246
+
247
+func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
248
+
249
+	if p, ok := cgroups[subsystem]; ok {
250
+		return p, nil
251
+	}
252
+
253
+	if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok {
254
+		return p, nil
255
+	}
239 256
 
240 257
 	return "", NewNotFoundError(subsystem)
241 258
 }
... ...
@@ -1,5 +1,13 @@
1 1
 package configs
2 2
 
3
+import (
4
+	"path/filepath"
5
+	"strings"
6
+	"syscall"
7
+
8
+	"github.com/opencontainers/runc/libcontainer/label"
9
+)
10
+
3 11
 type Mount struct {
4 12
 	// Source path for the mount.
5 13
 	Source string `json:"source"`
... ...
@@ -13,6 +21,9 @@ type Mount struct {
13 13
 	// Mount flags.
14 14
 	Flags int `json:"flags"`
15 15
 
16
+	// Propagation Flags
17
+	PropagationFlags []int `json:"propagation_flags"`
18
+
16 19
 	// Mount data applied to the mount.
17 20
 	Data string `json:"data"`
18 21
 
... ...
@@ -25,3 +36,40 @@ type Mount struct {
25 25
 	// Optional Command to be run after Source is mounted.
26 26
 	PostmountCmds []Command `json:"postmount_cmds"`
27 27
 }
28
+
29
+func (m *Mount) Remount(rootfs string) error {
30
+	var (
31
+		dest = m.Destination
32
+	)
33
+	if !strings.HasPrefix(dest, rootfs) {
34
+		dest = filepath.Join(rootfs, dest)
35
+	}
36
+
37
+	if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags|syscall.MS_REMOUNT), ""); err != nil {
38
+		return err
39
+	}
40
+	return nil
41
+}
42
+
43
+// Do the mount operation followed by additional mounts required to take care
44
+// of propagation flags.
45
+func (m *Mount) MountPropagate(rootfs string, mountLabel string) error {
46
+	var (
47
+		dest = m.Destination
48
+		data = label.FormatMountLabel(m.Data, mountLabel)
49
+	)
50
+	if !strings.HasPrefix(dest, rootfs) {
51
+		dest = filepath.Join(rootfs, dest)
52
+	}
53
+
54
+	if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil {
55
+		return err
56
+	}
57
+
58
+	for _, pflag := range m.PropagationFlags {
59
+		if err := syscall.Mount("", dest, "", uintptr(pflag), ""); err != nil {
60
+			return err
61
+		}
62
+	}
63
+	return nil
64
+}
... ...
@@ -423,7 +423,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
423 423
 		return err
424 424
 	}
425 425
 
426
-	err = c.criuSwrk(nil, req, criuOpts)
426
+	err = c.criuSwrk(nil, req, criuOpts, false)
427 427
 	if err != nil {
428 428
 		return err
429 429
 	}
... ...
@@ -516,6 +516,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
516 516
 			FileLocks:      proto.Bool(criuOpts.FileLocks),
517 517
 		},
518 518
 	}
519
+
519 520
 	for _, m := range c.config.Mounts {
520 521
 		switch m.Device {
521 522
 		case "bind":
... ...
@@ -573,14 +574,36 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
573 573
 		}
574 574
 	}
575 575
 
576
-	err = c.criuSwrk(process, req, criuOpts)
576
+	err = c.criuSwrk(process, req, criuOpts, true)
577 577
 	if err != nil {
578 578
 		return err
579 579
 	}
580 580
 	return nil
581 581
 }
582 582
 
583
-func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts) error {
583
+func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
584
+	if err := c.cgroupManager.Apply(pid); err != nil {
585
+		return err
586
+	}
587
+
588
+	path := fmt.Sprintf("/proc/%d/cgroup", pid)
589
+	cgroupsPaths, err := cgroups.ParseCgroupFile(path)
590
+	if err != nil {
591
+		return err
592
+	}
593
+
594
+	for c, p := range cgroupsPaths {
595
+		cgroupRoot := &criurpc.CgroupRoot{
596
+			Ctrl: proto.String(c),
597
+			Path: proto.String(p),
598
+		}
599
+		req.Opts.CgRoot = append(req.Opts.CgRoot, cgroupRoot)
600
+	}
601
+
602
+	return nil
603
+}
604
+
605
+func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool) error {
584 606
 	fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
585 607
 	if err != nil {
586 608
 		return err
... ...
@@ -614,6 +637,13 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
614 614
 		}
615 615
 	}()
616 616
 
617
+	if applyCgroups {
618
+		err := c.criuApplyCgroups(cmd.Process.Pid, req)
619
+		if err != nil {
620
+			return err
621
+		}
622
+	}
623
+
617 624
 	var extFds []string
618 625
 	if process != nil {
619 626
 		extFds, err = getPipeFds(cmd.Process.Pid)
... ...
@@ -6,6 +6,7 @@ import (
6 6
 	"encoding/json"
7 7
 	"fmt"
8 8
 	"io/ioutil"
9
+	"net"
9 10
 	"os"
10 11
 	"strconv"
11 12
 	"strings"
... ...
@@ -14,10 +15,10 @@ import (
14 14
 	"github.com/Sirupsen/logrus"
15 15
 	"github.com/opencontainers/runc/libcontainer/cgroups"
16 16
 	"github.com/opencontainers/runc/libcontainer/configs"
17
-	"github.com/opencontainers/runc/libcontainer/netlink"
18 17
 	"github.com/opencontainers/runc/libcontainer/system"
19 18
 	"github.com/opencontainers/runc/libcontainer/user"
20 19
 	"github.com/opencontainers/runc/libcontainer/utils"
20
+	"github.com/vishvananda/netlink"
21 21
 )
22 22
 
23 23
 type initType string
... ...
@@ -186,7 +187,17 @@ func setupUser(config *initConfig) error {
186 186
 			return err
187 187
 		}
188 188
 	}
189
-
189
+	// change the permissions on the STDIO of the current process so that when the user
190
+	// is changed for the container, it's STDIO of the process matches the user.
191
+	for _, fd := range []uintptr{
192
+		os.Stdin.Fd(),
193
+		os.Stderr.Fd(),
194
+		os.Stdout.Fd(),
195
+	} {
196
+		if err := syscall.Fchown(int(fd), execUser.Uid, execUser.Gid); err != nil {
197
+			return err
198
+		}
199
+	}
190 200
 	suppGroups := append(execUser.Sgids, addGroups...)
191 201
 	if err := syscall.Setgroups(suppGroups); err != nil {
192 202
 		return err
... ...
@@ -223,7 +234,30 @@ func setupNetwork(config *initConfig) error {
223 223
 
224 224
 func setupRoute(config *configs.Config) error {
225 225
 	for _, config := range config.Routes {
226
-		if err := netlink.AddRoute(config.Destination, config.Source, config.Gateway, config.InterfaceName); err != nil {
226
+		_, dst, err := net.ParseCIDR(config.Destination)
227
+		if err != nil {
228
+			return err
229
+		}
230
+		src := net.ParseIP(config.Source)
231
+		if src == nil {
232
+			return fmt.Errorf("Invalid source for route: %s", config.Source)
233
+		}
234
+		gw := net.ParseIP(config.Gateway)
235
+		if gw == nil {
236
+			return fmt.Errorf("Invalid gateway for route: %s", config.Gateway)
237
+		}
238
+		l, err := netlink.LinkByName(config.InterfaceName)
239
+		if err != nil {
240
+			return err
241
+		}
242
+		route := &netlink.Route{
243
+			Scope:     netlink.SCOPE_UNIVERSE,
244
+			Dst:       dst,
245
+			Src:       src,
246
+			Gw:        gw,
247
+			LinkIndex: l.Attrs().Index,
248
+		}
249
+		if err := netlink.RouteAdd(route); err != nil {
227 250
 			return err
228 251
 		}
229 252
 	}
230 253
deleted file mode 100644
... ...
@@ -1,2 +0,0 @@
1
-Michael Crosby <michael@crosbymichael.com> (@crosbymichael)
2
-Guillaume J. Charmes <guillaume@docker.com> (@creack)
3 1
deleted file mode 100644
... ...
@@ -1,31 +0,0 @@
1
-// Packet netlink provide access to low level Netlink sockets and messages.
2
-//
3
-// Actual implementations are in:
4
-// netlink_linux.go
5
-// netlink_darwin.go
6
-package netlink
7
-
8
-import (
9
-	"errors"
10
-	"net"
11
-)
12
-
13
-var (
14
-	ErrWrongSockType   = errors.New("Wrong socket type")
15
-	ErrShortResponse   = errors.New("Got short response from netlink")
16
-	ErrInterfaceExists = errors.New("Network interface already exists")
17
-)
18
-
19
-// A Route is a subnet associated with the interface to reach it.
20
-type Route struct {
21
-	*net.IPNet
22
-	Iface   *net.Interface
23
-	Default bool
24
-}
25
-
26
-// An IfAddr defines IP network settings for a given network interface
27
-type IfAddr struct {
28
-	Iface *net.Interface
29
-	IP    net.IP
30
-	IPNet *net.IPNet
31
-}
32 1
deleted file mode 100644
... ...
@@ -1,1321 +0,0 @@
1
-package netlink
2
-
3
-import (
4
-	"encoding/binary"
5
-	"fmt"
6
-	"io"
7
-	"math/rand"
8
-	"net"
9
-	"os"
10
-	"sync/atomic"
11
-	"syscall"
12
-	"time"
13
-	"unsafe"
14
-)
15
-
16
-const (
17
-	IFNAMSIZ          = 16
18
-	DEFAULT_CHANGE    = 0xFFFFFFFF
19
-	IFLA_INFO_KIND    = 1
20
-	IFLA_INFO_DATA    = 2
21
-	VETH_INFO_PEER    = 1
22
-	IFLA_MACVLAN_MODE = 1
23
-	IFLA_VLAN_ID      = 1
24
-	IFLA_NET_NS_FD    = 28
25
-	IFLA_ADDRESS      = 1
26
-	IFLA_BRPORT_MODE  = 4
27
-	SIOC_BRADDBR      = 0x89a0
28
-	SIOC_BRDELBR      = 0x89a1
29
-	SIOC_BRADDIF      = 0x89a2
30
-	SIOC_BRDELIF      = 0x89a3
31
-)
32
-
33
-const (
34
-	MACVLAN_MODE_PRIVATE = 1 << iota
35
-	MACVLAN_MODE_VEPA
36
-	MACVLAN_MODE_BRIDGE
37
-	MACVLAN_MODE_PASSTHRU
38
-)
39
-
40
-var nextSeqNr uint32
41
-
42
-type ifreqHwaddr struct {
43
-	IfrnName   [IFNAMSIZ]byte
44
-	IfruHwaddr syscall.RawSockaddr
45
-}
46
-
47
-type ifreqIndex struct {
48
-	IfrnName  [IFNAMSIZ]byte
49
-	IfruIndex int32
50
-}
51
-
52
-type ifreqFlags struct {
53
-	IfrnName  [IFNAMSIZ]byte
54
-	Ifruflags uint16
55
-}
56
-
57
-var native binary.ByteOrder
58
-
59
-var rnd = rand.New(rand.NewSource(time.Now().UnixNano()))
60
-
61
-func init() {
62
-	var x uint32 = 0x01020304
63
-	if *(*byte)(unsafe.Pointer(&x)) == 0x01 {
64
-		native = binary.BigEndian
65
-	} else {
66
-		native = binary.LittleEndian
67
-	}
68
-}
69
-
70
-func getIpFamily(ip net.IP) int {
71
-	if len(ip) <= net.IPv4len {
72
-		return syscall.AF_INET
73
-	}
74
-	if ip.To4() != nil {
75
-		return syscall.AF_INET
76
-	}
77
-	return syscall.AF_INET6
78
-}
79
-
80
-type NetlinkRequestData interface {
81
-	Len() int
82
-	ToWireFormat() []byte
83
-}
84
-
85
-type IfInfomsg struct {
86
-	syscall.IfInfomsg
87
-}
88
-
89
-func newIfInfomsg(family int) *IfInfomsg {
90
-	return &IfInfomsg{
91
-		IfInfomsg: syscall.IfInfomsg{
92
-			Family: uint8(family),
93
-		},
94
-	}
95
-}
96
-
97
-func newIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg {
98
-	msg := newIfInfomsg(family)
99
-	parent.children = append(parent.children, msg)
100
-	return msg
101
-}
102
-
103
-func (msg *IfInfomsg) ToWireFormat() []byte {
104
-	length := syscall.SizeofIfInfomsg
105
-	b := make([]byte, length)
106
-	b[0] = msg.Family
107
-	b[1] = 0
108
-	native.PutUint16(b[2:4], msg.Type)
109
-	native.PutUint32(b[4:8], uint32(msg.Index))
110
-	native.PutUint32(b[8:12], msg.Flags)
111
-	native.PutUint32(b[12:16], msg.Change)
112
-	return b
113
-}
114
-
115
-func (msg *IfInfomsg) Len() int {
116
-	return syscall.SizeofIfInfomsg
117
-}
118
-
119
-type IfAddrmsg struct {
120
-	syscall.IfAddrmsg
121
-}
122
-
123
-func newIfAddrmsg(family int) *IfAddrmsg {
124
-	return &IfAddrmsg{
125
-		IfAddrmsg: syscall.IfAddrmsg{
126
-			Family: uint8(family),
127
-		},
128
-	}
129
-}
130
-
131
-func (msg *IfAddrmsg) ToWireFormat() []byte {
132
-	length := syscall.SizeofIfAddrmsg
133
-	b := make([]byte, length)
134
-	b[0] = msg.Family
135
-	b[1] = msg.Prefixlen
136
-	b[2] = msg.Flags
137
-	b[3] = msg.Scope
138
-	native.PutUint32(b[4:8], msg.Index)
139
-	return b
140
-}
141
-
142
-func (msg *IfAddrmsg) Len() int {
143
-	return syscall.SizeofIfAddrmsg
144
-}
145
-
146
-type RtMsg struct {
147
-	syscall.RtMsg
148
-}
149
-
150
-func newRtMsg() *RtMsg {
151
-	return &RtMsg{
152
-		RtMsg: syscall.RtMsg{
153
-			Table:    syscall.RT_TABLE_MAIN,
154
-			Scope:    syscall.RT_SCOPE_UNIVERSE,
155
-			Protocol: syscall.RTPROT_BOOT,
156
-			Type:     syscall.RTN_UNICAST,
157
-		},
158
-	}
159
-}
160
-
161
-func (msg *RtMsg) ToWireFormat() []byte {
162
-	length := syscall.SizeofRtMsg
163
-	b := make([]byte, length)
164
-	b[0] = msg.Family
165
-	b[1] = msg.Dst_len
166
-	b[2] = msg.Src_len
167
-	b[3] = msg.Tos
168
-	b[4] = msg.Table
169
-	b[5] = msg.Protocol
170
-	b[6] = msg.Scope
171
-	b[7] = msg.Type
172
-	native.PutUint32(b[8:12], msg.Flags)
173
-	return b
174
-}
175
-
176
-func (msg *RtMsg) Len() int {
177
-	return syscall.SizeofRtMsg
178
-}
179
-
180
-func rtaAlignOf(attrlen int) int {
181
-	return (attrlen + syscall.RTA_ALIGNTO - 1) & ^(syscall.RTA_ALIGNTO - 1)
182
-}
183
-
184
-type RtAttr struct {
185
-	syscall.RtAttr
186
-	Data     []byte
187
-	children []NetlinkRequestData
188
-}
189
-
190
-func newRtAttr(attrType int, data []byte) *RtAttr {
191
-	return &RtAttr{
192
-		RtAttr: syscall.RtAttr{
193
-			Type: uint16(attrType),
194
-		},
195
-		children: []NetlinkRequestData{},
196
-		Data:     data,
197
-	}
198
-}
199
-
200
-func newRtAttrChild(parent *RtAttr, attrType int, data []byte) *RtAttr {
201
-	attr := newRtAttr(attrType, data)
202
-	parent.children = append(parent.children, attr)
203
-	return attr
204
-}
205
-
206
-func (a *RtAttr) Len() int {
207
-	if len(a.children) == 0 {
208
-		return (syscall.SizeofRtAttr + len(a.Data))
209
-	}
210
-
211
-	l := 0
212
-	for _, child := range a.children {
213
-		l += child.Len()
214
-	}
215
-	l += syscall.SizeofRtAttr
216
-	return rtaAlignOf(l + len(a.Data))
217
-}
218
-
219
-func (a *RtAttr) ToWireFormat() []byte {
220
-	length := a.Len()
221
-	buf := make([]byte, rtaAlignOf(length))
222
-
223
-	if a.Data != nil {
224
-		copy(buf[4:], a.Data)
225
-	} else {
226
-		next := 4
227
-		for _, child := range a.children {
228
-			childBuf := child.ToWireFormat()
229
-			copy(buf[next:], childBuf)
230
-			next += rtaAlignOf(len(childBuf))
231
-		}
232
-	}
233
-
234
-	if l := uint16(length); l != 0 {
235
-		native.PutUint16(buf[0:2], l)
236
-	}
237
-	native.PutUint16(buf[2:4], a.Type)
238
-	return buf
239
-}
240
-
241
-func uint32Attr(t int, n uint32) *RtAttr {
242
-	buf := make([]byte, 4)
243
-	native.PutUint32(buf, n)
244
-	return newRtAttr(t, buf)
245
-}
246
-
247
-type NetlinkRequest struct {
248
-	syscall.NlMsghdr
249
-	Data []NetlinkRequestData
250
-}
251
-
252
-func (rr *NetlinkRequest) ToWireFormat() []byte {
253
-	length := rr.Len
254
-	dataBytes := make([][]byte, len(rr.Data))
255
-	for i, data := range rr.Data {
256
-		dataBytes[i] = data.ToWireFormat()
257
-		length += uint32(len(dataBytes[i]))
258
-	}
259
-	b := make([]byte, length)
260
-	native.PutUint32(b[0:4], length)
261
-	native.PutUint16(b[4:6], rr.Type)
262
-	native.PutUint16(b[6:8], rr.Flags)
263
-	native.PutUint32(b[8:12], rr.Seq)
264
-	native.PutUint32(b[12:16], rr.Pid)
265
-
266
-	next := 16
267
-	for _, data := range dataBytes {
268
-		copy(b[next:], data)
269
-		next += len(data)
270
-	}
271
-	return b
272
-}
273
-
274
-func (rr *NetlinkRequest) AddData(data NetlinkRequestData) {
275
-	if data != nil {
276
-		rr.Data = append(rr.Data, data)
277
-	}
278
-}
279
-
280
-func newNetlinkRequest(proto, flags int) *NetlinkRequest {
281
-	return &NetlinkRequest{
282
-		NlMsghdr: syscall.NlMsghdr{
283
-			Len:   uint32(syscall.NLMSG_HDRLEN),
284
-			Type:  uint16(proto),
285
-			Flags: syscall.NLM_F_REQUEST | uint16(flags),
286
-			Seq:   atomic.AddUint32(&nextSeqNr, 1),
287
-		},
288
-	}
289
-}
290
-
291
-type NetlinkSocket struct {
292
-	fd  int
293
-	lsa syscall.SockaddrNetlink
294
-}
295
-
296
-func getNetlinkSocket() (*NetlinkSocket, error) {
297
-	fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW, syscall.NETLINK_ROUTE)
298
-	if err != nil {
299
-		return nil, err
300
-	}
301
-	s := &NetlinkSocket{
302
-		fd: fd,
303
-	}
304
-	s.lsa.Family = syscall.AF_NETLINK
305
-	if err := syscall.Bind(fd, &s.lsa); err != nil {
306
-		syscall.Close(fd)
307
-		return nil, err
308
-	}
309
-
310
-	return s, nil
311
-}
312
-
313
-func (s *NetlinkSocket) Close() {
314
-	syscall.Close(s.fd)
315
-}
316
-
317
-func (s *NetlinkSocket) Send(request *NetlinkRequest) error {
318
-	if err := syscall.Sendto(s.fd, request.ToWireFormat(), 0, &s.lsa); err != nil {
319
-		return err
320
-	}
321
-	return nil
322
-}
323
-
324
-func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, error) {
325
-	rb := make([]byte, syscall.Getpagesize())
326
-	nr, _, err := syscall.Recvfrom(s.fd, rb, 0)
327
-	if err != nil {
328
-		return nil, err
329
-	}
330
-	if nr < syscall.NLMSG_HDRLEN {
331
-		return nil, ErrShortResponse
332
-	}
333
-	rb = rb[:nr]
334
-	return syscall.ParseNetlinkMessage(rb)
335
-}
336
-
337
-func (s *NetlinkSocket) GetPid() (uint32, error) {
338
-	lsa, err := syscall.Getsockname(s.fd)
339
-	if err != nil {
340
-		return 0, err
341
-	}
342
-	switch v := lsa.(type) {
343
-	case *syscall.SockaddrNetlink:
344
-		return v.Pid, nil
345
-	}
346
-	return 0, ErrWrongSockType
347
-}
348
-
349
-func (s *NetlinkSocket) CheckMessage(m syscall.NetlinkMessage, seq, pid uint32) error {
350
-	if m.Header.Seq != seq {
351
-		return fmt.Errorf("netlink: invalid seq %d, expected %d", m.Header.Seq, seq)
352
-	}
353
-	if m.Header.Pid != pid {
354
-		return fmt.Errorf("netlink: wrong pid %d, expected %d", m.Header.Pid, pid)
355
-	}
356
-	if m.Header.Type == syscall.NLMSG_DONE {
357
-		return io.EOF
358
-	}
359
-	if m.Header.Type == syscall.NLMSG_ERROR {
360
-		e := int32(native.Uint32(m.Data[0:4]))
361
-		if e == 0 {
362
-			return io.EOF
363
-		}
364
-		return syscall.Errno(-e)
365
-	}
366
-	return nil
367
-}
368
-
369
-func (s *NetlinkSocket) HandleAck(seq uint32) error {
370
-	pid, err := s.GetPid()
371
-	if err != nil {
372
-		return err
373
-	}
374
-
375
-outer:
376
-	for {
377
-		msgs, err := s.Receive()
378
-		if err != nil {
379
-			return err
380
-		}
381
-		for _, m := range msgs {
382
-			if err := s.CheckMessage(m, seq, pid); err != nil {
383
-				if err == io.EOF {
384
-					break outer
385
-				}
386
-				return err
387
-			}
388
-		}
389
-	}
390
-
391
-	return nil
392
-}
393
-
394
-func zeroTerminated(s string) []byte {
395
-	return []byte(s + "\000")
396
-}
397
-
398
-func nonZeroTerminated(s string) []byte {
399
-	return []byte(s)
400
-}
401
-
402
-// Add a new network link of a specified type.
403
-// This is identical to running: ip link add $name type $linkType
404
-func NetworkLinkAdd(name string, linkType string) error {
405
-	if name == "" || linkType == "" {
406
-		return fmt.Errorf("Neither link name nor link type can be empty!")
407
-	}
408
-
409
-	s, err := getNetlinkSocket()
410
-	if err != nil {
411
-		return err
412
-	}
413
-	defer s.Close()
414
-
415
-	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
416
-
417
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
418
-	wb.AddData(msg)
419
-
420
-	linkInfo := newRtAttr(syscall.IFLA_LINKINFO, nil)
421
-	newRtAttrChild(linkInfo, IFLA_INFO_KIND, nonZeroTerminated(linkType))
422
-	wb.AddData(linkInfo)
423
-
424
-	nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(name))
425
-	wb.AddData(nameData)
426
-
427
-	if err := s.Send(wb); err != nil {
428
-		return err
429
-	}
430
-
431
-	return s.HandleAck(wb.Seq)
432
-}
433
-
434
-// Delete a network link.
435
-// This is identical to running: ip link del $name
436
-func NetworkLinkDel(name string) error {
437
-	if name == "" {
438
-		return fmt.Errorf("Network link name can not be empty!")
439
-	}
440
-
441
-	s, err := getNetlinkSocket()
442
-	if err != nil {
443
-		return err
444
-	}
445
-	defer s.Close()
446
-
447
-	iface, err := net.InterfaceByName(name)
448
-	if err != nil {
449
-		return err
450
-	}
451
-
452
-	wb := newNetlinkRequest(syscall.RTM_DELLINK, syscall.NLM_F_ACK)
453
-
454
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
455
-	msg.Index = int32(iface.Index)
456
-	wb.AddData(msg)
457
-
458
-	if err := s.Send(wb); err != nil {
459
-		return err
460
-	}
461
-
462
-	return s.HandleAck(wb.Seq)
463
-}
464
-
465
-// Bring up a particular network interface.
466
-// This is identical to running: ip link set dev $name up
467
-func NetworkLinkUp(iface *net.Interface) error {
468
-	s, err := getNetlinkSocket()
469
-	if err != nil {
470
-		return err
471
-	}
472
-	defer s.Close()
473
-
474
-	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
475
-
476
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
477
-	msg.Index = int32(iface.Index)
478
-	msg.Flags = syscall.IFF_UP
479
-	msg.Change = syscall.IFF_UP
480
-	wb.AddData(msg)
481
-
482
-	if err := s.Send(wb); err != nil {
483
-		return err
484
-	}
485
-
486
-	return s.HandleAck(wb.Seq)
487
-}
488
-
489
-// Bring down a particular network interface.
490
-// This is identical to running: ip link set $name down
491
-func NetworkLinkDown(iface *net.Interface) error {
492
-	s, err := getNetlinkSocket()
493
-	if err != nil {
494
-		return err
495
-	}
496
-	defer s.Close()
497
-
498
-	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
499
-
500
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
501
-	msg.Index = int32(iface.Index)
502
-	msg.Flags = 0 & ^syscall.IFF_UP
503
-	msg.Change = DEFAULT_CHANGE
504
-	wb.AddData(msg)
505
-
506
-	if err := s.Send(wb); err != nil {
507
-		return err
508
-	}
509
-
510
-	return s.HandleAck(wb.Seq)
511
-}
512
-
513
-// Set link layer address ie. MAC Address.
514
-// This is identical to running: ip link set dev $name address $macaddress
515
-func NetworkSetMacAddress(iface *net.Interface, macaddr string) error {
516
-	s, err := getNetlinkSocket()
517
-	if err != nil {
518
-		return err
519
-	}
520
-	defer s.Close()
521
-
522
-	hwaddr, err := net.ParseMAC(macaddr)
523
-	if err != nil {
524
-		return err
525
-	}
526
-
527
-	var (
528
-		MULTICAST byte = 0x1
529
-	)
530
-
531
-	if hwaddr[0]&0x1 == MULTICAST {
532
-		return fmt.Errorf("Multicast MAC Address is not supported: %s", macaddr)
533
-	}
534
-
535
-	wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
536
-
537
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
538
-	msg.Index = int32(iface.Index)
539
-	msg.Change = DEFAULT_CHANGE
540
-	wb.AddData(msg)
541
-
542
-	macdata := make([]byte, 6)
543
-	copy(macdata, hwaddr)
544
-	data := newRtAttr(IFLA_ADDRESS, macdata)
545
-	wb.AddData(data)
546
-
547
-	if err := s.Send(wb); err != nil {
548
-		return err
549
-	}
550
-	return s.HandleAck(wb.Seq)
551
-}
552
-
553
-// Set link Maximum Transmission Unit
554
-// This is identical to running: ip link set dev $name mtu $MTU
555
-// bridge is a bitch here https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=292088
556
-// https://bugzilla.redhat.com/show_bug.cgi?id=697021
557
-// There is a discussion about how to deal with ifcs joining bridge with MTU > 1500
558
-// Regular network nterfaces do seem to work though!
559
-func NetworkSetMTU(iface *net.Interface, mtu int) error {
560
-	s, err := getNetlinkSocket()
561
-	if err != nil {
562
-		return err
563
-	}
564
-	defer s.Close()
565
-
566
-	wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
567
-
568
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
569
-	msg.Type = syscall.RTM_SETLINK
570
-	msg.Flags = syscall.NLM_F_REQUEST
571
-	msg.Index = int32(iface.Index)
572
-	msg.Change = DEFAULT_CHANGE
573
-	wb.AddData(msg)
574
-	wb.AddData(uint32Attr(syscall.IFLA_MTU, uint32(mtu)))
575
-
576
-	if err := s.Send(wb); err != nil {
577
-		return err
578
-	}
579
-	return s.HandleAck(wb.Seq)
580
-}
581
-
582
-// Set link queue length
583
-// This is identical to running: ip link set dev $name txqueuelen $QLEN
584
-func NetworkSetTxQueueLen(iface *net.Interface, txQueueLen int) error {
585
-	s, err := getNetlinkSocket()
586
-	if err != nil {
587
-		return err
588
-	}
589
-	defer s.Close()
590
-
591
-	wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
592
-
593
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
594
-	msg.Type = syscall.RTM_SETLINK
595
-	msg.Flags = syscall.NLM_F_REQUEST
596
-	msg.Index = int32(iface.Index)
597
-	msg.Change = DEFAULT_CHANGE
598
-	wb.AddData(msg)
599
-	wb.AddData(uint32Attr(syscall.IFLA_TXQLEN, uint32(txQueueLen)))
600
-
601
-	if err := s.Send(wb); err != nil {
602
-		return err
603
-	}
604
-	return s.HandleAck(wb.Seq)
605
-}
606
-
607
-func networkMasterAction(iface *net.Interface, rtattr *RtAttr) error {
608
-	s, err := getNetlinkSocket()
609
-	if err != nil {
610
-		return err
611
-	}
612
-	defer s.Close()
613
-
614
-	wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
615
-
616
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
617
-	msg.Type = syscall.RTM_SETLINK
618
-	msg.Flags = syscall.NLM_F_REQUEST
619
-	msg.Index = int32(iface.Index)
620
-	msg.Change = DEFAULT_CHANGE
621
-	wb.AddData(msg)
622
-	wb.AddData(rtattr)
623
-
624
-	if err := s.Send(wb); err != nil {
625
-		return err
626
-	}
627
-
628
-	return s.HandleAck(wb.Seq)
629
-}
630
-
631
-// Add an interface to bridge.
632
-// This is identical to running: ip link set $name master $master
633
-func NetworkSetMaster(iface, master *net.Interface) error {
634
-	data := uint32Attr(syscall.IFLA_MASTER, uint32(master.Index))
635
-	return networkMasterAction(iface, data)
636
-}
637
-
638
-// Remove an interface from the bridge
639
-// This is is identical to to running: ip link $name set nomaster
640
-func NetworkSetNoMaster(iface *net.Interface) error {
641
-	data := uint32Attr(syscall.IFLA_MASTER, 0)
642
-	return networkMasterAction(iface, data)
643
-}
644
-
645
-func networkSetNsAction(iface *net.Interface, rtattr *RtAttr) error {
646
-	s, err := getNetlinkSocket()
647
-	if err != nil {
648
-		return err
649
-	}
650
-	defer s.Close()
651
-
652
-	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
653
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
654
-	msg.Index = int32(iface.Index)
655
-	wb.AddData(msg)
656
-	wb.AddData(rtattr)
657
-
658
-	if err := s.Send(wb); err != nil {
659
-		return err
660
-	}
661
-
662
-	return s.HandleAck(wb.Seq)
663
-}
664
-
665
-// Move a particular network interface to a particular network namespace
666
-// specified by PID. This is identical to running: ip link set dev $name netns $pid
667
-func NetworkSetNsPid(iface *net.Interface, nspid int) error {
668
-	data := uint32Attr(syscall.IFLA_NET_NS_PID, uint32(nspid))
669
-	return networkSetNsAction(iface, data)
670
-}
671
-
672
-// Move a particular network interface to a particular mounted
673
-// network namespace specified by file descriptor.
674
-// This is idential to running: ip link set dev $name netns $fd
675
-func NetworkSetNsFd(iface *net.Interface, fd int) error {
676
-	data := uint32Attr(IFLA_NET_NS_FD, uint32(fd))
677
-	return networkSetNsAction(iface, data)
678
-}
679
-
680
-// Rename a particular interface to a different name
681
-// !!! Note that you can't rename an active interface. You need to bring it down before renaming it.
682
-// This is identical to running: ip link set dev ${oldName} name ${newName}
683
-func NetworkChangeName(iface *net.Interface, newName string) error {
684
-	if len(newName) >= IFNAMSIZ {
685
-		return fmt.Errorf("Interface name %s too long", newName)
686
-	}
687
-
688
-	s, err := getNetlinkSocket()
689
-	if err != nil {
690
-		return err
691
-	}
692
-	defer s.Close()
693
-
694
-	wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
695
-
696
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
697
-	msg.Index = int32(iface.Index)
698
-	msg.Change = DEFAULT_CHANGE
699
-	wb.AddData(msg)
700
-
701
-	nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(newName))
702
-	wb.AddData(nameData)
703
-
704
-	if err := s.Send(wb); err != nil {
705
-		return err
706
-	}
707
-
708
-	return s.HandleAck(wb.Seq)
709
-}
710
-
711
-// Add a new VETH pair link on the host
712
-// This is identical to running: ip link add name $name type veth peer name $peername
713
-func NetworkCreateVethPair(name1, name2 string, txQueueLen int) error {
714
-	s, err := getNetlinkSocket()
715
-	if err != nil {
716
-		return err
717
-	}
718
-	defer s.Close()
719
-
720
-	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
721
-
722
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
723
-	wb.AddData(msg)
724
-
725
-	nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(name1))
726
-	wb.AddData(nameData)
727
-
728
-	txqLen := make([]byte, 4)
729
-	native.PutUint32(txqLen, uint32(txQueueLen))
730
-	txqData := newRtAttr(syscall.IFLA_TXQLEN, txqLen)
731
-	wb.AddData(txqData)
732
-
733
-	nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil)
734
-	newRtAttrChild(nest1, IFLA_INFO_KIND, zeroTerminated("veth"))
735
-	nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil)
736
-	nest3 := newRtAttrChild(nest2, VETH_INFO_PEER, nil)
737
-
738
-	newIfInfomsgChild(nest3, syscall.AF_UNSPEC)
739
-	newRtAttrChild(nest3, syscall.IFLA_IFNAME, zeroTerminated(name2))
740
-
741
-	txqLen2 := make([]byte, 4)
742
-	native.PutUint32(txqLen2, uint32(txQueueLen))
743
-	newRtAttrChild(nest3, syscall.IFLA_TXQLEN, txqLen2)
744
-
745
-	wb.AddData(nest1)
746
-
747
-	if err := s.Send(wb); err != nil {
748
-		return err
749
-	}
750
-
751
-	if err := s.HandleAck(wb.Seq); err != nil {
752
-		if os.IsExist(err) {
753
-			return ErrInterfaceExists
754
-		}
755
-
756
-		return err
757
-	}
758
-
759
-	return nil
760
-}
761
-
762
-// Add a new VLAN interface with masterDev as its upper device
763
-// This is identical to running:
764
-// ip link add name $name link $masterdev type vlan id $id
765
-func NetworkLinkAddVlan(masterDev, vlanDev string, vlanId uint16) error {
766
-	s, err := getNetlinkSocket()
767
-	if err != nil {
768
-		return err
769
-	}
770
-	defer s.Close()
771
-
772
-	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
773
-
774
-	masterDevIfc, err := net.InterfaceByName(masterDev)
775
-	if err != nil {
776
-		return err
777
-	}
778
-
779
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
780
-	wb.AddData(msg)
781
-
782
-	nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil)
783
-	newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated("vlan"))
784
-
785
-	nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil)
786
-	vlanData := make([]byte, 2)
787
-	native.PutUint16(vlanData, vlanId)
788
-	newRtAttrChild(nest2, IFLA_VLAN_ID, vlanData)
789
-	wb.AddData(nest1)
790
-
791
-	wb.AddData(uint32Attr(syscall.IFLA_LINK, uint32(masterDevIfc.Index)))
792
-	wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(vlanDev)))
793
-
794
-	if err := s.Send(wb); err != nil {
795
-		return err
796
-	}
797
-	return s.HandleAck(wb.Seq)
798
-}
799
-
800
-// MacVlan link has LowerDev, UpperDev and operates in Mode mode
801
-// This simplifies the code when creating MacVlan or MacVtap interface
802
-type MacVlanLink struct {
803
-	MasterDev string
804
-	SlaveDev  string
805
-	mode      string
806
-}
807
-
808
-func (m MacVlanLink) Mode() uint32 {
809
-	modeMap := map[string]uint32{
810
-		"private":  MACVLAN_MODE_PRIVATE,
811
-		"vepa":     MACVLAN_MODE_VEPA,
812
-		"bridge":   MACVLAN_MODE_BRIDGE,
813
-		"passthru": MACVLAN_MODE_PASSTHRU,
814
-	}
815
-
816
-	return modeMap[m.mode]
817
-}
818
-
819
-// Add MAC VLAN network interface with masterDev as its upper device
820
-// This is identical to running:
821
-// ip link add name $name link $masterdev type macvlan mode $mode
822
-func networkLinkMacVlan(dev_type string, mcvln *MacVlanLink) error {
823
-	s, err := getNetlinkSocket()
824
-	if err != nil {
825
-		return err
826
-	}
827
-	defer s.Close()
828
-
829
-	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
830
-
831
-	masterDevIfc, err := net.InterfaceByName(mcvln.MasterDev)
832
-	if err != nil {
833
-		return err
834
-	}
835
-
836
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
837
-	wb.AddData(msg)
838
-
839
-	nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil)
840
-	newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated(dev_type))
841
-
842
-	nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil)
843
-	macVlanData := make([]byte, 4)
844
-	native.PutUint32(macVlanData, mcvln.Mode())
845
-	newRtAttrChild(nest2, IFLA_MACVLAN_MODE, macVlanData)
846
-	wb.AddData(nest1)
847
-
848
-	wb.AddData(uint32Attr(syscall.IFLA_LINK, uint32(masterDevIfc.Index)))
849
-	wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(mcvln.SlaveDev)))
850
-
851
-	if err := s.Send(wb); err != nil {
852
-		return err
853
-	}
854
-	return s.HandleAck(wb.Seq)
855
-}
856
-
857
-func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error {
858
-	return networkLinkMacVlan("macvlan", &MacVlanLink{
859
-		MasterDev: masterDev,
860
-		SlaveDev:  macVlanDev,
861
-		mode:      mode,
862
-	})
863
-}
864
-
865
-func NetworkLinkAddMacVtap(masterDev, macVlanDev string, mode string) error {
866
-	return networkLinkMacVlan("macvtap", &MacVlanLink{
867
-		MasterDev: masterDev,
868
-		SlaveDev:  macVlanDev,
869
-		mode:      mode,
870
-	})
871
-}
872
-
873
-func networkLinkIpAction(action, flags int, ifa IfAddr) error {
874
-	s, err := getNetlinkSocket()
875
-	if err != nil {
876
-		return err
877
-	}
878
-	defer s.Close()
879
-
880
-	family := getIpFamily(ifa.IP)
881
-
882
-	wb := newNetlinkRequest(action, flags)
883
-
884
-	msg := newIfAddrmsg(family)
885
-	msg.Index = uint32(ifa.Iface.Index)
886
-	prefixLen, _ := ifa.IPNet.Mask.Size()
887
-	msg.Prefixlen = uint8(prefixLen)
888
-	wb.AddData(msg)
889
-
890
-	var ipData []byte
891
-	if family == syscall.AF_INET {
892
-		ipData = ifa.IP.To4()
893
-	} else {
894
-		ipData = ifa.IP.To16()
895
-	}
896
-
897
-	localData := newRtAttr(syscall.IFA_LOCAL, ipData)
898
-	wb.AddData(localData)
899
-
900
-	addrData := newRtAttr(syscall.IFA_ADDRESS, ipData)
901
-	wb.AddData(addrData)
902
-
903
-	if err := s.Send(wb); err != nil {
904
-		return err
905
-	}
906
-
907
-	return s.HandleAck(wb.Seq)
908
-}
909
-
910
-// Delete an IP address from an interface. This is identical to:
911
-// ip addr del $ip/$ipNet dev $iface
912
-func NetworkLinkDelIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error {
913
-	return networkLinkIpAction(
914
-		syscall.RTM_DELADDR,
915
-		syscall.NLM_F_ACK,
916
-		IfAddr{iface, ip, ipNet},
917
-	)
918
-}
919
-
920
-// Add an Ip address to an interface. This is identical to:
921
-// ip addr add $ip/$ipNet dev $iface
922
-func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error {
923
-	return networkLinkIpAction(
924
-		syscall.RTM_NEWADDR,
925
-		syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK,
926
-		IfAddr{iface, ip, ipNet},
927
-	)
928
-}
929
-
930
-// Returns an array of IPNet for all the currently routed subnets on ipv4
931
-// This is similar to the first column of "ip route" output
932
-func NetworkGetRoutes() ([]Route, error) {
933
-	s, err := getNetlinkSocket()
934
-	if err != nil {
935
-		return nil, err
936
-	}
937
-	defer s.Close()
938
-
939
-	wb := newNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_DUMP)
940
-
941
-	msg := newIfInfomsg(syscall.AF_UNSPEC)
942
-	wb.AddData(msg)
943
-
944
-	if err := s.Send(wb); err != nil {
945
-		return nil, err
946
-	}
947
-
948
-	pid, err := s.GetPid()
949
-	if err != nil {
950
-		return nil, err
951
-	}
952
-
953
-	res := make([]Route, 0)
954
-
955
-outer:
956
-	for {
957
-		msgs, err := s.Receive()
958
-		if err != nil {
959
-			return nil, err
960
-		}
961
-		for _, m := range msgs {
962
-			if err := s.CheckMessage(m, wb.Seq, pid); err != nil {
963
-				if err == io.EOF {
964
-					break outer
965
-				}
966
-				return nil, err
967
-			}
968
-			if m.Header.Type != syscall.RTM_NEWROUTE {
969
-				continue
970
-			}
971
-
972
-			var r Route
973
-
974
-			msg := (*RtMsg)(unsafe.Pointer(&m.Data[0:syscall.SizeofRtMsg][0]))
975
-
976
-			if msg.Flags&syscall.RTM_F_CLONED != 0 {
977
-				// Ignore cloned routes
978
-				continue
979
-			}
980
-
981
-			if msg.Table != syscall.RT_TABLE_MAIN {
982
-				// Ignore non-main tables
983
-				continue
984
-			}
985
-
986
-			if msg.Family != syscall.AF_INET {
987
-				// Ignore non-ipv4 routes
988
-				continue
989
-			}
990
-
991
-			if msg.Dst_len == 0 {
992
-				// Default routes
993
-				r.Default = true
994
-			}
995
-
996
-			attrs, err := syscall.ParseNetlinkRouteAttr(&m)
997
-			if err != nil {
998
-				return nil, err
999
-			}
1000
-			for _, attr := range attrs {
1001
-				switch attr.Attr.Type {
1002
-				case syscall.RTA_DST:
1003
-					ip := attr.Value
1004
-					r.IPNet = &net.IPNet{
1005
-						IP:   ip,
1006
-						Mask: net.CIDRMask(int(msg.Dst_len), 8*len(ip)),
1007
-					}
1008
-				case syscall.RTA_OIF:
1009
-					index := int(native.Uint32(attr.Value[0:4]))
1010
-					r.Iface, _ = net.InterfaceByIndex(index)
1011
-				}
1012
-			}
1013
-			if r.Default || r.IPNet != nil {
1014
-				res = append(res, r)
1015
-			}
1016
-		}
1017
-	}
1018
-
1019
-	return res, nil
1020
-}
1021
-
1022
-// Add a new route table entry.
1023
-func AddRoute(destination, source, gateway, device string) error {
1024
-	if destination == "" && source == "" && gateway == "" {
1025
-		return fmt.Errorf("one of destination, source or gateway must not be blank")
1026
-	}
1027
-
1028
-	s, err := getNetlinkSocket()
1029
-	if err != nil {
1030
-		return err
1031
-	}
1032
-	defer s.Close()
1033
-
1034
-	wb := newNetlinkRequest(syscall.RTM_NEWROUTE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
1035
-	msg := newRtMsg()
1036
-	currentFamily := -1
1037
-	var rtAttrs []*RtAttr
1038
-
1039
-	if destination != "" {
1040
-		destIP, destNet, err := net.ParseCIDR(destination)
1041
-		if err != nil {
1042
-			return fmt.Errorf("destination CIDR %s couldn't be parsed", destination)
1043
-		}
1044
-		destFamily := getIpFamily(destIP)
1045
-		currentFamily = destFamily
1046
-		destLen, bits := destNet.Mask.Size()
1047
-		if destLen == 0 && bits == 0 {
1048
-			return fmt.Errorf("destination CIDR %s generated a non-canonical Mask", destination)
1049
-		}
1050
-		msg.Family = uint8(destFamily)
1051
-		msg.Dst_len = uint8(destLen)
1052
-		var destData []byte
1053
-		if destFamily == syscall.AF_INET {
1054
-			destData = destIP.To4()
1055
-		} else {
1056
-			destData = destIP.To16()
1057
-		}
1058
-		rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_DST, destData))
1059
-	}
1060
-
1061
-	if source != "" {
1062
-		srcIP := net.ParseIP(source)
1063
-		if srcIP == nil {
1064
-			return fmt.Errorf("source IP %s couldn't be parsed", source)
1065
-		}
1066
-		srcFamily := getIpFamily(srcIP)
1067
-		if currentFamily != -1 && currentFamily != srcFamily {
1068
-			return fmt.Errorf("source and destination ip were not the same IP family")
1069
-		}
1070
-		currentFamily = srcFamily
1071
-		msg.Family = uint8(srcFamily)
1072
-		var srcData []byte
1073
-		if srcFamily == syscall.AF_INET {
1074
-			srcData = srcIP.To4()
1075
-		} else {
1076
-			srcData = srcIP.To16()
1077
-		}
1078
-		rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_PREFSRC, srcData))
1079
-	}
1080
-
1081
-	if gateway != "" {
1082
-		gwIP := net.ParseIP(gateway)
1083
-		if gwIP == nil {
1084
-			return fmt.Errorf("gateway IP %s couldn't be parsed", gateway)
1085
-		}
1086
-		gwFamily := getIpFamily(gwIP)
1087
-		if currentFamily != -1 && currentFamily != gwFamily {
1088
-			return fmt.Errorf("gateway, source, and destination ip were not the same IP family")
1089
-		}
1090
-		msg.Family = uint8(gwFamily)
1091
-		var gwData []byte
1092
-		if gwFamily == syscall.AF_INET {
1093
-			gwData = gwIP.To4()
1094
-		} else {
1095
-			gwData = gwIP.To16()
1096
-		}
1097
-		rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_GATEWAY, gwData))
1098
-	}
1099
-
1100
-	wb.AddData(msg)
1101
-	for _, attr := range rtAttrs {
1102
-		wb.AddData(attr)
1103
-	}
1104
-
1105
-	iface, err := net.InterfaceByName(device)
1106
-	if err != nil {
1107
-		return err
1108
-	}
1109
-	wb.AddData(uint32Attr(syscall.RTA_OIF, uint32(iface.Index)))
1110
-
1111
-	if err := s.Send(wb); err != nil {
1112
-		return err
1113
-	}
1114
-	return s.HandleAck(wb.Seq)
1115
-}
1116
-
1117
-// Add a new default gateway. Identical to:
1118
-// ip route add default via $ip
1119
-func AddDefaultGw(ip, device string) error {
1120
-	return AddRoute("", "", ip, device)
1121
-}
1122
-
1123
-// THIS CODE DOES NOT COMMUNICATE WITH KERNEL VIA RTNETLINK INTERFACE
1124
-// IT IS HERE FOR BACKWARDS COMPATIBILITY WITH OLDER LINUX KERNELS
1125
-// WHICH SHIP WITH OLDER NOT ENTIRELY FUNCTIONAL VERSION OF NETLINK
1126
-func getIfSocket() (fd int, err error) {
1127
-	for _, socket := range []int{
1128
-		syscall.AF_INET,
1129
-		syscall.AF_PACKET,
1130
-		syscall.AF_INET6,
1131
-	} {
1132
-		if fd, err = syscall.Socket(socket, syscall.SOCK_DGRAM, 0); err == nil {
1133
-			break
1134
-		}
1135
-	}
1136
-	if err == nil {
1137
-		return fd, nil
1138
-	}
1139
-	return -1, err
1140
-}
1141
-
1142
-// Create the actual bridge device.  This is more backward-compatible than
1143
-// netlink.NetworkLinkAdd and works on RHEL 6.
1144
-func CreateBridge(name string, setMacAddr bool) error {
1145
-	if len(name) >= IFNAMSIZ {
1146
-		return fmt.Errorf("Interface name %s too long", name)
1147
-	}
1148
-
1149
-	s, err := getIfSocket()
1150
-	if err != nil {
1151
-		return err
1152
-	}
1153
-	defer syscall.Close(s)
1154
-
1155
-	nameBytePtr, err := syscall.BytePtrFromString(name)
1156
-	if err != nil {
1157
-		return err
1158
-	}
1159
-	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), SIOC_BRADDBR, uintptr(unsafe.Pointer(nameBytePtr))); err != 0 {
1160
-		return err
1161
-	}
1162
-	if setMacAddr {
1163
-		return SetMacAddress(name, randMacAddr())
1164
-	}
1165
-	return nil
1166
-}
1167
-
1168
-// Delete the actual bridge device.
1169
-func DeleteBridge(name string) error {
1170
-	s, err := getIfSocket()
1171
-	if err != nil {
1172
-		return err
1173
-	}
1174
-	defer syscall.Close(s)
1175
-
1176
-	nameBytePtr, err := syscall.BytePtrFromString(name)
1177
-	if err != nil {
1178
-		return err
1179
-	}
1180
-
1181
-	var ifr ifreqFlags
1182
-	copy(ifr.IfrnName[:len(ifr.IfrnName)-1], []byte(name))
1183
-	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s),
1184
-		syscall.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifr))); err != 0 {
1185
-		return err
1186
-	}
1187
-
1188
-	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s),
1189
-		SIOC_BRDELBR, uintptr(unsafe.Pointer(nameBytePtr))); err != 0 {
1190
-		return err
1191
-	}
1192
-	return nil
1193
-}
1194
-
1195
-func ifIoctBridge(iface, master *net.Interface, op uintptr) error {
1196
-	if len(master.Name) >= IFNAMSIZ {
1197
-		return fmt.Errorf("Interface name %s too long", master.Name)
1198
-	}
1199
-
1200
-	s, err := getIfSocket()
1201
-	if err != nil {
1202
-		return err
1203
-	}
1204
-	defer syscall.Close(s)
1205
-
1206
-	ifr := ifreqIndex{}
1207
-	copy(ifr.IfrnName[:len(ifr.IfrnName)-1], master.Name)
1208
-	ifr.IfruIndex = int32(iface.Index)
1209
-
1210
-	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), op, uintptr(unsafe.Pointer(&ifr))); err != 0 {
1211
-		return err
1212
-	}
1213
-
1214
-	return nil
1215
-}
1216
-
1217
-// Add a slave to a bridge device.  This is more backward-compatible than
1218
-// netlink.NetworkSetMaster and works on RHEL 6.
1219
-func AddToBridge(iface, master *net.Interface) error {
1220
-	return ifIoctBridge(iface, master, SIOC_BRADDIF)
1221
-}
1222
-
1223
-// Detach a slave from a bridge device.  This is more backward-compatible than
1224
-// netlink.NetworkSetMaster and works on RHEL 6.
1225
-func DelFromBridge(iface, master *net.Interface) error {
1226
-	return ifIoctBridge(iface, master, SIOC_BRDELIF)
1227
-}
1228
-
1229
-func randMacAddr() string {
1230
-	hw := make(net.HardwareAddr, 6)
1231
-	for i := 0; i < 6; i++ {
1232
-		hw[i] = byte(rnd.Intn(255))
1233
-	}
1234
-	hw[0] &^= 0x1 // clear multicast bit
1235
-	hw[0] |= 0x2  // set local assignment bit (IEEE802)
1236
-	return hw.String()
1237
-}
1238
-
1239
-func SetMacAddress(name, addr string) error {
1240
-	if len(name) >= IFNAMSIZ {
1241
-		return fmt.Errorf("Interface name %s too long", name)
1242
-	}
1243
-
1244
-	hw, err := net.ParseMAC(addr)
1245
-	if err != nil {
1246
-		return err
1247
-	}
1248
-
1249
-	s, err := getIfSocket()
1250
-	if err != nil {
1251
-		return err
1252
-	}
1253
-	defer syscall.Close(s)
1254
-
1255
-	ifr := ifreqHwaddr{}
1256
-	ifr.IfruHwaddr.Family = syscall.ARPHRD_ETHER
1257
-	copy(ifr.IfrnName[:len(ifr.IfrnName)-1], name)
1258
-
1259
-	for i := 0; i < 6; i++ {
1260
-		ifr.IfruHwaddr.Data[i] = ifrDataByte(hw[i])
1261
-	}
1262
-
1263
-	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), syscall.SIOCSIFHWADDR, uintptr(unsafe.Pointer(&ifr))); err != 0 {
1264
-		return err
1265
-	}
1266
-	return nil
1267
-}
1268
-
1269
-func SetHairpinMode(iface *net.Interface, enabled bool) error {
1270
-	s, err := getNetlinkSocket()
1271
-	if err != nil {
1272
-		return err
1273
-	}
1274
-	defer s.Close()
1275
-	req := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
1276
-
1277
-	msg := newIfInfomsg(syscall.AF_BRIDGE)
1278
-	msg.Type = syscall.RTM_SETLINK
1279
-	msg.Flags = syscall.NLM_F_REQUEST
1280
-	msg.Index = int32(iface.Index)
1281
-	msg.Change = DEFAULT_CHANGE
1282
-	req.AddData(msg)
1283
-
1284
-	mode := []byte{0}
1285
-	if enabled {
1286
-		mode[0] = byte(1)
1287
-	}
1288
-
1289
-	br := newRtAttr(syscall.IFLA_PROTINFO|syscall.NLA_F_NESTED, nil)
1290
-	newRtAttrChild(br, IFLA_BRPORT_MODE, mode)
1291
-	req.AddData(br)
1292
-	if err := s.Send(req); err != nil {
1293
-		return err
1294
-	}
1295
-
1296
-	return s.HandleAck(req.Seq)
1297
-}
1298
-
1299
-func ChangeName(iface *net.Interface, newName string) error {
1300
-	if len(newName) >= IFNAMSIZ {
1301
-		return fmt.Errorf("Interface name %s too long", newName)
1302
-	}
1303
-
1304
-	fd, err := getIfSocket()
1305
-	if err != nil {
1306
-		return err
1307
-	}
1308
-	defer syscall.Close(fd)
1309
-
1310
-	data := [IFNAMSIZ * 2]byte{}
1311
-	// the "-1"s here are very important for ensuring we get proper null
1312
-	// termination of our new C strings
1313
-	copy(data[:IFNAMSIZ-1], iface.Name)
1314
-	copy(data[IFNAMSIZ:IFNAMSIZ*2-1], newName)
1315
-
1316
-	if _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&data[0]))); errno != 0 {
1317
-		return errno
1318
-	}
1319
-
1320
-	return nil
1321
-}
1322 1
deleted file mode 100644
... ...
@@ -1,7 +0,0 @@
1
-// +build arm ppc64 ppc64le
2
-
3
-package netlink
4
-
5
-func ifrDataByte(b byte) uint8 {
6
-	return uint8(b)
7
-}
8 1
deleted file mode 100644
... ...
@@ -1,7 +0,0 @@
1
-// +build !arm,!ppc64,!ppc64le
2
-
3
-package netlink
4
-
5
-func ifrDataByte(b byte) int8 {
6
-	return int8(b)
7
-}
8 1
deleted file mode 100644
... ...
@@ -1,88 +0,0 @@
1
-// +build !linux
2
-
3
-package netlink
4
-
5
-import (
6
-	"errors"
7
-	"net"
8
-)
9
-
10
-var (
11
-	ErrNotImplemented = errors.New("not implemented")
12
-)
13
-
14
-func NetworkGetRoutes() ([]Route, error) {
15
-	return nil, ErrNotImplemented
16
-}
17
-
18
-func NetworkLinkAdd(name string, linkType string) error {
19
-	return ErrNotImplemented
20
-}
21
-
22
-func NetworkLinkDel(name string) error {
23
-	return ErrNotImplemented
24
-}
25
-
26
-func NetworkLinkUp(iface *net.Interface) error {
27
-	return ErrNotImplemented
28
-}
29
-
30
-func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error {
31
-	return ErrNotImplemented
32
-}
33
-
34
-func NetworkLinkDelIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error {
35
-	return ErrNotImplemented
36
-}
37
-
38
-func AddRoute(destination, source, gateway, device string) error {
39
-	return ErrNotImplemented
40
-}
41
-
42
-func AddDefaultGw(ip, device string) error {
43
-	return ErrNotImplemented
44
-}
45
-
46
-func NetworkSetMTU(iface *net.Interface, mtu int) error {
47
-	return ErrNotImplemented
48
-}
49
-
50
-func NetworkSetTxQueueLen(iface *net.Interface, txQueueLen int) error {
51
-	return ErrNotImplemented
52
-}
53
-
54
-func NetworkCreateVethPair(name1, name2 string, txQueueLen int) error {
55
-	return ErrNotImplemented
56
-}
57
-
58
-func NetworkChangeName(iface *net.Interface, newName string) error {
59
-	return ErrNotImplemented
60
-}
61
-
62
-func NetworkSetNsFd(iface *net.Interface, fd int) error {
63
-	return ErrNotImplemented
64
-}
65
-
66
-func NetworkSetNsPid(iface *net.Interface, nspid int) error {
67
-	return ErrNotImplemented
68
-}
69
-
70
-func NetworkSetMaster(iface, master *net.Interface) error {
71
-	return ErrNotImplemented
72
-}
73
-
74
-func NetworkLinkDown(iface *net.Interface) error {
75
-	return ErrNotImplemented
76
-}
77
-
78
-func CreateBridge(name string, setMacAddr bool) error {
79
-	return ErrNotImplemented
80
-}
81
-
82
-func DeleteBridge(name string) error {
83
-	return ErrNotImplemented
84
-}
85
-
86
-func AddToBridge(iface, master *net.Interface) error {
87
-	return ErrNotImplemented
88
-}
... ...
@@ -11,8 +11,8 @@ import (
11 11
 	"strings"
12 12
 
13 13
 	"github.com/opencontainers/runc/libcontainer/configs"
14
-	"github.com/opencontainers/runc/libcontainer/netlink"
15 14
 	"github.com/opencontainers/runc/libcontainer/utils"
15
+	"github.com/vishvananda/netlink"
16 16
 )
17 17
 
18 18
 var strategies = map[string]networkStrategy{
... ...
@@ -93,11 +93,7 @@ func (l *loopback) create(n *network, nspid int) error {
93 93
 }
94 94
 
95 95
 func (l *loopback) initialize(config *network) error {
96
-	iface, err := net.InterfaceByName("lo")
97
-	if err != nil {
98
-		return err
99
-	}
100
-	return netlink.NetworkLinkUp(iface)
96
+	return netlink.LinkSetUp(&netlink.Device{netlink.LinkAttrs{Name: "lo"}})
101 97
 }
102 98
 
103 99
 func (l *loopback) attach(n *configs.Network) (err error) {
... ...
@@ -115,42 +111,36 @@ type veth struct {
115 115
 }
116 116
 
117 117
 func (v *veth) detach(n *configs.Network) (err error) {
118
-	bridge, err := net.InterfaceByName(n.Bridge)
119
-	if err != nil {
120
-		return err
121
-	}
122
-	host, err := net.InterfaceByName(n.HostInterfaceName)
123
-	if err != nil {
124
-		return err
125
-	}
126
-	if err := netlink.DelFromBridge(host, bridge); err != nil {
127
-		return err
128
-	}
129
-	return nil
118
+	return netlink.LinkSetMaster(&netlink.Device{netlink.LinkAttrs{Name: n.HostInterfaceName}}, nil)
130 119
 }
131 120
 
132 121
 // attach a container network interface to an external network
133 122
 func (v *veth) attach(n *configs.Network) (err error) {
134
-	bridge, err := net.InterfaceByName(n.Bridge)
123
+	brl, err := netlink.LinkByName(n.Bridge)
135 124
 	if err != nil {
136 125
 		return err
137 126
 	}
138
-	host, err := net.InterfaceByName(n.HostInterfaceName)
127
+	br, ok := brl.(*netlink.Bridge)
128
+	if !ok {
129
+		return fmt.Errorf("Wrong device type %T", brl)
130
+	}
131
+	host, err := netlink.LinkByName(n.HostInterfaceName)
139 132
 	if err != nil {
140 133
 		return err
141 134
 	}
142
-	if err := netlink.AddToBridge(host, bridge); err != nil {
135
+
136
+	if err := netlink.LinkSetMaster(host, br); err != nil {
143 137
 		return err
144 138
 	}
145
-	if err := netlink.NetworkSetMTU(host, n.Mtu); err != nil {
139
+	if err := netlink.LinkSetMTU(host, n.Mtu); err != nil {
146 140
 		return err
147 141
 	}
148 142
 	if n.HairpinMode {
149
-		if err := netlink.SetHairpinMode(host, true); err != nil {
143
+		if err := netlink.LinkSetHairpin(host, true); err != nil {
150 144
 			return err
151 145
 		}
152 146
 	}
153
-	if err := netlink.NetworkLinkUp(host); err != nil {
147
+	if err := netlink.LinkSetUp(host); err != nil {
154 148
 		return err
155 149
 	}
156 150
 
... ...
@@ -163,26 +153,32 @@ func (v *veth) create(n *network, nspid int) (err error) {
163 163
 		return err
164 164
 	}
165 165
 	n.TempVethPeerName = tmpName
166
-	defer func() {
167
-		if err != nil {
168
-			netlink.NetworkLinkDel(n.HostInterfaceName)
169
-			netlink.NetworkLinkDel(n.TempVethPeerName)
170
-		}
171
-	}()
172 166
 	if n.Bridge == "" {
173 167
 		return fmt.Errorf("bridge is not specified")
174 168
 	}
175
-	if err := netlink.NetworkCreateVethPair(n.HostInterfaceName, n.TempVethPeerName, n.TxQueueLen); err != nil {
169
+	veth := &netlink.Veth{
170
+		LinkAttrs: netlink.LinkAttrs{
171
+			Name:   n.HostInterfaceName,
172
+			TxQLen: n.TxQueueLen,
173
+		},
174
+		PeerName: n.TempVethPeerName,
175
+	}
176
+	if err := netlink.LinkAdd(veth); err != nil {
176 177
 		return err
177 178
 	}
179
+	defer func() {
180
+		if err != nil {
181
+			netlink.LinkDel(veth)
182
+		}
183
+	}()
178 184
 	if err := v.attach(&n.Network); err != nil {
179 185
 		return err
180 186
 	}
181
-	child, err := net.InterfaceByName(n.TempVethPeerName)
187
+	child, err := netlink.LinkByName(n.TempVethPeerName)
182 188
 	if err != nil {
183 189
 		return err
184 190
 	}
185
-	return netlink.NetworkSetNsPid(child, nspid)
191
+	return netlink.LinkSetNsPid(child, nspid)
186 192
 }
187 193
 
188 194
 func (v *veth) generateTempPeerName() (string, error) {
... ...
@@ -194,53 +190,68 @@ func (v *veth) initialize(config *network) error {
194 194
 	if peer == "" {
195 195
 		return fmt.Errorf("peer is not specified")
196 196
 	}
197
-	child, err := net.InterfaceByName(peer)
197
+	child, err := netlink.LinkByName(peer)
198 198
 	if err != nil {
199 199
 		return err
200 200
 	}
201
-	if err := netlink.NetworkLinkDown(child); err != nil {
201
+	if err := netlink.LinkSetDown(child); err != nil {
202 202
 		return err
203 203
 	}
204
-	if err := netlink.NetworkChangeName(child, config.Name); err != nil {
204
+	if err := netlink.LinkSetName(child, config.Name); err != nil {
205 205
 		return err
206 206
 	}
207 207
 	// get the interface again after we changed the name as the index also changes.
208
-	if child, err = net.InterfaceByName(config.Name); err != nil {
208
+	if child, err = netlink.LinkByName(config.Name); err != nil {
209 209
 		return err
210 210
 	}
211 211
 	if config.MacAddress != "" {
212
-		if err := netlink.NetworkSetMacAddress(child, config.MacAddress); err != nil {
212
+		mac, err := net.ParseMAC(config.MacAddress)
213
+		if err != nil {
214
+			return err
215
+		}
216
+		if err := netlink.LinkSetHardwareAddr(child, mac); err != nil {
213 217
 			return err
214 218
 		}
215 219
 	}
216
-	ip, ipNet, err := net.ParseCIDR(config.Address)
220
+	ip, err := netlink.ParseAddr(config.Address)
217 221
 	if err != nil {
218 222
 		return err
219 223
 	}
220
-	if err := netlink.NetworkLinkAddIp(child, ip, ipNet); err != nil {
224
+	if err := netlink.AddrAdd(child, ip); err != nil {
221 225
 		return err
222 226
 	}
223 227
 	if config.IPv6Address != "" {
224
-		if ip, ipNet, err = net.ParseCIDR(config.IPv6Address); err != nil {
228
+		ip6, err := netlink.ParseAddr(config.IPv6Address)
229
+		if err != nil {
225 230
 			return err
226 231
 		}
227
-		if err := netlink.NetworkLinkAddIp(child, ip, ipNet); err != nil {
232
+		if err := netlink.AddrAdd(child, ip6); err != nil {
228 233
 			return err
229 234
 		}
230 235
 	}
231
-	if err := netlink.NetworkSetMTU(child, config.Mtu); err != nil {
236
+	if err := netlink.LinkSetMTU(child, config.Mtu); err != nil {
232 237
 		return err
233 238
 	}
234
-	if err := netlink.NetworkLinkUp(child); err != nil {
239
+	if err := netlink.LinkSetUp(child); err != nil {
235 240
 		return err
236 241
 	}
237 242
 	if config.Gateway != "" {
238
-		if err := netlink.AddDefaultGw(config.Gateway, config.Name); err != nil {
243
+		gw := net.ParseIP(config.Gateway)
244
+		if err := netlink.RouteAdd(&netlink.Route{
245
+			Scope:     netlink.SCOPE_UNIVERSE,
246
+			LinkIndex: child.Attrs().Index,
247
+			Gw:        gw,
248
+		}); err != nil {
239 249
 			return err
240 250
 		}
241 251
 	}
242 252
 	if config.IPv6Gateway != "" {
243
-		if err := netlink.AddDefaultGw(config.IPv6Gateway, config.Name); err != nil {
253
+		gw := net.ParseIP(config.IPv6Gateway)
254
+		if err := netlink.RouteAdd(&netlink.Route{
255
+			Scope:     netlink.SCOPE_UNIVERSE,
256
+			LinkIndex: child.Attrs().Index,
257
+			Gw:        gw,
258
+		}); err != nil {
244 259
 			return err
245 260
 		}
246 261
 	}
... ...
@@ -96,7 +96,6 @@ func mountCmd(cmd configs.Command) error {
96 96
 func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
97 97
 	var (
98 98
 		dest = m.Destination
99
-		data = label.FormatMountLabel(m.Data, mountLabel)
100 99
 	)
101 100
 	if !strings.HasPrefix(dest, rootfs) {
102 101
 		dest = filepath.Join(rootfs, dest)
... ...
@@ -107,12 +106,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
107 107
 		if err := os.MkdirAll(dest, 0755); err != nil {
108 108
 			return err
109 109
 		}
110
-		return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), "")
110
+		return m.MountPropagate(rootfs, mountLabel)
111 111
 	case "mqueue":
112 112
 		if err := os.MkdirAll(dest, 0755); err != nil {
113 113
 			return err
114 114
 		}
115
-		if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), ""); err != nil {
115
+		if err := m.MountPropagate(rootfs, mountLabel); err != nil {
116 116
 			return err
117 117
 		}
118 118
 		return label.SetFileLabel(dest, mountLabel)
... ...
@@ -123,7 +122,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
123 123
 				return err
124 124
 			}
125 125
 		}
126
-		if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil {
126
+		if err := m.MountPropagate(rootfs, mountLabel); err != nil {
127 127
 			return err
128 128
 		}
129 129
 		if stat != nil {
... ...
@@ -136,12 +135,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
136 136
 		if err := os.MkdirAll(dest, 0755); err != nil {
137 137
 			return err
138 138
 		}
139
-		return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data)
139
+		return m.MountPropagate(rootfs, mountLabel)
140 140
 	case "securityfs":
141 141
 		if err := os.MkdirAll(dest, 0755); err != nil {
142 142
 			return err
143 143
 		}
144
-		return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data)
144
+		return m.MountPropagate(rootfs, mountLabel)
145 145
 	case "bind":
146 146
 		stat, err := os.Stat(m.Source)
147 147
 		if err != nil {
... ...
@@ -162,13 +161,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
162 162
 		if err := createIfNotExists(dest, stat.IsDir()); err != nil {
163 163
 			return err
164 164
 		}
165
-		if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data); err != nil {
165
+		if err := m.MountPropagate(rootfs, mountLabel); err != nil {
166 166
 			return err
167 167
 		}
168
-		if m.Flags&syscall.MS_RDONLY != 0 {
169
-			if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags|syscall.MS_REMOUNT), ""); err != nil {
170
-				return err
171
-			}
168
+		// bind mount won't change mount options, we need remount to make mount options effective.
169
+		if err := m.Remount(rootfs); err != nil {
170
+			return err
172 171
 		}
173 172
 		if m.Relabel != "" {
174 173
 			if err := label.Validate(m.Relabel); err != nil {
... ...
@@ -179,11 +177,6 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
179 179
 				return err
180 180
 			}
181 181
 		}
182
-		if m.Flags&syscall.MS_PRIVATE != 0 {
183
-			if err := syscall.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
184
-				return err
185
-			}
186
-		}
187 182
 	case "cgroup":
188 183
 		binds, err := getCgroupMounts(m)
189 184
 		if err != nil {
... ...
@@ -197,11 +190,12 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
197 197
 			}
198 198
 		}
199 199
 		tmpfs := &configs.Mount{
200
-			Source:      "tmpfs",
201
-			Device:      "tmpfs",
202
-			Destination: m.Destination,
203
-			Flags:       defaultMountFlags,
204
-			Data:        "mode=755",
200
+			Source:           "tmpfs",
201
+			Device:           "tmpfs",
202
+			Destination:      m.Destination,
203
+			Flags:            defaultMountFlags,
204
+			Data:             "mode=755",
205
+			PropagationFlags: m.PropagationFlags,
205 206
 		}
206 207
 		if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil {
207 208
 			return err
... ...
@@ -236,8 +230,11 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
236 236
 		}
237 237
 		if m.Flags&syscall.MS_RDONLY != 0 {
238 238
 			// remount cgroup root as readonly
239
-			rootfsCgroup := filepath.Join(rootfs, m.Destination)
240
-			if err := syscall.Mount("", rootfsCgroup, "", defaultMountFlags|syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil {
239
+			mcgrouproot := &configs.Mount{
240
+				Destination: m.Destination,
241
+				Flags:       defaultMountFlags | syscall.MS_RDONLY,
242
+			}
243
+			if err := mcgrouproot.Remount(rootfs); err != nil {
241 244
 				return err
242 245
 			}
243 246
 		}
... ...
@@ -253,10 +250,15 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
253 253
 		return nil, err
254 254
 	}
255 255
 
256
+	cgroupPaths, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
257
+	if err != nil {
258
+		return nil, err
259
+	}
260
+
256 261
 	var binds []*configs.Mount
257 262
 
258 263
 	for _, mm := range mounts {
259
-		dir, err := mm.GetThisCgroupDir()
264
+		dir, err := mm.GetThisCgroupDir(cgroupPaths)
260 265
 		if err != nil {
261 266
 			return nil, err
262 267
 		}
... ...
@@ -265,10 +267,11 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
265 265
 			return nil, err
266 266
 		}
267 267
 		binds = append(binds, &configs.Mount{
268
-			Device:      "bind",
269
-			Source:      filepath.Join(mm.Mountpoint, relDir),
270
-			Destination: filepath.Join(m.Destination, strings.Join(mm.Subsystems, ",")),
271
-			Flags:       syscall.MS_BIND | syscall.MS_REC | m.Flags,
268
+			Device:           "bind",
269
+			Source:           filepath.Join(mm.Mountpoint, relDir),
270
+			Destination:      filepath.Join(m.Destination, strings.Join(mm.Subsystems, ",")),
271
+			Flags:            syscall.MS_BIND | syscall.MS_REC | m.Flags,
272
+			PropagationFlags: m.PropagationFlags,
272 273
 		})
273 274
 	}
274 275