Browse code

Update libcontainer to 1597c68f7b941fd97881155d7f077852e2914e7b

This commit contains changes for docker:
* user.GetGroupFile to user.GetGroupPath docker/libcontainer#301
* Add systemd support for OOM docker/libcontainer#307
* Support for custom namespaces docker/libcontainer#279, docker/libcontainer#312
* Fixes #9699 docker/libcontainer#308

Signed-off-by: Alexander Morozov <lk4d4@docker.com>

Alexander Morozov authored on 2014/12/24 05:10:03
Showing 45 changed files
... ...
@@ -1399,7 +1399,7 @@ func serveFd(addr string, job *engine.Job) error {
1399 1399
 }
1400 1400
 
1401 1401
 func lookupGidByName(nameOrGid string) (int, error) {
1402
-	groupFile, err := user.GetGroupFile()
1402
+	groupFile, err := user.GetGroupPath()
1403 1403
 	if err != nil {
1404 1404
 		return -1, err
1405 1405
 	}
... ...
@@ -82,7 +82,7 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e
82 82
 
83 83
 func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Command) error {
84 84
 	if c.Network.HostNetworking {
85
-		container.Namespaces["NEWNET"] = false
85
+		container.Namespaces.Remove(libcontainer.NEWNET)
86 86
 		return nil
87 87
 	}
88 88
 
... ...
@@ -119,10 +119,7 @@ func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Com
119 119
 		cmd := active.cmd
120 120
 
121 121
 		nspath := filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "net")
122
-		container.Networks = append(container.Networks, &libcontainer.Network{
123
-			Type:   "netns",
124
-			NsPath: nspath,
125
-		})
122
+		container.Namespaces.Add(libcontainer.NEWNET, nspath)
126 123
 	}
127 124
 
128 125
 	return nil
... ...
@@ -130,7 +127,7 @@ func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Com
130 130
 
131 131
 func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command) error {
132 132
 	if c.Ipc.HostIpc {
133
-		container.Namespaces["NEWIPC"] = false
133
+		container.Namespaces.Remove(libcontainer.NEWIPC)
134 134
 		return nil
135 135
 	}
136 136
 
... ...
@@ -144,7 +141,7 @@ func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command
144 144
 		}
145 145
 		cmd := active.cmd
146 146
 
147
-		container.IpcNsPath = filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "ipc")
147
+		container.Namespaces.Add(libcontainer.NEWIPC, filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "ipc"))
148 148
 	}
149 149
 
150 150
 	return nil
... ...
@@ -61,10 +61,6 @@ func NewDriver(root, initPath string) (*driver, error) {
61 61
 	}, nil
62 62
 }
63 63
 
64
-func (d *driver) notifyOnOOM(config *libcontainer.Config) (<-chan struct{}, error) {
65
-	return fs.NotifyOnOOM(config.Cgroups)
66
-}
67
-
68 64
 type execOutput struct {
69 65
 	exitCode int
70 66
 	err      error
... ...
@@ -152,11 +148,16 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
152 152
 	}
153 153
 
154 154
 	oomKill := false
155
-	oomKillNotification, err := d.notifyOnOOM(container)
155
+	state, err := libcontainer.GetState(filepath.Join(d.root, c.ID))
156 156
 	if err == nil {
157
-		_, oomKill = <-oomKillNotification
157
+		oomKillNotification, err := libcontainer.NotifyOnOOM(state)
158
+		if err == nil {
159
+			_, oomKill = <-oomKillNotification
160
+		} else {
161
+			log.Warnf("WARNING: Your kernel does not support OOM notifications: %s", err)
162
+		}
158 163
 	} else {
159
-		log.Warnf("WARNING: Your kernel does not support OOM notifications: %s", err)
164
+		log.Warnf("Failed to get container state, oom notify will not work: %s", err)
160 165
 	}
161 166
 	// wait for the container to exit.
162 167
 	execOutput := <-execOutputChan
... ...
@@ -25,12 +25,12 @@ func New() *libcontainer.Config {
25 25
 			"KILL",
26 26
 			"AUDIT_WRITE",
27 27
 		},
28
-		Namespaces: map[string]bool{
29
-			"NEWNS":  true,
30
-			"NEWUTS": true,
31
-			"NEWIPC": true,
32
-			"NEWPID": true,
33
-			"NEWNET": true,
28
+		Namespaces: libcontainer.Namespaces{
29
+			{Type: "NEWNS"},
30
+			{Type: "NEWUTS"},
31
+			{Type: "NEWIPC"},
32
+			{Type: "NEWPID"},
33
+			{Type: "NEWNET"},
34 34
 		},
35 35
 		Cgroups: &cgroups.Cgroup{
36 36
 			Parent:          "docker",
... ...
@@ -66,7 +66,7 @@ if [ "$1" = '--go' ]; then
66 66
 	mv tmp-tar src/code.google.com/p/go/src/pkg/archive/tar
67 67
 fi
68 68
 
69
-clone git github.com/docker/libcontainer 53eca435e63db58b06cf796d3a9326db5fd42253
69
+clone git github.com/docker/libcontainer 1597c68f7b941fd97881155d7f077852e2914e7b
70 70
 # see src/github.com/docker/libcontainer/update-vendor.sh which is the "source of truth" for libcontainer deps (just like this file)
71 71
 rm -rf src/github.com/docker/libcontainer/vendor
72 72
 eval "$(grep '^clone ' src/github.com/docker/libcontainer/update-vendor.sh | grep -v 'github.com/codegangsta/cli')"
... ...
@@ -1,13 +1,13 @@
1 1
 
2 2
 all:
3
-	docker build -t docker/libcontainer .
3
+	docker build -t dockercore/libcontainer .
4 4
 
5 5
 test: 
6 6
 	# we need NET_ADMIN for the netlink tests and SYS_ADMIN for mounting
7
-	docker run --rm -it --privileged docker/libcontainer
7
+	docker run --rm -it --privileged dockercore/libcontainer
8 8
 
9 9
 sh:
10
-	docker run --rm -it --privileged -w /busybox docker/libcontainer nsinit exec sh
10
+	docker run --rm -it --privileged -w /busybox dockercore/libcontainer nsinit exec sh
11 11
 
12 12
 GO_PACKAGES = $(shell find . -not \( -wholename ./vendor -prune -o -wholename ./.git -prune \) -name '*.go' -print0 | xargs -0n1 dirname | sort -u)
13 13
 
... ...
@@ -318,4 +318,29 @@ a container.
318 318
 | Resume         | Resume all processes inside the container if paused                |
319 319
 | Exec           | Execute a new process inside of the container  ( requires setns )  |
320 320
 
321
+### Execute a new process inside of a running container.
321 322
 
323
+User can execute a new process inside of a running container. Any binaries to be
324
+executed must be accessible within the container's rootfs.
325
+
326
+The started process will run inside the container's rootfs. Any changes
327
+made by the process to the container's filesystem will persist after the
328
+process finished executing.
329
+
330
+The started process will join all the container's existing namespaces. When the
331
+container is paused, the process will also be paused and will resume when
332
+the container is unpaused.  The started process will only run when the container's
333
+primary process (PID 1) is running, and will not be restarted when the container
334
+is restarted.
335
+
336
+#### Planned additions
337
+
338
+The started process will have its own cgroups nested inside the container's
339
+cgroups. This is used for process tracking and optionally resource allocation
340
+handling for the new process. Freezer cgroup is required, the rest of the cgroups
341
+are optional. The process executor must place its pid inside the correct
342
+cgroups before starting the process. This is done so that no child processes or
343
+threads can escape the cgroups.
344
+
345
+When the process is stopped, the process executor will try (in a best-effort way)
346
+to stop all its children and remove the sub-cgroups.
... ...
@@ -50,6 +50,7 @@ type Cgroup struct {
50 50
 	CpuQuota          int64             `json:"cpu_quota,omitempty"`          // CPU hardcap limit (in usecs). Allowed cpu time in a given period.
51 51
 	CpuPeriod         int64             `json:"cpu_period,omitempty"`         // CPU period to be used for hardcapping (in usecs). 0 to use system default.
52 52
 	CpusetCpus        string            `json:"cpuset_cpus,omitempty"`        // CPU to use
53
+	CpusetMems        string            `json:"cpuset_mems,omitempty"`        // MEM to use
53 54
 	Freezer           FreezerState      `json:"freezer,omitempty"`            // set the freeze value for the process
54 55
 	Slice             string            `json:"slice,omitempty"`              // Parent slice to use for systemd
55 56
 }
... ...
@@ -18,7 +18,7 @@ func (s *CpusetGroup) Set(d *data) error {
18 18
 	if err != nil {
19 19
 		return err
20 20
 	}
21
-	return s.SetDir(dir, d.c.CpusetCpus, d.pid)
21
+	return s.SetDir(dir, d.c.CpusetCpus, d.c.CpusetMems, d.pid)
22 22
 }
23 23
 
24 24
 func (s *CpusetGroup) Remove(d *data) error {
... ...
@@ -29,7 +29,7 @@ func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
29 29
 	return nil
30 30
 }
31 31
 
32
-func (s *CpusetGroup) SetDir(dir, value string, pid int) error {
32
+func (s *CpusetGroup) SetDir(dir, cpus string, mems string, pid int) error {
33 33
 	if err := s.ensureParent(dir); err != nil {
34 34
 		return err
35 35
 	}
... ...
@@ -40,10 +40,15 @@ func (s *CpusetGroup) SetDir(dir, value string, pid int) error {
40 40
 		return err
41 41
 	}
42 42
 
43
-	// If we don't use --cpuset, the default cpuset.cpus is set in
44
-	// s.ensureParent, otherwise, use the value we set
45
-	if value != "" {
46
-		if err := writeFile(dir, "cpuset.cpus", value); err != nil {
43
+	// If we don't use --cpuset-xxx, the default value inherit from parent cgroup
44
+	// is set in s.ensureParent, otherwise, use the value we set
45
+	if cpus != "" {
46
+		if err := writeFile(dir, "cpuset.cpus", cpus); err != nil {
47
+			return err
48
+		}
49
+	}
50
+	if mems != "" {
51
+		if err := writeFile(dir, "cpuset.mems", mems); err != nil {
47 52
 			return err
48 53
 		}
49 54
 	}
... ...
@@ -38,12 +38,17 @@ func (s *MemoryGroup) Set(d *data) error {
38 38
 			}
39 39
 		}
40 40
 		// By default, MemorySwap is set to twice the size of RAM.
41
-		// If you want to omit MemorySwap, set it to `-1'.
42
-		if d.c.MemorySwap != -1 {
41
+		// If you want to omit MemorySwap, set it to '-1'.
42
+		if d.c.MemorySwap == 0 {
43 43
 			if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(d.c.Memory*2, 10)); err != nil {
44 44
 				return err
45 45
 			}
46 46
 		}
47
+		if d.c.MemorySwap > 0 {
48
+			if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(d.c.MemorySwap, 10)); err != nil {
49
+				return err
50
+			}
51
+		}
47 52
 	}
48 53
 	return nil
49 54
 }
50 55
deleted file mode 100644
... ...
@@ -1,82 +0,0 @@
1
-// +build linux
2
-
3
-package fs
4
-
5
-import (
6
-	"fmt"
7
-	"os"
8
-	"path/filepath"
9
-	"syscall"
10
-
11
-	"github.com/docker/libcontainer/cgroups"
12
-)
13
-
14
-// NotifyOnOOM sends signals on the returned channel when the cgroup reaches
15
-// its memory limit. The channel is closed when the cgroup is removed.
16
-func NotifyOnOOM(c *cgroups.Cgroup) (<-chan struct{}, error) {
17
-	d, err := getCgroupData(c, 0)
18
-	if err != nil {
19
-		return nil, err
20
-	}
21
-
22
-	return notifyOnOOM(d)
23
-}
24
-
25
-func notifyOnOOM(d *data) (<-chan struct{}, error) {
26
-	dir, err := d.path("memory")
27
-	if err != nil {
28
-		return nil, err
29
-	}
30
-
31
-	fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0)
32
-	if syserr != 0 {
33
-		return nil, syserr
34
-	}
35
-
36
-	eventfd := os.NewFile(fd, "eventfd")
37
-
38
-	oomControl, err := os.Open(filepath.Join(dir, "memory.oom_control"))
39
-	if err != nil {
40
-		eventfd.Close()
41
-		return nil, err
42
-	}
43
-
44
-	var (
45
-		eventControlPath = filepath.Join(dir, "cgroup.event_control")
46
-		data             = fmt.Sprintf("%d %d", eventfd.Fd(), oomControl.Fd())
47
-	)
48
-
49
-	if err := writeFile(dir, "cgroup.event_control", data); err != nil {
50
-		eventfd.Close()
51
-		oomControl.Close()
52
-		return nil, err
53
-	}
54
-
55
-	ch := make(chan struct{})
56
-
57
-	go func() {
58
-		defer func() {
59
-			close(ch)
60
-			eventfd.Close()
61
-			oomControl.Close()
62
-		}()
63
-
64
-		buf := make([]byte, 8)
65
-
66
-		for {
67
-			if _, err := eventfd.Read(buf); err != nil {
68
-				return
69
-			}
70
-
71
-			// When a cgroup is destroyed, an event is sent to eventfd.
72
-			// So if the control path is gone, return instead of notifying.
73
-			if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) {
74
-				return
75
-			}
76
-
77
-			ch <- struct{}{}
78
-		}
79
-	}()
80
-
81
-	return ch, nil
82
-}
83 1
deleted file mode 100644
... ...
@@ -1,86 +0,0 @@
1
-// +build linux
2
-
3
-package fs
4
-
5
-import (
6
-	"encoding/binary"
7
-	"fmt"
8
-	"syscall"
9
-	"testing"
10
-	"time"
11
-)
12
-
13
-func TestNotifyOnOOM(t *testing.T) {
14
-	helper := NewCgroupTestUtil("memory", t)
15
-	defer helper.cleanup()
16
-
17
-	helper.writeFileContents(map[string]string{
18
-		"memory.oom_control":   "",
19
-		"cgroup.event_control": "",
20
-	})
21
-
22
-	var eventFd, oomControlFd int
23
-
24
-	ooms, err := notifyOnOOM(helper.CgroupData)
25
-	if err != nil {
26
-		t.Fatal("expected no error, got:", err)
27
-	}
28
-
29
-	memoryPath, _ := helper.CgroupData.path("memory")
30
-	data, err := readFile(memoryPath, "cgroup.event_control")
31
-	if err != nil {
32
-		t.Fatal("couldn't read event control file:", err)
33
-	}
34
-
35
-	if _, err := fmt.Sscanf(data, "%d %d", &eventFd, &oomControlFd); err != nil {
36
-		t.Fatalf("invalid control data %q: %s", data, err)
37
-	}
38
-
39
-	// re-open the eventfd
40
-	efd, err := syscall.Dup(eventFd)
41
-	if err != nil {
42
-		t.Fatal("unable to reopen eventfd:", err)
43
-	}
44
-	defer syscall.Close(efd)
45
-
46
-	if err != nil {
47
-		t.Fatal("unable to dup event fd:", err)
48
-	}
49
-
50
-	buf := make([]byte, 8)
51
-	binary.LittleEndian.PutUint64(buf, 1)
52
-
53
-	if _, err := syscall.Write(efd, buf); err != nil {
54
-		t.Fatal("unable to write to eventfd:", err)
55
-	}
56
-
57
-	select {
58
-	case <-ooms:
59
-	case <-time.After(100 * time.Millisecond):
60
-		t.Fatal("no notification on oom channel after 100ms")
61
-	}
62
-
63
-	// simulate what happens when a cgroup is destroyed by cleaning up and then
64
-	// writing to the eventfd.
65
-	helper.cleanup()
66
-	if _, err := syscall.Write(efd, buf); err != nil {
67
-		t.Fatal("unable to write to eventfd:", err)
68
-	}
69
-
70
-	// give things a moment to shut down
71
-	select {
72
-	case _, ok := <-ooms:
73
-		if ok {
74
-			t.Fatal("expected no oom to be triggered")
75
-		}
76
-	case <-time.After(100 * time.Millisecond):
77
-	}
78
-
79
-	if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, uintptr(oomControlFd), syscall.F_GETFD, 0); err != syscall.EBADF {
80
-		t.Error("expected oom control to be closed")
81
-	}
82
-
83
-	if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, uintptr(eventFd), syscall.F_GETFD, 0); err != syscall.EBADF {
84
-		t.Error("expected event fd to be closed")
85
-	}
86
-}
... ...
@@ -90,4 +90,8 @@ func expectMemoryStatEquals(t *testing.T, expected, actual cgroups.MemoryStats)
90 90
 			t.Fail()
91 91
 		}
92 92
 	}
93
+	if expected.Failcnt != actual.Failcnt {
94
+		log.Printf("Expected memory failcnt %d but found %d\n", expected.Failcnt, actual.Failcnt)
95
+		t.Fail()
96
+	}
93 97
 }
... ...
@@ -313,5 +313,5 @@ func joinCpuset(c *cgroups.Cgroup, pid int) error {
313 313
 
314 314
 	s := &fs.CpusetGroup{}
315 315
 
316
-	return s.SetDir(path, c.CpusetCpus, pid)
316
+	return s.SetDir(path, c.CpusetCpus, c.CpusetMems, pid)
317 317
 }
... ...
@@ -9,6 +9,7 @@ import (
9 9
 	"path/filepath"
10 10
 	"strconv"
11 11
 	"strings"
12
+	"time"
12 13
 
13 14
 	"github.com/docker/docker/pkg/mount"
14 15
 )
... ...
@@ -193,13 +194,30 @@ func EnterPid(cgroupPaths map[string]string, pid int) error {
193 193
 }
194 194
 
195 195
 // RemovePaths iterates over the provided paths removing them.
196
-// If an error is encountered the removal proceeds and the first error is
197
-// returned to ensure a partial removal is not possible.
196
+// We trying to remove all paths five times with increasing delay between tries.
197
+// If after all there are not removed cgroups - appropriate error will be
198
+// returned.
198 199
 func RemovePaths(paths map[string]string) (err error) {
199
-	for _, path := range paths {
200
-		if rerr := os.RemoveAll(path); err == nil {
201
-			err = rerr
200
+	delay := 10 * time.Millisecond
201
+	for i := 0; i < 5; i++ {
202
+		if i != 0 {
203
+			time.Sleep(delay)
204
+			delay *= 2
205
+		}
206
+		for s, p := range paths {
207
+			os.RemoveAll(p)
208
+			// TODO: here probably should be logging
209
+			_, err := os.Stat(p)
210
+			// We need this strange way of checking cgroups existence because
211
+			// RemoveAll almost always returns error, even on already removed
212
+			// cgroups
213
+			if os.IsNotExist(err) {
214
+				delete(paths, s)
215
+			}
216
+		}
217
+		if len(paths) == 0 {
218
+			return nil
202 219
 		}
203 220
 	}
204
-	return err
221
+	return fmt.Errorf("Failed to remove paths: %s", paths)
205 222
 }
... ...
@@ -10,6 +10,57 @@ type MountConfig mount.MountConfig
10 10
 
11 11
 type Network network.Network
12 12
 
13
+type NamespaceType string
14
+
15
+const (
16
+	NEWNET  NamespaceType = "NEWNET"
17
+	NEWPID  NamespaceType = "NEWPID"
18
+	NEWNS   NamespaceType = "NEWNS"
19
+	NEWUTS  NamespaceType = "NEWUTS"
20
+	NEWIPC  NamespaceType = "NEWIPC"
21
+	NEWUSER NamespaceType = "NEWUSER"
22
+)
23
+
24
+// Namespace defines configuration for each namespace.  It specifies an
25
+// alternate path that is able to be joined via setns.
26
+type Namespace struct {
27
+	Type NamespaceType `json:"type"`
28
+	Path string        `json:"path,omitempty"`
29
+}
30
+
31
+type Namespaces []Namespace
32
+
33
+func (n Namespaces) Remove(t NamespaceType) bool {
34
+	i := n.index(t)
35
+	if i == -1 {
36
+		return false
37
+	}
38
+	n = append(n[:i], n[i+1:]...)
39
+	return true
40
+}
41
+
42
+func (n Namespaces) Add(t NamespaceType, path string) {
43
+	i := n.index(t)
44
+	if i == -1 {
45
+		n = append(n, Namespace{Type: t, Path: path})
46
+		return
47
+	}
48
+	n[i].Path = path
49
+}
50
+
51
+func (n Namespaces) index(t NamespaceType) int {
52
+	for i, ns := range n {
53
+		if ns.Type == t {
54
+			return i
55
+		}
56
+	}
57
+	return -1
58
+}
59
+
60
+func (n Namespaces) Contains(t NamespaceType) bool {
61
+	return n.index(t) != -1
62
+}
63
+
13 64
 // Config defines configuration options for executing a process inside a contained environment.
14 65
 type Config struct {
15 66
 	// Mount specific options.
... ...
@@ -38,7 +89,7 @@ type Config struct {
38 38
 
39 39
 	// Namespaces specifies the container's namespaces that it should setup when cloning the init process
40 40
 	// If a namespace is not provided that namespace is shared from the container's parent process
41
-	Namespaces map[string]bool `json:"namespaces,omitempty"`
41
+	Namespaces Namespaces `json:"namespaces,omitempty"`
42 42
 
43 43
 	// Capabilities specify the capabilities to keep when executing the process inside the container
44 44
 	// All capbilities not specified will be dropped from the processes capability mask
... ...
@@ -47,9 +98,6 @@ type Config struct {
47 47
 	// Networks specifies the container's network setup to be created
48 48
 	Networks []*Network `json:"networks,omitempty"`
49 49
 
50
-	// Ipc specifies the container's ipc setup to be created
51
-	IpcNsPath string `json:"ipc,omitempty"`
52
-
53 50
 	// Routes can be specified to create entries in the route table as the container is started
54 51
 	Routes []*Route `json:"routes,omitempty"`
55 52
 
... ...
@@ -64,12 +64,12 @@ func TestConfigJsonFormat(t *testing.T) {
64 64
 		t.Fail()
65 65
 	}
66 66
 
67
-	if !container.Namespaces["NEWNET"] {
67
+	if !container.Namespaces.Contains(NEWNET) {
68 68
 		t.Log("namespaces should contain NEWNET")
69 69
 		t.Fail()
70 70
 	}
71 71
 
72
-	if container.Namespaces["NEWUSER"] {
72
+	if container.Namespaces.Contains(NEWUSER) {
73 73
 		t.Log("namespaces should not contain NEWUSER")
74 74
 		t.Fail()
75 75
 	}
... ...
@@ -4,6 +4,8 @@ import (
4 4
 	"os"
5 5
 	"strings"
6 6
 	"testing"
7
+
8
+	"github.com/docker/libcontainer"
7 9
 )
8 10
 
9 11
 func TestExecPS(t *testing.T) {
... ...
@@ -55,7 +57,6 @@ func TestIPCPrivate(t *testing.T) {
55 55
 	}
56 56
 
57 57
 	config := newTemplateConfig(rootfs)
58
-	config.Namespaces["NEWIPC"] = true
59 58
 	buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
60 59
 	if err != nil {
61 60
 		t.Fatal(err)
... ...
@@ -87,7 +88,7 @@ func TestIPCHost(t *testing.T) {
87 87
 	}
88 88
 
89 89
 	config := newTemplateConfig(rootfs)
90
-	config.Namespaces["NEWIPC"] = false
90
+	config.Namespaces.Remove(libcontainer.NEWIPC)
91 91
 	buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
92 92
 	if err != nil {
93 93
 		t.Fatal(err)
... ...
@@ -119,8 +120,7 @@ func TestIPCJoinPath(t *testing.T) {
119 119
 	}
120 120
 
121 121
 	config := newTemplateConfig(rootfs)
122
-	config.Namespaces["NEWIPC"] = false
123
-	config.IpcNsPath = "/proc/1/ns/ipc"
122
+	config.Namespaces.Add(libcontainer.NEWIPC, "/proc/1/ns/ipc")
124 123
 
125 124
 	buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
126 125
 	if err != nil {
... ...
@@ -148,8 +148,7 @@ func TestIPCBadPath(t *testing.T) {
148 148
 	defer remove(rootfs)
149 149
 
150 150
 	config := newTemplateConfig(rootfs)
151
-	config.Namespaces["NEWIPC"] = false
152
-	config.IpcNsPath = "/proc/1/ns/ipcc"
151
+	config.Namespaces.Add(libcontainer.NEWIPC, "/proc/1/ns/ipcc")
153 152
 
154 153
 	_, _, err = runContainer(config, "", "true")
155 154
 	if err == nil {
156 155
new file mode 100644
... ...
@@ -0,0 +1,140 @@
0
+package integration
1
+
2
+import (
3
+	"os"
4
+	"os/exec"
5
+	"strings"
6
+	"sync"
7
+	"testing"
8
+
9
+	"github.com/docker/libcontainer"
10
+	"github.com/docker/libcontainer/namespaces"
11
+)
12
+
13
+func TestExecIn(t *testing.T) {
14
+	if testing.Short() {
15
+		return
16
+	}
17
+
18
+	rootfs, err := newRootFs()
19
+	if err != nil {
20
+		t.Fatal(err)
21
+	}
22
+	defer remove(rootfs)
23
+
24
+	config := newTemplateConfig(rootfs)
25
+	if err := writeConfig(config); err != nil {
26
+		t.Fatalf("failed to write config %s", err)
27
+	}
28
+
29
+	containerCmd, statePath, containerErr := startLongRunningContainer(config)
30
+	defer func() {
31
+		// kill the container
32
+		if containerCmd.Process != nil {
33
+			containerCmd.Process.Kill()
34
+		}
35
+		if err := <-containerErr; err != nil {
36
+			t.Fatal(err)
37
+		}
38
+	}()
39
+
40
+	// start the exec process
41
+	state, err := libcontainer.GetState(statePath)
42
+	if err != nil {
43
+		t.Fatalf("failed to get state %s", err)
44
+	}
45
+	buffers := newStdBuffers()
46
+	execErr := make(chan error, 1)
47
+	go func() {
48
+		_, err := namespaces.ExecIn(config, state, []string{"ps"},
49
+			os.Args[0], "exec", buffers.Stdin, buffers.Stdout, buffers.Stderr,
50
+			"", nil)
51
+		execErr <- err
52
+	}()
53
+	if err := <-execErr; err != nil {
54
+		t.Fatalf("exec finished with error %s", err)
55
+	}
56
+
57
+	out := buffers.Stdout.String()
58
+	if !strings.Contains(out, "sleep 10") || !strings.Contains(out, "ps") {
59
+		t.Fatalf("unexpected running process, output %q", out)
60
+	}
61
+}
62
+
63
+func TestExecInRlimit(t *testing.T) {
64
+	if testing.Short() {
65
+		return
66
+	}
67
+
68
+	rootfs, err := newRootFs()
69
+	if err != nil {
70
+		t.Fatal(err)
71
+	}
72
+	defer remove(rootfs)
73
+
74
+	config := newTemplateConfig(rootfs)
75
+	if err := writeConfig(config); err != nil {
76
+		t.Fatalf("failed to write config %s", err)
77
+	}
78
+
79
+	containerCmd, statePath, containerErr := startLongRunningContainer(config)
80
+	defer func() {
81
+		// kill the container
82
+		if containerCmd.Process != nil {
83
+			containerCmd.Process.Kill()
84
+		}
85
+		if err := <-containerErr; err != nil {
86
+			t.Fatal(err)
87
+		}
88
+	}()
89
+
90
+	// start the exec process
91
+	state, err := libcontainer.GetState(statePath)
92
+	if err != nil {
93
+		t.Fatalf("failed to get state %s", err)
94
+	}
95
+	buffers := newStdBuffers()
96
+	execErr := make(chan error, 1)
97
+	go func() {
98
+		_, err := namespaces.ExecIn(config, state, []string{"/bin/sh", "-c", "ulimit -n"},
99
+			os.Args[0], "exec", buffers.Stdin, buffers.Stdout, buffers.Stderr,
100
+			"", nil)
101
+		execErr <- err
102
+	}()
103
+	if err := <-execErr; err != nil {
104
+		t.Fatalf("exec finished with error %s", err)
105
+	}
106
+
107
+	out := buffers.Stdout.String()
108
+	if limit := strings.TrimSpace(out); limit != "1024" {
109
+		t.Fatalf("expected rlimit to be 1024, got %s", limit)
110
+	}
111
+}
112
+
113
+// start a long-running container so we have time to inspect execin processes
114
+func startLongRunningContainer(config *libcontainer.Config) (*exec.Cmd, string, chan error) {
115
+	containerErr := make(chan error, 1)
116
+	containerCmd := &exec.Cmd{}
117
+	var statePath string
118
+
119
+	createCmd := func(container *libcontainer.Config, console, dataPath, init string,
120
+		pipe *os.File, args []string) *exec.Cmd {
121
+		containerCmd = namespaces.DefaultCreateCommand(container, console, dataPath, init, pipe, args)
122
+		statePath = dataPath
123
+		return containerCmd
124
+	}
125
+
126
+	var containerStart sync.WaitGroup
127
+	containerStart.Add(1)
128
+	go func() {
129
+		buffers := newStdBuffers()
130
+		_, err := namespaces.Exec(config,
131
+			buffers.Stdin, buffers.Stdout, buffers.Stderr,
132
+			"", config.RootFs, []string{"sleep", "10"},
133
+			createCmd, containerStart.Done)
134
+		containerErr <- err
135
+	}()
136
+	containerStart.Wait()
137
+
138
+	return containerCmd, statePath, containerErr
139
+}
... ...
@@ -1,33 +1,76 @@
1 1
 package integration
2 2
 
3 3
 import (
4
+	"encoding/json"
4 5
 	"log"
5 6
 	"os"
6 7
 	"runtime"
7 8
 
9
+	"github.com/docker/libcontainer"
8 10
 	"github.com/docker/libcontainer/namespaces"
11
+	_ "github.com/docker/libcontainer/namespaces/nsenter"
9 12
 )
10 13
 
11 14
 // init runs the libcontainer initialization code because of the busybox style needs
12 15
 // to work around the go runtime and the issues with forking
13 16
 func init() {
14
-	if len(os.Args) < 2 || os.Args[1] != "init" {
17
+	if len(os.Args) < 2 {
15 18
 		return
16 19
 	}
17
-	runtime.LockOSThread()
20
+	// handle init
21
+	if len(os.Args) >= 2 && os.Args[1] == "init" {
22
+		runtime.LockOSThread()
18 23
 
19
-	container, err := loadConfig()
20
-	if err != nil {
21
-		log.Fatal(err)
24
+		container, err := loadConfig()
25
+		if err != nil {
26
+			log.Fatal(err)
27
+		}
28
+
29
+		rootfs, err := os.Getwd()
30
+		if err != nil {
31
+			log.Fatal(err)
32
+		}
33
+
34
+		if err := namespaces.Init(container, rootfs, "", os.NewFile(3, "pipe"), os.Args[3:]); err != nil {
35
+			log.Fatalf("unable to initialize for container: %s", err)
36
+		}
37
+		os.Exit(1)
22 38
 	}
23 39
 
24
-	rootfs, err := os.Getwd()
25
-	if err != nil {
26
-		log.Fatal(err)
40
+	// handle execin
41
+	if len(os.Args) >= 2 && os.Args[0] == "nsenter-exec" {
42
+		runtime.LockOSThread()
43
+
44
+		// User args are passed after '--' in the command line.
45
+		userArgs := findUserArgs()
46
+
47
+		config, err := loadConfigFromFd()
48
+		if err != nil {
49
+			log.Fatalf("docker-exec: unable to receive config from sync pipe: %s", err)
50
+		}
51
+
52
+		if err := namespaces.FinalizeSetns(config, userArgs); err != nil {
53
+			log.Fatalf("docker-exec: failed to exec: %s", err)
54
+		}
55
+		os.Exit(1)
27 56
 	}
57
+}
58
+
59
+func findUserArgs() []string {
60
+	for i, a := range os.Args {
61
+		if a == "--" {
62
+			return os.Args[i+1:]
63
+		}
64
+	}
65
+	return []string{}
66
+}
28 67
 
29
-	if err := namespaces.Init(container, rootfs, "", os.NewFile(3, "pipe"), os.Args[3:]); err != nil {
30
-		log.Fatalf("unable to initialize for container: %s", err)
68
+// loadConfigFromFd loads a container's config from the sync pipe that is provided by
69
+// fd 3 when running a process
70
+func loadConfigFromFd() (*libcontainer.Config, error) {
71
+	var config *libcontainer.Config
72
+	if err := json.NewDecoder(os.NewFile(3, "child")).Decode(&config); err != nil {
73
+		return nil, err
31 74
 	}
32
-	os.Exit(1)
75
+	return config, nil
33 76
 }
... ...
@@ -32,12 +32,12 @@ func newTemplateConfig(rootfs string) *libcontainer.Config {
32 32
 			"KILL",
33 33
 			"AUDIT_WRITE",
34 34
 		},
35
-		Namespaces: map[string]bool{
36
-			"NEWNS":  true,
37
-			"NEWUTS": true,
38
-			"NEWIPC": true,
39
-			"NEWPID": true,
40
-			"NEWNET": true,
35
+		Namespaces: libcontainer.Namespaces{
36
+			{Type: libcontainer.NEWNS},
37
+			{Type: libcontainer.NEWUTS},
38
+			{Type: libcontainer.NEWIPC},
39
+			{Type: libcontainer.NEWPID},
40
+			{Type: libcontainer.NEWNET},
41 41
 		},
42 42
 		Cgroups: &cgroups.Cgroup{
43 43
 			Parent:          "integration",
44 44
deleted file mode 100644
... ...
@@ -1,29 +0,0 @@
1
-package ipc
2
-
3
-import (
4
-	"fmt"
5
-	"os"
6
-	"syscall"
7
-
8
-	"github.com/docker/libcontainer/system"
9
-)
10
-
11
-// Join the IPC Namespace of specified ipc path if it exists.
12
-// If the path does not exist then you are not joining a container.
13
-func Initialize(nsPath string) error {
14
-	if nsPath == "" {
15
-		return nil
16
-	}
17
-	f, err := os.OpenFile(nsPath, os.O_RDONLY, 0)
18
-	if err != nil {
19
-		return fmt.Errorf("failed get IPC namespace fd: %v", err)
20
-	}
21
-
22
-	err = system.Setns(f.Fd(), syscall.CLONE_NEWIPC)
23
-	f.Close()
24
-
25
-	if err != nil {
26
-		return fmt.Errorf("failed to setns current IPC namespace: %v", err)
27
-	}
28
-	return nil
29
-}
... ...
@@ -97,6 +97,10 @@ func FinalizeSetns(container *libcontainer.Config, args []string) error {
97 97
 		return err
98 98
 	}
99 99
 
100
+	if err := setupRlimits(container); err != nil {
101
+		return fmt.Errorf("setup rlimits %s", err)
102
+	}
103
+
100 104
 	if err := FinalizeNamespace(container); err != nil {
101 105
 		return err
102 106
 	}
... ...
@@ -13,7 +13,6 @@ import (
13 13
 	"github.com/docker/libcontainer"
14 14
 	"github.com/docker/libcontainer/apparmor"
15 15
 	"github.com/docker/libcontainer/console"
16
-	"github.com/docker/libcontainer/ipc"
17 16
 	"github.com/docker/libcontainer/label"
18 17
 	"github.com/docker/libcontainer/mount"
19 18
 	"github.com/docker/libcontainer/netlink"
... ...
@@ -65,7 +64,10 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, pip
65 65
 	if err := json.NewDecoder(pipe).Decode(&networkState); err != nil {
66 66
 		return err
67 67
 	}
68
-
68
+	// join any namespaces via a path to the namespace fd if provided
69
+	if err := joinExistingNamespaces(container.Namespaces); err != nil {
70
+		return err
71
+	}
69 72
 	if consolePath != "" {
70 73
 		if err := console.OpenAndDup(consolePath); err != nil {
71 74
 			return err
... ...
@@ -79,9 +81,7 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, pip
79 79
 			return fmt.Errorf("setctty %s", err)
80 80
 		}
81 81
 	}
82
-	if err := ipc.Initialize(container.IpcNsPath); err != nil {
83
-		return fmt.Errorf("setup IPC %s", err)
84
-	}
82
+
85 83
 	if err := setupNetwork(container, networkState); err != nil {
86 84
 		return fmt.Errorf("setup networking %s", err)
87 85
 	}
... ...
@@ -178,17 +178,17 @@ func SetupUser(u string) error {
178 178
 		Home: "/",
179 179
 	}
180 180
 
181
-	passwdFile, err := user.GetPasswdFile()
181
+	passwdPath, err := user.GetPasswdPath()
182 182
 	if err != nil {
183 183
 		return err
184 184
 	}
185 185
 
186
-	groupFile, err := user.GetGroupFile()
186
+	groupPath, err := user.GetGroupPath()
187 187
 	if err != nil {
188 188
 		return err
189 189
 	}
190 190
 
191
-	execUser, err := user.GetExecUserFile(u, &defaultExecUser, passwdFile, groupFile)
191
+	execUser, err := user.GetExecUserPath(u, &defaultExecUser, passwdPath, groupPath)
192 192
 	if err != nil {
193 193
 		return fmt.Errorf("get supplementary groups %s", err)
194 194
 	}
... ...
@@ -308,3 +308,22 @@ func LoadContainerEnvironment(container *libcontainer.Config) error {
308 308
 	}
309 309
 	return nil
310 310
 }
311
+
312
+// joinExistingNamespaces gets all the namespace paths specified for the container and
313
+// does a setns on the namespace fd so that the current process joins the namespace.
314
+func joinExistingNamespaces(namespaces []libcontainer.Namespace) error {
315
+	for _, ns := range namespaces {
316
+		if ns.Path != "" {
317
+			f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0)
318
+			if err != nil {
319
+				return err
320
+			}
321
+			err = system.Setns(f.Fd(), uintptr(namespaceInfo[ns.Type]))
322
+			f.Close()
323
+			if err != nil {
324
+				return err
325
+			}
326
+		}
327
+	}
328
+	return nil
329
+}
... ...
@@ -15,6 +15,10 @@
15 15
 #include <unistd.h>
16 16
 #include <getopt.h>
17 17
 
18
+#ifndef PR_SET_CHILD_SUBREAPER
19
+#define PR_SET_CHILD_SUBREAPER 36
20
+#endif
21
+
18 22
 static const kBufSize = 256;
19 23
 static const char *kNsEnter = "nsenter";
20 24
 
... ...
@@ -32,8 +36,8 @@ void get_args(int *argc, char ***argv)
32 32
 		contents_size += kBufSize;
33 33
 		contents = (char *)realloc(contents, contents_size);
34 34
 		bytes_read =
35
-			read(fd, contents + contents_offset,
36
-			     contents_size - contents_offset);
35
+		    read(fd, contents + contents_offset,
36
+			 contents_size - contents_offset);
37 37
 		contents_offset += bytes_read;
38 38
 	}
39 39
 	while (bytes_read > 0);
... ...
@@ -90,16 +94,17 @@ void nsenter()
90 90
 	}
91 91
 
92 92
 	if (prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) == -1) {
93
-                fprintf(stderr, "nsenter: failed to set child subreaper: %s", strerror(errno));
94
-                exit(1);
95
-        }
93
+		fprintf(stderr, "nsenter: failed to set child subreaper: %s",
94
+			strerror(errno));
95
+		exit(1);
96
+	}
96 97
 
97 98
 	static const struct option longopts[] = {
98 99
 		{"nspid", required_argument, NULL, 'n'},
99 100
 		{"console", required_argument, NULL, 't'},
100 101
 		{NULL, 0, NULL, 0}
101 102
 	};
102
-    
103
+
103 104
 	pid_t init_pid = -1;
104 105
 	char *init_pid_str = NULL;
105 106
 	char *console = NULL;
106 107
deleted file mode 100644
... ...
@@ -1,50 +0,0 @@
1
-package namespaces
2
-
3
-import "errors"
4
-
5
-type (
6
-	Namespace struct {
7
-		Key   string `json:"key,omitempty"`
8
-		Value int    `json:"value,omitempty"`
9
-		File  string `json:"file,omitempty"`
10
-	}
11
-	Namespaces []*Namespace
12
-)
13
-
14
-// namespaceList is used to convert the libcontainer types
15
-// into the names of the files located in /proc/<pid>/ns/* for
16
-// each namespace
17
-var (
18
-	namespaceList      = Namespaces{}
19
-	ErrUnkownNamespace = errors.New("Unknown namespace")
20
-	ErrUnsupported     = errors.New("Unsupported method")
21
-)
22
-
23
-func (ns *Namespace) String() string {
24
-	return ns.Key
25
-}
26
-
27
-func GetNamespace(key string) *Namespace {
28
-	for _, ns := range namespaceList {
29
-		if ns.Key == key {
30
-			cpy := *ns
31
-			return &cpy
32
-		}
33
-	}
34
-	return nil
35
-}
36
-
37
-// Contains returns true if the specified Namespace is
38
-// in the slice
39
-func (n Namespaces) Contains(ns string) bool {
40
-	return n.Get(ns) != nil
41
-}
42
-
43
-func (n Namespaces) Get(ns string) *Namespace {
44
-	for _, nsp := range n {
45
-		if nsp != nil && nsp.Key == ns {
46
-			return nsp
47
-		}
48
-	}
49
-	return nil
50
-}
51 1
deleted file mode 100644
... ...
@@ -1,16 +0,0 @@
1
-package namespaces
2
-
3
-import (
4
-	"syscall"
5
-)
6
-
7
-func init() {
8
-	namespaceList = Namespaces{
9
-		{Key: "NEWNS", Value: syscall.CLONE_NEWNS, File: "mnt"},
10
-		{Key: "NEWUTS", Value: syscall.CLONE_NEWUTS, File: "uts"},
11
-		{Key: "NEWIPC", Value: syscall.CLONE_NEWIPC, File: "ipc"},
12
-		{Key: "NEWUSER", Value: syscall.CLONE_NEWUSER, File: "user"},
13
-		{Key: "NEWPID", Value: syscall.CLONE_NEWPID, File: "pid"},
14
-		{Key: "NEWNET", Value: syscall.CLONE_NEWNET, File: "net"},
15
-	}
16
-}
17 1
deleted file mode 100644
... ...
@@ -1,30 +0,0 @@
1
-package namespaces
2
-
3
-import (
4
-	"testing"
5
-)
6
-
7
-func TestNamespacesContains(t *testing.T) {
8
-	ns := Namespaces{
9
-		GetNamespace("NEWPID"),
10
-		GetNamespace("NEWNS"),
11
-		GetNamespace("NEWUTS"),
12
-	}
13
-
14
-	if ns.Contains("NEWNET") {
15
-		t.Fatal("namespaces should not contain NEWNET")
16
-	}
17
-
18
-	if !ns.Contains("NEWPID") {
19
-		t.Fatal("namespaces should contain NEWPID but does not")
20
-	}
21
-
22
-	withNil := Namespaces{
23
-		GetNamespace("UNDEFINED"), // this element will be nil
24
-		GetNamespace("NEWPID"),
25
-	}
26
-
27
-	if !withNil.Contains("NEWPID") {
28
-		t.Fatal("namespaces should contain NEWPID but does not")
29
-	}
30
-}
... ...
@@ -5,6 +5,8 @@ package namespaces
5 5
 import (
6 6
 	"os"
7 7
 	"syscall"
8
+
9
+	"github.com/docker/libcontainer"
8 10
 )
9 11
 
10 12
 type initError struct {
... ...
@@ -15,6 +17,15 @@ func (i initError) Error() string {
15 15
 	return i.Message
16 16
 }
17 17
 
18
+var namespaceInfo = map[libcontainer.NamespaceType]int{
19
+	libcontainer.NEWNET:  syscall.CLONE_NEWNET,
20
+	libcontainer.NEWNS:   syscall.CLONE_NEWNS,
21
+	libcontainer.NEWUSER: syscall.CLONE_NEWUSER,
22
+	libcontainer.NEWIPC:  syscall.CLONE_NEWIPC,
23
+	libcontainer.NEWUTS:  syscall.CLONE_NEWUTS,
24
+	libcontainer.NEWPID:  syscall.CLONE_NEWPID,
25
+}
26
+
18 27
 // New returns a newly initialized Pipe for communication between processes
19 28
 func newInitPipe() (parent *os.File, child *os.File, err error) {
20 29
 	fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
... ...
@@ -26,13 +37,9 @@ func newInitPipe() (parent *os.File, child *os.File, err error) {
26 26
 
27 27
 // GetNamespaceFlags parses the container's Namespaces options to set the correct
28 28
 // flags on clone, unshare, and setns
29
-func GetNamespaceFlags(namespaces map[string]bool) (flag int) {
30
-	for key, enabled := range namespaces {
31
-		if enabled {
32
-			if ns := GetNamespace(key); ns != nil {
33
-				flag |= ns.Value
34
-			}
35
-		}
29
+func GetNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) {
30
+	for _, v := range namespaces {
31
+		flag |= namespaceInfo[v.Type]
36 32
 	}
37 33
 	return flag
38 34
 }
... ...
@@ -522,11 +522,10 @@ func NetworkSetMacAddress(iface *net.Interface, macaddr string) error {
522 522
 
523 523
 	var (
524 524
 		MULTICAST byte = 0x1
525
-		LOCALOUI  byte = 0x2
526 525
 	)
527 526
 
528
-	if hwaddr[0]&0x1 == MULTICAST || hwaddr[0]&0x2 != LOCALOUI {
529
-		return fmt.Errorf("Incorrect Local MAC Address specified: %s", macaddr)
527
+	if hwaddr[0]&0x1 == MULTICAST {
528
+		return fmt.Errorf("Multicast MAC Address is not supported: %s", macaddr)
530 529
 	}
531 530
 
532 531
 	wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
533 532
deleted file mode 100644
... ...
@@ -1,39 +0,0 @@
1
-// +build linux
2
-
3
-package network
4
-
5
-import (
6
-	"fmt"
7
-	"os"
8
-	"syscall"
9
-
10
-	"github.com/docker/libcontainer/system"
11
-)
12
-
13
-//  crosbymichael: could make a network strategy that instead of returning veth pair names it returns a pid to an existing network namespace
14
-type NetNS struct {
15
-}
16
-
17
-func (v *NetNS) Create(n *Network, nspid int, networkState *NetworkState) error {
18
-	networkState.NsPath = n.NsPath
19
-	return nil
20
-}
21
-
22
-func (v *NetNS) Initialize(config *Network, networkState *NetworkState) error {
23
-	if networkState.NsPath == "" {
24
-		return fmt.Errorf("nspath does is not specified in NetworkState")
25
-	}
26
-
27
-	f, err := os.OpenFile(networkState.NsPath, os.O_RDONLY, 0)
28
-	if err != nil {
29
-		return fmt.Errorf("failed get network namespace fd: %v", err)
30
-	}
31
-
32
-	if err := system.Setns(f.Fd(), syscall.CLONE_NEWNET); err != nil {
33
-		f.Close()
34
-		return fmt.Errorf("failed to setns current network namespace: %v", err)
35
-	}
36
-
37
-	f.Close()
38
-	return nil
39
-}
... ...
@@ -88,6 +88,18 @@ func SetInterfaceIp(name string, rawIp string) error {
88 88
 	return netlink.NetworkLinkAddIp(iface, ip, ipNet)
89 89
 }
90 90
 
91
+func DeleteInterfaceIp(name string, rawIp string) error {
92
+	iface, err := net.InterfaceByName(name)
93
+	if err != nil {
94
+		return err
95
+	}
96
+	ip, ipNet, err := net.ParseCIDR(rawIp)
97
+	if err != nil {
98
+		return err
99
+	}
100
+	return netlink.NetworkLinkDelIp(iface, ip, ipNet)
101
+}
102
+
91 103
 func SetMtu(name string, mtu int) error {
92 104
 	iface, err := net.InterfaceByName(name)
93 105
 	if err != nil {
... ...
@@ -13,7 +13,6 @@ var (
13 13
 var strategies = map[string]NetworkStrategy{
14 14
 	"veth":     &Veth{},
15 15
 	"loopback": &Loopback{},
16
-	"netns":    &NetNS{},
17 16
 }
18 17
 
19 18
 // NetworkStrategy represents a specific network configuration for
... ...
@@ -8,9 +8,6 @@ type Network struct {
8 8
 	// Type sets the networks type, commonly veth and loopback
9 9
 	Type string `json:"type,omitempty"`
10 10
 
11
-	// Path to network namespace
12
-	NsPath string `json:"ns_path,omitempty"`
13
-
14 11
 	// The bridge to use.
15 12
 	Bridge string `json:"bridge,omitempty"`
16 13
 
... ...
@@ -50,6 +47,4 @@ type NetworkState struct {
50 50
 	VethHost string `json:"veth_host,omitempty"`
51 51
 	// The name of the veth interface created inside the container for the child.
52 52
 	VethChild string `json:"veth_child,omitempty"`
53
-	// Net namespace path.
54
-	NsPath string `json:"ns_path,omitempty"`
55 53
 }
56 54
new file mode 100644
... ...
@@ -0,0 +1,62 @@
0
+// +build linux
1
+
2
+package libcontainer
3
+
4
+import (
5
+	"fmt"
6
+	"io/ioutil"
7
+	"os"
8
+	"path/filepath"
9
+	"syscall"
10
+)
11
+
12
+const oomCgroupName = "memory"
13
+
14
+// NotifyOnOOM returns channel on which you can expect event about OOM,
15
+// if process died without OOM this channel will be closed.
16
+// s is current *libcontainer.State for container.
17
+func NotifyOnOOM(s *State) (<-chan struct{}, error) {
18
+	dir := s.CgroupPaths[oomCgroupName]
19
+	if dir == "" {
20
+		return nil, fmt.Errorf("There is no path for %q in state", oomCgroupName)
21
+	}
22
+	oomControl, err := os.Open(filepath.Join(dir, "memory.oom_control"))
23
+	if err != nil {
24
+		return nil, err
25
+	}
26
+	fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0)
27
+	if syserr != 0 {
28
+		return nil, syserr
29
+	}
30
+
31
+	eventfd := os.NewFile(fd, "eventfd")
32
+
33
+	eventControlPath := filepath.Join(dir, "cgroup.event_control")
34
+	data := fmt.Sprintf("%d %d", eventfd.Fd(), oomControl.Fd())
35
+	if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil {
36
+		eventfd.Close()
37
+		oomControl.Close()
38
+		return nil, err
39
+	}
40
+	ch := make(chan struct{})
41
+	go func() {
42
+		defer func() {
43
+			close(ch)
44
+			eventfd.Close()
45
+			oomControl.Close()
46
+		}()
47
+		buf := make([]byte, 8)
48
+		for {
49
+			if _, err := eventfd.Read(buf); err != nil {
50
+				return
51
+			}
52
+			// When a cgroup is destroyed, an event is sent to eventfd.
53
+			// So if the control path is gone, return instead of notifying.
54
+			if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) {
55
+				return
56
+			}
57
+			ch <- struct{}{}
58
+		}
59
+	}()
60
+	return ch, nil
61
+}
0 62
new file mode 100644
... ...
@@ -0,0 +1,98 @@
0
+// +build linux
1
+
2
+package libcontainer
3
+
4
+import (
5
+	"encoding/binary"
6
+	"fmt"
7
+	"io/ioutil"
8
+	"os"
9
+	"path/filepath"
10
+	"syscall"
11
+	"testing"
12
+	"time"
13
+)
14
+
15
+func TestNotifyOnOOM(t *testing.T) {
16
+	memoryPath, err := ioutil.TempDir("", "testnotifyoom-")
17
+	if err != nil {
18
+		t.Fatal(err)
19
+	}
20
+	oomPath := filepath.Join(memoryPath, "memory.oom_control")
21
+	eventPath := filepath.Join(memoryPath, "cgroup.event_control")
22
+	if err := ioutil.WriteFile(oomPath, []byte{}, 0700); err != nil {
23
+		t.Fatal(err)
24
+	}
25
+	if err := ioutil.WriteFile(eventPath, []byte{}, 0700); err != nil {
26
+		t.Fatal(err)
27
+	}
28
+	var eventFd, oomControlFd int
29
+	st := &State{
30
+		CgroupPaths: map[string]string{
31
+			"memory": memoryPath,
32
+		},
33
+	}
34
+	ooms, err := NotifyOnOOM(st)
35
+	if err != nil {
36
+		t.Fatal("expected no error, got:", err)
37
+	}
38
+
39
+	data, err := ioutil.ReadFile(eventPath)
40
+	if err != nil {
41
+		t.Fatal("couldn't read event control file:", err)
42
+	}
43
+
44
+	if _, err := fmt.Sscanf(string(data), "%d %d", &eventFd, &oomControlFd); err != nil {
45
+		t.Fatalf("invalid control data %q: %s", data, err)
46
+	}
47
+
48
+	// re-open the eventfd
49
+	efd, err := syscall.Dup(eventFd)
50
+	if err != nil {
51
+		t.Fatal("unable to reopen eventfd:", err)
52
+	}
53
+	defer syscall.Close(efd)
54
+
55
+	if err != nil {
56
+		t.Fatal("unable to dup event fd:", err)
57
+	}
58
+
59
+	buf := make([]byte, 8)
60
+	binary.LittleEndian.PutUint64(buf, 1)
61
+
62
+	if _, err := syscall.Write(efd, buf); err != nil {
63
+		t.Fatal("unable to write to eventfd:", err)
64
+	}
65
+
66
+	select {
67
+	case <-ooms:
68
+	case <-time.After(100 * time.Millisecond):
69
+		t.Fatal("no notification on oom channel after 100ms")
70
+	}
71
+
72
+	// simulate what happens when a cgroup is destroyed by cleaning up and then
73
+	// writing to the eventfd.
74
+	if err := os.RemoveAll(memoryPath); err != nil {
75
+		t.Fatal(err)
76
+	}
77
+	if _, err := syscall.Write(efd, buf); err != nil {
78
+		t.Fatal("unable to write to eventfd:", err)
79
+	}
80
+
81
+	// give things a moment to shut down
82
+	select {
83
+	case _, ok := <-ooms:
84
+		if ok {
85
+			t.Fatal("expected no oom to be triggered")
86
+		}
87
+	case <-time.After(100 * time.Millisecond):
88
+	}
89
+
90
+	if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, uintptr(oomControlFd), syscall.F_GETFD, 0); err != syscall.EBADF {
91
+		t.Error("expected oom control to be closed")
92
+	}
93
+
94
+	if _, _, err := syscall.Syscall(syscall.SYS_FCNTL, uintptr(eventFd), syscall.F_GETFD, 0); err != syscall.EBADF {
95
+		t.Error("expected event fd to be closed")
96
+	}
97
+}
... ...
@@ -176,13 +176,13 @@
176 176
         "TERM=xterm"
177 177
     ],
178 178
     "hostname": "koye",
179
-    "namespaces": {
180
-        "NEWIPC": true,
181
-        "NEWNET": true,
182
-        "NEWNS": true,
183
-        "NEWPID": true,
184
-        "NEWUTS": true
185
-    },
179
+    "namespaces": [
180
+        {"type":"NEWIPC"},
181
+        {"type": "NEWNET"},
182
+        {"type": "NEWNS"},
183
+        {"type": "NEWPID"},
184
+        {"type": "NEWUTS"}
185
+    ],
186 186
     "networks": [
187 187
         {
188 188
             "address": "127.0.0.1/0",
... ...
@@ -175,13 +175,13 @@
175 175
         "TERM=xterm"
176 176
     ],
177 177
     "hostname": "koye",
178
-    "namespaces": {
179
-        "NEWIPC": true,
180
-        "NEWNET": true,
181
-        "NEWNS": true,
182
-        "NEWPID": true,
183
-        "NEWUTS": true
184
-    },
178
+    "namespaces": [
179
+        {"type": "NEWIPC"},
180
+        {"type": "NEWNET"},
181
+        {"type": "NEWNS"},
182
+        {"type": "NEWPID"},
183
+        {"type": "NEWUTS"}
184
+    ],
185 185
     "networks": [
186 186
         {
187 187
             "address": "127.0.0.1/0",
... ...
@@ -181,13 +181,13 @@
181 181
         "TERM=xterm"
182 182
     ],
183 183
     "hostname": "koye",
184
-    "namespaces": {
185
-        "NEWIPC": true,
186
-        "NEWNET": true,
187
-        "NEWNS": true,
188
-        "NEWPID": true,
189
-        "NEWUTS": true
190
-    },
184
+    "namespaces": [
185
+        {"type": "NEWIPC"},
186
+        {"type": "NEWNET"},
187
+        {"type": "NEWNS"},
188
+        {"type": "NEWPID"},
189
+        {"type": "NEWUTS"}
190
+    ],
191 191
     "networks": [
192 192
         {
193 193
             "address": "127.0.0.1/0",
... ...
@@ -175,13 +175,13 @@
175 175
         "TERM=xterm"
176 176
     ],
177 177
     "hostname": "koye",
178
-    "namespaces": {
179
-        "NEWIPC": true,
180
-        "NEWNET": true,
181
-        "NEWNS": true,
182
-        "NEWPID": true,
183
-        "NEWUTS": true
184
-    },
178
+    "namespaces": [
179
+        {"type": "NEWIPC"},
180
+        {"type": "NEWNET"},
181
+        {"type": "NEWNS"},
182
+        {"type": "NEWPID"},
183
+        {"type": "NEWUTS"}
184
+    ],
185 185
     "networks": [
186 186
         {
187 187
             "address": "127.0.0.1/0",
... ...
@@ -177,13 +177,13 @@
177 177
         "TERM=xterm"
178 178
     ],
179 179
     "hostname": "koye",
180
-    "namespaces": {
181
-        "NEWIPC": true,
182
-        "NEWNET": true,
183
-        "NEWNS": true,
184
-        "NEWPID": true,
185
-        "NEWUTS": true
186
-    },
180
+    "namespaces": [
181
+        {"type": "NEWIPC"},
182
+        {"type": "NEWNET"},
183
+        {"type": "NEWNS"},
184
+        {"type": "NEWPID"},
185
+        {"type": "NEWUTS"}
186
+    ],
187 187
     "networks": [
188 188
         {
189 189
             "address": "127.0.0.1/0",
... ...
@@ -1 +1,2 @@
1 1
 Tianon Gravi <admwiggin@gmail.com> (@tianon)
2
+Aleksa Sarai <cyphar@cyphar.com> (@cyphar)
... ...
@@ -9,22 +9,22 @@ import (
9 9
 
10 10
 // Unix-specific path to the passwd and group formatted files.
11 11
 const (
12
-	unixPasswdFile = "/etc/passwd"
13
-	unixGroupFile  = "/etc/group"
12
+	unixPasswdPath = "/etc/passwd"
13
+	unixGroupPath  = "/etc/group"
14 14
 )
15 15
 
16
-func GetPasswdFile() (string, error) {
17
-	return unixPasswdFile, nil
16
+func GetPasswdPath() (string, error) {
17
+	return unixPasswdPath, nil
18 18
 }
19 19
 
20 20
 func GetPasswd() (io.ReadCloser, error) {
21
-	return os.Open(unixPasswdFile)
21
+	return os.Open(unixPasswdPath)
22 22
 }
23 23
 
24
-func GetGroupFile() (string, error) {
25
-	return unixGroupFile, nil
24
+func GetGroupPath() (string, error) {
25
+	return unixGroupPath, nil
26 26
 }
27 27
 
28 28
 func GetGroup() (io.ReadCloser, error) {
29
-	return os.Open(unixGroupFile)
29
+	return os.Open(unixGroupPath)
30 30
 }
... ...
@@ -4,7 +4,7 @@ package user
4 4
 
5 5
 import "io"
6 6
 
7
-func GetPasswdFile() (string, error) {
7
+func GetPasswdPath() (string, error) {
8 8
 	return "", ErrUnsupported
9 9
 }
10 10
 
... ...
@@ -12,7 +12,7 @@ func GetPasswd() (io.ReadCloser, error) {
12 12
 	return nil, ErrUnsupported
13 13
 }
14 14
 
15
-func GetGroupFile() (string, error) {
15
+func GetGroupPath() (string, error) {
16 16
 	return "", ErrUnsupported
17 17
 }
18 18
 
... ...
@@ -197,11 +197,11 @@ type ExecUser struct {
197 197
 	Home     string
198 198
 }
199 199
 
200
-// GetExecUserFile is a wrapper for GetExecUser. It reads data from each of the
200
+// GetExecUserPath is a wrapper for GetExecUser. It reads data from each of the
201 201
 // given file paths and uses that data as the arguments to GetExecUser. If the
202 202
 // files cannot be opened for any reason, the error is ignored and a nil
203 203
 // io.Reader is passed instead.
204
-func GetExecUserFile(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) {
204
+func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) {
205 205
 	passwd, err := os.Open(passwdPath)
206 206
 	if err != nil {
207 207
 		passwd = nil