Browse code

Refactor device handling code

We now have one place that keeps track of (most) devices that are allowed and created within the container. That place is pkg/libcontainer/devices/devices.go

This fixes several inconsistencies between which devices were created in the lxc backend and the native backend. It also fixes inconsistencies between wich devices were created and which were allowed. For example, /dev/full was being created but it was not allowed within the cgroup. It also declares the file modes and permissions of the default devices, rather than copying them from the host. This is in line with docker's philosphy of not being host dependent.

Docker-DCO-1.1-Signed-off-by: Timothy Hobbs <timothyhobbs@seznam.cz> (github: https://github.com/timthelion)

Timothy Hobbs authored on 2014/02/18 08:14:30
Showing 19 changed files
... ...
@@ -23,6 +23,7 @@ import (
23 23
 	"github.com/dotcloud/docker/links"
24 24
 	"github.com/dotcloud/docker/nat"
25 25
 	"github.com/dotcloud/docker/pkg/label"
26
+	"github.com/dotcloud/docker/pkg/libcontainer/devices"
26 27
 	"github.com/dotcloud/docker/pkg/networkfs/etchosts"
27 28
 	"github.com/dotcloud/docker/pkg/networkfs/resolvconf"
28 29
 	"github.com/dotcloud/docker/pkg/symlink"
... ...
@@ -230,18 +231,20 @@ func populateCommand(c *Container, env []string) error {
230 230
 		Cpuset:     c.Config.Cpuset,
231 231
 	}
232 232
 	c.command = &execdriver.Command{
233
-		ID:         c.ID,
234
-		Privileged: c.hostConfig.Privileged,
235
-		Rootfs:     c.RootfsPath(),
236
-		InitPath:   "/.dockerinit",
237
-		Entrypoint: c.Path,
238
-		Arguments:  c.Args,
239
-		WorkingDir: c.Config.WorkingDir,
240
-		Network:    en,
241
-		Tty:        c.Config.Tty,
242
-		User:       c.Config.User,
243
-		Config:     context,
244
-		Resources:  resources,
233
+		ID:                 c.ID,
234
+		Privileged:         c.hostConfig.Privileged,
235
+		Rootfs:             c.RootfsPath(),
236
+		InitPath:           "/.dockerinit",
237
+		Entrypoint:         c.Path,
238
+		Arguments:          c.Args,
239
+		WorkingDir:         c.Config.WorkingDir,
240
+		Network:            en,
241
+		Tty:                c.Config.Tty,
242
+		User:               c.Config.User,
243
+		Config:             context,
244
+		Resources:          resources,
245
+		AllowedDevices:     devices.DefaultAllowedDevices,
246
+		AutoCreatedDevices: devices.DefaultAutoCreatedDevices,
245 247
 	}
246 248
 	c.command.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
247 249
 	c.command.Env = env
... ...
@@ -5,6 +5,8 @@ import (
5 5
 	"io"
6 6
 	"os"
7 7
 	"os/exec"
8
+
9
+	"github.com/dotcloud/docker/pkg/libcontainer/devices"
8 10
 )
9 11
 
10 12
 // Context is a generic key value pair that allows
... ...
@@ -120,20 +122,22 @@ type Mount struct {
120 120
 type Command struct {
121 121
 	exec.Cmd `json:"-"`
122 122
 
123
-	ID         string              `json:"id"`
124
-	Privileged bool                `json:"privileged"`
125
-	User       string              `json:"user"`
126
-	Rootfs     string              `json:"rootfs"`   // root fs of the container
127
-	InitPath   string              `json:"initpath"` // dockerinit
128
-	Entrypoint string              `json:"entrypoint"`
129
-	Arguments  []string            `json:"arguments"`
130
-	WorkingDir string              `json:"working_dir"`
131
-	ConfigPath string              `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver
132
-	Tty        bool                `json:"tty"`
133
-	Network    *Network            `json:"network"`
134
-	Config     map[string][]string `json:"config"` //  generic values that specific drivers can consume
135
-	Resources  *Resources          `json:"resources"`
136
-	Mounts     []Mount             `json:"mounts"`
123
+	ID                 string              `json:"id"`
124
+	Privileged         bool                `json:"privileged"`
125
+	User               string              `json:"user"`
126
+	Rootfs             string              `json:"rootfs"`   // root fs of the container
127
+	InitPath           string              `json:"initpath"` // dockerinit
128
+	Entrypoint         string              `json:"entrypoint"`
129
+	Arguments          []string            `json:"arguments"`
130
+	WorkingDir         string              `json:"working_dir"`
131
+	ConfigPath         string              `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver
132
+	Tty                bool                `json:"tty"`
133
+	Network            *Network            `json:"network"`
134
+	Config             map[string][]string `json:"config"` //  generic values that specific drivers can consume
135
+	Resources          *Resources          `json:"resources"`
136
+	Mounts             []Mount             `json:"mounts"`
137
+	AllowedDevices     []devices.Device    `json:"allowed_devices"`
138
+	AutoCreatedDevices []devices.Device    `json:"autocreated_devices"`
137 139
 
138 140
 	Terminal     Terminal `json:"-"`             // standard or tty terminal
139 141
 	Console      string   `json:"-"`             // dev/console path
... ...
@@ -17,6 +17,7 @@ import (
17 17
 	"github.com/dotcloud/docker/daemon/execdriver"
18 18
 	"github.com/dotcloud/docker/pkg/label"
19 19
 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
20
+	"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
20 21
 	"github.com/dotcloud/docker/pkg/system"
21 22
 	"github.com/dotcloud/docker/utils"
22 23
 )
... ...
@@ -159,6 +160,10 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
159 159
 	c.Path = aname
160 160
 	c.Args = append([]string{name}, arg...)
161 161
 
162
+	if err := nodes.CreateDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil {
163
+		return -1, err
164
+	}
165
+
162 166
 	if err := c.Start(); err != nil {
163 167
 		return -1, err
164 168
 	}
... ...
@@ -47,37 +47,10 @@ lxc.cgroup.devices.allow = a
47 47
 {{else}}
48 48
 # no implicit access to devices
49 49
 lxc.cgroup.devices.deny = a
50
-
51
-# but allow mknod for any device
52
-lxc.cgroup.devices.allow = c *:* m
53
-lxc.cgroup.devices.allow = b *:* m
54
-
55
-# /dev/null and zero
56
-lxc.cgroup.devices.allow = c 1:3 rwm
57
-lxc.cgroup.devices.allow = c 1:5 rwm
58
-
59
-# consoles
60
-lxc.cgroup.devices.allow = c 5:1 rwm
61
-lxc.cgroup.devices.allow = c 5:0 rwm
62
-lxc.cgroup.devices.allow = c 4:0 rwm
63
-lxc.cgroup.devices.allow = c 4:1 rwm
64
-
65
-# /dev/urandom,/dev/random
66
-lxc.cgroup.devices.allow = c 1:9 rwm
67
-lxc.cgroup.devices.allow = c 1:8 rwm
68
-
69
-# /dev/pts/ - pts namespaces are "coming soon"
70
-lxc.cgroup.devices.allow = c 136:* rwm
71
-lxc.cgroup.devices.allow = c 5:2 rwm
72
-
73
-# tuntap
74
-lxc.cgroup.devices.allow = c 10:200 rwm
75
-
76
-# fuse
77
-#lxc.cgroup.devices.allow = c 10:229 rwm
78
-
79
-# rtc
80
-#lxc.cgroup.devices.allow = c 254:0 rwm
50
+#Allow the devices passed to us in the AllowedDevices list.
51
+{{range $allowedDevice := .AllowedDevices}}
52
+lxc.cgroup.devices.allow = {{$allowedDevice.GetCgroupAllowString}}
53
+{{end}}
81 54
 {{end}}
82 55
 
83 56
 # standard mount point
... ...
@@ -11,6 +11,8 @@ import (
11 11
 	"strings"
12 12
 	"testing"
13 13
 	"time"
14
+
15
+	"github.com/dotcloud/docker/pkg/libcontainer/devices"
14 16
 )
15 17
 
16 18
 func TestLXCConfig(t *testing.T) {
... ...
@@ -47,6 +49,7 @@ func TestLXCConfig(t *testing.T) {
47 47
 			Mtu:       1500,
48 48
 			Interface: nil,
49 49
 		},
50
+		AllowedDevices: make([]devices.Device, 0),
50 51
 	}
51 52
 	p, err := driver.generateLXCConfig(command)
52 53
 	if err != nil {
... ...
@@ -11,7 +11,6 @@ import (
11 11
 	"github.com/dotcloud/docker/daemon/execdriver/native/template"
12 12
 	"github.com/dotcloud/docker/pkg/apparmor"
13 13
 	"github.com/dotcloud/docker/pkg/libcontainer"
14
-	"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
15 14
 )
16 15
 
17 16
 // createContainer populates and configures the container type with the
... ...
@@ -25,6 +24,8 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container
25 25
 	container.WorkingDir = c.WorkingDir
26 26
 	container.Env = c.Env
27 27
 	container.Cgroups.Name = c.ID
28
+	container.Cgroups.AllowedDevices = c.AllowedDevices
29
+	container.DeviceNodes = c.AutoCreatedDevices
28 30
 	// check to see if we are running in ramdisk to disable pivot root
29 31
 	container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
30 32
 	container.Context["restrictions"] = "true"
... ...
@@ -105,15 +106,10 @@ func (d *driver) createNetwork(container *libcontainer.Container, c *execdriver.
105 105
 
106 106
 func (d *driver) setPrivileged(container *libcontainer.Container) (err error) {
107 107
 	container.Capabilities = libcontainer.GetAllCapabilities()
108
-	container.Cgroups.DeviceAccess = true
108
+	container.Cgroups.AllowAllDevices = true
109 109
 
110 110
 	delete(container.Context, "restrictions")
111 111
 
112
-	container.OptionalDeviceNodes = nil
113
-	if container.RequiredDeviceNodes, err = nodes.GetHostDeviceNodes(); err != nil {
114
-		return err
115
-	}
116
-
117 112
 	if apparmor.IsEnabled() {
118 113
 		container.Context["apparmor_profile"] = "unconfined"
119 114
 	}
... ...
@@ -4,7 +4,6 @@ import (
4 4
 	"github.com/dotcloud/docker/pkg/apparmor"
5 5
 	"github.com/dotcloud/docker/pkg/libcontainer"
6 6
 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
7
-	"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
8 7
 )
9 8
 
10 9
 // New returns the docker default configuration for libcontainer
... ...
@@ -30,12 +29,10 @@ func New() *libcontainer.Container {
30 30
 			"NEWNET": true,
31 31
 		},
32 32
 		Cgroups: &cgroups.Cgroup{
33
-			Parent:       "docker",
34
-			DeviceAccess: false,
33
+			Parent:          "docker",
34
+			AllowAllDevices: false,
35 35
 		},
36
-		Context:             libcontainer.Context{},
37
-		RequiredDeviceNodes: nodes.DefaultNodes,
38
-		OptionalDeviceNodes: []string{"/dev/fuse"},
36
+		Context: libcontainer.Context{},
39 37
 	}
40 38
 	if apparmor.IsEnabled() {
41 39
 		container.Context["apparmor_profile"] = "docker-default"
... ...
@@ -5,6 +5,7 @@ import (
5 5
 	"os"
6 6
 	"os/exec"
7 7
 	"path/filepath"
8
+	"reflect"
8 9
 	"regexp"
9 10
 	"sort"
10 11
 	"strings"
... ...
@@ -835,3 +836,40 @@ func TestRunWithCpuset(t *testing.T) {
835 835
 
836 836
 	logDone("run - cpuset 0")
837 837
 }
838
+
839
+func TestDeviceNumbers(t *testing.T) {
840
+	cmd := exec.Command(dockerBinary, "run", "busybox", "sh", "-c", "ls -l /dev/null")
841
+
842
+	out, _, err := runCommandWithOutput(cmd)
843
+	if err != nil {
844
+		t.Fatal(err, out)
845
+	}
846
+	deviceLineFields := strings.Fields(out)
847
+	deviceLineFields[6] = ""
848
+	deviceLineFields[7] = ""
849
+	deviceLineFields[8] = ""
850
+	expected := []string{"crw-rw-rw-", "1", "root", "root", "1,", "3", "", "", "", "/dev/null"}
851
+
852
+	if !(reflect.DeepEqual(deviceLineFields, expected)) {
853
+		t.Fatalf("expected output\ncrw-rw-rw- 1 root root 1, 3 May 24 13:29 /dev/null\n received\n %s\n", out)
854
+	}
855
+	deleteAllContainers()
856
+
857
+	logDone("run - test device numbers")
858
+}
859
+
860
+func TestThatCharacterDevicesActLikeCharacterDevices(t *testing.T) {
861
+	cmd := exec.Command(dockerBinary, "run", "busybox", "sh", "-c", "dd if=/dev/zero of=/zero bs=1k count=5 2> /dev/null ; du -h /zero")
862
+
863
+	out, _, err := runCommandWithOutput(cmd)
864
+	if err != nil {
865
+		t.Fatal(err, out)
866
+	}
867
+
868
+	if actual := strings.Trim(out, "\r\n"); actual[0] == '0' {
869
+		t.Fatalf("expected a new file called /zero to be create that is greater than 0 bytes long, but du says: %s", actual)
870
+	}
871
+	deleteAllContainers()
872
+
873
+	logDone("run - test that character devices work.")
874
+}
... ...
@@ -2,6 +2,8 @@ package cgroups
2 2
 
3 3
 import (
4 4
 	"errors"
5
+
6
+	"github.com/dotcloud/docker/pkg/libcontainer/devices"
5 7
 )
6 8
 
7 9
 var (
... ...
@@ -10,17 +12,18 @@ var (
10 10
 
11 11
 type Cgroup struct {
12 12
 	Name   string `json:"name,omitempty"`
13
-	Parent string `json:"parent,omitempty"`
13
+	Parent string `json:"parent,omitempty"` // name of parent cgroup or slice
14 14
 
15
-	DeviceAccess      bool   `json:"device_access,omitempty"`      // name of parent cgroup or slice
16
-	Memory            int64  `json:"memory,omitempty"`             // Memory limit (in bytes)
17
-	MemoryReservation int64  `json:"memory_reservation,omitempty"` // Memory reservation or soft_limit (in bytes)
18
-	MemorySwap        int64  `json:"memory_swap,omitempty"`        // Total memory usage (memory + swap); set `-1' to disable swap
19
-	CpuShares         int64  `json:"cpu_shares,omitempty"`         // CPU shares (relative weight vs. other containers)
20
-	CpuQuota          int64  `json:"cpu_quota,omitempty"`          // CPU hardcap limit (in usecs). Allowed cpu time in a given period.
21
-	CpuPeriod         int64  `json:"cpu_period,omitempty"`         // CPU period to be used for hardcapping (in usecs). 0 to use system default.
22
-	CpusetCpus        string `json:"cpuset_cpus,omitempty"`        // CPU to use
23
-	Freezer           string `json:"freezer,omitempty"`            // set the freeze value for the process
15
+	AllowAllDevices   bool             `json:"allow_all_devices,omitempty"` // If this is true allow access to any kind of device within the container.  If false, allow access only to devices explicitly listed in the allowed_devices list.
16
+	AllowedDevices    []devices.Device `json:"allowed_devices,omitempty"`
17
+	Memory            int64            `json:"memory,omitempty"`             // Memory limit (in bytes)
18
+	MemoryReservation int64            `json:"memory_reservation,omitempty"` // Memory reservation or soft_limit (in bytes)
19
+	MemorySwap        int64            `json:"memory_swap,omitempty"`        // Total memory usage (memory + swap); set `-1' to disable swap
20
+	CpuShares         int64            `json:"cpu_shares,omitempty"`         // CPU shares (relative weight vs. other containers)
21
+	CpuQuota          int64            `json:"cpu_quota,omitempty"`          // CPU hardcap limit (in usecs). Allowed cpu time in a given period.
22
+	CpuPeriod         int64            `json:"cpu_period,omitempty"`         // CPU period to be used for hardcapping (in usecs). 0 to use system default.
23
+	CpusetCpus        string           `json:"cpuset_cpus,omitempty"`        // CPU to use
24
+	Freezer           string           `json:"freezer,omitempty"`            // set the freeze value for the process
24 25
 
25 26
 	Slice string `json:"slice,omitempty"` // Parent slice to use for systemd
26 27
 }
... ...
@@ -9,41 +9,13 @@ func (s *devicesGroup) Set(d *data) error {
9 9
 		return err
10 10
 	}
11 11
 
12
-	if !d.c.DeviceAccess {
12
+	if !d.c.AllowAllDevices {
13 13
 		if err := writeFile(dir, "devices.deny", "a"); err != nil {
14 14
 			return err
15 15
 		}
16 16
 
17
-		allow := []string{
18
-			// allow mknod for any device
19
-			"c *:* m",
20
-			"b *:* m",
21
-
22
-			// /dev/null, zero, full
23
-			"c 1:3 rwm",
24
-			"c 1:5 rwm",
25
-			"c 1:7 rwm",
26
-
27
-			// consoles
28
-			"c 5:1 rwm",
29
-			"c 5:0 rwm",
30
-			"c 4:0 rwm",
31
-			"c 4:1 rwm",
32
-
33
-			// /dev/urandom,/dev/random
34
-			"c 1:9 rwm",
35
-			"c 1:8 rwm",
36
-
37
-			// /dev/pts/ - pts namespaces are "coming soon"
38
-			"c 136:* rwm",
39
-			"c 5:2 rwm",
40
-
41
-			// tuntap
42
-			"c 10:200 rwm",
43
-		}
44
-
45
-		for _, val := range allow {
46
-			if err := writeFile(dir, "devices.allow", val); err != nil {
17
+		for _, dev := range d.c.AllowedDevices {
18
+			if err := writeFile(dir, "devices.allow", dev.GetCgroupAllowString()); err != nil {
47 19
 				return err
48 20
 			}
49 21
 		}
... ...
@@ -21,11 +21,6 @@ type systemdCgroup struct {
21 21
 	cleanupDirs []string
22 22
 }
23 23
 
24
-type DeviceAllow struct {
25
-	Node        string
26
-	Permissions string
27
-}
28
-
29 24
 var (
30 25
 	connLock              sync.Mutex
31 26
 	theConn               *systemd1.Conn
... ...
@@ -116,24 +111,9 @@ func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
116 116
 		systemd1.Property{"PIDs", dbus.MakeVariant([]uint32{uint32(pid)})},
117 117
 	)
118 118
 
119
-	if !c.DeviceAccess {
119
+	if !c.AllowAllDevices {
120 120
 		properties = append(properties,
121
-			systemd1.Property{"DevicePolicy", dbus.MakeVariant("strict")},
122
-			systemd1.Property{"DeviceAllow", dbus.MakeVariant([]DeviceAllow{
123
-				{"/dev/null", "rwm"},
124
-				{"/dev/zero", "rwm"},
125
-				{"/dev/full", "rwm"},
126
-				{"/dev/random", "rwm"},
127
-				{"/dev/urandom", "rwm"},
128
-				{"/dev/tty", "rwm"},
129
-				{"/dev/console", "rwm"},
130
-				{"/dev/tty0", "rwm"},
131
-				{"/dev/tty1", "rwm"},
132
-				{"/dev/pts/ptmx", "rwm"},
133
-				// There is no way to add /dev/pts/* here atm, so we hack this manually below
134
-				// /dev/pts/* (how to add this?)
135
-				// Same with tuntap, which doesn't exist as a node most of the time
136
-			})})
121
+			systemd1.Property{"DevicePolicy", dbus.MakeVariant("strict")})
137 122
 	}
138 123
 
139 124
 	// Always enable accounting, this gets us the same behaviour as the fs implementation,
... ...
@@ -167,28 +147,16 @@ func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
167 167
 
168 168
 	cgroup := props["ControlGroup"].(string)
169 169
 
170
-	if !c.DeviceAccess {
170
+	if !c.AllowAllDevices {
171 171
 		mountpoint, err := cgroups.FindCgroupMountpoint("devices")
172 172
 		if err != nil {
173 173
 			return nil, err
174 174
 		}
175 175
 
176
-		path := filepath.Join(mountpoint, cgroup)
177
-
178
-		allow := []string{
179
-			// allow mknod for any device
180
-			"c *:* m",
181
-			"b *:* m",
182
-
183
-			// /dev/pts/ - pts namespaces are "coming soon"
184
-			"c 136:* rwm",
185
-
186
-			// tuntap
187
-			"c 10:200 rwm",
188
-		}
189
-
190
-		for _, val := range allow {
191
-			if err := ioutil.WriteFile(filepath.Join(path, "devices.allow"), []byte(val), 0700); err != nil {
176
+		dir := filepath.Join(mountpoint, cgroup)
177
+		// We use the same method of allowing devices as in the fs backend.  This needs to be changed to use DBUS as soon as possible.  However, that change has to wait untill http://cgit.freedesktop.org/systemd/systemd/commit/?id=90060676c442604780634c0a993e3f9c3733f8e6 has been applied in most commonly used systemd versions.
178
+		for _, dev := range c.AllowedDevices {
179
+			if err := writeFile(dir, "devices.allow", dev.GetCgroupAllowString()); err != nil {
192 180
 				return nil, err
193 181
 			}
194 182
 		}
... ...
@@ -295,6 +263,10 @@ func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
295 295
 	return &res, nil
296 296
 }
297 297
 
298
+func writeFile(dir, file, data string) error {
299
+	return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
300
+}
301
+
298 302
 func (c *systemdCgroup) Cleanup() error {
299 303
 	// systemd cleans up, we don't need to do much
300 304
 
... ...
@@ -2,6 +2,7 @@ package libcontainer
2 2
 
3 3
 import (
4 4
 	"github.com/dotcloud/docker/pkg/libcontainer/cgroups"
5
+	"github.com/dotcloud/docker/pkg/libcontainer/devices"
5 6
 )
6 7
 
7 8
 // Context is a generic key value pair that allows arbatrary data to be sent
... ...
@@ -60,13 +61,8 @@ type Container struct {
60 60
 	// rootfs and mount namespace if specified
61 61
 	Mounts Mounts `json:"mounts,omitempty"`
62 62
 
63
-	// RequiredDeviceNodes are a list of device nodes that will be mknod into the container's rootfs at /dev
64
-	// If the host system does not support the device that the container requests an error is returned
65
-	RequiredDeviceNodes []string `json:"required_device_nodes,omitempty"`
66
-
67
-	// OptionalDeviceNodes are a list of device nodes that will be mknod into the container's rootfs at /dev
68
-	// If the host system does not support the device that the container requests the error is ignored
69
-	OptionalDeviceNodes []string `json:"optional_device_nodes,omitempty"`
63
+	// The device nodes that should be automatically created within the container upon container start.  Note, make sure that the node is marked as allowed in the cgroup as well!
64
+	DeviceNodes []devices.Device `json:"device_nodes,omitempty"`
70 65
 }
71 66
 
72 67
 // Network defines configuration for a container's networking stack
... ...
@@ -44,12 +44,54 @@
44 44
       "type": "devtmpfs"
45 45
     }
46 46
   ],
47
-  "required_device_nodes": [
48
-      "/dev/null",
49
-      "/dev/zero",
50
-      "/dev/full",
51
-      "/dev/random",
52
-      "/dev/urandom",
53
-      "/dev/tty"
47
+  "device_nodes": [
48
+		{
49
+			"path":  "/dev/null",
50
+			"type":        99,
51
+			"major_number": 1,
52
+			"minor_number": 3,
53
+			"cgroup_permissions": "rwm",
54
+			"file_mode": 438
55
+		},
56
+		{
57
+			"path":  "/dev/zero",
58
+			"type":        99,
59
+			"major_number": 1,
60
+			"minor_number": 5,
61
+			"cgroup_permissions": "rwm",
62
+			"file_mode": 438
63
+		},
64
+		{
65
+			"path":  "/dev/full",
66
+			"type":        99,
67
+			"major_number": 1,
68
+			"minor_number": 7,
69
+			"cgroup_permissions": "rwm",
70
+			"file_mode": 438
71
+		},
72
+		{
73
+			"path":  "/dev/tty",
74
+			"type":        99,
75
+			"major_number": 5,
76
+			"minor_number": 0,
77
+			"cgroup_permissions": "rwm",
78
+			"file_mode": 438
79
+		},
80
+		{
81
+			"path":  "/dev/urandom",
82
+			"type":        99,
83
+			"major_number": 1,
84
+			"minor_number": 9,
85
+			"cgroup_permissions": "rwm",
86
+			"file_mode": 438
87
+		},
88
+		{
89
+			"path":  "/dev/random",
90
+			"type":        99,
91
+			"major_number": 1,
92
+			"minor_number": 8,
93
+			"cgroup_permissions": "rwm",
94
+			"file_mode": 438
95
+		}
54 96
   ]
55 97
 }
... ...
@@ -4,8 +4,6 @@ import (
4 4
 	"encoding/json"
5 5
 	"os"
6 6
 	"testing"
7
-
8
-	"github.com/dotcloud/docker/pkg/libcontainer/mount/nodes"
9 7
 )
10 8
 
11 9
 // Checks whether the expected capability is specified in the capabilities.
... ...
@@ -63,11 +61,4 @@ func TestContainerJsonFormat(t *testing.T) {
63 63
 		t.Log("capabilities mask should not contain SYS_CHROOT")
64 64
 		t.Fail()
65 65
 	}
66
-
67
-	for _, n := range nodes.DefaultNodes {
68
-		if !contains(n, container.RequiredDeviceNodes) {
69
-			t.Logf("devices should contain %s", n)
70
-			t.Fail()
71
-		}
72
-	}
73 66
 }
74 67
new file mode 100644
... ...
@@ -0,0 +1,239 @@
0
+package devices
1
+
2
+import (
3
+	"fmt"
4
+	"os"
5
+	"syscall"
6
+)
7
+
8
+const (
9
+	Wildcard = -1
10
+)
11
+
12
+type Device struct {
13
+	Type              rune        `json:"type,omitempty"`
14
+	Path              string      `json:"path,omitempty"`               // It is fine if this is an empty string in the case that you are using Wildcards
15
+	MajorNumber       int64       `json:"major_number,omitempty"`       // Use the wildcard constant for wildcards.
16
+	MinorNumber       int64       `json:"minor_number,omitempty"`       // Use the wildcard constant for wildcards.
17
+	CgroupPermissions string      `json:"cgroup_permissions,omitempty"` // Typically just "rwm"
18
+	FileMode          os.FileMode `json:"file_mode,omitempty"`          // The permission bits of the file's mode
19
+}
20
+
21
+func GetDeviceNumberString(deviceNumber int64) string {
22
+	if deviceNumber == Wildcard {
23
+		return "*"
24
+	} else {
25
+		return fmt.Sprintf("%d", deviceNumber)
26
+	}
27
+}
28
+
29
+func (device Device) GetCgroupAllowString() string {
30
+	return fmt.Sprintf("%c %s:%s %s", device.Type, GetDeviceNumberString(device.MajorNumber), GetDeviceNumberString(device.MinorNumber), device.CgroupPermissions)
31
+}
32
+
33
+// Given the path to a device and it's cgroup_permissions(which cannot be easilly queried) look up the information about a linux device and return that information as a Device struct.
34
+func GetDevice(path string, cgroupPermissions string) (Device, error) {
35
+	var (
36
+		err                    error
37
+		fileInfo               os.FileInfo
38
+		mode                   os.FileMode
39
+		fileModePermissionBits os.FileMode
40
+		devType                rune
41
+		devNumber              int
42
+		stat_t                 *syscall.Stat_t
43
+		ok                     bool
44
+		device                 Device
45
+	)
46
+
47
+	fileInfo, err = os.Stat(path)
48
+	if err != nil {
49
+		return Device{}, err
50
+	}
51
+
52
+	mode = fileInfo.Mode()
53
+	fileModePermissionBits = os.FileMode.Perm(mode)
54
+	switch {
55
+	case (mode & os.ModeDevice) == 0:
56
+		return Device{}, fmt.Errorf("%s is not a device", path)
57
+	case (mode & os.ModeCharDevice) != 0:
58
+		fileModePermissionBits |= syscall.S_IFCHR
59
+		devType = 'c'
60
+	default:
61
+		fileModePermissionBits |= syscall.S_IFBLK
62
+		devType = 'b'
63
+	}
64
+
65
+	stat_t, ok = fileInfo.Sys().(*syscall.Stat_t)
66
+	if !ok {
67
+		return Device{}, fmt.Errorf("cannot determine the device number for device %s", path)
68
+	}
69
+	devNumber = int(stat_t.Rdev)
70
+
71
+	device = Device{
72
+		Type:              devType,
73
+		Path:              path,
74
+		MajorNumber:       Major(devNumber),
75
+		MinorNumber:       Minor(devNumber),
76
+		CgroupPermissions: cgroupPermissions,
77
+		FileMode:          fileModePermissionBits,
78
+	}
79
+	return device, nil
80
+}
81
+
82
+var (
83
+	// These are devices that are to be both allowed and created.
84
+
85
+	DefaultSimpleDevices = []Device{
86
+		// /dev/null and zero
87
+		{
88
+			Path:              "/dev/null",
89
+			Type:              'c',
90
+			MajorNumber:       1,
91
+			MinorNumber:       3,
92
+			CgroupPermissions: "rwm",
93
+			FileMode:          0666,
94
+		},
95
+		{
96
+			Path:              "/dev/zero",
97
+			Type:              'c',
98
+			MajorNumber:       1,
99
+			MinorNumber:       5,
100
+			CgroupPermissions: "rwm",
101
+			FileMode:          0666,
102
+		},
103
+
104
+		{
105
+			Path:              "/dev/full",
106
+			Type:              'c',
107
+			MajorNumber:       1,
108
+			MinorNumber:       7,
109
+			CgroupPermissions: "rwm",
110
+			FileMode:          0666,
111
+		},
112
+
113
+		// consoles and ttys
114
+		{
115
+			Path:              "/dev/tty",
116
+			Type:              'c',
117
+			MajorNumber:       5,
118
+			MinorNumber:       0,
119
+			CgroupPermissions: "rwm",
120
+			FileMode:          0666,
121
+		},
122
+
123
+		// /dev/urandom,/dev/random
124
+		{
125
+			Path:              "/dev/urandom",
126
+			Type:              'c',
127
+			MajorNumber:       1,
128
+			MinorNumber:       9,
129
+			CgroupPermissions: "rwm",
130
+			FileMode:          0666,
131
+		},
132
+		{
133
+			Path:              "/dev/random",
134
+			Type:              'c',
135
+			MajorNumber:       1,
136
+			MinorNumber:       8,
137
+			CgroupPermissions: "rwm",
138
+			FileMode:          0666,
139
+		},
140
+	}
141
+
142
+	DefaultAllowedDevices = append([]Device{
143
+		// allow mknod for any device
144
+		{
145
+			Type:              'c',
146
+			MajorNumber:       Wildcard,
147
+			MinorNumber:       Wildcard,
148
+			CgroupPermissions: "m",
149
+		},
150
+		{
151
+			Type:              'b',
152
+			MajorNumber:       Wildcard,
153
+			MinorNumber:       Wildcard,
154
+			CgroupPermissions: "m",
155
+		},
156
+
157
+		{
158
+			Path:              "/dev/console",
159
+			Type:              'c',
160
+			MajorNumber:       5,
161
+			MinorNumber:       1,
162
+			CgroupPermissions: "rwm",
163
+		},
164
+		{
165
+			Path:              "/dev/tty0",
166
+			Type:              'c',
167
+			MajorNumber:       4,
168
+			MinorNumber:       0,
169
+			CgroupPermissions: "rwm",
170
+		},
171
+		{
172
+			Path:              "/dev/tty1",
173
+			Type:              'c',
174
+			MajorNumber:       4,
175
+			MinorNumber:       1,
176
+			CgroupPermissions: "rwm",
177
+		},
178
+		// /dev/pts/ - pts namespaces are "coming soon"
179
+		{
180
+			Path:              "",
181
+			Type:              'c',
182
+			MajorNumber:       136,
183
+			MinorNumber:       Wildcard,
184
+			CgroupPermissions: "rwm",
185
+		},
186
+		{
187
+			Path:              "",
188
+			Type:              'c',
189
+			MajorNumber:       5,
190
+			MinorNumber:       2,
191
+			CgroupPermissions: "rwm",
192
+		},
193
+
194
+		// tuntap
195
+		{
196
+			Path:              "",
197
+			Type:              'c',
198
+			MajorNumber:       10,
199
+			MinorNumber:       200,
200
+			CgroupPermissions: "rwm",
201
+		},
202
+
203
+		/*// fuse
204
+		   {
205
+		    Path: "",
206
+		    Type: 'c',
207
+		    MajorNumber: 10,
208
+		    MinorNumber: 229,
209
+		    CgroupPermissions: "rwm",
210
+		   },
211
+
212
+		// rtc
213
+		   {
214
+		    Path: "",
215
+		    Type: 'c',
216
+		    MajorNumber: 254,
217
+		    MinorNumber: 0,
218
+		    CgroupPermissions: "rwm",
219
+		   },
220
+		*/
221
+	}, DefaultSimpleDevices...)
222
+
223
+	DefaultAutoCreatedDevices = append([]Device{
224
+		{
225
+			// /dev/fuse is created but not allowed.
226
+			// This is to allow java to work.  Because java
227
+			// Insists on there being a /dev/fuse
228
+			// https://github.com/dotcloud/docker/issues/514
229
+			// https://github.com/dotcloud/docker/issues/2393
230
+			//
231
+			Path:              "/dev/fuse",
232
+			Type:              'c',
233
+			MajorNumber:       10,
234
+			MinorNumber:       229,
235
+			CgroupPermissions: "rwm",
236
+		},
237
+	}, DefaultSimpleDevices...)
238
+)
0 239
new file mode 100644
... ...
@@ -0,0 +1,26 @@
0
+package devices
1
+
2
+/*
3
+
4
+This code provides support for manipulating linux device numbers.  It should be replaced by normal syscall functions once http://code.google.com/p/go/issues/detail?id=8106 is solved.
5
+
6
+You can read what they are here:
7
+
8
+ - http://www.makelinux.net/ldd3/chp-3-sect-2
9
+ - http://www.linux-tutorial.info/modules.php?name=MContent&pageid=94
10
+
11
+Note! These are NOT the same as the MAJOR(dev_t device);, MINOR(dev_t device); and MKDEV(int major, int minor); functions as defined in <linux/kdev_t.h> as the representation of device numbers used by go is different than the one used internally to the kernel! - https://github.com/torvalds/linux/blob/master/include/linux/kdev_t.h#L9
12
+
13
+*/
14
+
15
+func Major(devNumber int) int64 {
16
+	return int64((devNumber >> 8) & 0xfff)
17
+}
18
+
19
+func Minor(devNumber int) int64 {
20
+	return int64((devNumber & 0xff) | ((devNumber >> 12) & 0xfff00))
21
+}
22
+
23
+func Mkdev(majorNumber int64, minorNumber int64) int {
24
+	return int((majorNumber << 8) | (minorNumber & 0xff) | ((minorNumber & 0xfff00) << 12))
25
+}
... ...
@@ -48,11 +48,8 @@ func InitializeMountNamespace(rootfs, console string, container *libcontainer.Co
48 48
 	if err := setupBindmounts(rootfs, container.Mounts); err != nil {
49 49
 		return fmt.Errorf("bind mounts %s", err)
50 50
 	}
51
-	if err := nodes.CopyN(rootfs, container.RequiredDeviceNodes, true); err != nil {
52
-		return fmt.Errorf("copy required dev nodes %s", err)
53
-	}
54
-	if err := nodes.CopyN(rootfs, container.OptionalDeviceNodes, false); err != nil {
55
-		return fmt.Errorf("copy optional dev nodes %s", err)
51
+	if err := nodes.CreateDeviceNodes(rootfs, container.DeviceNodes); err != nil {
52
+		return fmt.Errorf("create device nodes %s", err)
56 53
 	}
57 54
 	if err := SetupPtmx(rootfs, console, container.Context["mount_label"]); err != nil {
58 55
 		return err
... ...
@@ -9,47 +9,27 @@ import (
9 9
 	"path/filepath"
10 10
 	"syscall"
11 11
 
12
+	"github.com/dotcloud/docker/pkg/libcontainer/devices"
12 13
 	"github.com/dotcloud/docker/pkg/system"
13 14
 )
14 15
 
15
-// Default list of device nodes to copy
16
-var DefaultNodes = []string{
17
-	"/dev/null",
18
-	"/dev/zero",
19
-	"/dev/full",
20
-	"/dev/random",
21
-	"/dev/urandom",
22
-	"/dev/tty",
23
-}
24
-
25
-// CopyN copies the device node from the host into the rootfs
26
-func CopyN(rootfs string, nodesToCopy []string, shouldExist bool) error {
16
+// Create the device nodes in the container.
17
+func CreateDeviceNodes(rootfs string, nodesToCreate []devices.Device) error {
27 18
 	oldMask := system.Umask(0000)
28 19
 	defer system.Umask(oldMask)
29 20
 
30
-	for _, node := range nodesToCopy {
31
-		if err := Copy(rootfs, node, shouldExist); err != nil {
21
+	for _, node := range nodesToCreate {
22
+		if err := CreateDeviceNode(rootfs, node); err != nil {
32 23
 			return err
33 24
 		}
34 25
 	}
35 26
 	return nil
36 27
 }
37 28
 
38
-// Copy copies the device node into the rootfs.  If the node
39
-// on the host system does not exist and the boolean flag is passed
40
-// an error will be returned
41
-func Copy(rootfs, node string, shouldExist bool) error {
42
-	stat, err := os.Stat(node)
43
-	if err != nil {
44
-		if os.IsNotExist(err) && !shouldExist {
45
-			return nil
46
-		}
47
-		return err
48
-	}
49
-
29
+// Creates the device node in the rootfs of the container.
30
+func CreateDeviceNode(rootfs string, node devices.Device) error {
50 31
 	var (
51
-		dest   = filepath.Join(rootfs, node)
52
-		st     = stat.Sys().(*syscall.Stat_t)
32
+		dest   = filepath.Join(rootfs, node.Path)
53 33
 		parent = filepath.Dir(dest)
54 34
 	)
55 35
 
... ...
@@ -57,13 +37,23 @@ func Copy(rootfs, node string, shouldExist bool) error {
57 57
 		return err
58 58
 	}
59 59
 
60
-	if err := system.Mknod(dest, st.Mode, int(st.Rdev)); err != nil && !os.IsExist(err) {
61
-		return fmt.Errorf("mknod %s %s", node, err)
60
+	fileMode := node.FileMode
61
+	switch node.Type {
62
+	case 'c':
63
+		fileMode |= syscall.S_IFCHR
64
+	case 'b':
65
+		fileMode |= syscall.S_IFBLK
66
+	default:
67
+		return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
68
+	}
69
+
70
+	if err := system.Mknod(dest, uint32(fileMode), devices.Mkdev(node.MajorNumber, node.MinorNumber)); err != nil && !os.IsExist(err) {
71
+		return fmt.Errorf("mknod %s %s", node.Path, err)
62 72
 	}
63 73
 	return nil
64 74
 }
65 75
 
66
-func getNodes(path string) ([]string, error) {
76
+func getDeviceNodes(path string) ([]string, error) {
67 77
 	out := []string{}
68 78
 	files, err := ioutil.ReadDir(path)
69 79
 	if err != nil {
... ...
@@ -71,7 +61,7 @@ func getNodes(path string) ([]string, error) {
71 71
 	}
72 72
 	for _, f := range files {
73 73
 		if f.IsDir() && f.Name() != "pts" && f.Name() != "shm" {
74
-			sub, err := getNodes(filepath.Join(path, f.Name()))
74
+			sub, err := getDeviceNodes(filepath.Join(path, f.Name()))
75 75
 			if err != nil {
76 76
 				return nil, err
77 77
 			}
... ...
@@ -84,5 +74,5 @@ func getNodes(path string) ([]string, error) {
84 84
 }
85 85
 
86 86
 func GetHostDeviceNodes() ([]string, error) {
87
-	return getNodes("/dev")
87
+	return getDeviceNodes("/dev")
88 88
 }
... ...
@@ -2,10 +2,15 @@
2 2
 
3 3
 package nodes
4 4
 
5
-import "github.com/dotcloud/docker/pkg/libcontainer"
6
-
7
-var DefaultNodes = []string{}
5
+import (
6
+	"github.com/dotcloud/docker/pkg/libcontainer"
7
+	"github.com/dotcloud/docker/pkg/libcontainer/devices"
8
+)
8 9
 
9 10
 func GetHostDeviceNodes() ([]string, error) {
10 11
 	return nil, libcontainer.ErrUnsupported
11 12
 }
13
+
14
+func CreateDeviceNodes(rootfs string, nodesToCreate []devices.Device) error {
15
+	return libcontainer.ErrUnsupported
16
+}