Browse code

Merge pull request #22563 from mlaventure/cgroup-devices

Allow adding rules to cgroup devices.allow on container create/run

Vincent Demeester authored on 2017/02/02 00:29:34
Showing 15 changed files
... ...
@@ -251,6 +251,7 @@ type Resources struct {
251 251
 	CpusetCpus           string          // CpusetCpus 0-2, 0,1
252 252
 	CpusetMems           string          // CpusetMems 0-2, 0,1
253 253
 	Devices              []DeviceMapping // List of devices to map inside the container
254
+	DeviceCgroupRules    []string        // List of rule to be added to the device cgroup
254 255
 	DiskQuota            int64           // Disk limit (in bytes)
255 256
 	KernelMemory         int64           // Kernel memory limit (in bytes)
256 257
 	MemoryReservation    int64           // Memory soft limit (in bytes)
... ...
@@ -6,6 +6,7 @@ import (
6 6
 	"fmt"
7 7
 	"io/ioutil"
8 8
 	"path"
9
+	"regexp"
9 10
 	"strconv"
10 11
 	"strings"
11 12
 	"time"
... ...
@@ -21,6 +22,10 @@ import (
21 21
 	"github.com/spf13/pflag"
22 22
 )
23 23
 
24
+var (
25
+	deviceCgroupRuleRegexp = regexp.MustCompile("^[acb] ([0-9]+|\\*):([0-9]+|\\*) [rwm]{1,3}$")
26
+)
27
+
24 28
 // containerOptions is a data object with all the options for creating a container
25 29
 type containerOptions struct {
26 30
 	attach             opts.ListOpts
... ...
@@ -36,6 +41,7 @@ type containerOptions struct {
36 36
 	deviceWriteIOps    opts.ThrottledeviceOpt
37 37
 	env                opts.ListOpts
38 38
 	labels             opts.ListOpts
39
+	deviceCgroupRules  opts.ListOpts
39 40
 	devices            opts.ListOpts
40 41
 	ulimits            *opts.UlimitOpt
41 42
 	sysctls            *opts.MapOpts
... ...
@@ -127,6 +133,7 @@ func addFlags(flags *pflag.FlagSet) *containerOptions {
127 127
 		dns:               opts.NewListOpts(opts.ValidateIPAddress),
128 128
 		dnsOptions:        opts.NewListOpts(nil),
129 129
 		dnsSearch:         opts.NewListOpts(opts.ValidateDNSSearch),
130
+		deviceCgroupRules: opts.NewListOpts(validateDeviceCgroupRule),
130 131
 		deviceReadBps:     opts.NewThrottledeviceOpt(opts.ValidateThrottleBpsDevice),
131 132
 		deviceReadIOps:    opts.NewThrottledeviceOpt(opts.ValidateThrottleIOpsDevice),
132 133
 		deviceWriteBps:    opts.NewThrottledeviceOpt(opts.ValidateThrottleBpsDevice),
... ...
@@ -154,6 +161,7 @@ func addFlags(flags *pflag.FlagSet) *containerOptions {
154 154
 
155 155
 	// General purpose flags
156 156
 	flags.VarP(&copts.attach, "attach", "a", "Attach to STDIN, STDOUT or STDERR")
157
+	flags.Var(&copts.deviceCgroupRules, "device-cgroup-rule", "Add a rule to the cgroup allowed devices list")
157 158
 	flags.Var(&copts.devices, "device", "Add a host device to the container")
158 159
 	flags.VarP(&copts.env, "env", "e", "Set environment variables")
159 160
 	flags.Var(&copts.envFile, "env-file", "Read in a file of environment variables")
... ...
@@ -553,6 +561,7 @@ func parse(flags *pflag.FlagSet, copts *containerOptions) (*container.Config, *c
553 553
 		IOMaximumIOps:        copts.ioMaxIOps,
554 554
 		IOMaximumBandwidth:   uint64(maxIOBandwidth),
555 555
 		Ulimits:              copts.ulimits.GetList(),
556
+		DeviceCgroupRules:    copts.deviceCgroupRules.GetAll(),
556 557
 		Devices:              deviceMappings,
557 558
 	}
558 559
 
... ...
@@ -767,6 +776,17 @@ func parseDevice(device string) (container.DeviceMapping, error) {
767 767
 	return deviceMapping, nil
768 768
 }
769 769
 
770
+// validateDeviceCgroupRule validates a device cgroup rule string format
771
+// It will make sure 'val' is in the form:
772
+//    'type major:minor mode'
773
+func validateDeviceCgroupRule(val string) (string, error) {
774
+	if deviceCgroupRuleRegexp.MatchString(val) {
775
+		return val, nil
776
+	}
777
+
778
+	return val, fmt.Errorf("invalid device cgroup format '%s'", val)
779
+}
780
+
770 781
 // validDeviceMode checks if the mode for device is valid or not.
771 782
 // Valid mode is a composition of r (read), w (write), and m (mknod).
772 783
 func validDeviceMode(mode string) bool {
... ...
@@ -1358,6 +1358,7 @@ _docker_container_run() {
1358 1358
 		--cpuset-mems
1359 1359
 		--cpu-shares -c
1360 1360
 		--device
1361
+		--device-cgroup-rule
1361 1362
 		--device-read-bps
1362 1363
 		--device-read-iops
1363 1364
 		--device-write-bps
... ...
@@ -121,6 +121,7 @@ complete -c docker -A -f -n '__fish_seen_subcommand_from create' -l cap-drop -d
121 121
 complete -c docker -A -f -n '__fish_seen_subcommand_from create' -l cidfile -d 'Write the container ID to the file'
122 122
 complete -c docker -A -f -n '__fish_seen_subcommand_from create' -l cpuset -d 'CPUs in which to allow execution (0-3, 0,1)'
123 123
 complete -c docker -A -f -n '__fish_seen_subcommand_from create' -l device -d 'Add a host device to the container (e.g. --device=/dev/sdc:/dev/xvdc:rwm)'
124
+complete -c docker -A -f -n '__fish_seen_subcommand_from create' -l device-cgroup-rule -d 'Add a rule to the cgroup allowed devices list (e.g. --device-cgroup-rule="c 13:37 rwm")'
124 125
 complete -c docker -A -f -n '__fish_seen_subcommand_from create' -l dns -d 'Set custom DNS servers'
125 126
 complete -c docker -A -f -n '__fish_seen_subcommand_from create' -l dns-opt -d "Set custom DNS options (Use --dns-opt='' if you don't wish to set options)"
126 127
 complete -c docker -A -f -n '__fish_seen_subcommand_from create' -l dns-search -d "Set custom DNS search domains (Use --dns-search=. if you don't wish to set the search domain)"
... ...
@@ -312,6 +313,7 @@ complete -c docker -A -f -n '__fish_seen_subcommand_from run' -l cidfile -d 'Wri
312 312
 complete -c docker -A -f -n '__fish_seen_subcommand_from run' -l cpuset -d 'CPUs in which to allow execution (0-3, 0,1)'
313 313
 complete -c docker -A -f -n '__fish_seen_subcommand_from run' -s d -l detach -d 'Detached mode: run the container in the background and print the new container ID'
314 314
 complete -c docker -A -f -n '__fish_seen_subcommand_from run' -l device -d 'Add a host device to the container (e.g. --device=/dev/sdc:/dev/xvdc:rwm)'
315
+complete -c docker -A -f -n '__fish_seen_subcommand_from create' -l device-cgroup-rule -d 'Add a rule to the cgroup allowed devices list (e.g. --device-cgroup-rule="c 13:37 rwm")'
315 316
 complete -c docker -A -f -n '__fish_seen_subcommand_from run' -l dns -d 'Set custom DNS servers'
316 317
 complete -c docker -A -f -n '__fish_seen_subcommand_from run' -l dns-opt -d "Set custom DNS options (Use --dns-opt='' if you don't wish to set options)"
317 318
 complete -c docker -A -f -n '__fish_seen_subcommand_from run' -l dns-search -d "Set custom DNS search domains (Use --dns-search=. if you don't wish to set the search domain)"
... ...
@@ -546,6 +546,7 @@ __docker_container_subcommand() {
546 546
         "($help)--cidfile=[Write the container ID to the file]:CID file:_files"
547 547
         "($help)--cpus=[Number of CPUs (default 0.000)]:cpus: "
548 548
         "($help)*--device=[Add a host device to the container]:device:_files"
549
+        "($help)*--device-cgroup-rule=[Add a rule to the cgroup allowed devices list]:device:cgroup: "
549 550
         "($help)*--device-read-bps=[Limit the read rate (bytes per second) from a device]:device:IO rate: "
550 551
         "($help)*--device-read-iops=[Limit the read rate (IO per second) from a device]:device:IO rate: "
551 552
         "($help)*--device-write-bps=[Limit the write rate (bytes per second) to a device]:device:IO rate: "
... ...
@@ -6,6 +6,7 @@ import (
6 6
 	"os"
7 7
 	"os/exec"
8 8
 	"path/filepath"
9
+	"regexp"
9 10
 	"sort"
10 11
 	"strconv"
11 12
 	"strings"
... ...
@@ -27,6 +28,10 @@ import (
27 27
 	specs "github.com/opencontainers/runtime-spec/specs-go"
28 28
 )
29 29
 
30
+var (
31
+	deviceCgroupRuleRegex = regexp.MustCompile("^([acb]) ([0-9]+|\\*):([0-9]+|\\*) ([rwm]{1,3})$")
32
+)
33
+
30 34
 func setResources(s *specs.Spec, r containertypes.Resources) error {
31 35
 	weightDevices, err := getBlkioWeightDevices(r)
32 36
 	if err != nil {
... ...
@@ -106,6 +111,41 @@ func setDevices(s *specs.Spec, c *container.Container) error {
106 106
 			devs = append(devs, d...)
107 107
 			devPermissions = append(devPermissions, dPermissions...)
108 108
 		}
109
+
110
+		for _, deviceCgroupRule := range c.HostConfig.DeviceCgroupRules {
111
+			ss := deviceCgroupRuleRegex.FindAllStringSubmatch(deviceCgroupRule, -1)
112
+			if len(ss[0]) != 5 {
113
+				return fmt.Errorf("invalid device cgroup rule format: '%s'", deviceCgroupRule)
114
+			}
115
+			matches := ss[0]
116
+
117
+			dPermissions := specs.DeviceCgroup{
118
+				Allow:  true,
119
+				Type:   &matches[1],
120
+				Access: &matches[4],
121
+			}
122
+			if matches[2] == "*" {
123
+				major := int64(-1)
124
+				dPermissions.Major = &major
125
+			} else {
126
+				major, err := strconv.ParseInt(matches[2], 10, 64)
127
+				if err != nil {
128
+					return fmt.Errorf("invalid major value in device cgroup rule format: '%s'", deviceCgroupRule)
129
+				}
130
+				dPermissions.Major = &major
131
+			}
132
+			if matches[3] == "*" {
133
+				minor := int64(-1)
134
+				dPermissions.Minor = &minor
135
+			} else {
136
+				minor, err := strconv.ParseInt(matches[3], 10, 64)
137
+				if err != nil {
138
+					return fmt.Errorf("invalid minor value in device cgroup rule format: '%s'", deviceCgroupRule)
139
+				}
140
+				dPermissions.Minor = &minor
141
+			}
142
+			devPermissions = append(devPermissions, dPermissions)
143
+		}
109 144
 	}
110 145
 
111 146
 	s.Linux.Devices = append(s.Linux.Devices, devs...)
... ...
@@ -44,6 +44,7 @@ Options:
44 44
       --cpuset-cpus string          CPUs in which to allow execution (0-3, 0,1)
45 45
       --cpuset-mems string          MEMs in which to allow execution (0-3, 0,1)
46 46
       --device value                Add a host device to the container (default [])
47
+      --device-cgroup-rule value    Add a rule to the cgroup allowed devices list
47 48
       --device-read-bps value       Limit read rate (bytes per second) from a device (default [])
48 49
       --device-read-iops value      Limit read rate (IO per second) from a device (default [])
49 50
       --device-write-bps value      Limit write rate (bytes per second) to a device (default [])
... ...
@@ -48,6 +48,7 @@ Options:
48 48
   -d, --detach                      Run container in background and print container ID
49 49
       --detach-keys string          Override the key sequence for detaching a container
50 50
       --device value                Add a host device to the container (default [])
51
+      --device-cgroup-rule value    Add a rule to the cgroup allowed devices list
51 52
       --device-read-bps value       Limit read rate (bytes per second) from a device (default [])
52 53
       --device-read-iops value      Limit read rate (IO per second) from a device (default [])
53 54
       --device-write-bps value      Limit write rate (bytes per second) to a device (default [])
... ...
@@ -4415,3 +4415,17 @@ func (s *DockerSuite) TestRunHostnameInHostMode(c *check.C) {
4415 4415
 	out, _ := dockerCmd(c, "run", "--net=host", "--hostname=foobar", "busybox", "sh", "-c", `echo $HOSTNAME && hostname`)
4416 4416
 	c.Assert(strings.TrimSpace(out), checker.Equals, expectedOutput)
4417 4417
 }
4418
+
4419
+func (s *DockerSuite) TestRunAddDeviceCgroupRule(c *check.C) {
4420
+	testRequires(c, DaemonIsLinux)
4421
+
4422
+	deviceRule := "c 7:128 rwm"
4423
+
4424
+	out, _ := dockerCmd(c, "run", "--rm", "busybox", "cat", "/sys/fs/cgroup/devices/devices.list")
4425
+	if strings.Contains(out, deviceRule) {
4426
+		c.Fatalf("%s shouldn't been in the device.list", deviceRule)
4427
+	}
4428
+
4429
+	out, _ = dockerCmd(c, "run", "--rm", fmt.Sprintf("--device-cgroup-rule=%s", deviceRule), "busybox", "grep", deviceRule, "/sys/fs/cgroup/devices/devices.list")
4430
+	c.Assert(strings.TrimSpace(out), checker.Equals, deviceRule)
4431
+}
... ...
@@ -27,6 +27,7 @@ docker-run - Run a command in a new container
27 27
 [**-d**|**--detach**]
28 28
 [**--detach-keys**[=*[]*]]
29 29
 [**--device**[=*[]*]]
30
+[**--device-cgroup-rule**[=*[]*]]
30 31
 [**--device-read-bps**[=*[]*]]
31 32
 [**--device-read-iops**[=*[]*]]
32 33
 [**--device-write-bps**[=*[]*]]
... ...
@@ -246,6 +247,16 @@ See **config-json(5)** for documentation on using a configuration file.
246 246
 **--device**=[]
247 247
    Add a host device to the container (e.g. --device=/dev/sdc:/dev/xvdc:rwm)
248 248
 
249
+**--device-cgroup-rule**=[]
250
+   Add a rule to the cgroup allowed devices list.
251
+   
252
+   The rule is expected to be in the format specified in the Linux kernel documentation (Documentation/cgroup-v1/devices.txt):
253
+     - type: `a` (all), `c` (char) or `b` (block)
254
+     - major and minor: either a number or `*` for all
255
+     - permission: a composition of `r` (read), `w` (write) and `m` (mknod)
256
+
257
+   Example: `c 1:3 mr`: allow for character device with major `1` and minor `3` to be created (`m`) and read (`r`)
258
+
249 259
 **--device-read-bps**=[]
250 260
    Limit read rate from a device (e.g. --device-read-bps=/dev/sda:1mb)
251 261
 
... ...
@@ -62,6 +62,18 @@ func loadLongDescription(cmd *cobra.Command, path string) error {
62 62
 			return err
63 63
 		}
64 64
 		cmd.Long = string(content)
65
+
66
+		fullpath = filepath.Join(path, cmd.Name()+"-example.md")
67
+		if _, err := os.Stat(fullpath); err != nil {
68
+			continue
69
+		}
70
+
71
+		content, err = ioutil.ReadFile(fullpath)
72
+		if err != nil {
73
+			return err
74
+		}
75
+		cmd.Example = string(content)
76
+
65 77
 	}
66 78
 	return nil
67 79
 }
68 80
new file mode 100644
... ...
@@ -0,0 +1,35 @@
0
+### Specify isolation technology for container (--isolation)
1
+
2
+This option is useful in situations where you are running Docker containers on
3
+Windows. The `--isolation=<value>` option sets a container's isolation
4
+technology. On Linux, the only supported is the `default` option which uses
5
+Linux namespaces. On Microsoft Windows, you can specify these values:
6
+
7
+* `default`: Use the value specified by the Docker daemon's `--exec-opt` . If the `daemon` does not specify an isolation technology, Microsoft Windows uses `process` as its default value.
8
+* `process`: Namespace isolation only.
9
+* `hyperv`: Hyper-V hypervisor partition-based isolation.
10
+
11
+Specifying the `--isolation` flag without a value is the same as setting `--isolation="default"`.
12
+
13
+### Dealing with dynamically created devices (--device-cgroup-rule)
14
+
15
+Devices available to a container are assigned at creation time. The
16
+assigned devices will both be added to the cgroup.allow file and
17
+created into the container once it is run. This poses a problem when
18
+a new device needs to be added to running container.
19
+
20
+One of the solution is to add a more permissive rule to a container
21
+allowing it access to a wider range of devices. For example, supposing
22
+our container needs access to a character device with major `42` and
23
+any number of minor number (added as new devices appear), the
24
+following rule would be added:
25
+
26
+```
27
+docker create --device-cgroup-rule='c 42:* rmw' -name my-container my-image
28
+```
29
+
30
+Then, a user could ask `udev` to execute a script that would `docker exec my-container mknod newDevX c 42 <minor>`
31
+the required device when it is added.
32
+
33
+NOTE: initially present devices still need to be explicitely added to
34
+the create/run command
... ...
@@ -6,7 +6,7 @@ any point.
6 6
 
7 7
 The initial status of the container created with **docker create** is 'created'.
8 8
 
9
-# OPTIONS 
9
+### OPTIONS 
10 10
 
11 11
 The `CONTAINER-DIR` must be an absolute path such as `/src/docs`. The `HOST-DIR`
12 12
 can be an absolute path or a `name` value. A `name` value must start with an
... ...
@@ -82,18 +82,3 @@ change propagation properties of source mount. Say `/` is source mount for
82 82
 
83 83
 To disable automatic copying of data from the container path to the volume, use
84 84
 the `nocopy` flag. The `nocopy` flag can be set on bind mounts and named volumes.
85
-
86
-# EXAMPLES
87
-
88
-## Specify isolation technology for container (--isolation)
89
-
90
-This option is useful in situations where you are running Docker containers on
91
-Windows. The `--isolation=<value>` option sets a container's isolation
92
-technology. On Linux, the only supported is the `default` option which uses
93
-Linux namespaces. On Microsoft Windows, you can specify these values:
94
-
95
-* `default`: Use the value specified by the Docker daemon's `--exec-opt` . If the `daemon` does not specify an isolation technology, Microsoft Windows uses `process` as its default value.
96
-* `process`: Namespace isolation only.
97
-* `hyperv`: Hyper-V hypervisor partition-based isolation.
98
-
99
-Specifying the `--isolation` flag without a value is the same as setting `--isolation="default"`.
... ...
@@ -125,7 +125,7 @@ github.com/matttproud/golang_protobuf_extensions v1.0.0
125 125
 github.com/pkg/errors 839d9e913e063e28dfd0e6c7b7512793e0a48be9
126 126
 
127 127
 # cli
128
-github.com/spf13/cobra v1.5 https://github.com/dnephin/cobra.git
128
+github.com/spf13/cobra v1.5.1 https://github.com/dnephin/cobra.git
129 129
 github.com/spf13/pflag dabebe21bf790f782ea4c7bbd2efc430de182afd
130 130
 github.com/inconshreveable/mousetrap 76626ae9c91c4f2a10f34cad8ce83ea42c93bb75
131 131
 github.com/flynn-archive/go-shlex 3f9db97f856818214da2e1057f8ad84803971cff
... ...
@@ -66,7 +66,7 @@ func GenManTreeFromOpts(cmd *cobra.Command, opts GenManTreeOptions) error {
66 66
 		separator = opts.CommandSeparator
67 67
 	}
68 68
 	basename := strings.Replace(cmd.CommandPath(), " ", separator, -1)
69
-	filename := filepath.Join(opts.Path, basename + "." + section)
69
+	filename := filepath.Join(opts.Path, basename+"."+section)
70 70
 	f, err := os.Create(filename)
71 71
 	if err != nil {
72 72
 		return err
... ...
@@ -197,7 +197,7 @@ func genMan(cmd *cobra.Command, header *GenManHeader) []byte {
197 197
 	manPrintOptions(buf, cmd)
198 198
 	if len(cmd.Example) > 0 {
199 199
 		fmt.Fprintf(buf, "# EXAMPLE\n")
200
-		fmt.Fprintf(buf, "```\n%s\n```\n", cmd.Example)
200
+		fmt.Fprintf(buf, "\n%s\n\n", cmd.Example)
201 201
 	}
202 202
 	if hasSeeAlso(cmd) {
203 203
 		fmt.Fprintf(buf, "# SEE ALSO\n")