Browse code

add -insecure flag and relevant tests

Jérôme Petazzoni authored on 2013/08/10 07:53:02
Showing 4 changed files
... ...
@@ -78,6 +78,7 @@ type Config struct {
78 78
 	VolumesFrom     string
79 79
 	Entrypoint      []string
80 80
 	NetworkDisabled bool
81
+	Privileged      bool
81 82
 }
82 83
 
83 84
 type HostConfig struct {
... ...
@@ -108,6 +109,7 @@ func ParseRun(args []string, capabilities *Capabilities) (*Config, *HostConfig,
108 108
 	flMemory := cmd.Int64("m", 0, "Memory limit (in bytes)")
109 109
 	flContainerIDFile := cmd.String("cidfile", "", "Write the container ID to the file")
110 110
 	flNetwork := cmd.Bool("n", true, "Enable networking for this container")
111
+	flPrivileged := cmd.Bool("privileged", false, "Give extended privileges to this container")
111 112
 
112 113
 	if capabilities != nil && *flMemory > 0 && !capabilities.MemoryLimit {
113 114
 		//fmt.Fprintf(stdout, "WARNING: Your kernel does not support memory limit capabilities. Limitation discarded.\n")
... ...
@@ -194,6 +196,7 @@ func ParseRun(args []string, capabilities *Capabilities) (*Config, *HostConfig,
194 194
 		Volumes:         flVolumes,
195 195
 		VolumesFrom:     *flVolumesFrom,
196 196
 		Entrypoint:      entrypoint,
197
+		Privileged:      *flPrivileged,
197 198
 	}
198 199
 	hostConfig := &HostConfig{
199 200
 		Binds:           binds,
... ...
@@ -1313,3 +1313,35 @@ func TestOnlyLoopbackExistsWhenUsingDisableNetworkOption(t *testing.T) {
1313 1313
 	}
1314 1314
 
1315 1315
 }
1316
+
1317
+func TestPrivilegedCanMknod(t *testing.T) {
1318
+	runtime := mkRuntime(t)
1319
+	defer nuke(runtime)
1320
+	if output, _ := runContainer(runtime, []string{"-privileged", "_", "sh", "-c", "mknod /tmp/sda b 8 0 && echo ok"}, t); output != "ok\n" {
1321
+		t.Fatal("Could not mknod into privileged container")
1322
+	}
1323
+}
1324
+
1325
+func TestPrivilegedCanMount(t *testing.T) {
1326
+	runtime := mkRuntime(t)
1327
+	defer nuke(runtime)
1328
+	if output, _ := runContainer(runtime, []string{"-privileged", "_", "sh", "-c", "mount -t tmpfs none /tmp && echo ok"}, t); output != "ok\n" {
1329
+		t.Fatal("Could not mount into privileged container")
1330
+	}
1331
+}
1332
+
1333
+func TestPrivilegedCannotMknod(t *testing.T) {
1334
+	runtime := mkRuntime(t)
1335
+	defer nuke(runtime)
1336
+	if output, _ := runContainer(runtime, []string{"_", "sh", "-c", "mknod /tmp/sda b 8 0 || echo ok"}, t); output != "ok\n" {
1337
+		t.Fatal("Could mknod into secure container")
1338
+	}
1339
+}
1340
+
1341
+func TestPrivilegedCannotMount(t *testing.T) {
1342
+	runtime := mkRuntime(t)
1343
+	defer nuke(runtime)
1344
+	if output, _ := runContainer(runtime, []string{"_", "sh", "-c", "mount -t tmpfs none /tmp || echo ok"}, t); output != "ok\n" {
1345
+		t.Fatal("Could mount into secure container")
1346
+	}
1347
+}
... ...
@@ -19,6 +19,7 @@
19 19
       -e=[]: Set environment variables
20 20
       -h="": Container host name
21 21
       -i=false: Keep stdin open even if not attached
22
+      -privileged=false: Give extended privileges to this container
22 23
       -m=0: Memory limit (in bytes)
23 24
       -n=true: Enable networking for this container
24 25
       -p=[]: Map a network port to the container
... ...
@@ -38,3 +39,15 @@ Examples
38 38
     docker run -cidfile /tmp/docker_test.cid ubuntu echo "test"
39 39
 
40 40
 | This will create a container and print "test" to the console. The cidfile flag makes docker attempt to create a new file and write the container ID to it. If the file exists already, docker will return an error. Docker will close this file when docker run exits.
41
+
42
+.. code-block:: bash
43
+
44
+   docker run mount -t tmpfs none /var/spool/squid
45
+
46
+| This will *not* work, because by default, most potentially dangerous kernel capabilities are dropped; including  ``cap_sys_admin`` (which is required to mount filesystems). However, the ``-privileged`` flag will allow it to run:
47
+
48
+.. code-block:: bash
49
+
50
+   docker run -privileged mount -t tmpfs none /var/spool/squid
51
+
52
+| The ``-privileged`` flag gives *all* capabilities to the container, and it also lifts all the limitations enforced by the ``device`` cgroup controller. In other words, the container can then do almost everything that the host can do. This flag exists to allow special use-cases, like running Docker within Docker.
... ...
@@ -40,6 +40,9 @@ lxc.console = none
40 40
 # no controlling tty at all
41 41
 lxc.tty = 1
42 42
 
43
+{{if .Config.Privileged}}
44
+lxc.cgroup.devices.allow = a 
45
+{{else}}
43 46
 # no implicit access to devices
44 47
 lxc.cgroup.devices.deny = a
45 48
 
... ...
@@ -69,7 +72,7 @@ lxc.cgroup.devices.allow = c 10:200 rwm
69 69
 
70 70
 # rtc
71 71
 #lxc.cgroup.devices.allow = c 254:0 rwm
72
-
72
+{{end}}
73 73
 
74 74
 # standard mount point
75 75
 #  WARNING: procfs is a known attack vector and should probably be disabled
... ...
@@ -95,11 +98,15 @@ lxc.mount.entry = {{$realPath}} {{$ROOTFS}}/{{$virtualPath}} none bind,{{ if ind
95 95
 {{end}}
96 96
 {{end}}
97 97
 
98
+{{if .Config.Privileged}}
99
+# retain all capabilities; no lxc.cap.drop line
100
+{{else}}
98 101
 # drop linux capabilities (apply mainly to the user root in the container)
99 102
 #  (Note: 'lxc.cap.keep' is coming soon and should replace this under the
100 103
 #         security principle 'deny all unless explicitly permitted', see
101 104
 #         http://sourceforge.net/mailarchive/message.php?msg_id=31054627 )
102 105
 lxc.cap.drop = audit_control audit_write mac_admin mac_override mknod setfcap setpcap sys_admin sys_boot sys_module sys_nice sys_pacct sys_rawio sys_resource sys_time sys_tty_config
106
+{{end}}
103 107
 
104 108
 # limits
105 109
 {{if .Config.Memory}}