Browse code

set default seccomp profile

Signed-off-by: Jessica Frazelle <acidburn@docker.com>

Jessica Frazelle authored on 2015/12/19 03:01:58
Showing 5 changed files
... ...
@@ -69,6 +69,10 @@ func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks)
69 69
 		if err := d.setCapabilities(container, c); err != nil {
70 70
 			return nil, err
71 71
 		}
72
+
73
+		if c.SeccompProfile == "" {
74
+			container.Seccomp = getDefaultSeccompProfile()
75
+		}
72 76
 	}
73 77
 	// add CAP_ prefix to all caps for new libcontainer update to match
74 78
 	// the spec format.
... ...
@@ -89,6 +93,7 @@ func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks)
89 89
 			return nil, err
90 90
 		}
91 91
 	}
92
+
92 93
 	if err := execdriver.SetupCgroups(container, c); err != nil {
93 94
 		return nil, err
94 95
 	}
... ...
@@ -12,6 +12,10 @@ import (
12 12
 	"github.com/opencontainers/specs"
13 13
 )
14 14
 
15
+func getDefaultSeccompProfile() *configs.Seccomp {
16
+	return defaultSeccompProfile
17
+}
18
+
15 19
 func loadSeccompProfile(path string) (*configs.Seccomp, error) {
16 20
 	f, err := ioutil.ReadFile(path)
17 21
 	if err != nil {
18 22
new file mode 100644
... ...
@@ -0,0 +1,319 @@
0
+// +build linux
1
+
2
+package native
3
+
4
+import "github.com/opencontainers/runc/libcontainer/configs"
5
+
6
+var defaultSeccompProfile = &configs.Seccomp{
7
+	DefaultAction: configs.Allow,
8
+	Syscalls: []*configs.Syscall{
9
+		{
10
+			// Quota and Accounting syscalls which could let containers
11
+			// disable their own resource limits or process accounting
12
+			Name:   "acct",
13
+			Action: configs.Errno,
14
+			Args:   []*configs.Arg{},
15
+		},
16
+		{
17
+			// Prevent containers from using the kernel keyring,
18
+			// which is not namespaced
19
+			Name:   "add_key",
20
+			Action: configs.Errno,
21
+			Args:   []*configs.Arg{},
22
+		},
23
+		{
24
+			// Similar to clock_settime and settimeofday
25
+			// Time/Date is not namespaced
26
+			Name:   "adjtimex",
27
+			Action: configs.Errno,
28
+			Args:   []*configs.Arg{},
29
+		},
30
+		{
31
+			// Time/Date is not namespaced
32
+			Name:   "clock_settime",
33
+			Action: configs.Errno,
34
+			Args:   []*configs.Arg{},
35
+		},
36
+		{
37
+			// Deny cloning new namespaces
38
+			Name:   "clone",
39
+			Action: configs.Errno,
40
+			Args: []*configs.Arg{
41
+				{
42
+					// flags from sched.h
43
+					// CLONE_NEWUTS		0x04000000
44
+					// CLONE_NEWIPC		0x08000000
45
+					// CLONE_NEWUSER	0x10000000
46
+					// CLONE_NEWPID		0x20000000
47
+					// CLONE_NEWNET		0x40000000
48
+					Index: 0,
49
+					Value: uint64(0x04000000),
50
+					Op:    configs.GreaterThanOrEqualTo,
51
+				},
52
+				{
53
+					// flags from sched.h
54
+					// CLONE_NEWNS		0x00020000
55
+					Index: 0,
56
+					Value: uint64(0x00020000),
57
+					Op:    configs.EqualTo,
58
+				},
59
+			},
60
+		},
61
+		{
62
+			// Deny manipulation and functions on kernel modules.
63
+			Name:   "create_module",
64
+			Action: configs.Errno,
65
+			Args:   []*configs.Arg{},
66
+		},
67
+		{
68
+			// Deny manipulation and functions on kernel modules.
69
+			Name:   "delete_module",
70
+			Action: configs.Errno,
71
+			Args:   []*configs.Arg{},
72
+		},
73
+		{
74
+			// Deny retrieval of exported kernel and module symbols
75
+			Name:   "get_kernel_syms",
76
+			Action: configs.Errno,
77
+			Args:   []*configs.Arg{},
78
+		},
79
+		{
80
+			// Terrifying syscalls that modify kernel memory and NUMA settings.
81
+			// They're gated by CAP_SYS_NICE,
82
+			// which we do not retain by default in containers.
83
+			Name:   "get_mempolicy",
84
+			Action: configs.Errno,
85
+			Args:   []*configs.Arg{},
86
+		},
87
+		{
88
+			// Deny getting the list of robust futexes
89
+			Name:   "get_robust_list",
90
+			Action: configs.Errno,
91
+			Args:   []*configs.Arg{},
92
+		},
93
+		{
94
+			// Deny manipulation and functions on kernel modules.
95
+			Name:   "init_module",
96
+			Action: configs.Errno,
97
+			Args:   []*configs.Arg{},
98
+		},
99
+		{
100
+			// Prevent containers from modifying kernel I/O privilege levels.
101
+			// Already restricted as containers drop CAP_SYS_RAWIO by default.
102
+			Name:   "ioperm",
103
+			Action: configs.Errno,
104
+			Args:   []*configs.Arg{},
105
+		},
106
+		{
107
+			// Prevent containers from modifying kernel I/O privilege levels.
108
+			// Already restricted as containers drop CAP_SYS_RAWIO by default.
109
+			Name:   "iopl",
110
+			Action: configs.Errno,
111
+			Args:   []*configs.Arg{},
112
+		},
113
+		{
114
+			// Sister syscall of kexec_load that does the same thing,
115
+			// slightly different arguments
116
+			Name:   "kexec_file_load",
117
+			Action: configs.Errno,
118
+			Args:   []*configs.Arg{},
119
+		},
120
+		{
121
+			// Deny loading a new kernel for later execution
122
+			Name:   "kexec_load",
123
+			Action: configs.Errno,
124
+			Args:   []*configs.Arg{},
125
+		},
126
+		{
127
+			// Prevent containers from using the kernel keyring,
128
+			// which is not namespaced
129
+			Name:   "keyctl",
130
+			Action: configs.Errno,
131
+			Args:   []*configs.Arg{},
132
+		},
133
+		{
134
+			// Tracing/profiling syscalls,
135
+			// which could leak a lot of information on the host
136
+			Name:   "lookup_dcookie",
137
+			Action: configs.Errno,
138
+			Args:   []*configs.Arg{},
139
+		},
140
+		{
141
+			// Terrifying syscalls that modify kernel memory and NUMA settings.
142
+			// They're gated by CAP_SYS_NICE,
143
+			// which we do not retain by default in containers.
144
+			Name:   "mbind",
145
+			Action: configs.Errno,
146
+			Args:   []*configs.Arg{},
147
+		},
148
+		{
149
+			// Terrifying syscalls that modify kernel memory and NUMA settings.
150
+			// They're gated by CAP_SYS_NICE,
151
+			// which we do not retain by default in containers.
152
+			Name:   "migrate_pages",
153
+			Action: configs.Errno,
154
+			Args:   []*configs.Arg{},
155
+		},
156
+		{
157
+			// Old syscall only used in 16-bit code,
158
+			// and a potential information leak
159
+			Name:   "modify_ldt",
160
+			Action: configs.Errno,
161
+			Args:   []*configs.Arg{},
162
+		},
163
+		{
164
+			// Deny mount
165
+			Name:   "mount",
166
+			Action: configs.Errno,
167
+			Args:   []*configs.Arg{},
168
+		},
169
+		{
170
+			// Terrifying syscalls that modify kernel memory and NUMA settings.
171
+			// They're gated by CAP_SYS_NICE,
172
+			// which we do not retain by default in containers.
173
+			Name:   "move_pages",
174
+			Action: configs.Errno,
175
+			Args:   []*configs.Arg{},
176
+		},
177
+		{
178
+			// Deny interaction with the kernel nfs daemon
179
+			Name:   "nfsservctl",
180
+			Action: configs.Errno,
181
+			Args:   []*configs.Arg{},
182
+		},
183
+		{
184
+			// Cause of an old container breakout,
185
+			// might as well restrict it to be on the safe side
186
+			Name:   "open_by_handle_at",
187
+			Action: configs.Errno,
188
+			Args:   []*configs.Arg{},
189
+		},
190
+		{
191
+			// Tracing/profiling syscalls,
192
+			// which could leak a lot of information on the host
193
+			Name:   "perf_event_open",
194
+			Action: configs.Errno,
195
+			Args:   []*configs.Arg{},
196
+		},
197
+		{
198
+			// Prevent container from enabling BSD emulation.
199
+			// Not inherently dangerous, but poorly tested,
200
+			// potential for a lot of kernel vulns in this.
201
+			Name:   "personality",
202
+			Action: configs.Errno,
203
+			Args:   []*configs.Arg{},
204
+		},
205
+		{
206
+			// Deny pivot_root
207
+			Name:   "pivot_root",
208
+			Action: configs.Errno,
209
+			Args:   []*configs.Arg{},
210
+		},
211
+		{
212
+			// Already blocked by dropping CAP_PTRACE
213
+			Name:   "ptrace",
214
+			Action: configs.Errno,
215
+			Args:   []*configs.Arg{},
216
+		},
217
+		{
218
+			// Deny manipulation and functions on kernel modules.
219
+			Name:   "query_module",
220
+			Action: configs.Errno,
221
+			Args:   []*configs.Arg{},
222
+		},
223
+		{
224
+			// Quota and Accounting syscalls which could let containers
225
+			// disable their own resource limits or process accounting
226
+			Name:   "quotactl",
227
+			Action: configs.Errno,
228
+			Args:   []*configs.Arg{},
229
+		},
230
+		{
231
+			// Probably a bad idea to let containers reboot the host
232
+			Name:   "reboot",
233
+			Action: configs.Errno,
234
+			Args:   []*configs.Arg{},
235
+		},
236
+		{
237
+			// Probably a bad idea to let containers restart
238
+			Name:   "restart_syscall",
239
+			Action: configs.Errno,
240
+			Args:   []*configs.Arg{},
241
+		},
242
+		{
243
+			// Prevent containers from using the kernel keyring,
244
+			// which is not namespaced
245
+			Name:   "request_key",
246
+			Action: configs.Errno,
247
+			Args:   []*configs.Arg{},
248
+		},
249
+		{
250
+			// meta, deny seccomp
251
+			Name:   "seccomp",
252
+			Action: configs.Errno,
253
+			Args:   []*configs.Arg{},
254
+		},
255
+		{
256
+			// Terrifying syscalls that modify kernel memory and NUMA settings.
257
+			// They're gated by CAP_SYS_NICE,
258
+			// which we do not retain by default in containers.
259
+			Name:   "set_mempolicy",
260
+			Action: configs.Errno,
261
+			Args:   []*configs.Arg{},
262
+		},
263
+		{
264
+			// deny associating a thread with a namespace
265
+			Name:   "setns",
266
+			Action: configs.Errno,
267
+			Args:   []*configs.Arg{},
268
+		},
269
+		{
270
+			// Deny setting the list of robust futexes
271
+			Name:   "set_robust_list",
272
+			Action: configs.Errno,
273
+			Args:   []*configs.Arg{},
274
+		},
275
+		{
276
+			// Time/Date is not namespaced
277
+			Name:   "settimeofday",
278
+			Action: configs.Errno,
279
+			Args:   []*configs.Arg{},
280
+		},
281
+		{
282
+			// Deny start/stop swapping to file/device
283
+			Name:   "swapon",
284
+			Action: configs.Errno,
285
+			Args:   []*configs.Arg{},
286
+		},
287
+		{
288
+			// Deny start/stop swapping to file/device
289
+			Name:   "swapoff",
290
+			Action: configs.Errno,
291
+			Args:   []*configs.Arg{},
292
+		},
293
+		{
294
+			// Deny read/write system parameters
295
+			Name:   "_sysctl",
296
+			Action: configs.Errno,
297
+			Args:   []*configs.Arg{},
298
+		},
299
+		{
300
+			// Deny umount
301
+			Name:   "umount2",
302
+			Action: configs.Errno,
303
+			Args:   []*configs.Arg{},
304
+		},
305
+		{
306
+			// Same as clone
307
+			Name:   "unshare",
308
+			Action: configs.Errno,
309
+			Args:   []*configs.Arg{},
310
+		},
311
+		{
312
+			// Older syscall related to shared libraries, unused for a long time
313
+			Name:   "uselib",
314
+			Action: configs.Errno,
315
+			Args:   []*configs.Arg{},
316
+		},
317
+	},
318
+}
... ...
@@ -2858,18 +2858,25 @@ func (s *DockerSuite) TestRunUnshareProc(c *check.C) {
2858 2858
 	testRequires(c, Apparmor, DaemonIsLinux, NotUserNamespace)
2859 2859
 
2860 2860
 	name := "acidburn"
2861
-	if out, _, err := dockerCmdWithError("run", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "--mount-proc=/proc", "mount"); err == nil || !strings.Contains(out, "Permission denied") {
2861
+	out, _, err := dockerCmdWithError("run", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "--mount-proc=/proc", "mount")
2862
+	if err == nil ||
2863
+		!(strings.Contains(strings.ToLower(out), "permission denied") ||
2864
+			strings.Contains(strings.ToLower(out), "operation not permitted")) {
2862 2865
 		c.Fatalf("unshare with --mount-proc should have failed with permission denied, got: %s, %v", out, err)
2863 2866
 	}
2864 2867
 
2865 2868
 	name = "cereal"
2866
-	if out, _, err := dockerCmdWithError("run", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc"); err == nil || !strings.Contains(out, "Permission denied") {
2869
+	out, _, err = dockerCmdWithError("run", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc")
2870
+	if err == nil ||
2871
+		!(strings.Contains(strings.ToLower(out), "permission denied") ||
2872
+			strings.Contains(strings.ToLower(out), "operation not permitted")) {
2867 2873
 		c.Fatalf("unshare and mount of /proc should have failed with permission denied, got: %s, %v", out, err)
2868 2874
 	}
2869 2875
 
2870 2876
 	/* Ensure still fails if running privileged with the default policy */
2871 2877
 	name = "crashoverride"
2872
-	if out, _, err := dockerCmdWithError("run", "--privileged", "--security-opt", "apparmor:docker-default", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc"); err == nil || !(strings.Contains(strings.ToLower(out), "permission denied") || strings.Contains(strings.ToLower(out), "operation not permitted")) {
2878
+	out, _, err = dockerCmdWithError("run", "--privileged", "--security-opt", "apparmor:docker-default", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc")
2879
+	if err == nil || !(strings.Contains(strings.ToLower(out), "permission denied") || strings.Contains(strings.ToLower(out), "operation not permitted")) {
2873 2880
 		c.Fatalf("privileged unshare with apparmor should have failed with permission denied, got: %s, %v", out, err)
2874 2881
 	}
2875 2882
 }
... ...
@@ -548,3 +548,39 @@ func (s *DockerSuite) TestRunSeccompProfileDenyChmod(c *check.C) {
548 548
 		c.Fatalf("expected chmod with seccomp profile denied to fail, got %s", out)
549 549
 	}
550 550
 }
551
+
552
+// TestRunSeccompProfileDenyUserns checks that 'docker run jess/unshare unshare --map-root-user --user sh -c whoami' exits with operation not permitted.
553
+func (s *DockerSuite) TestRunSeccompProfileDenyUserns(c *check.C) {
554
+	testRequires(c, SameHostDaemon, seccompEnabled)
555
+	// from sched.h
556
+	jsonData := fmt.Sprintf(`{
557
+	"defaultAction": "SCMP_ACT_ALLOW",
558
+	"syscalls": [
559
+		{
560
+			"name": "unshare",
561
+			"action": "SCMP_ACT_ERRNO",
562
+			"args": [
563
+				{
564
+					"index": 0,
565
+					"value": %d,
566
+					"op": "SCMP_CMP_EQ"
567
+				}
568
+			]
569
+		}
570
+	]
571
+}`, uint64(0x10000000))
572
+	tmpFile, err := ioutil.TempFile("", "profile.json")
573
+	defer tmpFile.Close()
574
+	if err != nil {
575
+		c.Fatal(err)
576
+	}
577
+
578
+	if _, err := tmpFile.Write([]byte(jsonData)); err != nil {
579
+		c.Fatal(err)
580
+	}
581
+	runCmd := exec.Command(dockerBinary, "run", "--security-opt", "seccomp:"+tmpFile.Name(), "jess/unshare", "unshare", "--map-root-user", "--user", "sh", "-c", "whoami")
582
+	out, _, _ := runCommandWithOutput(runCmd)
583
+	if !strings.Contains(out, "Operation not permitted") {
584
+		c.Fatalf("expected unshare userns with seccomp profile denied to fail, got %s", out)
585
+	}
586
+}