Browse code

Merge pull request #9074 from rhatdan/shm

Allow IPC namespace to be shared between containers or with the host

Michael Crosby authored on 2014/11/15 03:34:00
Showing 9 changed files
... ...
@@ -233,6 +233,18 @@ func populateCommand(c *Container, env []string) error {
233 233
 		return fmt.Errorf("invalid network mode: %s", c.hostConfig.NetworkMode)
234 234
 	}
235 235
 
236
+	ipc := &execdriver.Ipc{}
237
+
238
+	if c.hostConfig.IpcMode.IsContainer() {
239
+		ic, err := c.getIpcContainer()
240
+		if err != nil {
241
+			return err
242
+		}
243
+		ipc.ContainerID = ic.ID
244
+	} else {
245
+		ipc.HostIpc = c.hostConfig.IpcMode.IsHost()
246
+	}
247
+
236 248
 	// Build lists of devices allowed and created within the container.
237 249
 	userSpecifiedDevices := make([]*devices.Device, len(c.hostConfig.Devices))
238 250
 	for i, deviceMapping := range c.hostConfig.Devices {
... ...
@@ -274,6 +286,7 @@ func populateCommand(c *Container, env []string) error {
274 274
 		InitPath:           "/.dockerinit",
275 275
 		WorkingDir:         c.Config.WorkingDir,
276 276
 		Network:            en,
277
+		Ipc:                ipc,
277 278
 		Resources:          resources,
278 279
 		AllowedDevices:     allowedDevices,
279 280
 		AutoCreatedDevices: autoCreatedDevices,
... ...
@@ -1250,10 +1263,25 @@ func (container *Container) GetMountLabel() string {
1250 1250
 	return container.MountLabel
1251 1251
 }
1252 1252
 
1253
+func (container *Container) getIpcContainer() (*Container, error) {
1254
+	containerID := container.hostConfig.IpcMode.Container()
1255
+	c := container.daemon.Get(containerID)
1256
+	if c == nil {
1257
+		return nil, fmt.Errorf("no such container to join IPC: %s", containerID)
1258
+	}
1259
+	if !c.IsRunning() {
1260
+		return nil, fmt.Errorf("cannot join IPC of a non running container: %s", containerID)
1261
+	}
1262
+	return c, nil
1263
+}
1264
+
1253 1265
 func (container *Container) getNetworkedContainer() (*Container, error) {
1254 1266
 	parts := strings.SplitN(string(container.hostConfig.NetworkMode), ":", 2)
1255 1267
 	switch parts[0] {
1256 1268
 	case "container":
1269
+		if len(parts) != 2 {
1270
+			return nil, fmt.Errorf("no container specified to join network")
1271
+		}
1257 1272
 		nc := container.daemon.Get(parts[1])
1258 1273
 		if nc == nil {
1259 1274
 			return nil, fmt.Errorf("no such container to join network: %s", parts[1])
... ...
@@ -1,10 +1,13 @@
1 1
 package daemon
2 2
 
3 3
 import (
4
+	"fmt"
5
+
4 6
 	"github.com/docker/docker/engine"
5 7
 	"github.com/docker/docker/graph"
6 8
 	"github.com/docker/docker/pkg/parsers"
7 9
 	"github.com/docker/docker/runconfig"
10
+	"github.com/docker/libcontainer/label"
8 11
 )
9 12
 
10 13
 func (daemon *Daemon) ContainerCreate(job *engine.Job) engine.Status {
... ...
@@ -80,6 +83,12 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos
80 80
 	if warnings, err = daemon.mergeAndVerifyConfig(config, img); err != nil {
81 81
 		return nil, nil, err
82 82
 	}
83
+	if hostConfig != nil && config.SecurityOpt == nil {
84
+		config.SecurityOpt, err = daemon.GenerateSecurityOpt(hostConfig.IpcMode)
85
+		if err != nil {
86
+			return nil, nil, err
87
+		}
88
+	}
83 89
 	if container, err = daemon.newContainer(name, config, img); err != nil {
84 90
 		return nil, nil, err
85 91
 	}
... ...
@@ -99,3 +108,20 @@ func (daemon *Daemon) Create(config *runconfig.Config, hostConfig *runconfig.Hos
99 99
 	}
100 100
 	return container, warnings, nil
101 101
 }
102
+func (daemon *Daemon) GenerateSecurityOpt(ipcMode runconfig.IpcMode) ([]string, error) {
103
+	if ipcMode.IsHost() {
104
+		return label.DisableSecOpt(), nil
105
+	}
106
+	if ipcContainer := ipcMode.Container(); ipcContainer != "" {
107
+		c := daemon.Get(ipcContainer)
108
+		if c == nil {
109
+			return nil, fmt.Errorf("no such container to join IPC: %s", ipcContainer)
110
+		}
111
+		if !c.IsRunning() {
112
+			return nil, fmt.Errorf("cannot join IPC of a non running container: %s", ipcContainer)
113
+		}
114
+
115
+		return label.DupSecOpt(c.ProcessLabel), nil
116
+	}
117
+	return nil, nil
118
+}
... ...
@@ -71,6 +71,12 @@ type Network struct {
71 71
 	HostNetworking bool              `json:"host_networking"`
72 72
 }
73 73
 
74
+// IPC settings of the container
75
+type Ipc struct {
76
+	ContainerID string `json:"container_id"` // id of the container to join ipc.
77
+	HostIpc     bool   `json:"host_ipc"`
78
+}
79
+
74 80
 type NetworkInterface struct {
75 81
 	Gateway     string `json:"gateway"`
76 82
 	IPAddress   string `json:"ip"`
... ...
@@ -115,6 +121,7 @@ type Command struct {
115 115
 	WorkingDir         string            `json:"working_dir"`
116 116
 	ConfigPath         string            `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver
117 117
 	Network            *Network          `json:"network"`
118
+	Ipc                *Ipc              `json:"ipc"`
118 119
 	Resources          *Resources        `json:"resources"`
119 120
 	Mounts             []Mount           `json:"mounts"`
120 121
 	AllowedDevices     []*devices.Device `json:"allowed_devices"`
... ...
@@ -36,6 +36,10 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e
36 36
 	container.MountConfig.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
37 37
 	container.RestrictSys = true
38 38
 
39
+	if err := d.createIpc(container, c); err != nil {
40
+		return nil, err
41
+	}
42
+
39 43
 	if err := d.createNetwork(container, c); err != nil {
40 44
 		return nil, err
41 45
 	}
... ...
@@ -124,6 +128,28 @@ func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Com
124 124
 	return nil
125 125
 }
126 126
 
127
+func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command) error {
128
+	if c.Ipc.HostIpc {
129
+		container.Namespaces["NEWIPC"] = false
130
+		return nil
131
+	}
132
+
133
+	if c.Ipc.ContainerID != "" {
134
+		d.Lock()
135
+		active := d.activeContainers[c.Ipc.ContainerID]
136
+		d.Unlock()
137
+
138
+		if active == nil || active.cmd.Process == nil {
139
+			return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
140
+		}
141
+		cmd := active.cmd
142
+
143
+		container.IpcNsPath = filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "ipc")
144
+	}
145
+
146
+	return nil
147
+}
148
+
127 149
 func (d *driver) setPrivileged(container *libcontainer.Config) (err error) {
128 150
 	container.Capabilities = capabilities.GetAllCapabilities()
129 151
 	container.Cgroups.AllowAllDevices = true
... ...
@@ -23,6 +23,7 @@ docker-run - Run a command in a new container
23 23
 [**--expose**[=*[]*]]
24 24
 [**-h**|**--hostname**[=*HOSTNAME*]]
25 25
 [**-i**|**--interactive**[=*false*]]
26
+[**--ipc**[=*[]*]]
26 27
 [**--security-opt**[=*[]*]]
27 28
 [**--link**[=*[]*]]
28 29
 [**--lxc-conf**[=*[]*]]
... ...
@@ -142,6 +143,12 @@ ENTRYPOINT.
142 142
 **-i**, **--interactive**=*true*|*false*
143 143
    When set to true, keep stdin open even if not attached. The default is false.
144 144
 
145
+**--ipc**=[]
146
+   Set the IPC mode for the container
147
+     **container**:<*name*|*id*>: reuses another container's IPC stack
148
+     **host**: use the host's IPC stack inside the container.  
149
+     Note: the host mode gives the container full access to local IPC and is therefore considered insecure.
150
+
145 151
 **--security-opt**=*secdriver*:*name*:*value*
146 152
     "label:user:USER"   : Set the label user for the container
147 153
     "label:role:ROLE"   : Set the label role for the container
... ...
@@ -183,10 +190,11 @@ and foreground Docker containers.
183 183
 
184 184
 **--net**="bridge"
185 185
    Set the Network mode for the container
186
-                               'bridge': creates a new network stack for the container on the docker bridge
187
-                               'none': no networking for this container
188
-                               'container:<name|id>': reuses another container network stack
189
-                               'host': use the host network stack inside the container.  Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.
186
+   **bridge**: creates a new network stack for the container on the docker bridge
187
+   **none**: no networking for this container
188
+   **container**:<*name*|*id*>: reuses another container's network stack
189
+   **host**: use the host network stack inside the container.  
190
+   Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.
190 191
 
191 192
 **--mac-address**=*macaddress*
192 193
    Set the MAC address for the container's Ethernet device:
... ...
@@ -310,6 +318,71 @@ you’d like to connect instead, as in:
310 310
 
311 311
     # docker run -a stdin -a stdout -i -t fedora /bin/bash
312 312
 
313
+## Sharing IPC between containers
314
+
315
+Using shm_server.c available here: http://www.cs.cf.ac.uk/Dave/C/node27.html
316
+
317
+Testing `--ipc=host` mode:
318
+
319
+Host shows a shared memory segment with 7 pids attached, happens to be from httpd:
320
+
321
+```
322
+ $ sudo ipcs -m
323
+
324
+ ------ Shared Memory Segments --------
325
+ key        shmid      owner      perms      bytes      nattch     status      
326
+ 0x01128e25 0          root       600        1000       7                       
327
+```
328
+
329
+Now run a regular container, and it correctly does NOT see the shared memory segment from the host:
330
+
331
+```
332
+ $ sudo docker run -it shm ipcs -m
333
+
334
+ ------ Shared Memory Segments --------	
335
+ key        shmid      owner      perms      bytes      nattch     status      
336
+```
337
+
338
+Run a container with the new `--ipc=host` option, and it now sees the shared memory segment from the host httpd:
339
+
340
+ ```
341
+ $ sudo docker run -it --ipc=host shm ipcs -m
342
+
343
+ ------ Shared Memory Segments --------
344
+ key        shmid      owner      perms      bytes      nattch     status      
345
+ 0x01128e25 0          root       600        1000       7                   
346
+```
347
+Testing `--ipc=container:CONTAINERID` mode:
348
+
349
+Start a container with a program to create a shared memory segment:
350
+```
351
+ sudo docker run -it shm bash
352
+ $ sudo shm/shm_server &
353
+ $ sudo ipcs -m
354
+
355
+ ------ Shared Memory Segments --------
356
+ key        shmid      owner      perms      bytes      nattch     status      
357
+ 0x0000162e 0          root       666        27         1                       
358
+```
359
+Create a 2nd container correctly shows no shared memory segment from 1st container:
360
+```
361
+ $ sudo docker run shm ipcs -m
362
+
363
+ ------ Shared Memory Segments --------
364
+ key        shmid      owner      perms      bytes      nattch     status      
365
+```
366
+
367
+Create a 3rd container using the new --ipc=container:CONTAINERID option, now it shows the shared memory segment from the first:
368
+
369
+```
370
+ $ sudo docker run -it --ipc=container:ed735b2264ac shm ipcs -m
371
+ $ sudo ipcs -m
372
+
373
+ ------ Shared Memory Segments --------
374
+ key        shmid      owner      perms      bytes      nattch     status      
375
+ 0x0000162e 0          root       666        27         1
376
+```
377
+
313 378
 ## Linking Containers
314 379
 
315 380
 The link feature allows multiple containers to communicate with each other. For
... ...
@@ -50,6 +50,7 @@ following options.
50 50
  - [Container Identification](#container-identification)
51 51
      - [Name (--name)](#name-name)
52 52
      - [PID Equivalent](#pid-equivalent)
53
+ - [IPC Settings](#ipc-settings)
53 54
  - [Network Settings](#network-settings)
54 55
  - [Clean Up (--rm)](#clean-up-rm)
55 56
  - [Runtime Constraints on CPU and Memory](#runtime-constraints-on-cpu-and-memory)
... ...
@@ -131,6 +132,22 @@ While not strictly a means of identifying a container, you can specify a version
131 131
 image you'd like to run the container with by adding `image[:tag]` to the command. For
132 132
 example, `docker run ubuntu:14.04`.
133 133
 
134
+## IPC Settings
135
+    --ipc=""  : Set the IPC mode for the container,
136
+                                 'container:<name|id>': reuses another container's IPC namespace
137
+                                 'host': use the host's IPC namespace inside the container
138
+By default, all containers have the IPC namespace enabled 
139
+
140
+IPC (POSIX/SysV IPC) namespace provides separation of named shared memory segments, semaphores and message queues.  
141
+
142
+Shared memory segments are used to accelerate inter-process communication at
143
+memory speed, rather than through pipes or through the network stack. Shared
144
+memory is commonly used by databases and custom-built (typically C/OpenMPI, 
145
+C++/using boost libraries) high performance applications for scientific
146
+computing and financial services industries. If these types of applications
147
+are broken into multiple containers, you might need to share the IPC mechanisms
148
+of the containers.
149
+
134 150
 ## Network settings
135 151
 
136 152
     --dns=[]         : Set custom dns servers for the container
... ...
@@ -2568,3 +2568,73 @@ func TestRunUnknownCommand(t *testing.T) {
2568 2568
 
2569 2569
 	logDone("run - Unknown Command")
2570 2570
 }
2571
+
2572
+func TestRunModeIpcHost(t *testing.T) {
2573
+	hostIpc, err := os.Readlink("/proc/1/ns/ipc")
2574
+	if err != nil {
2575
+		t.Fatal(err)
2576
+	}
2577
+
2578
+	cmd := exec.Command(dockerBinary, "run", "--ipc=host", "busybox", "readlink", "/proc/self/ns/ipc")
2579
+	out2, _, err := runCommandWithOutput(cmd)
2580
+	if err != nil {
2581
+		t.Fatal(err, out2)
2582
+	}
2583
+
2584
+	out2 = strings.Trim(out2, "\n")
2585
+	if hostIpc != out2 {
2586
+		t.Fatalf("IPC different with --ipc=host %s != %s\n", hostIpc, out2)
2587
+	}
2588
+
2589
+	cmd = exec.Command(dockerBinary, "run", "busybox", "readlink", "/proc/self/ns/ipc")
2590
+	out2, _, err = runCommandWithOutput(cmd)
2591
+	if err != nil {
2592
+		t.Fatal(err, out2)
2593
+	}
2594
+
2595
+	out2 = strings.Trim(out2, "\n")
2596
+	if hostIpc == out2 {
2597
+		t.Fatalf("IPC should be different without --ipc=host %s != %s\n", hostIpc, out2)
2598
+	}
2599
+	deleteAllContainers()
2600
+
2601
+	logDone("run - hostname and several network modes")
2602
+}
2603
+
2604
+func TestRunModeIpcContainer(t *testing.T) {
2605
+	cmd := exec.Command(dockerBinary, "run", "-d", "busybox", "top")
2606
+	out, _, err := runCommandWithOutput(cmd)
2607
+	if err != nil {
2608
+		t.Fatal(err, out)
2609
+	}
2610
+	id := strings.TrimSpace(out)
2611
+	state, err := inspectField(id, "State.Running")
2612
+	if err != nil {
2613
+		t.Fatal(err)
2614
+	}
2615
+	if state != "true" {
2616
+		t.Fatal("Container state is 'not running'")
2617
+	}
2618
+	pid1, err := inspectField(id, "State.Pid")
2619
+	if err != nil {
2620
+		t.Fatal(err)
2621
+	}
2622
+
2623
+	parentContainerIpc, err := os.Readlink(fmt.Sprintf("/proc/%s/ns/ipc", pid1))
2624
+	if err != nil {
2625
+		t.Fatal(err)
2626
+	}
2627
+	cmd = exec.Command(dockerBinary, "run", fmt.Sprintf("--ipc=container:%s", id), "busybox", "readlink", "/proc/self/ns/ipc")
2628
+	out2, _, err := runCommandWithOutput(cmd)
2629
+	if err != nil {
2630
+		t.Fatal(err, out2)
2631
+	}
2632
+
2633
+	out2 = strings.Trim(out2, "\n")
2634
+	if parentContainerIpc != out2 {
2635
+		t.Fatalf("IPC different with --ipc=container:%s %s != %s\n", id, parentContainerIpc, out2)
2636
+	}
2637
+	deleteAllContainers()
2638
+
2639
+	logDone("run - hostname and several network modes")
2640
+}
... ...
@@ -28,6 +28,44 @@ func (n NetworkMode) IsNone() bool {
28 28
 	return n == "none"
29 29
 }
30 30
 
31
+type IpcMode string
32
+
33
+// IsPrivate indicates whether container use it's private ipc stack
34
+func (n IpcMode) IsPrivate() bool {
35
+	return !(n.IsHost() || n.IsContainer())
36
+}
37
+
38
+func (n IpcMode) IsHost() bool {
39
+	return n == "host"
40
+}
41
+
42
+func (n IpcMode) IsContainer() bool {
43
+	parts := strings.SplitN(string(n), ":", 2)
44
+	return len(parts) > 1 && parts[0] == "container"
45
+}
46
+
47
+func (n IpcMode) Valid() bool {
48
+	parts := strings.Split(string(n), ":")
49
+	switch mode := parts[0]; mode {
50
+	case "", "host":
51
+	case "container":
52
+		if len(parts) != 2 || parts[1] == "" {
53
+			return false
54
+		}
55
+	default:
56
+		return false
57
+	}
58
+	return true
59
+}
60
+
61
+func (n IpcMode) Container() string {
62
+	parts := strings.SplitN(string(n), ":", 2)
63
+	if len(parts) > 1 {
64
+		return parts[1]
65
+	}
66
+	return ""
67
+}
68
+
31 69
 type DeviceMapping struct {
32 70
 	PathOnHost        string
33 71
 	PathInContainer   string
... ...
@@ -53,6 +91,7 @@ type HostConfig struct {
53 53
 	VolumesFrom     []string
54 54
 	Devices         []DeviceMapping
55 55
 	NetworkMode     NetworkMode
56
+	IpcMode         IpcMode
56 57
 	CapAdd          []string
57 58
 	CapDrop         []string
58 59
 	RestartPolicy   RestartPolicy
... ...
@@ -84,6 +123,7 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
84 84
 		Privileged:      job.GetenvBool("Privileged"),
85 85
 		PublishAllPorts: job.GetenvBool("PublishAllPorts"),
86 86
 		NetworkMode:     NetworkMode(job.Getenv("NetworkMode")),
87
+		IpcMode:         IpcMode(job.Getenv("IpcMode")),
87 88
 	}
88 89
 
89 90
 	job.GetenvJson("LxcConf", &hostConfig.LxcConf)
... ...
@@ -60,6 +60,7 @@ func Parse(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config,
60 60
 		flCpuset          = cmd.String([]string{"-cpuset"}, "", "CPUs in which to allow execution (0-3, 0,1)")
61 61
 		flNetMode         = cmd.String([]string{"-net"}, "bridge", "Set the Network mode for the container\n'bridge': creates a new network stack for the container on the docker bridge\n'none': no networking for this container\n'container:<name|id>': reuses another container network stack\n'host': use the host network stack inside the container.  Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.")
62 62
 		flMacAddress      = cmd.String([]string{"-mac-address"}, "", "Container MAC address (e.g. 92:d0:c6:0a:29:33)")
63
+		flIpcMode         = cmd.String([]string{"-ipc"}, "", "Default is to create a private IPC namespace (POSIX SysV IPC) for the container\n'container:<name|id>': reuses another container shared memory, semaphores and message queues\n'host': use the host shared memory,semaphores and message queues inside the container.  Note: the host mode gives the container full access to local shared memory and is therefore considered insecure.")
63 64
 		flRestartPolicy   = cmd.String([]string{"-restart"}, "", "Restart policy to apply when a container exits (no, on-failure[:max-retry], always)")
64 65
 	)
65 66
 
... ...
@@ -241,6 +242,11 @@ func Parse(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config,
241 241
 	// parse the '-e' and '--env' after, to allow override
242 242
 	envVariables = append(envVariables, flEnv.GetAll()...)
243 243
 
244
+	ipcMode := IpcMode(*flIpcMode)
245
+	if !ipcMode.Valid() {
246
+		return nil, nil, cmd, fmt.Errorf("--ipc: invalid IPC mode: %v", err)
247
+	}
248
+
244 249
 	netMode, err := parseNetMode(*flNetMode)
245 250
 	if err != nil {
246 251
 		return nil, nil, cmd, fmt.Errorf("--net: invalid net mode: %v", err)
... ...
@@ -289,6 +295,7 @@ func Parse(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config,
289 289
 		ExtraHosts:      flExtraHosts.GetAll(),
290 290
 		VolumesFrom:     flVolumesFrom.GetAll(),
291 291
 		NetworkMode:     netMode,
292
+		IpcMode:         ipcMode,
292 293
 		Devices:         deviceMappings,
293 294
 		CapAdd:          flCapAdd.GetAll(),
294 295
 		CapDrop:         flCapDrop.GetAll(),