This moves the bind mounts like /.dockerinit, /etc/hostname, volumes,
etc into the container namespace, by setting them up using lxc.
This is useful to avoid littering the global namespace with a lot of
mounts that are internal to each container and are not generally
needed on the outside. In particular, it seems that having a lot of
mounts is problematic wrt scaling to a lot of containers on systems
where the root filesystem is mounted --rshared.
Note that the "private" option is only supported by the native driver, as
lxc doesn't support setting this. This is not a huge problem, but it does
mean that some mounts are unnecessarily shared inside the container if you're
using the lxc driver.
Docker-DCO-1.1-Signed-off-by: Alexander Larsson <alexl@redhat.com> (github: alexlarsson)
| ... | ... |
@@ -97,6 +97,13 @@ type Resources struct {
|
| 97 | 97 |
CpuShares int64 `json:"cpu_shares"` |
| 98 | 98 |
} |
| 99 | 99 |
|
| 100 |
+type Mount struct {
|
|
| 101 |
+ Source string `json:"source"` |
|
| 102 |
+ Destination string `json:"destination"` |
|
| 103 |
+ Writable bool `json:"writable"` |
|
| 104 |
+ Private bool `json:"private"` |
|
| 105 |
+} |
|
| 106 |
+ |
|
| 100 | 107 |
// Process wrapps an os/exec.Cmd to add more metadata |
| 101 | 108 |
type Command struct {
|
| 102 | 109 |
exec.Cmd `json:"-"` |
| ... | ... |
@@ -114,6 +121,7 @@ type Command struct {
|
| 114 | 114 |
Network *Network `json:"network"` // if network is nil then networking is disabled |
| 115 | 115 |
Config []string `json:"config"` // generic values that specific drivers can consume |
| 116 | 116 |
Resources *Resources `json:"resources"` |
| 117 |
+ Mounts []Mount `json:"mounts"` |
|
| 117 | 118 |
|
| 118 | 119 |
Terminal Terminal `json:"-"` // standard or tty terminal |
| 119 | 120 |
Console string `json:"-"` // dev/console path |
| ... | ... |
@@ -9,7 +9,7 @@ import ( |
| 9 | 9 |
"path" |
| 10 | 10 |
) |
| 11 | 11 |
|
| 12 |
-func NewDriver(name, root string, sysInfo *sysinfo.SysInfo) (execdriver.Driver, error) {
|
|
| 12 |
+func NewDriver(name, root, initPath string, sysInfo *sysinfo.SysInfo) (execdriver.Driver, error) {
|
|
| 13 | 13 |
switch name {
|
| 14 | 14 |
case "lxc": |
| 15 | 15 |
// we want to five the lxc driver the full docker root because it needs |
| ... | ... |
@@ -17,7 +17,7 @@ func NewDriver(name, root string, sysInfo *sysinfo.SysInfo) (execdriver.Driver, |
| 17 | 17 |
// to be backwards compatible |
| 18 | 18 |
return lxc.NewDriver(root, sysInfo.AppArmor) |
| 19 | 19 |
case "native": |
| 20 |
- return native.NewDriver(path.Join(root, "execdriver", "native")) |
|
| 20 |
+ return native.NewDriver(path.Join(root, "execdriver", "native"), initPath) |
|
| 21 | 21 |
} |
| 22 | 22 |
return nil, fmt.Errorf("unknown exec driver %s", name)
|
| 23 | 23 |
} |
| ... | ... |
@@ -88,6 +88,14 @@ lxc.mount.entry = {{.Console}} {{escapeFstabSpaces $ROOTFS}}/dev/console none bi
|
| 88 | 88 |
lxc.mount.entry = devpts {{escapeFstabSpaces $ROOTFS}}/dev/pts devpts newinstance,ptmxmode=0666,nosuid,noexec 0 0
|
| 89 | 89 |
lxc.mount.entry = shm {{escapeFstabSpaces $ROOTFS}}/dev/shm tmpfs size=65536k,nosuid,nodev,noexec 0 0
|
| 90 | 90 |
|
| 91 |
+{{range $value := .Mounts}}
|
|
| 92 |
+{{if $value.Writable}}
|
|
| 93 |
+lxc.mount.entry = {{$value.Source}} {{escapeFstabSpaces $ROOTFS}}/{{escapeFstabSpaces $value.Destination}} none bind,rw 0 0
|
|
| 94 |
+{{else}}
|
|
| 95 |
+lxc.mount.entry = {{$value.Source}} {{escapeFstabSpaces $ROOTFS}}/{{escapeFstabSpaces $value.Destination}} none bind,ro 0 0
|
|
| 96 |
+{{end}}
|
|
| 97 |
+{{end}}
|
|
| 98 |
+ |
|
| 91 | 99 |
{{if .Privileged}}
|
| 92 | 100 |
{{if .AppArmor}}
|
| 93 | 101 |
lxc.aa_profile = unconfined |
| ... | ... |
@@ -48,6 +48,10 @@ func createContainer(c *execdriver.Command) *libcontainer.Container {
|
| 48 | 48 |
// check to see if we are running in ramdisk to disable pivot root |
| 49 | 49 |
container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
|
| 50 | 50 |
|
| 51 |
+ for _, m := range c.Mounts {
|
|
| 52 |
+ container.Mounts = append(container.Mounts, libcontainer.Mount{m.Source, m.Destination, m.Writable, m.Private})
|
|
| 53 |
+ } |
|
| 54 |
+ |
|
| 51 | 55 |
return container |
| 52 | 56 |
} |
| 53 | 57 |
|
| ... | ... |
@@ -55,10 +55,11 @@ func init() {
|
| 55 | 55 |
} |
| 56 | 56 |
|
| 57 | 57 |
type driver struct {
|
| 58 |
- root string |
|
| 58 |
+ root string |
|
| 59 |
+ initPath string |
|
| 59 | 60 |
} |
| 60 | 61 |
|
| 61 |
-func NewDriver(root string) (*driver, error) {
|
|
| 62 |
+func NewDriver(root, initPath string) (*driver, error) {
|
|
| 62 | 63 |
if err := os.MkdirAll(root, 0700); err != nil {
|
| 63 | 64 |
return nil, err |
| 64 | 65 |
} |
| ... | ... |
@@ -66,7 +67,8 @@ func NewDriver(root string) (*driver, error) {
|
| 66 | 66 |
return nil, err |
| 67 | 67 |
} |
| 68 | 68 |
return &driver{
|
| 69 |
- root: root, |
|
| 69 |
+ root: root, |
|
| 70 |
+ initPath: initPath, |
|
| 70 | 71 |
}, nil |
| 71 | 72 |
} |
| 72 | 73 |
|
| ... | ... |
@@ -210,7 +212,7 @@ func (d *dockerCommandFactory) Create(container *libcontainer.Container, console |
| 210 | 210 |
// we need to join the rootfs because nsinit will setup the rootfs and chroot |
| 211 | 211 |
initPath := filepath.Join(d.c.Rootfs, d.c.InitPath) |
| 212 | 212 |
|
| 213 |
- d.c.Path = initPath |
|
| 213 |
+ d.c.Path = d.driver.initPath |
|
| 214 | 214 |
d.c.Args = append([]string{
|
| 215 | 215 |
initPath, |
| 216 | 216 |
"-driver", DriverName, |
| ... | ... |
@@ -23,6 +23,7 @@ type Container struct {
|
| 23 | 23 |
Networks []*Network `json:"networks,omitempty"` // nil for host's network stack |
| 24 | 24 |
Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` // cgroups |
| 25 | 25 |
Context Context `json:"context,omitempty"` // generic context for specific options (apparmor, selinux) |
| 26 |
+ Mounts []Mount `json:"mounts,omitempty"` |
|
| 26 | 27 |
} |
| 27 | 28 |
|
| 28 | 29 |
// Network defines configuration for a container's networking stack |
| ... | ... |
@@ -36,3 +37,12 @@ type Network struct {
|
| 36 | 36 |
Gateway string `json:"gateway,omitempty"` |
| 37 | 37 |
Mtu int `json:"mtu,omitempty"` |
| 38 | 38 |
} |
| 39 |
+ |
|
| 40 |
+// Bind mounts from the host system to the container |
|
| 41 |
+// |
|
| 42 |
+type Mount struct {
|
|
| 43 |
+ Source string `json:"source"` // Source path, in the host namespace |
|
| 44 |
+ Destination string `json:"destination"` // Destination path, in the container |
|
| 45 |
+ Writable bool `json:"writable"` |
|
| 46 |
+ Private bool `json:"private"` |
|
| 47 |
+} |
| ... | ... |
@@ -51,7 +51,7 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol |
| 51 | 51 |
if err := system.ParentDeathSignal(); err != nil {
|
| 52 | 52 |
return fmt.Errorf("parent death signal %s", err)
|
| 53 | 53 |
} |
| 54 |
- if err := setupNewMountNamespace(rootfs, console, container.ReadonlyFs, container.NoPivotRoot); err != nil {
|
|
| 54 |
+ if err := setupNewMountNamespace(rootfs, container.Mounts, console, container.ReadonlyFs, container.NoPivotRoot); err != nil {
|
|
| 55 | 55 |
return fmt.Errorf("setup mount namespace %s", err)
|
| 56 | 56 |
} |
| 57 | 57 |
if err := setupNetwork(container, context); err != nil {
|
| ... | ... |
@@ -4,6 +4,7 @@ package nsinit |
| 4 | 4 |
|
| 5 | 5 |
import ( |
| 6 | 6 |
"fmt" |
| 7 |
+ "github.com/dotcloud/docker/pkg/libcontainer" |
|
| 7 | 8 |
"github.com/dotcloud/docker/pkg/system" |
| 8 | 9 |
"io/ioutil" |
| 9 | 10 |
"os" |
| ... | ... |
@@ -19,7 +20,7 @@ const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NOD |
| 19 | 19 |
// |
| 20 | 20 |
// There is no need to unmount the new mounts because as soon as the mount namespace |
| 21 | 21 |
// is no longer in use, the mounts will be removed automatically |
| 22 |
-func setupNewMountNamespace(rootfs, console string, readonly, noPivotRoot bool) error {
|
|
| 22 |
+func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, console string, readonly, noPivotRoot bool) error {
|
|
| 23 | 23 |
flag := syscall.MS_PRIVATE |
| 24 | 24 |
if noPivotRoot {
|
| 25 | 25 |
flag = syscall.MS_SLAVE |
| ... | ... |
@@ -38,6 +39,23 @@ func setupNewMountNamespace(rootfs, console string, readonly, noPivotRoot bool) |
| 38 | 38 |
if err := mountSystem(rootfs); err != nil {
|
| 39 | 39 |
return fmt.Errorf("mount system %s", err)
|
| 40 | 40 |
} |
| 41 |
+ |
|
| 42 |
+ for _, m := range bindMounts {
|
|
| 43 |
+ flags := syscall.MS_BIND | syscall.MS_REC |
|
| 44 |
+ if !m.Writable {
|
|
| 45 |
+ flags = flags | syscall.MS_RDONLY |
|
| 46 |
+ } |
|
| 47 |
+ dest := filepath.Join(rootfs, m.Destination) |
|
| 48 |
+ if err := system.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil {
|
|
| 49 |
+ return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err)
|
|
| 50 |
+ } |
|
| 51 |
+ if m.Private {
|
|
| 52 |
+ if err := system.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
|
|
| 53 |
+ return fmt.Errorf("mounting %s private %s", dest, err)
|
|
| 54 |
+ } |
|
| 55 |
+ } |
|
| 56 |
+ } |
|
| 57 |
+ |
|
| 41 | 58 |
if err := copyDevNodes(rootfs); err != nil {
|
| 42 | 59 |
return fmt.Errorf("copy dev nodes %s", err)
|
| 43 | 60 |
} |
| ... | ... |
@@ -529,13 +529,13 @@ func (container *Container) Start() (err error) {
|
| 529 | 529 |
return err |
| 530 | 530 |
} |
| 531 | 531 |
|
| 532 |
+ populateCommand(container) |
|
| 533 |
+ container.command.Env = env |
|
| 534 |
+ |
|
| 532 | 535 |
if err := mountVolumesForContainer(container, envPath); err != nil {
|
| 533 | 536 |
return err |
| 534 | 537 |
} |
| 535 | 538 |
|
| 536 |
- populateCommand(container) |
|
| 537 |
- container.command.Env = env |
|
| 538 |
- |
|
| 539 | 539 |
// Setup logging of stdout and stderr to disk |
| 540 | 540 |
if err := container.runtime.LogToDisk(container.stdout, container.logPath("json"), "stdout"); err != nil {
|
| 541 | 541 |
return err |
| ... | ... |
@@ -733,7 +733,7 @@ func NewRuntimeFromDirectory(config *daemonconfig.Config, eng *engine.Engine) (* |
| 733 | 733 |
} |
| 734 | 734 |
|
| 735 | 735 |
sysInfo := sysinfo.New(false) |
| 736 |
- ed, err := execdrivers.NewDriver(config.ExecDriver, config.Root, sysInfo) |
|
| 736 |
+ ed, err := execdrivers.NewDriver(config.ExecDriver, config.Root, sysInitPath, sysInfo) |
|
| 737 | 737 |
if err != nil {
|
| 738 | 738 |
return nil, err |
| 739 | 739 |
} |
| ... | ... |
@@ -3,6 +3,7 @@ package runtime |
| 3 | 3 |
import ( |
| 4 | 4 |
"fmt" |
| 5 | 5 |
"github.com/dotcloud/docker/archive" |
| 6 |
+ "github.com/dotcloud/docker/execdriver" |
|
| 6 | 7 |
"github.com/dotcloud/docker/pkg/mount" |
| 7 | 8 |
"github.com/dotcloud/docker/utils" |
| 8 | 9 |
"io/ioutil" |
| ... | ... |
@@ -55,70 +56,33 @@ func mountVolumesForContainer(container *Container, envPath string) error {
|
| 55 | 55 |
return err |
| 56 | 56 |
} |
| 57 | 57 |
|
| 58 |
- // Mount docker specific files into the containers root fs |
|
| 59 |
- if err := mount.Mount(runtime.sysInitPath, filepath.Join(root, "/.dockerinit"), "none", "bind,ro"); err != nil {
|
|
| 60 |
- return err |
|
| 61 |
- } |
|
| 62 |
- if err := mount.Mount(envPath, filepath.Join(root, "/.dockerenv"), "none", "bind,ro"); err != nil {
|
|
| 63 |
- return err |
|
| 64 |
- } |
|
| 65 |
- if err := mount.Mount(container.ResolvConfPath, filepath.Join(root, "/etc/resolv.conf"), "none", "bind,ro"); err != nil {
|
|
| 66 |
- return err |
|
| 58 |
+ mounts := []execdriver.Mount{
|
|
| 59 |
+ {runtime.sysInitPath, "/.dockerinit", false, true},
|
|
| 60 |
+ {envPath, "/.dockerenv", false, true},
|
|
| 61 |
+ {container.ResolvConfPath, "/etc/resolv.conf", false, true},
|
|
| 67 | 62 |
} |
| 68 | 63 |
|
| 69 | 64 |
if container.HostnamePath != "" && container.HostsPath != "" {
|
| 70 |
- if err := mount.Mount(container.HostnamePath, filepath.Join(root, "/etc/hostname"), "none", "bind,ro"); err != nil {
|
|
| 71 |
- return err |
|
| 72 |
- } |
|
| 73 |
- if err := mount.Mount(container.HostsPath, filepath.Join(root, "/etc/hosts"), "none", "bind,ro"); err != nil {
|
|
| 74 |
- return err |
|
| 75 |
- } |
|
| 65 |
+ mounts = append(mounts, execdriver.Mount{container.HostnamePath, "/etc/hostname", false, true})
|
|
| 66 |
+ mounts = append(mounts, execdriver.Mount{container.HostsPath, "/etc/hosts", false, true})
|
|
| 76 | 67 |
} |
| 77 | 68 |
|
| 78 | 69 |
// Mount user specified volumes |
| 70 |
+ // Note, these are not private because you may want propagation of (un)mounts from host |
|
| 71 |
+ // volumes. For instance if you use -v /usr:/usr and the host later mounts /usr/share you |
|
| 72 |
+ // want this new mount in the container |
|
| 79 | 73 |
for r, v := range container.Volumes {
|
| 80 |
- mountAs := "ro" |
|
| 81 |
- if container.VolumesRW[r] {
|
|
| 82 |
- mountAs = "rw" |
|
| 83 |
- } |
|
| 74 |
+ mounts = append(mounts, execdriver.Mount{v, r, container.VolumesRW[r], false})
|
|
| 75 |
+ } |
|
| 84 | 76 |
|
| 85 |
- r = filepath.Join(root, r) |
|
| 86 |
- if p, err := utils.FollowSymlinkInScope(r, root); err != nil {
|
|
| 87 |
- return err |
|
| 88 |
- } else {
|
|
| 89 |
- r = p |
|
| 90 |
- } |
|
| 77 |
+ container.command.Mounts = mounts |
|
| 91 | 78 |
|
| 92 |
- if err := mount.Mount(v, r, "none", fmt.Sprintf("bind,%s", mountAs)); err != nil {
|
|
| 93 |
- return err |
|
| 94 |
- } |
|
| 95 |
- } |
|
| 96 | 79 |
return nil |
| 97 | 80 |
} |
| 98 | 81 |
|
| 99 | 82 |
func unmountVolumesForContainer(container *Container) {
|
| 100 |
- var ( |
|
| 101 |
- root = container.RootfsPath() |
|
| 102 |
- mounts = []string{
|
|
| 103 |
- root, |
|
| 104 |
- filepath.Join(root, "/.dockerinit"), |
|
| 105 |
- filepath.Join(root, "/.dockerenv"), |
|
| 106 |
- filepath.Join(root, "/etc/resolv.conf"), |
|
| 107 |
- } |
|
| 108 |
- ) |
|
| 109 |
- |
|
| 110 |
- if container.HostnamePath != "" && container.HostsPath != "" {
|
|
| 111 |
- mounts = append(mounts, filepath.Join(root, "/etc/hostname"), filepath.Join(root, "/etc/hosts")) |
|
| 112 |
- } |
|
| 113 |
- |
|
| 114 |
- for r := range container.Volumes {
|
|
| 115 |
- mounts = append(mounts, filepath.Join(root, r)) |
|
| 116 |
- } |
|
| 117 |
- |
|
| 118 |
- for i := len(mounts) - 1; i >= 0; i-- {
|
|
| 119 |
- if lastError := mount.Unmount(mounts[i]); lastError != nil {
|
|
| 120 |
- log.Printf("Failed to umount %v: %v", mounts[i], lastError)
|
|
| 121 |
- } |
|
| 83 |
+ if err := mount.Unmount(container.RootfsPath()); err != nil {
|
|
| 84 |
+ log.Printf("Failed to umount container: %v", err)
|
|
| 122 | 85 |
} |
| 123 | 86 |
} |
| 124 | 87 |
|