This also cleans up some of the left over restriction paths code from
before.
Docker-DCO-1.1-Signed-off-by: Michael Crosby <michael@crosbymichael.com> (github: crosbymichael)
| ... | ... |
@@ -2,12 +2,6 @@ package lxc |
| 2 | 2 |
|
| 3 | 3 |
import ( |
| 4 | 4 |
"fmt" |
| 5 |
- "github.com/dotcloud/docker/daemon/execdriver" |
|
| 6 |
- "github.com/dotcloud/docker/pkg/cgroups" |
|
| 7 |
- "github.com/dotcloud/docker/pkg/label" |
|
| 8 |
- "github.com/dotcloud/docker/pkg/libcontainer/security/restrict" |
|
| 9 |
- "github.com/dotcloud/docker/pkg/system" |
|
| 10 |
- "github.com/dotcloud/docker/utils" |
|
| 11 | 5 |
"io/ioutil" |
| 12 | 6 |
"log" |
| 13 | 7 |
"os" |
| ... | ... |
@@ -18,6 +12,13 @@ import ( |
| 18 | 18 |
"strings" |
| 19 | 19 |
"syscall" |
| 20 | 20 |
"time" |
| 21 |
+ |
|
| 22 |
+ "github.com/dotcloud/docker/daemon/execdriver" |
|
| 23 |
+ "github.com/dotcloud/docker/pkg/cgroups" |
|
| 24 |
+ "github.com/dotcloud/docker/pkg/label" |
|
| 25 |
+ "github.com/dotcloud/docker/pkg/libcontainer/security/restrict" |
|
| 26 |
+ "github.com/dotcloud/docker/pkg/system" |
|
| 27 |
+ "github.com/dotcloud/docker/utils" |
|
| 21 | 28 |
) |
| 22 | 29 |
|
| 23 | 30 |
const DriverName = "lxc" |
| ... | ... |
@@ -27,31 +28,26 @@ func init() {
|
| 27 | 27 |
if err := setupEnv(args); err != nil {
|
| 28 | 28 |
return err |
| 29 | 29 |
} |
| 30 |
- |
|
| 31 | 30 |
if err := setupHostname(args); err != nil {
|
| 32 | 31 |
return err |
| 33 | 32 |
} |
| 34 |
- |
|
| 35 | 33 |
if err := setupNetworking(args); err != nil {
|
| 36 | 34 |
return err |
| 37 | 35 |
} |
| 38 |
- |
|
| 39 |
- if err := restrict.Restrict("/", "/empty"); err != nil {
|
|
| 40 |
- return err |
|
| 36 |
+ if !args.Privileged {
|
|
| 37 |
+ if err := restrict.Restrict(); err != nil {
|
|
| 38 |
+ return err |
|
| 39 |
+ } |
|
| 41 | 40 |
} |
| 42 |
- |
|
| 43 | 41 |
if err := setupCapabilities(args); err != nil {
|
| 44 | 42 |
return err |
| 45 | 43 |
} |
| 46 |
- |
|
| 47 | 44 |
if err := setupWorkingDirectory(args); err != nil {
|
| 48 | 45 |
return err |
| 49 | 46 |
} |
| 50 |
- |
|
| 51 | 47 |
if err := system.CloseFdsFrom(3); err != nil {
|
| 52 | 48 |
return err |
| 53 | 49 |
} |
| 54 |
- |
|
| 55 | 50 |
if err := changeUser(args); err != nil {
|
| 56 | 51 |
return err |
| 57 | 52 |
} |
| ... | ... |
@@ -69,10 +65,9 @@ func init() {
|
| 69 | 69 |
} |
| 70 | 70 |
|
| 71 | 71 |
type driver struct {
|
| 72 |
- root string // root path for the driver to use |
|
| 73 |
- apparmor bool |
|
| 74 |
- sharedRoot bool |
|
| 75 |
- restrictionPath string |
|
| 72 |
+ root string // root path for the driver to use |
|
| 73 |
+ apparmor bool |
|
| 74 |
+ sharedRoot bool |
|
| 76 | 75 |
} |
| 77 | 76 |
|
| 78 | 77 |
func NewDriver(root string, apparmor bool) (*driver, error) {
|
| ... | ... |
@@ -80,15 +75,10 @@ func NewDriver(root string, apparmor bool) (*driver, error) {
|
| 80 | 80 |
if err := linkLxcStart(root); err != nil {
|
| 81 | 81 |
return nil, err |
| 82 | 82 |
} |
| 83 |
- restrictionPath := filepath.Join(root, "empty") |
|
| 84 |
- if err := os.MkdirAll(restrictionPath, 0700); err != nil {
|
|
| 85 |
- return nil, err |
|
| 86 |
- } |
|
| 87 | 83 |
return &driver{
|
| 88 |
- apparmor: apparmor, |
|
| 89 |
- root: root, |
|
| 90 |
- sharedRoot: rootIsShared(), |
|
| 91 |
- restrictionPath: restrictionPath, |
|
| 84 |
+ apparmor: apparmor, |
|
| 85 |
+ root: root, |
|
| 86 |
+ sharedRoot: rootIsShared(), |
|
| 92 | 87 |
}, nil |
| 93 | 88 |
} |
| 94 | 89 |
|
| ... | ... |
@@ -419,16 +409,14 @@ func (d *driver) generateLXCConfig(c *execdriver.Command) (string, error) {
|
| 419 | 419 |
|
| 420 | 420 |
if err := LxcTemplateCompiled.Execute(fo, struct {
|
| 421 | 421 |
*execdriver.Command |
| 422 |
- AppArmor bool |
|
| 423 |
- ProcessLabel string |
|
| 424 |
- MountLabel string |
|
| 425 |
- RestrictionSource string |
|
| 422 |
+ AppArmor bool |
|
| 423 |
+ ProcessLabel string |
|
| 424 |
+ MountLabel string |
|
| 426 | 425 |
}{
|
| 427 |
- Command: c, |
|
| 428 |
- AppArmor: d.apparmor, |
|
| 429 |
- ProcessLabel: process, |
|
| 430 |
- MountLabel: mount, |
|
| 431 |
- RestrictionSource: d.restrictionPath, |
|
| 426 |
+ Command: c, |
|
| 427 |
+ AppArmor: d.apparmor, |
|
| 428 |
+ ProcessLabel: process, |
|
| 429 |
+ MountLabel: mount, |
|
| 432 | 430 |
}); err != nil {
|
| 433 | 431 |
return "", err |
| 434 | 432 |
} |
| ... | ... |
@@ -1,10 +1,11 @@ |
| 1 | 1 |
package lxc |
| 2 | 2 |
|
| 3 | 3 |
import ( |
| 4 |
- "github.com/dotcloud/docker/daemon/execdriver" |
|
| 5 |
- "github.com/dotcloud/docker/pkg/label" |
|
| 6 | 4 |
"strings" |
| 7 | 5 |
"text/template" |
| 6 |
+ |
|
| 7 |
+ "github.com/dotcloud/docker/daemon/execdriver" |
|
| 8 |
+ "github.com/dotcloud/docker/pkg/label" |
|
| 8 | 9 |
) |
| 9 | 10 |
|
| 10 | 11 |
const LxcTemplate = ` |
| ... | ... |
@@ -110,13 +111,6 @@ lxc.aa_profile = unconfined |
| 110 | 110 |
{{else}}
|
| 111 | 111 |
# Let AppArmor normal confinement take place (i.e., not unconfined) |
| 112 | 112 |
{{end}}
|
| 113 |
-{{else}}
|
|
| 114 |
-# Restrict access to some stuff in /proc. Note that /proc is already mounted |
|
| 115 |
-# read-only, so we don't need to bother about things that are just dangerous |
|
| 116 |
-# to write to (like sysrq-trigger). Also, recent kernels won't let a container |
|
| 117 |
-# peek into /proc/kcore, but let's cater for people who might run Docker on |
|
| 118 |
-# older kernels. Just in case. |
|
| 119 |
-lxc.mount.entry = {{escapeFstabSpaces $ROOTFS}}/dev/null {{escapeFstabSpaces $ROOTFS}}/proc/kcore none bind,ro 0 0
|
|
| 120 | 113 |
{{end}}
|
| 121 | 114 |
|
| 122 | 115 |
# limits |
| ... | ... |
@@ -24,7 +24,7 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container |
| 24 | 24 |
container.Cgroups.Name = c.ID |
| 25 | 25 |
// check to see if we are running in ramdisk to disable pivot root |
| 26 | 26 |
container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
|
| 27 |
- container.Context["restriction_path"] = d.restrictionPath |
|
| 27 |
+ container.Context["restrictions"] = "true" |
|
| 28 | 28 |
|
| 29 | 29 |
if err := d.createNetwork(container, c); err != nil {
|
| 30 | 30 |
return nil, err |
| ... | ... |
@@ -84,7 +84,7 @@ func (d *driver) setPrivileged(container *libcontainer.Container) error {
|
| 84 | 84 |
} |
| 85 | 85 |
container.Cgroups.DeviceAccess = true |
| 86 | 86 |
|
| 87 |
- delete(container.Context, "restriction_path") |
|
| 87 |
+ delete(container.Context, "restrictions") |
|
| 88 | 88 |
|
| 89 | 89 |
if apparmor.IsEnabled() {
|
| 90 | 90 |
container.Context["apparmor_profile"] = "unconfined" |
| ... | ... |
@@ -57,7 +57,6 @@ type driver struct {
|
| 57 | 57 |
root string |
| 58 | 58 |
initPath string |
| 59 | 59 |
activeContainers map[string]*exec.Cmd |
| 60 |
- restrictionPath string |
|
| 61 | 60 |
} |
| 62 | 61 |
|
| 63 | 62 |
func NewDriver(root, initPath string) (*driver, error) {
|
| ... | ... |
@@ -68,14 +67,8 @@ func NewDriver(root, initPath string) (*driver, error) {
|
| 68 | 68 |
if err := apparmor.InstallDefaultProfile(filepath.Join(root, "../..", BackupApparmorProfilePath)); err != nil {
|
| 69 | 69 |
return nil, err |
| 70 | 70 |
} |
| 71 |
- restrictionPath := filepath.Join(root, "empty") |
|
| 72 |
- if err := os.MkdirAll(restrictionPath, 0700); err != nil {
|
|
| 73 |
- return nil, err |
|
| 74 |
- } |
|
| 75 |
- |
|
| 76 | 71 |
return &driver{
|
| 77 | 72 |
root: root, |
| 78 |
- restrictionPath: restrictionPath, |
|
| 79 | 73 |
initPath: initPath, |
| 80 | 74 |
activeContainers: make(map[string]*exec.Cmd), |
| 81 | 75 |
}, nil |
| ... | ... |
@@ -123,15 +123,12 @@ func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mo |
| 123 | 123 |
systemMounts := []mount{
|
| 124 | 124 |
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
|
| 125 | 125 |
{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags},
|
| 126 |
+ {source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
|
|
| 127 |
+ {source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
|
|
| 126 | 128 |
} |
| 127 | 129 |
|
| 128 | 130 |
if len(mounts.OfType("devtmpfs")) == 1 {
|
| 129 | 131 |
systemMounts = append(systemMounts, mount{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)})
|
| 130 | 132 |
} |
| 131 |
- systemMounts = append(systemMounts, |
|
| 132 |
- mount{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
|
|
| 133 |
- mount{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
|
|
| 134 |
- ) |
|
| 135 |
- |
|
| 136 | 133 |
return systemMounts |
| 137 | 134 |
} |
| ... | ... |
@@ -72,8 +72,8 @@ func Init(container *libcontainer.Container, uncleanRootfs, consolePath string, |
| 72 | 72 |
|
| 73 | 73 |
runtime.LockOSThread() |
| 74 | 74 |
|
| 75 |
- if restrictionPath := container.Context["restriction_path"]; restrictionPath != "" {
|
|
| 76 |
- if err := restrict.Restrict("/", restrictionPath); err != nil {
|
|
| 75 |
+ if container.Context["restrictions"] != "" {
|
|
| 76 |
+ if err := restrict.Restrict(); err != nil {
|
|
| 77 | 77 |
return err |
| 78 | 78 |
} |
| 79 | 79 |
} |
| ... | ... |
@@ -11,67 +11,42 @@ import ( |
| 11 | 11 |
"github.com/dotcloud/docker/pkg/system" |
| 12 | 12 |
) |
| 13 | 13 |
|
| 14 |
-// "restrictions" are container paths (files, directories, whatever) that have to be masked. |
|
| 15 |
-// maskPath is a "safe" path to be mounted over maskedPath. It can take two special values: |
|
| 16 |
-// - if it is "", then nothing is mounted; |
|
| 17 |
-// - if it is "EMPTY", then an empty directory is mounted instead. |
|
| 18 |
-// If remountRO is true then the maskedPath is remounted read-only (regardless of whether a maskPath was used). |
|
| 19 |
-type restriction struct {
|
|
| 20 |
- maskedPath string |
|
| 21 |
- maskPath string |
|
| 22 |
- remountRO bool |
|
| 23 |
-} |
|
| 24 |
- |
|
| 25 |
-var restrictions = []restriction{
|
|
| 26 |
- {"/proc", "", true},
|
|
| 27 |
- {"/sys", "", true},
|
|
| 28 |
- {"/proc/kcore", "/dev/null", false},
|
|
| 29 |
-} |
|
| 30 |
- |
|
| 31 | 14 |
// This has to be called while the container still has CAP_SYS_ADMIN (to be able to perform mounts). |
| 32 | 15 |
// However, afterwards, CAP_SYS_ADMIN should be dropped (otherwise the user will be able to revert those changes). |
| 33 |
-// "empty" should be the path to an empty directory. |
|
| 34 |
-func Restrict(rootfs, empty string) error {
|
|
| 35 |
- for _, restriction := range restrictions {
|
|
| 36 |
- dest := filepath.Join(rootfs, restriction.maskedPath) |
|
| 37 |
- if restriction.maskPath != "" {
|
|
| 38 |
- var source string |
|
| 39 |
- if restriction.maskPath == "EMPTY" {
|
|
| 40 |
- source = empty |
|
| 41 |
- } else {
|
|
| 42 |
- source = filepath.Join(rootfs, restriction.maskPath) |
|
| 43 |
- } |
|
| 44 |
- if err := system.Mount(source, dest, "", syscall.MS_BIND, ""); err != nil {
|
|
| 45 |
- return fmt.Errorf("unable to bind-mount %s over %s: %s", source, dest, err)
|
|
| 46 |
- } |
|
| 47 |
- } |
|
| 48 |
- if restriction.remountRO {
|
|
| 49 |
- if err := system.Mount("", dest, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil {
|
|
| 50 |
- return fmt.Errorf("unable to remount %s readonly: %s", dest, err)
|
|
| 51 |
- } |
|
| 16 |
+func Restrict() error {
|
|
| 17 |
+ // remount proc and sys as readonly |
|
| 18 |
+ for _, dest := range []string{"proc", "sys"} {
|
|
| 19 |
+ if err := system.Mount("", dest, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil {
|
|
| 20 |
+ return fmt.Errorf("unable to remount %s readonly: %s", dest, err)
|
|
| 52 | 21 |
} |
| 53 | 22 |
} |
| 54 | 23 |
|
| 24 |
+ if err := system.Mount("/proc/kcore", "/dev/null", "", syscall.MS_BIND, ""); err != nil {
|
|
| 25 |
+ return fmt.Errorf("unable to bind-mount /dev/null over /proc/kcore")
|
|
| 26 |
+ } |
|
| 27 |
+ |
|
| 55 | 28 |
// This weird trick will allow us to mount /proc read-only, while being able to use AppArmor. |
| 56 | 29 |
// This is because apparently, loading an AppArmor profile requires write access to /proc/1/attr. |
| 57 | 30 |
// So we do another mount of procfs, ensure it's write-able, and bind-mount a subset of it. |
| 58 |
- tmpProcPath := filepath.Join(rootfs, ".proc") |
|
| 59 |
- if err := os.Mkdir(tmpProcPath, 0700); err != nil {
|
|
| 60 |
- return fmt.Errorf("unable to create temporary proc mountpoint %s: %s", tmpProcPath, err)
|
|
| 31 |
+ var ( |
|
| 32 |
+ rwAttrPath = filepath.Join(".proc", "1", "attr")
|
|
| 33 |
+ roAttrPath = filepath.Join("proc", "1", "attr")
|
|
| 34 |
+ ) |
|
| 35 |
+ |
|
| 36 |
+ if err := os.Mkdir(".proc", 0700); err != nil {
|
|
| 37 |
+ return fmt.Errorf("unable to create temporary proc mountpoint .proc: %s", err)
|
|
| 61 | 38 |
} |
| 62 |
- if err := system.Mount("proc", tmpProcPath, "proc", 0, ""); err != nil {
|
|
| 39 |
+ if err := system.Mount("proc", ".proc", "proc", 0, ""); err != nil {
|
|
| 63 | 40 |
return fmt.Errorf("unable to mount proc on temporary proc mountpoint: %s", err)
|
| 64 | 41 |
} |
| 65 |
- if err := system.Mount("proc", tmpProcPath, "", syscall.MS_REMOUNT, ""); err != nil {
|
|
| 42 |
+ if err := system.Mount("proc", ".proc", "", syscall.MS_REMOUNT, ""); err != nil {
|
|
| 66 | 43 |
return fmt.Errorf("unable to remount proc read-write: %s", err)
|
| 67 | 44 |
} |
| 68 |
- rwAttrPath := filepath.Join(rootfs, ".proc", "1", "attr") |
|
| 69 |
- roAttrPath := filepath.Join(rootfs, "proc", "1", "attr") |
|
| 70 | 45 |
if err := system.Mount(rwAttrPath, roAttrPath, "", syscall.MS_BIND, ""); err != nil {
|
| 71 | 46 |
return fmt.Errorf("unable to bind-mount %s on %s: %s", rwAttrPath, roAttrPath, err)
|
| 72 | 47 |
} |
| 73 |
- if err := system.Unmount(tmpProcPath, 0); err != nil {
|
|
| 48 |
+ if err := system.Unmount(".proc", 0); err != nil {
|
|
| 74 | 49 |
return fmt.Errorf("unable to unmount temporary proc filesystem: %s", err)
|
| 75 | 50 |
} |
| 76 |
- return nil |
|
| 51 |
+ return os.RemoveAll(".proc")
|
|
| 77 | 52 |
} |