| ... | ... |
@@ -25,6 +25,7 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container |
| 25 | 25 |
container.Cgroups.Name = c.ID |
| 26 | 26 |
// check to see if we are running in ramdisk to disable pivot root |
| 27 | 27 |
container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
|
| 28 |
+ container.Context["restriction_path"] = d.restrictionPath |
|
| 28 | 29 |
|
| 29 | 30 |
if err := d.createNetwork(container, c); err != nil {
|
| 30 | 31 |
return nil, err |
| ... | ... |
@@ -81,6 +82,8 @@ func (d *driver) setPrivileged(container *libcontainer.Container) error {
|
| 81 | 81 |
c.Enabled = true |
| 82 | 82 |
} |
| 83 | 83 |
container.Cgroups.DeviceAccess = true |
| 84 |
+ delete(container.Context, "restriction_path") |
|
| 85 |
+ |
|
| 84 | 86 |
if apparmor.IsEnabled() {
|
| 85 | 87 |
container.Context["apparmor_profile"] = "unconfined" |
| 86 | 88 |
} |
| ... | ... |
@@ -62,6 +62,7 @@ type driver struct {
|
| 62 | 62 |
root string |
| 63 | 63 |
initPath string |
| 64 | 64 |
activeContainers map[string]*exec.Cmd |
| 65 |
+ restrictionPath string |
|
| 65 | 66 |
} |
| 66 | 67 |
|
| 67 | 68 |
func NewDriver(root, initPath string) (*driver, error) {
|
| ... | ... |
@@ -72,8 +73,14 @@ func NewDriver(root, initPath string) (*driver, error) {
|
| 72 | 72 |
if err := apparmor.InstallDefaultProfile(filepath.Join(root, "../..", BackupApparmorProfilePath)); err != nil {
|
| 73 | 73 |
return nil, err |
| 74 | 74 |
} |
| 75 |
+ restrictionPath := filepath.Join(root, "empty") |
|
| 76 |
+ if err := os.MkdirAll(restrictionPath, 0700); err != nil {
|
|
| 77 |
+ return nil, err |
|
| 78 |
+ } |
|
| 79 |
+ |
|
| 75 | 80 |
return &driver{
|
| 76 | 81 |
root: root, |
| 82 |
+ restrictionPath: restrictionPath, |
|
| 77 | 83 |
initPath: initPath, |
| 78 | 84 |
activeContainers: make(map[string]*exec.Cmd), |
| 79 | 85 |
}, nil |
| ... | ... |
@@ -61,7 +61,7 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol |
| 61 | 61 |
|
| 62 | 62 |
label.Init() |
| 63 | 63 |
ns.logger.Println("setup mount namespace")
|
| 64 |
- if err := setupNewMountNamespace(rootfs, container.Mounts, console, container.ReadonlyFs, container.NoPivotRoot, container.Context["mount_label"]); err != nil {
|
|
| 64 |
+ if err := setupNewMountNamespace(rootfs, console, container); err != nil {
|
|
| 65 | 65 |
return fmt.Errorf("setup mount namespace %s", err)
|
| 66 | 66 |
} |
| 67 | 67 |
if err := system.Sethostname(container.Hostname); err != nil {
|
| ... | ... |
@@ -6,6 +6,7 @@ import ( |
| 6 | 6 |
"fmt" |
| 7 | 7 |
"github.com/dotcloud/docker/pkg/label" |
| 8 | 8 |
"github.com/dotcloud/docker/pkg/libcontainer" |
| 9 |
+ "github.com/dotcloud/docker/pkg/libcontainer/security/restrict" |
|
| 9 | 10 |
"github.com/dotcloud/docker/pkg/system" |
| 10 | 11 |
"io/ioutil" |
| 11 | 12 |
"os" |
| ... | ... |
@@ -21,9 +22,9 @@ const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NOD |
| 21 | 21 |
// |
| 22 | 22 |
// There is no need to unmount the new mounts because as soon as the mount namespace |
| 23 | 23 |
// is no longer in use, the mounts will be removed automatically |
| 24 |
-func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, console string, readonly, noPivotRoot bool, mountLabel string) error {
|
|
| 24 |
+func setupNewMountNamespace(rootfs, console string, container *libcontainer.Container) error {
|
|
| 25 | 25 |
flag := syscall.MS_PRIVATE |
| 26 |
- if noPivotRoot {
|
|
| 26 |
+ if container.NoPivotRoot {
|
|
| 27 | 27 |
flag = syscall.MS_SLAVE |
| 28 | 28 |
} |
| 29 | 29 |
if err := system.Mount("", "/", "", uintptr(flag|syscall.MS_REC), ""); err != nil {
|
| ... | ... |
@@ -32,44 +33,28 @@ func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, cons |
| 32 | 32 |
if err := system.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil {
|
| 33 | 33 |
return fmt.Errorf("mouting %s as bind %s", rootfs, err)
|
| 34 | 34 |
} |
| 35 |
- if err := mountSystem(rootfs, mountLabel); err != nil {
|
|
| 35 |
+ if err := mountSystem(rootfs, container.Context["mount_label"]); err != nil {
|
|
| 36 | 36 |
return fmt.Errorf("mount system %s", err)
|
| 37 | 37 |
} |
| 38 |
- |
|
| 39 |
- for _, m := range bindMounts {
|
|
| 40 |
- var ( |
|
| 41 |
- flags = syscall.MS_BIND | syscall.MS_REC |
|
| 42 |
- dest = filepath.Join(rootfs, m.Destination) |
|
| 43 |
- ) |
|
| 44 |
- if !m.Writable {
|
|
| 45 |
- flags = flags | syscall.MS_RDONLY |
|
| 46 |
- } |
|
| 47 |
- if err := system.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil {
|
|
| 48 |
- return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err)
|
|
| 49 |
- } |
|
| 50 |
- if !m.Writable {
|
|
| 51 |
- if err := system.Mount(m.Source, dest, "bind", uintptr(flags|syscall.MS_REMOUNT), ""); err != nil {
|
|
| 52 |
- return fmt.Errorf("remounting %s into %s %s", m.Source, dest, err)
|
|
| 53 |
- } |
|
| 54 |
- } |
|
| 55 |
- if m.Private {
|
|
| 56 |
- if err := system.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
|
|
| 57 |
- return fmt.Errorf("mounting %s private %s", dest, err)
|
|
| 58 |
- } |
|
| 38 |
+ if err := setupBindmounts(rootfs, container.Mounts); err != nil {
|
|
| 39 |
+ return fmt.Errorf("bind mounts %s", err)
|
|
| 40 |
+ } |
|
| 41 |
+ if restrictionPath := container.Context["restriction_path"]; restrictionPath != "" {
|
|
| 42 |
+ if err := restrict.Restrict(rootfs, restrictionPath); err != nil {
|
|
| 43 |
+ return fmt.Errorf("restrict %s", err)
|
|
| 59 | 44 |
} |
| 60 | 45 |
} |
| 61 |
- |
|
| 62 | 46 |
if err := copyDevNodes(rootfs); err != nil {
|
| 63 | 47 |
return fmt.Errorf("copy dev nodes %s", err)
|
| 64 | 48 |
} |
| 65 |
- if err := setupPtmx(rootfs, console, mountLabel); err != nil {
|
|
| 49 |
+ if err := setupPtmx(rootfs, console, container.Context["mount_label"]); err != nil {
|
|
| 66 | 50 |
return err |
| 67 | 51 |
} |
| 68 | 52 |
if err := system.Chdir(rootfs); err != nil {
|
| 69 | 53 |
return fmt.Errorf("chdir into %s %s", rootfs, err)
|
| 70 | 54 |
} |
| 71 | 55 |
|
| 72 |
- if noPivotRoot {
|
|
| 56 |
+ if container.NoPivotRoot {
|
|
| 73 | 57 |
if err := rootMsMove(rootfs); err != nil {
|
| 74 | 58 |
return err |
| 75 | 59 |
} |
| ... | ... |
@@ -79,7 +64,7 @@ func setupNewMountNamespace(rootfs string, bindMounts []libcontainer.Mount, cons |
| 79 | 79 |
} |
| 80 | 80 |
} |
| 81 | 81 |
|
| 82 |
- if readonly {
|
|
| 82 |
+ if container.ReadonlyFs {
|
|
| 83 | 83 |
if err := system.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, ""); err != nil {
|
| 84 | 84 |
return fmt.Errorf("mounting %s as readonly %s", rootfs, err)
|
| 85 | 85 |
} |
| ... | ... |
@@ -263,3 +248,29 @@ func remountSys() error {
|
| 263 | 263 |
} |
| 264 | 264 |
return nil |
| 265 | 265 |
} |
| 266 |
+ |
|
| 267 |
+func setupBindmounts(rootfs string, bindMounts []libcontainer.Mount) error {
|
|
| 268 |
+ for _, m := range bindMounts {
|
|
| 269 |
+ var ( |
|
| 270 |
+ flags = syscall.MS_BIND | syscall.MS_REC |
|
| 271 |
+ dest = filepath.Join(rootfs, m.Destination) |
|
| 272 |
+ ) |
|
| 273 |
+ if !m.Writable {
|
|
| 274 |
+ flags = flags | syscall.MS_RDONLY |
|
| 275 |
+ } |
|
| 276 |
+ if err := system.Mount(m.Source, dest, "bind", uintptr(flags), ""); err != nil {
|
|
| 277 |
+ return fmt.Errorf("mounting %s into %s %s", m.Source, dest, err)
|
|
| 278 |
+ } |
|
| 279 |
+ if !m.Writable {
|
|
| 280 |
+ if err := system.Mount(m.Source, dest, "bind", uintptr(flags|syscall.MS_REMOUNT), ""); err != nil {
|
|
| 281 |
+ return fmt.Errorf("remounting %s into %s %s", m.Source, dest, err)
|
|
| 282 |
+ } |
|
| 283 |
+ } |
|
| 284 |
+ if m.Private {
|
|
| 285 |
+ if err := system.Mount("", dest, "none", uintptr(syscall.MS_PRIVATE), ""); err != nil {
|
|
| 286 |
+ return fmt.Errorf("mounting %s private %s", dest, err)
|
|
| 287 |
+ } |
|
| 288 |
+ } |
|
| 289 |
+ } |
|
| 290 |
+ return nil |
|
| 291 |
+} |
| 266 | 292 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,46 @@ |
| 0 |
+package restrict |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+ "github.com/dotcloud/docker/pkg/system" |
|
| 5 |
+ "path/filepath" |
|
| 6 |
+ "syscall" |
|
| 7 |
+) |
|
| 8 |
+ |
|
| 9 |
+const flags = syscall.MS_BIND | syscall.MS_REC | syscall.MS_RDONLY |
|
| 10 |
+ |
|
| 11 |
+var restrictions = map[string]string{
|
|
| 12 |
+ // dirs |
|
| 13 |
+ "/proc/sys": "", |
|
| 14 |
+ "/proc/irq": "", |
|
| 15 |
+ "/proc/acpi": "", |
|
| 16 |
+ |
|
| 17 |
+ // files |
|
| 18 |
+ "/proc/sysrq-trigger": "/dev/null", |
|
| 19 |
+ "/proc/kcore": "/dev/null", |
|
| 20 |
+} |
|
| 21 |
+ |
|
| 22 |
+// Restrict locks down access to many areas of proc |
|
| 23 |
+// by using the asumption that the user does not have mount caps to |
|
| 24 |
+// revert the changes made here |
|
| 25 |
+func Restrict(rootfs, empty string) error {
|
|
| 26 |
+ for dest, source := range restrictions {
|
|
| 27 |
+ dest = filepath.Join(rootfs, dest) |
|
| 28 |
+ |
|
| 29 |
+ // we don't have a "/dev/null" for dirs so have the requester pass a dir |
|
| 30 |
+ // for us to bind mount |
|
| 31 |
+ switch source {
|
|
| 32 |
+ case "": |
|
| 33 |
+ source = empty |
|
| 34 |
+ default: |
|
| 35 |
+ source = filepath.Join(rootfs, source) |
|
| 36 |
+ } |
|
| 37 |
+ if err := system.Mount(source, dest, "bind", flags, ""); err != nil {
|
|
| 38 |
+ return fmt.Errorf("unable to mount %s over %s %s", source, dest, err)
|
|
| 39 |
+ } |
|
| 40 |
+ if err := system.Mount("", dest, "bind", flags|syscall.MS_REMOUNT, ""); err != nil {
|
|
| 41 |
+ return fmt.Errorf("unable to mount %s over %s %s", source, dest, err)
|
|
| 42 |
+ } |
|
| 43 |
+ } |
|
| 44 |
+ return nil |
|
| 45 |
+} |