This is enabled for all containers that are not run with --privileged,
if the kernel supports it.
Fixes #38332
Signed-off-by: Rob Gulewich <rgulewich@netflix.com>
| ... | ... |
@@ -81,26 +81,27 @@ var ( |
| 81 | 81 |
|
| 82 | 82 |
// Daemon holds information about the Docker daemon. |
| 83 | 83 |
type Daemon struct {
|
| 84 |
- ID string |
|
| 85 |
- repository string |
|
| 86 |
- containers container.Store |
|
| 87 |
- containersReplica container.ViewDB |
|
| 88 |
- execCommands *exec.Store |
|
| 89 |
- imageService *images.ImageService |
|
| 90 |
- idIndex *truncindex.TruncIndex |
|
| 91 |
- configStore *config.Config |
|
| 92 |
- statsCollector *stats.Collector |
|
| 93 |
- defaultLogConfig containertypes.LogConfig |
|
| 94 |
- RegistryService registry.Service |
|
| 95 |
- EventsService *events.Events |
|
| 96 |
- netController libnetwork.NetworkController |
|
| 97 |
- volumes *volumesservice.VolumesService |
|
| 98 |
- discoveryWatcher discovery.Reloader |
|
| 99 |
- root string |
|
| 100 |
- seccompEnabled bool |
|
| 101 |
- apparmorEnabled bool |
|
| 102 |
- shutdown bool |
|
| 103 |
- idMapping *idtools.IdentityMapping |
|
| 84 |
+ ID string |
|
| 85 |
+ repository string |
|
| 86 |
+ containers container.Store |
|
| 87 |
+ containersReplica container.ViewDB |
|
| 88 |
+ execCommands *exec.Store |
|
| 89 |
+ imageService *images.ImageService |
|
| 90 |
+ idIndex *truncindex.TruncIndex |
|
| 91 |
+ configStore *config.Config |
|
| 92 |
+ statsCollector *stats.Collector |
|
| 93 |
+ defaultLogConfig containertypes.LogConfig |
|
| 94 |
+ RegistryService registry.Service |
|
| 95 |
+ EventsService *events.Events |
|
| 96 |
+ netController libnetwork.NetworkController |
|
| 97 |
+ volumes *volumesservice.VolumesService |
|
| 98 |
+ discoveryWatcher discovery.Reloader |
|
| 99 |
+ root string |
|
| 100 |
+ seccompEnabled bool |
|
| 101 |
+ apparmorEnabled bool |
|
| 102 |
+ cgroupNamespacesEnabled bool |
|
| 103 |
+ shutdown bool |
|
| 104 |
+ idMapping *idtools.IdentityMapping |
|
| 104 | 105 |
// TODO: move graphDrivers field to an InfoService |
| 105 | 106 |
graphDrivers map[string]string // By operating system |
| 106 | 107 |
|
| ... | ... |
@@ -1020,6 +1021,7 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S |
| 1020 | 1020 |
d.idMapping = idMapping |
| 1021 | 1021 |
d.seccompEnabled = sysInfo.Seccomp |
| 1022 | 1022 |
d.apparmorEnabled = sysInfo.AppArmor |
| 1023 |
+ d.cgroupNamespacesEnabled = sysInfo.CgroupNamespaces |
|
| 1023 | 1024 |
|
| 1024 | 1025 |
d.linkIndex = newLinkIndex() |
| 1025 | 1026 |
|
| ... | ... |
@@ -307,8 +307,13 @@ func WithNamespaces(daemon *Daemon, c *container.Container) coci.SpecOpts {
|
| 307 | 307 |
s.Hostname = "" |
| 308 | 308 |
} |
| 309 | 309 |
|
| 310 |
- return nil |
|
| 310 |
+ // cgroup |
|
| 311 |
+ if daemon.cgroupNamespacesEnabled && !c.HostConfig.Privileged {
|
|
| 312 |
+ nsCgroup := specs.LinuxNamespace{Type: "cgroup"}
|
|
| 313 |
+ setNamespace(s, nsCgroup) |
|
| 311 | 314 |
} |
| 315 |
+ |
|
| 316 |
+ return nil |
|
| 312 | 317 |
} |
| 313 | 318 |
|
| 314 | 319 |
func specMapping(s []idtools.IDMap) []specs.LinuxIDMapping {
|
| ... | ... |
@@ -3984,18 +3984,40 @@ func (s *DockerSuite) TestBuildContainerWithCgroupParent(c *check.C) {
|
| 3984 | 3984 |
if !found {
|
| 3985 | 3985 |
c.Fatalf("unable to find self memory cgroup path. CgroupsPath: %v", selfCgroupPaths)
|
| 3986 | 3986 |
} |
| 3987 |
- result := buildImage("buildcgroupparent",
|
|
| 3988 |
- cli.WithFlags("--cgroup-parent", cgroupParent),
|
|
| 3989 |
- build.WithDockerfile(` |
|
| 3987 |
+ |
|
| 3988 |
+ doneCh := make(chan string) |
|
| 3989 |
+ |
|
| 3990 |
+ // If cgroup namespaces are enabled, then processes running inside the container won't |
|
| 3991 |
+ // be able to see the parent namespace. Check that they have the correct parents from |
|
| 3992 |
+ // the host, which has the non-namespaced view of the hierarchy. |
|
| 3993 |
+ |
|
| 3994 |
+ go func() {
|
|
| 3995 |
+ result := buildImage("buildcgroupparent",
|
|
| 3996 |
+ cli.WithFlags("--cgroup-parent", cgroupParent),
|
|
| 3997 |
+ build.WithDockerfile(` |
|
| 3990 | 3998 |
FROM busybox |
| 3991 |
-RUN cat /proc/self/cgroup |
|
| 3992 |
-`)) |
|
| 3993 |
- result.Assert(c, icmd.Success) |
|
| 3994 |
- m, err := regexp.MatchString(fmt.Sprintf("memory:.*/%s/.*", cgroupParent), result.Combined())
|
|
| 3995 |
- assert.NilError(c, err) |
|
| 3999 |
+RUN sleep 10 |
|
| 4000 |
+ `)) |
|
| 4001 |
+ result.Assert(c, icmd.Success) |
|
| 4002 |
+ doneCh <- "done" |
|
| 4003 |
+ }() |
|
| 4004 |
+ |
|
| 4005 |
+ // Wait until the build is well into the sleep |
|
| 4006 |
+ time.Sleep(3 * time.Second) |
|
| 4007 |
+ out, _, err := dockerCmdWithError("ps", "-q", "-l")
|
|
| 4008 |
+ c.Assert(err, check.IsNil) |
|
| 4009 |
+ cID := strings.TrimSpace(out) |
|
| 4010 |
+ |
|
| 4011 |
+ pid := inspectField(c, cID, "State.Pid") |
|
| 4012 |
+ paths := ReadCgroupPathsForPid(c, pid) |
|
| 4013 |
+ m, err := regexp.MatchString(fmt.Sprintf("memory:.*/%s/.*", cgroupParent), paths)
|
|
| 4014 |
+ c.Assert(err, check.IsNil) |
|
| 3996 | 4015 |
if !m {
|
| 3997 |
- c.Fatalf("There is no expected memory cgroup with parent /%s/: %s", cgroupParent, result.Combined())
|
|
| 4016 |
+ c.Fatalf("There is no expected memory cgroup with parent /%s/: %s", cgroupParent, paths)
|
|
| 3998 | 4017 |
} |
| 4018 |
+ |
|
| 4019 |
+ // Wait for the build to complete, otherwise it will exit with an error |
|
| 4020 |
+ <-doneCh |
|
| 3999 | 4021 |
} |
| 4000 | 4022 |
|
| 4001 | 4023 |
// FIXME(vdemeester) could be a unit test |
| ... | ... |
@@ -1787,7 +1787,8 @@ func (s *DockerDaemonSuite) TestDaemonRestartContainerLinksRestart(c *check.C) {
|
| 1787 | 1787 |
} |
| 1788 | 1788 |
|
| 1789 | 1789 |
func (s *DockerDaemonSuite) TestDaemonCgroupParent(c *check.C) {
|
| 1790 |
- testRequires(c, DaemonIsLinux) |
|
| 1790 |
+ // Test requires local filesystem access on a Linux host |
|
| 1791 |
+ testRequires(c, DaemonIsLinux, testEnv.IsLocalDaemon) |
|
| 1791 | 1792 |
|
| 1792 | 1793 |
cgroupParent := "test" |
| 1793 | 1794 |
name := "cgroup-test" |
| ... | ... |
@@ -1795,10 +1796,20 @@ func (s *DockerDaemonSuite) TestDaemonCgroupParent(c *check.C) {
|
| 1795 | 1795 |
s.d.StartWithBusybox(c, "--cgroup-parent", cgroupParent) |
| 1796 | 1796 |
defer s.d.Restart(c) |
| 1797 | 1797 |
|
| 1798 |
- out, err := s.d.Cmd("run", "--name", name, "busybox", "cat", "/proc/self/cgroup")
|
|
| 1799 |
- assert.NilError(c, err) |
|
| 1800 |
- cgroupPaths := ParseCgroupPaths(string(out)) |
|
| 1801 |
- c.Assert(len(cgroupPaths), checker.Not(checker.Equals), 0, check.Commentf("unexpected output - %q", string(out)))
|
|
| 1798 |
+ out, err := s.d.Cmd("run", "--name", name, "-d", "busybox", "top")
|
|
| 1799 |
+ c.Assert(err, checker.IsNil) |
|
| 1800 |
+ |
|
| 1801 |
+ // If cgroup namespaces are enabled, then processes running inside the container won't |
|
| 1802 |
+ // be able to see the parent namespace. Check that they have the correct parents from |
|
| 1803 |
+ // the host, which has the non-namespaced view of the hierarchy. |
|
| 1804 |
+ |
|
| 1805 |
+ pid, err := s.d.Cmd("inspect", "-f", "{{.State.Pid}}", name)
|
|
| 1806 |
+ c.Assert(err, checker.IsNil) |
|
| 1807 |
+ pid = strings.TrimSpace(string(pid)) |
|
| 1808 |
+ paths := ReadCgroupPathsForPid(c, pid) |
|
| 1809 |
+ cgroupPaths := ParseCgroupPaths(paths) |
|
| 1810 |
+ c.Assert(len(cgroupPaths), checker.Not(checker.Equals), 0, check.Commentf("unexpected output - %q", paths))
|
|
| 1811 |
+ |
|
| 1802 | 1812 |
out, err = s.d.Cmd("inspect", "-f", "{{.Id}}", name)
|
| 1803 | 1813 |
assert.NilError(c, err) |
| 1804 | 1814 |
id := strings.TrimSpace(string(out)) |
| ... | ... |
@@ -3241,8 +3241,8 @@ func (s *DockerSuite) TestRunWithUlimits(c *check.C) {
|
| 3241 | 3241 |
} |
| 3242 | 3242 |
|
| 3243 | 3243 |
func (s *DockerSuite) TestRunContainerWithCgroupParent(c *check.C) {
|
| 3244 |
- // Not applicable on Windows as uses Unix specific functionality |
|
| 3245 |
- testRequires(c, DaemonIsLinux) |
|
| 3244 |
+ // Test requires local filesystem access on a Linux host |
|
| 3245 |
+ testRequires(c, DaemonIsLinux, testEnv.IsLocalDaemon) |
|
| 3246 | 3246 |
|
| 3247 | 3247 |
// cgroup-parent relative path |
| 3248 | 3248 |
testRunContainerWithCgroupParent(c, "test", "cgroup-test") |
| ... | ... |
@@ -3252,14 +3252,23 @@ func (s *DockerSuite) TestRunContainerWithCgroupParent(c *check.C) {
|
| 3252 | 3252 |
} |
| 3253 | 3253 |
|
| 3254 | 3254 |
func testRunContainerWithCgroupParent(c *check.C, cgroupParent, name string) {
|
| 3255 |
- out, _, err := dockerCmdWithError("run", "--cgroup-parent", cgroupParent, "--name", name, "busybox", "cat", "/proc/self/cgroup")
|
|
| 3255 |
+ out, _, err := dockerCmdWithError("run", "--cgroup-parent", cgroupParent, "--name", name, "-d", "busybox", "top")
|
|
| 3256 | 3256 |
if err != nil {
|
| 3257 | 3257 |
c.Fatalf("unexpected failure when running container with --cgroup-parent option - %s\n%v", string(out), err)
|
| 3258 | 3258 |
} |
| 3259 |
- cgroupPaths := ParseCgroupPaths(string(out)) |
|
| 3259 |
+ cID := strings.TrimSpace(out) |
|
| 3260 |
+ |
|
| 3261 |
+ // If cgroup namespaces are enabled, then processes running inside the container won't |
|
| 3262 |
+ // be able to see the parent namespace. Check that they have the correct parents from |
|
| 3263 |
+ // the host, which has the non-namespaced view of the hierarchy. |
|
| 3264 |
+ |
|
| 3265 |
+ pid := inspectField(c, cID, "State.Pid") |
|
| 3266 |
+ paths := ReadCgroupPathsForPid(c, pid) |
|
| 3267 |
+ cgroupPaths := ParseCgroupPaths(paths) |
|
| 3260 | 3268 |
if len(cgroupPaths) == 0 {
|
| 3261 |
- c.Fatalf("unexpected output - %q", string(out))
|
|
| 3269 |
+ c.Fatalf("unexpected output - %q", string(paths))
|
|
| 3262 | 3270 |
} |
| 3271 |
+ |
|
| 3263 | 3272 |
id := getIDByName(c, name) |
| 3264 | 3273 |
expectedCgroup := path.Join(cgroupParent, id) |
| 3265 | 3274 |
found := false |
| ... | ... |
@@ -3285,21 +3294,29 @@ func (s *DockerSuite) TestRunInvalidCgroupParent(c *check.C) {
|
| 3285 | 3285 |
} |
| 3286 | 3286 |
|
| 3287 | 3287 |
func testRunInvalidCgroupParent(c *check.C, cgroupParent, cleanCgroupParent, name string) {
|
| 3288 |
- out, _, err := dockerCmdWithError("run", "--cgroup-parent", cgroupParent, "--name", name, "busybox", "cat", "/proc/self/cgroup")
|
|
| 3288 |
+ out, _, err := dockerCmdWithError("run", "--cgroup-parent", cgroupParent, "--name", name, "-d", "busybox", "top")
|
|
| 3289 | 3289 |
if err != nil {
|
| 3290 | 3290 |
// XXX: This may include a daemon crash. |
| 3291 | 3291 |
c.Fatalf("unexpected failure when running container with --cgroup-parent option - %s\n%v", string(out), err)
|
| 3292 | 3292 |
} |
| 3293 |
+ cID := strings.TrimSpace(out) |
|
| 3293 | 3294 |
|
| 3294 | 3295 |
// We expect "/SHOULD_NOT_EXIST" to not exist. If not, we have a security issue. |
| 3295 | 3296 |
if _, err := os.Stat("/SHOULD_NOT_EXIST"); err == nil || !os.IsNotExist(err) {
|
| 3296 | 3297 |
c.Fatalf("SECURITY: --cgroup-parent with ../../ relative paths cause files to be created in the host (this is bad) !!")
|
| 3297 | 3298 |
} |
| 3298 | 3299 |
|
| 3299 |
- cgroupPaths := ParseCgroupPaths(string(out)) |
|
| 3300 |
+ // If cgroup namespaces are enabled, then processes running inside the container won't |
|
| 3301 |
+ // be able to see the parent namespace. Check that they have the correct parents from |
|
| 3302 |
+ // the host, which has the non-namespaced view of the hierarchy. |
|
| 3303 |
+ |
|
| 3304 |
+ pid := inspectField(c, cID, "State.Pid") |
|
| 3305 |
+ paths := ReadCgroupPathsForPid(c, pid) |
|
| 3306 |
+ cgroupPaths := ParseCgroupPaths(paths) |
|
| 3300 | 3307 |
if len(cgroupPaths) == 0 {
|
| 3301 |
- c.Fatalf("unexpected output - %q", string(out))
|
|
| 3308 |
+ c.Fatalf("unexpected output - %q", string(paths))
|
|
| 3302 | 3309 |
} |
| 3310 |
+ |
|
| 3303 | 3311 |
id := getIDByName(c, name) |
| 3304 | 3312 |
expectedCgroup := path.Join(cleanCgroupParent, id) |
| 3305 | 3313 |
found := false |
| ... | ... |
@@ -2,6 +2,7 @@ package main |
| 2 | 2 |
|
| 3 | 3 |
import ( |
| 4 | 4 |
"fmt" |
| 5 |
+ "io/ioutil" |
|
| 5 | 6 |
"os" |
| 6 | 7 |
"os/exec" |
| 7 | 8 |
"path/filepath" |
| ... | ... |
@@ -38,6 +39,17 @@ func transformCmd(execCmd *exec.Cmd) icmd.Cmd {
|
| 38 | 38 |
} |
| 39 | 39 |
} |
| 40 | 40 |
|
| 41 |
+// ReadCgroupPathsForPid reads the cgroup path file for a pid in '/proc/<pid>/cgroup' |
|
| 42 |
+func ReadCgroupPathsForPid(c *check.C, pid string) string {
|
|
| 43 |
+ cgroupFile := fmt.Sprintf("/proc/%s/cgroup", pid)
|
|
| 44 |
+ out, err := ioutil.ReadFile(cgroupFile) |
|
| 45 |
+ if err != nil {
|
|
| 46 |
+ c.Fatalf("unexpected failure when reading cgroup file %s\n%v", cgroupFile, err)
|
|
| 47 |
+ } |
|
| 48 |
+ |
|
| 49 |
+ return string(out) |
|
| 50 |
+} |
|
| 51 |
+ |
|
| 41 | 52 |
// ParseCgroupPaths parses 'procCgroupData', which is output of '/proc/<pid>/cgroup', and returns |
| 42 | 53 |
// a map which cgroup name as key and path as value. |
| 43 | 54 |
func ParseCgroupPaths(procCgroupData string) map[string]string {
|
| ... | ... |
@@ -2,6 +2,10 @@ package container // import "github.com/docker/docker/integration/container" |
| 2 | 2 |
|
| 3 | 3 |
import ( |
| 4 | 4 |
"context" |
| 5 |
+ "fmt" |
|
| 6 |
+ "io/ioutil" |
|
| 7 |
+ "os" |
|
| 8 |
+ "path/filepath" |
|
| 5 | 9 |
"strconv" |
| 6 | 10 |
"strings" |
| 7 | 11 |
"testing" |
| ... | ... |
@@ -93,3 +97,32 @@ func TestNISDomainname(t *testing.T) {
|
| 93 | 93 |
assert.Equal(t, 0, res.ExitCode) |
| 94 | 94 |
assert.Check(t, is.Equal(domainname, strings.TrimSpace(res.Stdout()))) |
| 95 | 95 |
} |
| 96 |
+ |
|
| 97 |
+func TestCgroupNamespaces(t *testing.T) {
|
|
| 98 |
+ skip.If(t, testEnv.DaemonInfo.OSType != "linux") |
|
| 99 |
+ skip.If(t, testEnv.IsRemoteDaemon()) |
|
| 100 |
+ |
|
| 101 |
+ if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) {
|
|
| 102 |
+ t.Skip("cgroup namespaces are unsupported")
|
|
| 103 |
+ } |
|
| 104 |
+ |
|
| 105 |
+ defer setupTest(t)() |
|
| 106 |
+ client := testEnv.APIClient() |
|
| 107 |
+ ctx := context.Background() |
|
| 108 |
+ |
|
| 109 |
+ cID := container.Run(t, ctx, client) |
|
| 110 |
+ poll.WaitOn(t, container.IsInState(ctx, client, cID, "running"), poll.WithDelay(100*time.Millisecond)) |
|
| 111 |
+ |
|
| 112 |
+ path := filepath.Join(os.Getenv("DEST"), "docker.pid")
|
|
| 113 |
+ b, err := ioutil.ReadFile(path) |
|
| 114 |
+ assert.NilError(t, err) |
|
| 115 |
+ link, err := os.Readlink(fmt.Sprintf("/proc/%s/ns/cgroup", string(b)))
|
|
| 116 |
+ assert.NilError(t, err) |
|
| 117 |
+ |
|
| 118 |
+ // Check that the container's cgroup doesn't match the docker daemon's |
|
| 119 |
+ res, err := container.Exec(ctx, client, cID, []string{"readlink", "/proc/1/ns/cgroup"})
|
|
| 120 |
+ assert.NilError(t, err) |
|
| 121 |
+ assert.Assert(t, is.Len(res.Stderr(), 0)) |
|
| 122 |
+ assert.Equal(t, 0, res.ExitCode) |
|
| 123 |
+ assert.Assert(t, link != strings.TrimSpace(res.Stdout())) |
|
| 124 |
+} |
| ... | ... |
@@ -16,6 +16,9 @@ type SysInfo struct {
|
| 16 | 16 |
cgroupCpusetInfo |
| 17 | 17 |
cgroupPids |
| 18 | 18 |
|
| 19 |
+ // Whether the kernel supports cgroup namespaces or not |
|
| 20 |
+ CgroupNamespaces bool |
|
| 21 |
+ |
|
| 19 | 22 |
// Whether IPv4 forwarding is supported or not, if this was disabled, networking will not work |
| 20 | 23 |
IPv4ForwardingDisabled bool |
| 21 | 24 |
|
| ... | ... |
@@ -53,6 +53,7 @@ func New(quiet bool) *SysInfo {
|
| 53 | 53 |
applyNetworkingInfo, |
| 54 | 54 |
applyAppArmorInfo, |
| 55 | 55 |
applySeccompInfo, |
| 56 |
+ applyCgroupNsInfo, |
|
| 56 | 57 |
}...) |
| 57 | 58 |
|
| 58 | 59 |
for _, o := range ops {
|
| ... | ... |
@@ -250,6 +251,15 @@ func applyAppArmorInfo(info *SysInfo, _ map[string]string) []string {
|
| 250 | 250 |
return warnings |
| 251 | 251 |
} |
| 252 | 252 |
|
| 253 |
+// applyCgroupNsInfo adds cgroup namespace information to the info. |
|
| 254 |
+func applyCgroupNsInfo(info *SysInfo, _ map[string]string) []string {
|
|
| 255 |
+ var warnings []string |
|
| 256 |
+ if _, err := os.Stat("/proc/self/ns/cgroup"); !os.IsNotExist(err) {
|
|
| 257 |
+ info.CgroupNamespaces = true |
|
| 258 |
+ } |
|
| 259 |
+ return warnings |
|
| 260 |
+} |
|
| 261 |
+ |
|
| 253 | 262 |
// applySeccompInfo checks if Seccomp is supported, via CONFIG_SECCOMP. |
| 254 | 263 |
func applySeccompInfo(info *SysInfo, _ map[string]string) []string {
|
| 255 | 264 |
var warnings []string |
| ... | ... |
@@ -96,6 +96,26 @@ func TestNewAppArmorDisabled(t *testing.T) {
|
| 96 | 96 |
assert.Assert(t, !sysInfo.AppArmor) |
| 97 | 97 |
} |
| 98 | 98 |
|
| 99 |
+func TestNewCgroupNamespacesEnabled(t *testing.T) {
|
|
| 100 |
+ // If cgroup namespaces are supported in the kernel, then sysInfo.CgroupNamespaces should be TRUE |
|
| 101 |
+ if _, err := os.Stat("/proc/self/ns/cgroup"); err != nil {
|
|
| 102 |
+ t.Skip("cgroup namespaces must be enabled")
|
|
| 103 |
+ } |
|
| 104 |
+ |
|
| 105 |
+ sysInfo := New(true) |
|
| 106 |
+ assert.Assert(t, sysInfo.CgroupNamespaces) |
|
| 107 |
+} |
|
| 108 |
+ |
|
| 109 |
+func TestNewCgroupNamespacesDisabled(t *testing.T) {
|
|
| 110 |
+ // If cgroup namespaces are *not* supported in the kernel, then sysInfo.CgroupNamespaces should be FALSE |
|
| 111 |
+ if _, err := os.Stat("/proc/self/ns/cgroup"); !os.IsNotExist(err) {
|
|
| 112 |
+ t.Skip("cgroup namespaces must be disabled")
|
|
| 113 |
+ } |
|
| 114 |
+ |
|
| 115 |
+ sysInfo := New(true) |
|
| 116 |
+ assert.Assert(t, !sysInfo.CgroupNamespaces) |
|
| 117 |
+} |
|
| 118 |
+ |
|
| 99 | 119 |
func TestNumCPU(t *testing.T) {
|
| 100 | 120 |
cpuNumbers := NumCPU() |
| 101 | 121 |
if cpuNumbers <= 0 {
|