GitList

Browse code

libcontainer: Initial version of cgroups support

This is a minimal version of raw cgroup support for libcontainer.
It has only enough for what docker needs, and it has no support
for systemd yet.

Docker-DCO-1.1-Signed-off-by: Alexander Larsson <alexl@redhat.com> (github: alexlarsson)

Alexander Larsson authored on 2014/02/21 07:12:08
Showing 6 changed files

pkg/cgroups/cgroups.go index 91ac384..b9318f9 100644
pkg/libcontainer/cgroup/cgroup.go index 0000000..e30262c
pkg/libcontainer/container.go index a6a57da..b34ac8b 100644
pkg/libcontainer/container.json index ccc9abb..3e23600 100644
pkg/libcontainer/nsinit/exec.go index 202cfca..acff647 100644
pkg/libcontainer/nsinit/init.go index c77fd90..f619276 100644

pkg/cgroups/cgroups.go

History View file @ 664fc54

@@ -40,6 +40,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
                      	return parseCgroupFile(subsystem, f)
+                     }
                     +func GetInitCgroupDir(subsystem string) (string, error) {
                     +	f, err := os.Open("/proc/1/cgroup")
                     +	if err != nil {
                     +		return "", err
                     +	}
                     +	defer f.Close()
+                    +
                     +	return parseCgroupFile(subsystem, f)
                     +}
+                    +
                      func parseCgroupFile(subsystem string, r io.Reader) (string, error) {
                      	s := bufio.NewScanner(r)
@@ -49,8 +59,10 @@ func parseCgroupFile(subsystem string, r io.Reader) (string, error) {
+                     		}
                      		text := s.Text()
                      		parts := strings.Split(text, ":")
                     -		if parts[1] == subsystem {
                     -			return parts[2], nil
                     +		for _, subs := range strings.Split(parts[1], ",") {
                     +			if subs == subsystem {
                     +				return parts[2], nil
                     +			}
+                     		}
+                     	}
                      	return "", fmt.Errorf("cgroup '%s' not found in /proc/self/cgroup", subsystem)

pkg/libcontainer/cgroup/cgroup.go

History View file @ 664fc54

                     new file mode 100644
@@ -0,0 +1,177 @@
                     +package cgroup
+                    +
                     +import (
                     +	"fmt"
                     +	"github.com/dotcloud/docker/pkg/cgroups"
                     +	"github.com/dotcloud/docker/pkg/libcontainer"
                     +	"io/ioutil"
                     +	"os"
                     +	"path/filepath"
                     +	"strconv"
                     +)
+                    +
                     +// We have two implementation of cgroups support, one is based on
                     +// systemd and the dbus api, and one is based on raw cgroup fs operations
                     +// following the pre-single-writer model docs at:
                     +// http://www.freedesktop.org/wiki/Software/systemd/PaxControlGroups/
                     +const (
                     +	cgroupRoot = "/sys/fs/cgroup"
                     +)
+                    +
                     +func useSystemd() bool {
                     +	return false
                     +}
+                    +
                     +func applyCgroupSystemd(container *libcontainer.Container, pid int) error {
                     +	return fmt.Errorf("not supported yet")
                     +}
+                    +
                     +func writeFile(dir, file, data string) error {
                     +	return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
                     +}
+                    +
                     +func getCgroup(subsystem string, container *libcontainer.Container) (string, error) {
                     +	cgroup := container.CgroupName
                     +	if container.CgroupParent != "" {
                     +		cgroup = filepath.Join(container.CgroupParent, cgroup)
                     +	}
+                    +
                     +	initPath, err := cgroups.GetInitCgroupDir(subsystem)
                     +	if err != nil {
                     +		return "", err
                     +	}
+                    +
                     +	path := filepath.Join(cgroupRoot, subsystem, initPath, cgroup)
+                    +
                     +	return path, nil
                     +}
+                    +
                     +func joinCgroup(subsystem string, container *libcontainer.Container, pid int) (string, error) {
                     +	path, err := getCgroup(subsystem, container)
                     +	if err != nil {
                     +		return "", err
                     +	}
+                    +
                     +	if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
                     +		return "", err
                     +	}
+                    +
                     +	if err := writeFile(path, "tasks", strconv.Itoa(pid)); err != nil {
                     +		return "", err
                     +	}
+                    +
                     +	return path, nil
                     +}
+                    +
                     +func applyCgroupRaw(container *libcontainer.Container, pid int) (retErr error) {
                     +	if _, err := os.Stat(cgroupRoot); err != nil {
                     +		return fmt.Errorf("cgroups fs not found")
                     +	}
+                    +
                     +	if !container.DeviceAccess {
                     +		dir, err := joinCgroup("devices", container, pid)
                     +		if err != nil {
                     +			return err
                     +		}
                     +		defer func() {
                     +			if retErr != nil {
                     +				os.RemoveAll(dir)
                     +			}
                     +		}()
+                    +
                     +		if err := writeFile(dir, "devices.deny", "a"); err != nil {
                     +			return err
                     +		}
+                    +
                     +		allow := []string{
                     +			// /dev/null, zero, full
                     +			"c 1:3 rwm",
                     +			"c 1:5 rwm",
                     +			"c 1:7 rwm",
+                    +
                     +			// consoles
                     +			"c 5:1 rwm",
                     +			"c 5:0 rwm",
                     +			"c 4:0 rwm",
                     +			"c 4:1 rwm",
+                    +
                     +			// /dev/urandom,/dev/random
                     +			"c 1:9 rwm",
                     +			"c 1:8 rwm",
+                    +
                     +			// /dev/pts/ - pts namespaces are "coming soon"
                     +			"c 136:* rwm",
                     +			"c 5:2 rwm",
+                    +
                     +			// tuntap
                     +			"c 10:200 rwm",
                     +		}
+                    +
                     +		for _, val := range allow {
                     +			if err := writeFile(dir, "devices.allow", val); err != nil {
                     +				return err
                     +			}
                     +		}
                     +	}
+                    +
                     +	if container.Memory != 0 || container.MemorySwap != 0 {
                     +		dir, err := joinCgroup("memory", container, pid)
                     +		if err != nil {
                     +			return err
                     +		}
                     +		defer func() {
                     +			if retErr != nil {
                     +				os.RemoveAll(dir)
                     +			}
                     +		}()
+                    +
                     +		if container.Memory != 0 {
                     +			if err := writeFile(dir, "memory.limit_in_bytes", strconv.FormatInt(container.Memory, 10)); err != nil {
                     +				return err
                     +			}
                     +			if err := writeFile(dir, "memory.soft_limit_in_bytes", strconv.FormatInt(container.Memory, 10)); err != nil {
                     +				return err
                     +			}
                     +		}
                     +		if container.MemorySwap != 0 {
                     +			if err := writeFile(dir, "memory.memsw.limit_in_bytes", strconv.FormatInt(container.MemorySwap, 10)); err != nil {
                     +				return err
                     +			}
                     +		}
                     +	}
+                    +
                     +	// We always want to join the cpu group, to allow fair cpu scheduling
                     +	// on a container basis
                     +	dir, err := joinCgroup("cpu", container, pid)
                     +	if err != nil {
                     +		return err
                     +	}
                     +	if container.CpuShares != 0 {
                     +		if err := writeFile(dir, "cpu.shares", strconv.FormatInt(container.CpuShares, 10)); err != nil {
                     +			return err
                     +		}
                     +	}
                     +	return nil
                     +}
+                    +
                     +func CleanupCgroup(container *libcontainer.Container) error {
                     +	path, _ := getCgroup("memory", container)
                     +	os.RemoveAll(path)
                     +	path, _ = getCgroup("devices", container)
                     +	os.RemoveAll(path)
                     +	path, _ = getCgroup("cpu", container)
                     +	os.RemoveAll(path)
                     +	return nil
                     +}
+                    +
                     +func ApplyCgroup(container *libcontainer.Container, pid int) error {
                     +	if container.CgroupName == "" {
                     +		return nil
                     +	}
+                    +
                     +	if useSystemd() {
                     +		return applyCgroupSystemd(container, pid)
                     +	} else {
                     +		return applyCgroupRaw(container, pid)
                     +	}
                     +}

pkg/libcontainer/container.go

History View file @ 664fc54

@@ -11,6 +11,13 @@ type Container struct {
                      	Namespaces   Namespaces   `json:"namespaces,omitempty"`   // namespaces to apply
                      	Capabilities Capabilities `json:"capabilities,omitempty"` // capabilities to drop
                      	Network      *Network     `json:"network,omitempty"`      // nil for host's network stack
+                    +
                     +	CgroupName   string `json:"cgroup_name,omitempty"`   // name of cgroup
                     +	CgroupParent string `json:"cgroup_parent,omitempty"` // name of parent cgroup or slice
                     +	DeviceAccess bool   `json:"device_access,omitempty"` // name of parent cgroup or slice
                     +	Memory       int64  `json:"memory,omitempty"`        // Memory limit (in bytes)
                     +	MemorySwap   int64  `json:"memory_swap,omitempty"`   // Total memory usage (memory + swap); set `-1' to disable swap
                     +	CpuShares    int64  `json:"cpu_shares,omitempty"`    // CPU shares (relative weight vs. other containers)
+                     }
                      // Network defines configuration for a container's networking stack

pkg/libcontainer/container.json

History View file @ 664fc54

@@ -34,5 +34,8 @@
                              "gateway": "172.17.42.1",
                              "bridge": "docker0",
                              "mtu": 1500
                     -    }
                     +    },
                     +    "cgroup_name": "docker-koye",
                     +    "cgroup_parent": "docker",
                     +    "memory": 524800
+                     }

pkg/libcontainer/nsinit/exec.go

History View file @ 664fc54

@@ -5,6 +5,7 @@ package main
                      import (
                      	"fmt"
                      	"github.com/dotcloud/docker/pkg/libcontainer"
                     +	"github.com/dotcloud/docker/pkg/libcontainer/cgroup"
                      	"github.com/dotcloud/docker/pkg/libcontainer/network"
                      	"github.com/dotcloud/docker/pkg/libcontainer/utils"
                      	"github.com/dotcloud/docker/pkg/system"
@@ -33,10 +34,18 @@ func execCommand(container *libcontainer.Container, args []string) (int, error)
                      		return -1, err
+                     	}
                      	if err := writePidFile(command); err != nil {
                     +		command.Process.Kill()
                      		return -1, err
+                     	}
                      	defer deletePidFile()
                     +	// Do this before syncing with child so that no children
                     +	// can escape the cgroup
                     +	if err := cgroup.ApplyCgroup(container, command.Process.Pid); err != nil {
                     +		command.Process.Kill()
                     +		return -1, err
                     +	}
+                    +
                      	if container.Network != nil {
                      		vethPair, err := initializeContainerVeth(container.Network.Bridge, command.Process.Pid)
                      		if err != nil {
@@ -45,6 +54,9 @@ func execCommand(container *libcontainer.Container, args []string) (int, error)
                      		sendVethName(vethPair, inPipe)
+                     	}
                     +	// Sync with child
                     +	inPipe.Close()
+                    +
                      	go io.Copy(os.Stdout, master)
                      	go io.Copy(master, os.Stdin)
@@ -67,7 +79,6 @@ func execCommand(container *libcontainer.Container, args []string) (int, error)
                      // pipe so that the child stops waiting for more data
                      func sendVethName(name string, pipe io.WriteCloser) {
                      	fmt.Fprint(pipe, name)
                     -	pipe.Close()
+                     }
                      // initializeContainerVeth will create a veth pair and setup the host's

pkg/libcontainer/nsinit/init.go

History View file @ 664fc54

@@ -20,12 +20,10 @@ func initCommand(container *libcontainer.Container, console string, args []strin
                      		return err
+                     	}
                     -	var tempVethName string
                     -	if container.Network != nil {
                     -		tempVethName, err = getVethName()
                     -		if err != nil {
                     -			return err
                     -		}
                     +	// We always read this as it is a way to sync with the parent as well
                     +	tempVethName, err := getVethName()
                     +	if err != nil {
                     +		return err
+                     	}
                      	// close pipes so that we can replace it with the pty