Signed-off-by: Antonio Murdaca <amurdaca@redhat.com>
| ... | ... |
@@ -150,7 +150,7 @@ type Resources struct {
|
| 150 | 150 |
CpusetMems string `json:"cpuset_mems"` |
| 151 | 151 |
CPUPeriod int64 `json:"cpu_period"` |
| 152 | 152 |
CPUQuota int64 `json:"cpu_quota"` |
| 153 |
- BlkioWeight int64 `json:"blkio_weight"` |
|
| 153 |
+ BlkioWeight uint16 `json:"blkio_weight"` |
|
| 154 | 154 |
Rlimits []*ulimit.Rlimit `json:"rlimits"` |
| 155 | 155 |
OomKillDisable bool `json:"oom_kill_disable"` |
| 156 | 156 |
MemorySwappiness int64 `json:"memory_swappiness"` |
| ... | ... |
@@ -12,6 +12,7 @@ import ( |
| 12 | 12 |
"time" |
| 13 | 13 |
|
| 14 | 14 |
"github.com/docker/docker/daemon/execdriver/native/template" |
| 15 |
+ "github.com/docker/docker/pkg/mount" |
|
| 15 | 16 |
"github.com/opencontainers/runc/libcontainer" |
| 16 | 17 |
"github.com/opencontainers/runc/libcontainer/cgroups/fs" |
| 17 | 18 |
"github.com/opencontainers/runc/libcontainer/configs" |
| ... | ... |
@@ -37,7 +38,7 @@ func InitContainer(c *Command) *configs.Config {
|
| 37 | 37 |
container.Devices = c.AutoCreatedDevices |
| 38 | 38 |
container.Rootfs = c.Rootfs |
| 39 | 39 |
container.Readonlyfs = c.ReadonlyRootfs |
| 40 |
- container.Privatefs = true |
|
| 40 |
+ container.RootPropagation = mount.RPRIVATE |
|
| 41 | 41 |
|
| 42 | 42 |
// check to see if we are running in ramdisk to disable pivot root |
| 43 | 43 |
container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
|
| ... | ... |
@@ -44,8 +44,8 @@ clone git github.com/endophage/gotuf 9bcdad0308e34a49f38448b8ad436ad8860825ce |
| 44 | 44 |
clone git github.com/jfrazelle/go 6e461eb70cb4187b41a84e9a567d7137bdbe0f16 |
| 45 | 45 |
clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c |
| 46 | 46 |
|
| 47 |
-clone git github.com/opencontainers/runc fba07bce72e72ce5b2dd618e4f67dd86ccb49c82 # libcontainer |
|
| 48 |
-# libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh) |
|
| 47 |
+clone git github.com/opencontainers/runc 902c012e85cdae6bb68d8c7a0df69a42f818ce96 # libcontainer |
|
| 48 |
+# libcontainer deps (see src/github.com/opencontainers/runc/Godeps/Godeps.json) |
|
| 49 | 49 |
clone git github.com/coreos/go-systemd v3 |
| 50 | 50 |
clone git github.com/godbus/dbus v2 |
| 51 | 51 |
clone git github.com/syndtr/gocapability 66ef2aa7a23ba682594e2b6f74cf40c0692b49fb |
| ... | ... |
@@ -200,6 +200,24 @@ func (i *uint64Value) Get() interface{} { return uint64(*i) }
|
| 200 | 200 |
|
| 201 | 201 |
func (i *uint64Value) String() string { return fmt.Sprintf("%v", *i) }
|
| 202 | 202 |
|
| 203 |
+// -- uint16 Value |
|
| 204 |
+type uint16Value uint16 |
|
| 205 |
+ |
|
| 206 |
+func newUint16Value(val uint16, p *uint16) *uint16Value {
|
|
| 207 |
+ *p = val |
|
| 208 |
+ return (*uint16Value)(p) |
|
| 209 |
+} |
|
| 210 |
+ |
|
| 211 |
+func (i *uint16Value) Set(s string) error {
|
|
| 212 |
+ v, err := strconv.ParseUint(s, 0, 16) |
|
| 213 |
+ *i = uint16Value(v) |
|
| 214 |
+ return err |
|
| 215 |
+} |
|
| 216 |
+ |
|
| 217 |
+func (i *uint16Value) Get() interface{} { return uint16(*i) }
|
|
| 218 |
+ |
|
| 219 |
+func (i *uint16Value) String() string { return fmt.Sprintf("%v", *i) }
|
|
| 220 |
+ |
|
| 203 | 221 |
// -- string Value |
| 204 | 222 |
type stringValue string |
| 205 | 223 |
|
| ... | ... |
@@ -757,6 +775,32 @@ func Uint64(names []string, value uint64, usage string) *uint64 {
|
| 757 | 757 |
return CommandLine.Uint64(names, value, usage) |
| 758 | 758 |
} |
| 759 | 759 |
|
| 760 |
+// Uint16Var defines a uint16 flag with specified name, default value, and usage string. |
|
| 761 |
+// The argument p points to a uint16 variable in which to store the value of the flag. |
|
| 762 |
+func (fs *FlagSet) Uint16Var(p *uint16, names []string, value uint16, usage string) {
|
|
| 763 |
+ fs.Var(newUint16Value(value, p), names, usage) |
|
| 764 |
+} |
|
| 765 |
+ |
|
| 766 |
+// Uint16Var defines a uint16 flag with specified name, default value, and usage string. |
|
| 767 |
+// The argument p points to a uint16 variable in which to store the value of the flag. |
|
| 768 |
+func Uint16Var(p *uint16, names []string, value uint16, usage string) {
|
|
| 769 |
+ CommandLine.Var(newUint16Value(value, p), names, usage) |
|
| 770 |
+} |
|
| 771 |
+ |
|
| 772 |
+// Uint16 defines a uint16 flag with specified name, default value, and usage string. |
|
| 773 |
+// The return value is the address of a uint16 variable that stores the value of the flag. |
|
| 774 |
+func (fs *FlagSet) Uint16(names []string, value uint16, usage string) *uint16 {
|
|
| 775 |
+ p := new(uint16) |
|
| 776 |
+ fs.Uint16Var(p, names, value, usage) |
|
| 777 |
+ return p |
|
| 778 |
+} |
|
| 779 |
+ |
|
| 780 |
+// Uint16 defines a uint16 flag with specified name, default value, and usage string. |
|
| 781 |
+// The return value is the address of a uint16 variable that stores the value of the flag. |
|
| 782 |
+func Uint16(names []string, value uint16, usage string) *uint16 {
|
|
| 783 |
+ return CommandLine.Uint16(names, value, usage) |
|
| 784 |
+} |
|
| 785 |
+ |
|
| 760 | 786 |
// StringVar defines a string flag with specified name, default value, and usage string. |
| 761 | 787 |
// The argument p points to a string variable in which to store the value of the flag. |
| 762 | 788 |
func (fs *FlagSet) StringVar(p *string, names []string, value string, usage string) {
|
| ... | ... |
@@ -226,7 +226,7 @@ type HostConfig struct {
|
| 226 | 226 |
CpusetCpus string // CpusetCpus 0-2, 0,1 |
| 227 | 227 |
CpusetMems string // CpusetMems 0-2, 0,1 |
| 228 | 228 |
CPUQuota int64 `json:"CpuQuota"` // CPU CFS (Completely Fair Scheduler) quota |
| 229 |
- BlkioWeight int64 // Block IO weight (relative weight vs. other containers) |
|
| 229 |
+ BlkioWeight uint16 // Block IO weight (relative weight vs. other containers) |
|
| 230 | 230 |
OomKillDisable bool // Whether to disable OOM Killer or not |
| 231 | 231 |
MemorySwappiness *int64 // Tuning container memory swappiness behaviour |
| 232 | 232 |
Privileged bool // Is the container in privileged mode |
| ... | ... |
@@ -86,7 +86,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe |
| 86 | 86 |
flCPUQuota = cmd.Int64([]string{"-cpu-quota"}, 0, "Limit CPU CFS (Completely Fair Scheduler) quota")
|
| 87 | 87 |
flCpusetCpus = cmd.String([]string{"#-cpuset", "-cpuset-cpus"}, "", "CPUs in which to allow execution (0-3, 0,1)")
|
| 88 | 88 |
flCpusetMems = cmd.String([]string{"-cpuset-mems"}, "", "MEMs in which to allow execution (0-3, 0,1)")
|
| 89 |
- flBlkioWeight = cmd.Int64([]string{"-blkio-weight"}, 0, "Block IO (relative weight), between 10 and 1000")
|
|
| 89 |
+ flBlkioWeight = cmd.Uint16([]string{"-blkio-weight"}, 0, "Block IO (relative weight), between 10 and 1000")
|
|
| 90 | 90 |
flSwappiness = cmd.Int64([]string{"-memory-swappiness"}, -1, "Tuning container memory swappiness (0 to 100)")
|
| 91 | 91 |
flNetMode = cmd.String([]string{"-net"}, "default", "Set the Network mode for the container")
|
| 92 | 92 |
flMacAddress = cmd.String([]string{"-mac-address"}, "", "Container MAC address (e.g. 92:d0:c6:0a:29:33)")
|
| ... | ... |
@@ -32,33 +32,41 @@ func (s *BlkioGroup) Apply(d *data) error {
|
| 32 | 32 |
|
| 33 | 33 |
func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
| 34 | 34 |
if cgroup.BlkioWeight != 0 {
|
| 35 |
- if err := writeFile(path, "blkio.weight", strconv.FormatInt(cgroup.BlkioWeight, 10)); err != nil {
|
|
| 35 |
+ if err := writeFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.BlkioWeight), 10)); err != nil {
|
|
| 36 | 36 |
return err |
| 37 | 37 |
} |
| 38 | 38 |
} |
| 39 | 39 |
|
| 40 |
- if cgroup.BlkioWeightDevice != "" {
|
|
| 41 |
- if err := writeFile(path, "blkio.weight_device", cgroup.BlkioWeightDevice); err != nil {
|
|
| 40 |
+ if cgroup.BlkioLeafWeight != 0 {
|
|
| 41 |
+ if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.BlkioLeafWeight), 10)); err != nil {
|
|
| 42 | 42 |
return err |
| 43 | 43 |
} |
| 44 | 44 |
} |
| 45 |
- if cgroup.BlkioThrottleReadBpsDevice != "" {
|
|
| 46 |
- if err := writeFile(path, "blkio.throttle.read_bps_device", cgroup.BlkioThrottleReadBpsDevice); err != nil {
|
|
| 45 |
+ for _, wd := range cgroup.BlkioWeightDevice {
|
|
| 46 |
+ if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
|
|
| 47 |
+ return err |
|
| 48 |
+ } |
|
| 49 |
+ if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
|
| 50 |
+ return err |
|
| 51 |
+ } |
|
| 52 |
+ } |
|
| 53 |
+ for _, td := range cgroup.BlkioThrottleReadBpsDevice {
|
|
| 54 |
+ if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
|
| 47 | 55 |
return err |
| 48 | 56 |
} |
| 49 | 57 |
} |
| 50 |
- if cgroup.BlkioThrottleWriteBpsDevice != "" {
|
|
| 51 |
- if err := writeFile(path, "blkio.throttle.write_bps_device", cgroup.BlkioThrottleWriteBpsDevice); err != nil {
|
|
| 58 |
+ for _, td := range cgroup.BlkioThrottleWriteBpsDevice {
|
|
| 59 |
+ if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
|
| 52 | 60 |
return err |
| 53 | 61 |
} |
| 54 | 62 |
} |
| 55 |
- if cgroup.BlkioThrottleReadIOpsDevice != "" {
|
|
| 56 |
- if err := writeFile(path, "blkio.throttle.read_iops_device", cgroup.BlkioThrottleReadIOpsDevice); err != nil {
|
|
| 63 |
+ for _, td := range cgroup.BlkioThrottleReadIOPSDevice {
|
|
| 64 |
+ if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
|
| 57 | 65 |
return err |
| 58 | 66 |
} |
| 59 | 67 |
} |
| 60 |
- if cgroup.BlkioThrottleWriteIOpsDevice != "" {
|
|
| 61 |
- if err := writeFile(path, "blkio.throttle.write_iops_device", cgroup.BlkioThrottleWriteIOpsDevice); err != nil {
|
|
| 68 |
+ for _, td := range cgroup.BlkioThrottleWriteIOPSDevice {
|
|
| 69 |
+ if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
|
| 62 | 70 |
return err |
| 63 | 71 |
} |
| 64 | 72 |
} |
| ... | ... |
@@ -29,7 +29,7 @@ func (s *HugetlbGroup) Apply(d *data) error {
|
| 29 | 29 |
|
| 30 | 30 |
func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error {
|
| 31 | 31 |
for _, hugetlb := range cgroup.HugetlbLimit {
|
| 32 |
- if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.Itoa(hugetlb.Limit)); err != nil {
|
|
| 32 |
+ if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
|
|
| 33 | 33 |
return err |
| 34 | 34 |
} |
| 35 | 35 |
} |
| 36 | 36 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,25 @@ |
| 0 |
+package fs |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "github.com/opencontainers/runc/libcontainer/cgroups" |
|
| 4 |
+ "github.com/opencontainers/runc/libcontainer/configs" |
|
| 5 |
+) |
|
| 6 |
+ |
|
| 7 |
+type NameGroup struct {
|
|
| 8 |
+} |
|
| 9 |
+ |
|
| 10 |
+func (s *NameGroup) Apply(d *data) error {
|
|
| 11 |
+ return nil |
|
| 12 |
+} |
|
| 13 |
+ |
|
| 14 |
+func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|
| 15 |
+ return nil |
|
| 16 |
+} |
|
| 17 |
+ |
|
| 18 |
+func (s *NameGroup) Remove(d *data) error {
|
|
| 19 |
+ return nil |
|
| 20 |
+} |
|
| 21 |
+ |
|
| 22 |
+func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|
| 23 |
+ return nil |
|
| 24 |
+} |
| ... | ... |
@@ -34,17 +34,18 @@ type subsystem interface {
|
| 34 | 34 |
} |
| 35 | 35 |
|
| 36 | 36 |
var subsystems = map[string]subsystem{
|
| 37 |
- "devices": &fs.DevicesGroup{},
|
|
| 38 |
- "memory": &fs.MemoryGroup{},
|
|
| 39 |
- "cpu": &fs.CpuGroup{},
|
|
| 40 |
- "cpuset": &fs.CpusetGroup{},
|
|
| 41 |
- "cpuacct": &fs.CpuacctGroup{},
|
|
| 42 |
- "blkio": &fs.BlkioGroup{},
|
|
| 43 |
- "hugetlb": &fs.HugetlbGroup{},
|
|
| 44 |
- "perf_event": &fs.PerfEventGroup{},
|
|
| 45 |
- "freezer": &fs.FreezerGroup{},
|
|
| 46 |
- "net_prio": &fs.NetPrioGroup{},
|
|
| 47 |
- "net_cls": &fs.NetClsGroup{},
|
|
| 37 |
+ "devices": &fs.DevicesGroup{},
|
|
| 38 |
+ "memory": &fs.MemoryGroup{},
|
|
| 39 |
+ "cpu": &fs.CpuGroup{},
|
|
| 40 |
+ "cpuset": &fs.CpusetGroup{},
|
|
| 41 |
+ "cpuacct": &fs.CpuacctGroup{},
|
|
| 42 |
+ "blkio": &fs.BlkioGroup{},
|
|
| 43 |
+ "hugetlb": &fs.HugetlbGroup{},
|
|
| 44 |
+ "perf_event": &fs.PerfEventGroup{},
|
|
| 45 |
+ "freezer": &fs.FreezerGroup{},
|
|
| 46 |
+ "net_prio": &fs.NetPrioGroup{},
|
|
| 47 |
+ "net_cls": &fs.NetClsGroup{},
|
|
| 48 |
+ "name=systemd": &fs.NameGroup{},
|
|
| 48 | 49 |
} |
| 49 | 50 |
|
| 50 | 51 |
const ( |
| ... | ... |
@@ -176,7 +177,6 @@ func (m *Manager) Apply(pid int) error {
|
| 176 | 176 |
properties = append(properties, |
| 177 | 177 |
newProp("MemoryLimit", uint64(c.Memory)))
|
| 178 | 178 |
} |
| 179 |
- // TODO: MemoryReservation and MemorySwap not available in systemd |
|
| 180 | 179 |
|
| 181 | 180 |
if c.CpuShares != 0 {
|
| 182 | 181 |
properties = append(properties, |
| ... | ... |
@@ -212,6 +212,7 @@ func (m *Manager) Apply(pid int) error {
|
| 212 | 212 |
return err |
| 213 | 213 |
} |
| 214 | 214 |
|
| 215 |
+ // TODO: MemoryReservation and MemorySwap not available in systemd |
|
| 215 | 216 |
if err := joinMemory(c, pid); err != nil {
|
| 216 | 217 |
return err |
| 217 | 218 |
} |
| ... | ... |
@@ -236,6 +237,10 @@ func (m *Manager) Apply(pid int) error {
|
| 236 | 236 |
if err := joinHugetlb(c, pid); err != nil {
|
| 237 | 237 |
return err |
| 238 | 238 |
} |
| 239 |
+ |
|
| 240 |
+ if err := joinPerfEvent(c, pid); err != nil {
|
|
| 241 |
+ return err |
|
| 242 |
+ } |
|
| 239 | 243 |
// FIXME: Systemd does have `BlockIODeviceWeight` property, but we got problem |
| 240 | 244 |
// using that (at least on systemd 208, see https://github.com/opencontainers/runc/libcontainer/pull/354), |
| 241 | 245 |
// so use fs work around for now. |
| ... | ... |
@@ -505,6 +510,12 @@ func joinMemory(c *configs.Cgroup, pid int) error {
|
| 505 | 505 |
return err |
| 506 | 506 |
} |
| 507 | 507 |
} |
| 508 |
+ if c.MemoryReservation > 0 {
|
|
| 509 |
+ err = writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(c.MemoryReservation, 10)) |
|
| 510 |
+ if err != nil {
|
|
| 511 |
+ return err |
|
| 512 |
+ } |
|
| 513 |
+ } |
|
| 508 | 514 |
if c.OomKillDisable {
|
| 509 | 515 |
if err := writeFile(path, "memory.oom_control", "1"); err != nil {
|
| 510 | 516 |
return err |
| ... | ... |
@@ -547,28 +558,37 @@ func joinBlkio(c *configs.Cgroup, pid int) error {
|
| 547 | 547 |
if err != nil {
|
| 548 | 548 |
return err |
| 549 | 549 |
} |
| 550 |
- if c.BlkioWeightDevice != "" {
|
|
| 551 |
- if err := writeFile(path, "blkio.weight_device", c.BlkioWeightDevice); err != nil {
|
|
| 550 |
+ // systemd doesn't directly support this in the dbus properties |
|
| 551 |
+ if c.BlkioLeafWeight != 0 {
|
|
| 552 |
+ if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(c.BlkioLeafWeight), 10)); err != nil {
|
|
| 553 |
+ return err |
|
| 554 |
+ } |
|
| 555 |
+ } |
|
| 556 |
+ for _, wd := range c.BlkioWeightDevice {
|
|
| 557 |
+ if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
|
|
| 558 |
+ return err |
|
| 559 |
+ } |
|
| 560 |
+ if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
|
| 552 | 561 |
return err |
| 553 | 562 |
} |
| 554 | 563 |
} |
| 555 |
- if c.BlkioThrottleReadBpsDevice != "" {
|
|
| 556 |
- if err := writeFile(path, "blkio.throttle.read_bps_device", c.BlkioThrottleReadBpsDevice); err != nil {
|
|
| 564 |
+ for _, td := range c.BlkioThrottleReadBpsDevice {
|
|
| 565 |
+ if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
|
| 557 | 566 |
return err |
| 558 | 567 |
} |
| 559 | 568 |
} |
| 560 |
- if c.BlkioThrottleWriteBpsDevice != "" {
|
|
| 561 |
- if err := writeFile(path, "blkio.throttle.write_bps_device", c.BlkioThrottleWriteBpsDevice); err != nil {
|
|
| 569 |
+ for _, td := range c.BlkioThrottleWriteBpsDevice {
|
|
| 570 |
+ if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
|
| 562 | 571 |
return err |
| 563 | 572 |
} |
| 564 | 573 |
} |
| 565 |
- if c.BlkioThrottleReadIOpsDevice != "" {
|
|
| 566 |
- if err := writeFile(path, "blkio.throttle.read_iops_device", c.BlkioThrottleReadIOpsDevice); err != nil {
|
|
| 574 |
+ for _, td := range c.BlkioThrottleReadIOPSDevice {
|
|
| 575 |
+ if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
|
| 567 | 576 |
return err |
| 568 | 577 |
} |
| 569 | 578 |
} |
| 570 |
- if c.BlkioThrottleWriteIOpsDevice != "" {
|
|
| 571 |
- if err := writeFile(path, "blkio.throttle.write_iops_device", c.BlkioThrottleWriteIOpsDevice); err != nil {
|
|
| 579 |
+ for _, td := range c.BlkioThrottleWriteIOPSDevice {
|
|
| 580 |
+ if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
|
| 572 | 581 |
return err |
| 573 | 582 |
} |
| 574 | 583 |
} |
| ... | ... |
@@ -585,3 +605,13 @@ func joinHugetlb(c *configs.Cgroup, pid int) error {
|
| 585 | 585 |
hugetlb := subsystems["hugetlb"] |
| 586 | 586 |
return hugetlb.Set(path, c) |
| 587 | 587 |
} |
| 588 |
+ |
|
| 589 |
+func joinPerfEvent(c *configs.Cgroup, pid int) error {
|
|
| 590 |
+ path, err := join(c, "perf_event", pid) |
|
| 591 |
+ if err != nil && !cgroups.IsNotFound(err) {
|
|
| 592 |
+ return err |
|
| 593 |
+ } |
|
| 594 |
+ |
|
| 595 |
+ perfEvent := subsystems["perf_event"] |
|
| 596 |
+ return perfEvent.Set(path, c) |
|
| 597 |
+} |
| 588 | 598 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,61 @@ |
| 0 |
+package configs |
|
| 1 |
+ |
|
| 2 |
+import "fmt" |
|
| 3 |
+ |
|
| 4 |
+// blockIODevice holds major:minor format supported in blkio cgroup |
|
| 5 |
+type blockIODevice struct {
|
|
| 6 |
+ // Major is the device's major number |
|
| 7 |
+ Major int64 `json:"major"` |
|
| 8 |
+ // Minor is the device's minor number |
|
| 9 |
+ Minor int64 `json:"minor"` |
|
| 10 |
+} |
|
| 11 |
+ |
|
| 12 |
+// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair |
|
| 13 |
+type WeightDevice struct {
|
|
| 14 |
+ blockIODevice |
|
| 15 |
+ // Weight is the bandwidth rate for the device, range is from 10 to 1000 |
|
| 16 |
+ Weight uint16 `json:"weight"` |
|
| 17 |
+ // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only |
|
| 18 |
+ LeafWeight uint16 `json:"leafWeight"` |
|
| 19 |
+} |
|
| 20 |
+ |
|
| 21 |
+// NewWeightDevice returns a configured WeightDevice pointer |
|
| 22 |
+func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice {
|
|
| 23 |
+ wd := &WeightDevice{}
|
|
| 24 |
+ wd.Major = major |
|
| 25 |
+ wd.Minor = minor |
|
| 26 |
+ wd.Weight = weight |
|
| 27 |
+ wd.LeafWeight = leafWeight |
|
| 28 |
+ return wd |
|
| 29 |
+} |
|
| 30 |
+ |
|
| 31 |
+// WeightString formats the struct to be writable to the cgroup specific file |
|
| 32 |
+func (wd *WeightDevice) WeightString() string {
|
|
| 33 |
+ return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight)
|
|
| 34 |
+} |
|
| 35 |
+ |
|
| 36 |
+// LeafWeightString formats the struct to be writable to the cgroup specific file |
|
| 37 |
+func (wd *WeightDevice) LeafWeightString() string {
|
|
| 38 |
+ return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight)
|
|
| 39 |
+} |
|
| 40 |
+ |
|
| 41 |
+// ThrottleDevice struct holds a `major:minor rate_per_second` pair |
|
| 42 |
+type ThrottleDevice struct {
|
|
| 43 |
+ blockIODevice |
|
| 44 |
+ // Rate is the IO rate limit per cgroup per device |
|
| 45 |
+ Rate uint64 `json:"rate"` |
|
| 46 |
+} |
|
| 47 |
+ |
|
| 48 |
+// NewThrottleDevice returns a configured ThrottleDevice pointer |
|
| 49 |
+func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice {
|
|
| 50 |
+ td := &ThrottleDevice{}
|
|
| 51 |
+ td.Major = major |
|
| 52 |
+ td.Minor = minor |
|
| 53 |
+ td.Rate = rate |
|
| 54 |
+ return td |
|
| 55 |
+} |
|
| 56 |
+ |
|
| 57 |
+// String formats the struct to be writable to the cgroup specific file |
|
| 58 |
+func (td *ThrottleDevice) String() string {
|
|
| 59 |
+ return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)
|
|
| 60 |
+} |
| ... | ... |
@@ -57,23 +57,26 @@ type Cgroup struct {
|
| 57 | 57 |
// MEM to use |
| 58 | 58 |
CpusetMems string `json:"cpuset_mems"` |
| 59 | 59 |
|
| 60 |
+ // Specifies per cgroup weight, range is from 10 to 1000. |
|
| 61 |
+ BlkioWeight uint16 `json:"blkio_weight"` |
|
| 62 |
+ |
|
| 63 |
+ // Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only |
|
| 64 |
+ BlkioLeafWeight uint16 `json:"blkio_leaf_weight"` |
|
| 65 |
+ |
|
| 66 |
+ // Weight per cgroup per device, can override BlkioWeight. |
|
| 67 |
+ BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"` |
|
| 68 |
+ |
|
| 60 | 69 |
// IO read rate limit per cgroup per device, bytes per second. |
| 61 |
- BlkioThrottleReadBpsDevice string `json:"blkio_throttle_read_bps_device"` |
|
| 70 |
+ BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"` |
|
| 62 | 71 |
|
| 63 | 72 |
// IO write rate limit per cgroup per divice, bytes per second. |
| 64 |
- BlkioThrottleWriteBpsDevice string `json:"blkio_throttle_write_bps_device"` |
|
| 73 |
+ BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"` |
|
| 65 | 74 |
|
| 66 | 75 |
// IO read rate limit per cgroup per device, IO per second. |
| 67 |
- BlkioThrottleReadIOpsDevice string `json:"blkio_throttle_read_iops_device"` |
|
| 76 |
+ BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"` |
|
| 68 | 77 |
|
| 69 | 78 |
// IO write rate limit per cgroup per device, IO per second. |
| 70 |
- BlkioThrottleWriteIOpsDevice string `json:"blkio_throttle_write_iops_device"` |
|
| 71 |
- |
|
| 72 |
- // Specifies per cgroup weight, range is from 10 to 1000. |
|
| 73 |
- BlkioWeight int64 `json:"blkio_weight"` |
|
| 74 |
- |
|
| 75 |
- // Weight per cgroup per device, can override BlkioWeight. |
|
| 76 |
- BlkioWeightDevice string `json:"blkio_weight_device"` |
|
| 79 |
+ BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"` |
|
| 77 | 80 |
|
| 78 | 81 |
// set the freeze value for the process |
| 79 | 82 |
Freezer FreezerState `json:"freezer"` |
| ... | ... |
@@ -92,8 +92,8 @@ type Config struct {
|
| 92 | 92 |
// bind mounts are writtable. |
| 93 | 93 |
Readonlyfs bool `json:"readonlyfs"` |
| 94 | 94 |
|
| 95 |
- // Privatefs will mount the container's rootfs as private where mount points from the parent will not propogate |
|
| 96 |
- Privatefs bool `json:"privatefs"` |
|
| 95 |
+ // Specifies the mount propagation flags to be applied to /. |
|
| 96 |
+ RootPropagation int `json:"rootPropagation"` |
|
| 97 | 97 |
|
| 98 | 98 |
// Mounts specify additional source and destination paths that will be mounted inside the container's |
| 99 | 99 |
// rootfs and mount namespace if specified |
| ... | ... |
@@ -21,7 +21,7 @@ func (c Config) HostUID() (int, error) {
|
| 21 | 21 |
return 0, nil |
| 22 | 22 |
} |
| 23 | 23 |
|
| 24 |
-// Gets the root uid for the process on host which could be non-zero |
|
| 24 |
+// Gets the root gid for the process on host which could be non-zero |
|
| 25 | 25 |
// when user namespaces are enabled. |
| 26 | 26 |
func (c Config) HostGID() (int, error) {
|
| 27 | 27 |
if c.Namespaces.Contains(NEWUSER) {
|
| ... | ... |
@@ -30,11 +30,11 @@ func (c Config) HostGID() (int, error) {
|
| 30 | 30 |
} |
| 31 | 31 |
id, found := c.hostIDFromMapping(0, c.GidMappings) |
| 32 | 32 |
if !found {
|
| 33 |
- return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
|
|
| 33 |
+ return -1, fmt.Errorf("User namespaces enabled, but no root group mapping found.")
|
|
| 34 | 34 |
} |
| 35 | 35 |
return id, nil |
| 36 | 36 |
} |
| 37 |
- // Return default root uid 0 |
|
| 37 |
+ // Return default root gid 0 |
|
| 38 | 38 |
return 0, nil |
| 39 | 39 |
} |
| 40 | 40 |
|
| ... | ... |
@@ -23,7 +23,7 @@ struct clone_arg {
|
| 23 | 23 |
* Reserve some space for clone() to locate arguments |
| 24 | 24 |
* and retcode in this place |
| 25 | 25 |
*/ |
| 26 |
- char stack[4096] __attribute__ ((aligned(8))); |
|
| 26 |
+ char stack[4096] __attribute__ ((aligned(16))); |
|
| 27 | 27 |
char stack_ptr[0]; |
| 28 | 28 |
jmp_buf *env; |
| 29 | 29 |
}; |
| ... | ... |
@@ -13,6 +13,7 @@ import ( |
| 13 | 13 |
"syscall" |
| 14 | 14 |
"time" |
| 15 | 15 |
|
| 16 |
+ "github.com/docker/docker/pkg/mount" |
|
| 16 | 17 |
"github.com/docker/docker/pkg/symlink" |
| 17 | 18 |
"github.com/opencontainers/runc/libcontainer/cgroups" |
| 18 | 19 |
"github.com/opencontainers/runc/libcontainer/configs" |
| ... | ... |
@@ -341,7 +342,7 @@ func setupDevSymlinks(rootfs string) error {
|
| 341 | 341 |
// symlinks are resolved locally. |
| 342 | 342 |
func reOpenDevNull() error {
|
| 343 | 343 |
var stat, devNullStat syscall.Stat_t |
| 344 |
- file, err := os.Open("/dev/null")
|
|
| 344 |
+ file, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
|
|
| 345 | 345 |
if err != nil {
|
| 346 | 346 |
return fmt.Errorf("Failed to open /dev/null - %s", err)
|
| 347 | 347 |
} |
| ... | ... |
@@ -420,14 +421,89 @@ func mknodDevice(dest string, node *configs.Device) error {
|
| 420 | 420 |
return syscall.Chown(dest, int(node.Uid), int(node.Gid)) |
| 421 | 421 |
} |
| 422 | 422 |
|
| 423 |
+func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
|
|
| 424 |
+ for _, m := range mountinfo {
|
|
| 425 |
+ if m.Mountpoint == dir {
|
|
| 426 |
+ return m |
|
| 427 |
+ } |
|
| 428 |
+ } |
|
| 429 |
+ return nil |
|
| 430 |
+} |
|
| 431 |
+ |
|
| 432 |
+// Get the parent mount point of directory passed in as argument. Also return |
|
| 433 |
+// optional fields. |
|
| 434 |
+func getParentMount(rootfs string) (string, string, error) {
|
|
| 435 |
+ var path string |
|
| 436 |
+ |
|
| 437 |
+ mountinfos, err := mount.GetMounts() |
|
| 438 |
+ if err != nil {
|
|
| 439 |
+ return "", "", err |
|
| 440 |
+ } |
|
| 441 |
+ |
|
| 442 |
+ mountinfo := getMountInfo(mountinfos, rootfs) |
|
| 443 |
+ if mountinfo != nil {
|
|
| 444 |
+ return rootfs, mountinfo.Optional, nil |
|
| 445 |
+ } |
|
| 446 |
+ |
|
| 447 |
+ path = rootfs |
|
| 448 |
+ for {
|
|
| 449 |
+ path = filepath.Dir(path) |
|
| 450 |
+ |
|
| 451 |
+ mountinfo = getMountInfo(mountinfos, path) |
|
| 452 |
+ if mountinfo != nil {
|
|
| 453 |
+ return path, mountinfo.Optional, nil |
|
| 454 |
+ } |
|
| 455 |
+ |
|
| 456 |
+ if path == "/" {
|
|
| 457 |
+ break |
|
| 458 |
+ } |
|
| 459 |
+ } |
|
| 460 |
+ |
|
| 461 |
+ // If we are here, we did not find parent mount. Something is wrong. |
|
| 462 |
+ return "", "", fmt.Errorf("Could not find parent mount of %s", rootfs)
|
|
| 463 |
+} |
|
| 464 |
+ |
|
| 465 |
+// Make parent mount private if it was shared |
|
| 466 |
+func rootfsParentMountPrivate(config *configs.Config) error {
|
|
| 467 |
+ sharedMount := false |
|
| 468 |
+ |
|
| 469 |
+ parentMount, optionalOpts, err := getParentMount(config.Rootfs) |
|
| 470 |
+ if err != nil {
|
|
| 471 |
+ return err |
|
| 472 |
+ } |
|
| 473 |
+ |
|
| 474 |
+ optsSplit := strings.Split(optionalOpts, " ") |
|
| 475 |
+ for _, opt := range optsSplit {
|
|
| 476 |
+ if strings.HasPrefix(opt, "shared:") {
|
|
| 477 |
+ sharedMount = true |
|
| 478 |
+ break |
|
| 479 |
+ } |
|
| 480 |
+ } |
|
| 481 |
+ |
|
| 482 |
+ // Make parent mount PRIVATE if it was shared. It is needed for two |
|
| 483 |
+ // reasons. First of all pivot_root() will fail if parent mount is |
|
| 484 |
+ // shared. Secondly when we bind mount rootfs it will propagate to |
|
| 485 |
+ // parent namespace and we don't want that to happen. |
|
| 486 |
+ if sharedMount {
|
|
| 487 |
+ return syscall.Mount("", parentMount, "", syscall.MS_PRIVATE, "")
|
|
| 488 |
+ } |
|
| 489 |
+ |
|
| 490 |
+ return nil |
|
| 491 |
+} |
|
| 492 |
+ |
|
| 423 | 493 |
func prepareRoot(config *configs.Config) error {
|
| 424 | 494 |
flag := syscall.MS_SLAVE | syscall.MS_REC |
| 425 |
- if config.Privatefs {
|
|
| 426 |
- flag = syscall.MS_PRIVATE | syscall.MS_REC |
|
| 495 |
+ if config.RootPropagation != 0 {
|
|
| 496 |
+ flag = config.RootPropagation |
|
| 427 | 497 |
} |
| 428 | 498 |
if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil {
|
| 429 | 499 |
return err |
| 430 | 500 |
} |
| 501 |
+ |
|
| 502 |
+ if err := rootfsParentMountPrivate(config); err != nil {
|
|
| 503 |
+ return err |
|
| 504 |
+ } |
|
| 505 |
+ |
|
| 431 | 506 |
return syscall.Mount(config.Rootfs, config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, "") |
| 432 | 507 |
} |
| 433 | 508 |
|
| ... | ... |
@@ -469,6 +545,13 @@ func pivotRoot(rootfs, pivotBaseDir string) error {
|
| 469 | 469 |
} |
| 470 | 470 |
// path to pivot dir now changed, update |
| 471 | 471 |
pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir)) |
| 472 |
+ |
|
| 473 |
+ // Make pivotDir rprivate to make sure any of the unmounts don't |
|
| 474 |
+ // propagate to parent. |
|
| 475 |
+ if err := syscall.Mount("", pivotDir, "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil {
|
|
| 476 |
+ return err |
|
| 477 |
+ } |
|
| 478 |
+ |
|
| 472 | 479 |
if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
|
| 473 | 480 |
return fmt.Errorf("unmount pivot_root dir %s", err)
|
| 474 | 481 |
} |
| ... | ... |
@@ -349,21 +349,26 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) ( |
| 349 | 349 |
return user, nil |
| 350 | 350 |
} |
| 351 | 351 |
|
| 352 |
-// GetAdditionalGroups looks up a list of groups by name or group id against |
|
| 353 |
-// against the given /etc/group formatted data. If a group name cannot be found, |
|
| 354 |
-// an error will be returned. If a group id cannot be found, it will be returned |
|
| 355 |
-// as-is. |
|
| 352 |
+// GetAdditionalGroups looks up a list of groups by name or group id |
|
| 353 |
+// against the given /etc/group formatted data. If a group name cannot |
|
| 354 |
+// be found, an error will be returned. If a group id cannot be found, |
|
| 355 |
+// or the given group data is nil, the id will be returned as-is |
|
| 356 |
+// provided it is in the legal range. |
|
| 356 | 357 |
func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, error) {
|
| 357 |
- groups, err := ParseGroupFilter(group, func(g Group) bool {
|
|
| 358 |
- for _, ag := range additionalGroups {
|
|
| 359 |
- if g.Name == ag || strconv.Itoa(g.Gid) == ag {
|
|
| 360 |
- return true |
|
| 358 |
+ var groups = []Group{}
|
|
| 359 |
+ if group != nil {
|
|
| 360 |
+ var err error |
|
| 361 |
+ groups, err = ParseGroupFilter(group, func(g Group) bool {
|
|
| 362 |
+ for _, ag := range additionalGroups {
|
|
| 363 |
+ if g.Name == ag || strconv.Itoa(g.Gid) == ag {
|
|
| 364 |
+ return true |
|
| 365 |
+ } |
|
| 361 | 366 |
} |
| 367 |
+ return false |
|
| 368 |
+ }) |
|
| 369 |
+ if err != nil {
|
|
| 370 |
+ return nil, fmt.Errorf("Unable to find additional groups %v: %v", additionalGroups, err)
|
|
| 362 | 371 |
} |
| 363 |
- return false |
|
| 364 |
- }) |
|
| 365 |
- if err != nil {
|
|
| 366 |
- return nil, fmt.Errorf("Unable to find additional groups %v: %v", additionalGroups, err)
|
|
| 367 | 372 |
} |
| 368 | 373 |
|
| 369 | 374 |
gidMap := make(map[int]struct{})
|
| ... | ... |
@@ -401,13 +406,13 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err |
| 401 | 401 |
return gids, nil |
| 402 | 402 |
} |
| 403 | 403 |
|
| 404 |
-// Wrapper around GetAdditionalGroups that opens the groupPath given and gives |
|
| 405 |
-// it as an argument to GetAdditionalGroups. |
|
| 404 |
+// GetAdditionalGroupsPath is a wrapper around GetAdditionalGroups |
|
| 405 |
+// that opens the groupPath given and gives it as an argument to |
|
| 406 |
+// GetAdditionalGroups. |
|
| 406 | 407 |
func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) {
|
| 407 | 408 |
group, err := os.Open(groupPath) |
| 408 |
- if err != nil {
|
|
| 409 |
- return nil, fmt.Errorf("Failed to open group file: %v", err)
|
|
| 409 |
+ if err == nil {
|
|
| 410 |
+ defer group.Close() |
|
| 410 | 411 |
} |
| 411 |
- defer group.Close() |
|
| 412 | 412 |
return GetAdditionalGroups(additionalGroups, group) |
| 413 | 413 |
} |