Browse code

bump libcontainer to 902c012e85cdae6bb68d8c7a0df69a42f818ce96

Signed-off-by: Antonio Murdaca <amurdaca@redhat.com>

Antonio Murdaca authored on 2015/10/07 00:21:43
Showing 18 changed files
... ...
@@ -150,7 +150,7 @@ type Resources struct {
150 150
 	CpusetMems        string           `json:"cpuset_mems"`
151 151
 	CPUPeriod         int64            `json:"cpu_period"`
152 152
 	CPUQuota          int64            `json:"cpu_quota"`
153
-	BlkioWeight       int64            `json:"blkio_weight"`
153
+	BlkioWeight       uint16           `json:"blkio_weight"`
154 154
 	Rlimits           []*ulimit.Rlimit `json:"rlimits"`
155 155
 	OomKillDisable    bool             `json:"oom_kill_disable"`
156 156
 	MemorySwappiness  int64            `json:"memory_swappiness"`
... ...
@@ -12,6 +12,7 @@ import (
12 12
 	"time"
13 13
 
14 14
 	"github.com/docker/docker/daemon/execdriver/native/template"
15
+	"github.com/docker/docker/pkg/mount"
15 16
 	"github.com/opencontainers/runc/libcontainer"
16 17
 	"github.com/opencontainers/runc/libcontainer/cgroups/fs"
17 18
 	"github.com/opencontainers/runc/libcontainer/configs"
... ...
@@ -37,7 +38,7 @@ func InitContainer(c *Command) *configs.Config {
37 37
 	container.Devices = c.AutoCreatedDevices
38 38
 	container.Rootfs = c.Rootfs
39 39
 	container.Readonlyfs = c.ReadonlyRootfs
40
-	container.Privatefs = true
40
+	container.RootPropagation = mount.RPRIVATE
41 41
 
42 42
 	// check to see if we are running in ramdisk to disable pivot root
43 43
 	container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
... ...
@@ -44,8 +44,8 @@ clone git github.com/endophage/gotuf 9bcdad0308e34a49f38448b8ad436ad8860825ce
44 44
 clone git github.com/jfrazelle/go 6e461eb70cb4187b41a84e9a567d7137bdbe0f16
45 45
 clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c
46 46
 
47
-clone git github.com/opencontainers/runc fba07bce72e72ce5b2dd618e4f67dd86ccb49c82 # libcontainer
48
-# libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh)
47
+clone git github.com/opencontainers/runc 902c012e85cdae6bb68d8c7a0df69a42f818ce96 # libcontainer
48
+# libcontainer deps (see src/github.com/opencontainers/runc/Godeps/Godeps.json)
49 49
 clone git github.com/coreos/go-systemd v3
50 50
 clone git github.com/godbus/dbus v2
51 51
 clone git github.com/syndtr/gocapability 66ef2aa7a23ba682594e2b6f74cf40c0692b49fb
... ...
@@ -200,6 +200,24 @@ func (i *uint64Value) Get() interface{} { return uint64(*i) }
200 200
 
201 201
 func (i *uint64Value) String() string { return fmt.Sprintf("%v", *i) }
202 202
 
203
+// -- uint16 Value
204
+type uint16Value uint16
205
+
206
+func newUint16Value(val uint16, p *uint16) *uint16Value {
207
+	*p = val
208
+	return (*uint16Value)(p)
209
+}
210
+
211
+func (i *uint16Value) Set(s string) error {
212
+	v, err := strconv.ParseUint(s, 0, 16)
213
+	*i = uint16Value(v)
214
+	return err
215
+}
216
+
217
+func (i *uint16Value) Get() interface{} { return uint16(*i) }
218
+
219
+func (i *uint16Value) String() string { return fmt.Sprintf("%v", *i) }
220
+
203 221
 // -- string Value
204 222
 type stringValue string
205 223
 
... ...
@@ -757,6 +775,32 @@ func Uint64(names []string, value uint64, usage string) *uint64 {
757 757
 	return CommandLine.Uint64(names, value, usage)
758 758
 }
759 759
 
760
+// Uint16Var defines a uint16 flag with specified name, default value, and usage string.
761
+// The argument p points to a uint16 variable in which to store the value of the flag.
762
+func (fs *FlagSet) Uint16Var(p *uint16, names []string, value uint16, usage string) {
763
+	fs.Var(newUint16Value(value, p), names, usage)
764
+}
765
+
766
+// Uint16Var defines a uint16 flag with specified name, default value, and usage string.
767
+// The argument p points to a uint16 variable in which to store the value of the flag.
768
+func Uint16Var(p *uint16, names []string, value uint16, usage string) {
769
+	CommandLine.Var(newUint16Value(value, p), names, usage)
770
+}
771
+
772
+// Uint16 defines a uint16 flag with specified name, default value, and usage string.
773
+// The return value is the address of a uint16 variable that stores the value of the flag.
774
+func (fs *FlagSet) Uint16(names []string, value uint16, usage string) *uint16 {
775
+	p := new(uint16)
776
+	fs.Uint16Var(p, names, value, usage)
777
+	return p
778
+}
779
+
780
+// Uint16 defines a uint16 flag with specified name, default value, and usage string.
781
+// The return value is the address of a uint16 variable that stores the value of the flag.
782
+func Uint16(names []string, value uint16, usage string) *uint16 {
783
+	return CommandLine.Uint16(names, value, usage)
784
+}
785
+
760 786
 // StringVar defines a string flag with specified name, default value, and usage string.
761 787
 // The argument p points to a string variable in which to store the value of the flag.
762 788
 func (fs *FlagSet) StringVar(p *string, names []string, value string, usage string) {
... ...
@@ -226,7 +226,7 @@ type HostConfig struct {
226 226
 	CpusetCpus        string                // CpusetCpus 0-2, 0,1
227 227
 	CpusetMems        string                // CpusetMems 0-2, 0,1
228 228
 	CPUQuota          int64                 `json:"CpuQuota"` // CPU CFS (Completely Fair Scheduler) quota
229
-	BlkioWeight       int64                 // Block IO weight (relative weight vs. other containers)
229
+	BlkioWeight       uint16                // Block IO weight (relative weight vs. other containers)
230 230
 	OomKillDisable    bool                  // Whether to disable OOM Killer or not
231 231
 	MemorySwappiness  *int64                // Tuning container memory swappiness behaviour
232 232
 	Privileged        bool                  // Is the container in privileged mode
... ...
@@ -86,7 +86,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe
86 86
 		flCPUQuota          = cmd.Int64([]string{"-cpu-quota"}, 0, "Limit CPU CFS (Completely Fair Scheduler) quota")
87 87
 		flCpusetCpus        = cmd.String([]string{"#-cpuset", "-cpuset-cpus"}, "", "CPUs in which to allow execution (0-3, 0,1)")
88 88
 		flCpusetMems        = cmd.String([]string{"-cpuset-mems"}, "", "MEMs in which to allow execution (0-3, 0,1)")
89
-		flBlkioWeight       = cmd.Int64([]string{"-blkio-weight"}, 0, "Block IO (relative weight), between 10 and 1000")
89
+		flBlkioWeight       = cmd.Uint16([]string{"-blkio-weight"}, 0, "Block IO (relative weight), between 10 and 1000")
90 90
 		flSwappiness        = cmd.Int64([]string{"-memory-swappiness"}, -1, "Tuning container memory swappiness (0 to 100)")
91 91
 		flNetMode           = cmd.String([]string{"-net"}, "default", "Set the Network mode for the container")
92 92
 		flMacAddress        = cmd.String([]string{"-mac-address"}, "", "Container MAC address (e.g. 92:d0:c6:0a:29:33)")
... ...
@@ -32,33 +32,41 @@ func (s *BlkioGroup) Apply(d *data) error {
32 32
 
33 33
 func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error {
34 34
 	if cgroup.BlkioWeight != 0 {
35
-		if err := writeFile(path, "blkio.weight", strconv.FormatInt(cgroup.BlkioWeight, 10)); err != nil {
35
+		if err := writeFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.BlkioWeight), 10)); err != nil {
36 36
 			return err
37 37
 		}
38 38
 	}
39 39
 
40
-	if cgroup.BlkioWeightDevice != "" {
41
-		if err := writeFile(path, "blkio.weight_device", cgroup.BlkioWeightDevice); err != nil {
40
+	if cgroup.BlkioLeafWeight != 0 {
41
+		if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.BlkioLeafWeight), 10)); err != nil {
42 42
 			return err
43 43
 		}
44 44
 	}
45
-	if cgroup.BlkioThrottleReadBpsDevice != "" {
46
-		if err := writeFile(path, "blkio.throttle.read_bps_device", cgroup.BlkioThrottleReadBpsDevice); err != nil {
45
+	for _, wd := range cgroup.BlkioWeightDevice {
46
+		if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
47
+			return err
48
+		}
49
+		if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
50
+			return err
51
+		}
52
+	}
53
+	for _, td := range cgroup.BlkioThrottleReadBpsDevice {
54
+		if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
47 55
 			return err
48 56
 		}
49 57
 	}
50
-	if cgroup.BlkioThrottleWriteBpsDevice != "" {
51
-		if err := writeFile(path, "blkio.throttle.write_bps_device", cgroup.BlkioThrottleWriteBpsDevice); err != nil {
58
+	for _, td := range cgroup.BlkioThrottleWriteBpsDevice {
59
+		if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
52 60
 			return err
53 61
 		}
54 62
 	}
55
-	if cgroup.BlkioThrottleReadIOpsDevice != "" {
56
-		if err := writeFile(path, "blkio.throttle.read_iops_device", cgroup.BlkioThrottleReadIOpsDevice); err != nil {
63
+	for _, td := range cgroup.BlkioThrottleReadIOPSDevice {
64
+		if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
57 65
 			return err
58 66
 		}
59 67
 	}
60
-	if cgroup.BlkioThrottleWriteIOpsDevice != "" {
61
-		if err := writeFile(path, "blkio.throttle.write_iops_device", cgroup.BlkioThrottleWriteIOpsDevice); err != nil {
68
+	for _, td := range cgroup.BlkioThrottleWriteIOPSDevice {
69
+		if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
62 70
 			return err
63 71
 		}
64 72
 	}
... ...
@@ -29,7 +29,7 @@ func (s *HugetlbGroup) Apply(d *data) error {
29 29
 
30 30
 func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error {
31 31
 	for _, hugetlb := range cgroup.HugetlbLimit {
32
-		if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.Itoa(hugetlb.Limit)); err != nil {
32
+		if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
33 33
 			return err
34 34
 		}
35 35
 	}
36 36
new file mode 100644
... ...
@@ -0,0 +1,25 @@
0
+package fs
1
+
2
+import (
3
+	"github.com/opencontainers/runc/libcontainer/cgroups"
4
+	"github.com/opencontainers/runc/libcontainer/configs"
5
+)
6
+
7
+type NameGroup struct {
8
+}
9
+
10
+func (s *NameGroup) Apply(d *data) error {
11
+	return nil
12
+}
13
+
14
+func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error {
15
+	return nil
16
+}
17
+
18
+func (s *NameGroup) Remove(d *data) error {
19
+	return nil
20
+}
21
+
22
+func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error {
23
+	return nil
24
+}
... ...
@@ -34,17 +34,18 @@ type subsystem interface {
34 34
 }
35 35
 
36 36
 var subsystems = map[string]subsystem{
37
-	"devices":    &fs.DevicesGroup{},
38
-	"memory":     &fs.MemoryGroup{},
39
-	"cpu":        &fs.CpuGroup{},
40
-	"cpuset":     &fs.CpusetGroup{},
41
-	"cpuacct":    &fs.CpuacctGroup{},
42
-	"blkio":      &fs.BlkioGroup{},
43
-	"hugetlb":    &fs.HugetlbGroup{},
44
-	"perf_event": &fs.PerfEventGroup{},
45
-	"freezer":    &fs.FreezerGroup{},
46
-	"net_prio":   &fs.NetPrioGroup{},
47
-	"net_cls":    &fs.NetClsGroup{},
37
+	"devices":      &fs.DevicesGroup{},
38
+	"memory":       &fs.MemoryGroup{},
39
+	"cpu":          &fs.CpuGroup{},
40
+	"cpuset":       &fs.CpusetGroup{},
41
+	"cpuacct":      &fs.CpuacctGroup{},
42
+	"blkio":        &fs.BlkioGroup{},
43
+	"hugetlb":      &fs.HugetlbGroup{},
44
+	"perf_event":   &fs.PerfEventGroup{},
45
+	"freezer":      &fs.FreezerGroup{},
46
+	"net_prio":     &fs.NetPrioGroup{},
47
+	"net_cls":      &fs.NetClsGroup{},
48
+	"name=systemd": &fs.NameGroup{},
48 49
 }
49 50
 
50 51
 const (
... ...
@@ -176,7 +177,6 @@ func (m *Manager) Apply(pid int) error {
176 176
 		properties = append(properties,
177 177
 			newProp("MemoryLimit", uint64(c.Memory)))
178 178
 	}
179
-	// TODO: MemoryReservation and MemorySwap not available in systemd
180 179
 
181 180
 	if c.CpuShares != 0 {
182 181
 		properties = append(properties,
... ...
@@ -212,6 +212,7 @@ func (m *Manager) Apply(pid int) error {
212 212
 		return err
213 213
 	}
214 214
 
215
+	// TODO: MemoryReservation and MemorySwap not available in systemd
215 216
 	if err := joinMemory(c, pid); err != nil {
216 217
 		return err
217 218
 	}
... ...
@@ -236,6 +237,10 @@ func (m *Manager) Apply(pid int) error {
236 236
 	if err := joinHugetlb(c, pid); err != nil {
237 237
 		return err
238 238
 	}
239
+
240
+	if err := joinPerfEvent(c, pid); err != nil {
241
+		return err
242
+	}
239 243
 	// FIXME: Systemd does have `BlockIODeviceWeight` property, but we got problem
240 244
 	// using that (at least on systemd 208, see https://github.com/opencontainers/runc/libcontainer/pull/354),
241 245
 	// so use fs work around for now.
... ...
@@ -505,6 +510,12 @@ func joinMemory(c *configs.Cgroup, pid int) error {
505 505
 			return err
506 506
 		}
507 507
 	}
508
+	if c.MemoryReservation > 0 {
509
+		err = writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(c.MemoryReservation, 10))
510
+		if err != nil {
511
+			return err
512
+		}
513
+	}
508 514
 	if c.OomKillDisable {
509 515
 		if err := writeFile(path, "memory.oom_control", "1"); err != nil {
510 516
 			return err
... ...
@@ -547,28 +558,37 @@ func joinBlkio(c *configs.Cgroup, pid int) error {
547 547
 	if err != nil {
548 548
 		return err
549 549
 	}
550
-	if c.BlkioWeightDevice != "" {
551
-		if err := writeFile(path, "blkio.weight_device", c.BlkioWeightDevice); err != nil {
550
+	// systemd doesn't directly support this in the dbus properties
551
+	if c.BlkioLeafWeight != 0 {
552
+		if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(c.BlkioLeafWeight), 10)); err != nil {
553
+			return err
554
+		}
555
+	}
556
+	for _, wd := range c.BlkioWeightDevice {
557
+		if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
558
+			return err
559
+		}
560
+		if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
552 561
 			return err
553 562
 		}
554 563
 	}
555
-	if c.BlkioThrottleReadBpsDevice != "" {
556
-		if err := writeFile(path, "blkio.throttle.read_bps_device", c.BlkioThrottleReadBpsDevice); err != nil {
564
+	for _, td := range c.BlkioThrottleReadBpsDevice {
565
+		if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
557 566
 			return err
558 567
 		}
559 568
 	}
560
-	if c.BlkioThrottleWriteBpsDevice != "" {
561
-		if err := writeFile(path, "blkio.throttle.write_bps_device", c.BlkioThrottleWriteBpsDevice); err != nil {
569
+	for _, td := range c.BlkioThrottleWriteBpsDevice {
570
+		if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
562 571
 			return err
563 572
 		}
564 573
 	}
565
-	if c.BlkioThrottleReadIOpsDevice != "" {
566
-		if err := writeFile(path, "blkio.throttle.read_iops_device", c.BlkioThrottleReadIOpsDevice); err != nil {
574
+	for _, td := range c.BlkioThrottleReadIOPSDevice {
575
+		if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
567 576
 			return err
568 577
 		}
569 578
 	}
570
-	if c.BlkioThrottleWriteIOpsDevice != "" {
571
-		if err := writeFile(path, "blkio.throttle.write_iops_device", c.BlkioThrottleWriteIOpsDevice); err != nil {
579
+	for _, td := range c.BlkioThrottleWriteIOPSDevice {
580
+		if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
572 581
 			return err
573 582
 		}
574 583
 	}
... ...
@@ -585,3 +605,13 @@ func joinHugetlb(c *configs.Cgroup, pid int) error {
585 585
 	hugetlb := subsystems["hugetlb"]
586 586
 	return hugetlb.Set(path, c)
587 587
 }
588
+
589
+func joinPerfEvent(c *configs.Cgroup, pid int) error {
590
+	path, err := join(c, "perf_event", pid)
591
+	if err != nil && !cgroups.IsNotFound(err) {
592
+		return err
593
+	}
594
+
595
+	perfEvent := subsystems["perf_event"]
596
+	return perfEvent.Set(path, c)
597
+}
588 598
new file mode 100644
... ...
@@ -0,0 +1,61 @@
0
+package configs
1
+
2
+import "fmt"
3
+
4
+// blockIODevice holds major:minor format supported in blkio cgroup
5
+type blockIODevice struct {
6
+	// Major is the device's major number
7
+	Major int64 `json:"major"`
8
+	// Minor is the device's minor number
9
+	Minor int64 `json:"minor"`
10
+}
11
+
12
+// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair
13
+type WeightDevice struct {
14
+	blockIODevice
15
+	// Weight is the bandwidth rate for the device, range is from 10 to 1000
16
+	Weight uint16 `json:"weight"`
17
+	// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
18
+	LeafWeight uint16 `json:"leafWeight"`
19
+}
20
+
21
+// NewWeightDevice returns a configured WeightDevice pointer
22
+func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice {
23
+	wd := &WeightDevice{}
24
+	wd.Major = major
25
+	wd.Minor = minor
26
+	wd.Weight = weight
27
+	wd.LeafWeight = leafWeight
28
+	return wd
29
+}
30
+
31
+// WeightString formats the struct to be writable to the cgroup specific file
32
+func (wd *WeightDevice) WeightString() string {
33
+	return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight)
34
+}
35
+
36
+// LeafWeightString formats the struct to be writable to the cgroup specific file
37
+func (wd *WeightDevice) LeafWeightString() string {
38
+	return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight)
39
+}
40
+
41
+// ThrottleDevice struct holds a `major:minor rate_per_second` pair
42
+type ThrottleDevice struct {
43
+	blockIODevice
44
+	// Rate is the IO rate limit per cgroup per device
45
+	Rate uint64 `json:"rate"`
46
+}
47
+
48
+// NewThrottleDevice returns a configured ThrottleDevice pointer
49
+func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice {
50
+	td := &ThrottleDevice{}
51
+	td.Major = major
52
+	td.Minor = minor
53
+	td.Rate = rate
54
+	return td
55
+}
56
+
57
+// String formats the struct to be writable to the cgroup specific file
58
+func (td *ThrottleDevice) String() string {
59
+	return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)
60
+}
... ...
@@ -57,23 +57,26 @@ type Cgroup struct {
57 57
 	// MEM to use
58 58
 	CpusetMems string `json:"cpuset_mems"`
59 59
 
60
+	// Specifies per cgroup weight, range is from 10 to 1000.
61
+	BlkioWeight uint16 `json:"blkio_weight"`
62
+
63
+	// Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
64
+	BlkioLeafWeight uint16 `json:"blkio_leaf_weight"`
65
+
66
+	// Weight per cgroup per device, can override BlkioWeight.
67
+	BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"`
68
+
60 69
 	// IO read rate limit per cgroup per device, bytes per second.
61
-	BlkioThrottleReadBpsDevice string `json:"blkio_throttle_read_bps_device"`
70
+	BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"`
62 71
 
63 72
 	// IO write rate limit per cgroup per divice, bytes per second.
64
-	BlkioThrottleWriteBpsDevice string `json:"blkio_throttle_write_bps_device"`
73
+	BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"`
65 74
 
66 75
 	// IO read rate limit per cgroup per device, IO per second.
67
-	BlkioThrottleReadIOpsDevice string `json:"blkio_throttle_read_iops_device"`
76
+	BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"`
68 77
 
69 78
 	// IO write rate limit per cgroup per device, IO per second.
70
-	BlkioThrottleWriteIOpsDevice string `json:"blkio_throttle_write_iops_device"`
71
-
72
-	// Specifies per cgroup weight, range is from 10 to 1000.
73
-	BlkioWeight int64 `json:"blkio_weight"`
74
-
75
-	// Weight per cgroup per device, can override BlkioWeight.
76
-	BlkioWeightDevice string `json:"blkio_weight_device"`
79
+	BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"`
77 80
 
78 81
 	// set the freeze value for the process
79 82
 	Freezer FreezerState `json:"freezer"`
... ...
@@ -92,8 +92,8 @@ type Config struct {
92 92
 	// bind mounts are writtable.
93 93
 	Readonlyfs bool `json:"readonlyfs"`
94 94
 
95
-	// Privatefs will mount the container's rootfs as private where mount points from the parent will not propogate
96
-	Privatefs bool `json:"privatefs"`
95
+	// Specifies the mount propagation flags to be applied to /.
96
+	RootPropagation int `json:"rootPropagation"`
97 97
 
98 98
 	// Mounts specify additional source and destination paths that will be mounted inside the container's
99 99
 	// rootfs and mount namespace if specified
... ...
@@ -21,7 +21,7 @@ func (c Config) HostUID() (int, error) {
21 21
 	return 0, nil
22 22
 }
23 23
 
24
-// Gets the root uid for the process on host which could be non-zero
24
+// Gets the root gid for the process on host which could be non-zero
25 25
 // when user namespaces are enabled.
26 26
 func (c Config) HostGID() (int, error) {
27 27
 	if c.Namespaces.Contains(NEWUSER) {
... ...
@@ -30,11 +30,11 @@ func (c Config) HostGID() (int, error) {
30 30
 		}
31 31
 		id, found := c.hostIDFromMapping(0, c.GidMappings)
32 32
 		if !found {
33
-			return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
33
+			return -1, fmt.Errorf("User namespaces enabled, but no root group mapping found.")
34 34
 		}
35 35
 		return id, nil
36 36
 	}
37
-	// Return default root uid 0
37
+	// Return default root gid 0
38 38
 	return 0, nil
39 39
 }
40 40
 
... ...
@@ -5,5 +5,5 @@ type HugepageLimit struct {
5 5
 	Pagesize string `json:"page_size"`
6 6
 
7 7
 	// usage limit for hugepage.
8
-	Limit int `json:"limit"`
8
+	Limit uint64 `json:"limit"`
9 9
 }
... ...
@@ -23,7 +23,7 @@ struct clone_arg {
23 23
 	 * Reserve some space for clone() to locate arguments
24 24
 	 * and retcode in this place
25 25
 	 */
26
-	char stack[4096] __attribute__ ((aligned(8)));
26
+	char stack[4096] __attribute__ ((aligned(16)));
27 27
 	char stack_ptr[0];
28 28
 	jmp_buf *env;
29 29
 };
... ...
@@ -13,6 +13,7 @@ import (
13 13
 	"syscall"
14 14
 	"time"
15 15
 
16
+	"github.com/docker/docker/pkg/mount"
16 17
 	"github.com/docker/docker/pkg/symlink"
17 18
 	"github.com/opencontainers/runc/libcontainer/cgroups"
18 19
 	"github.com/opencontainers/runc/libcontainer/configs"
... ...
@@ -341,7 +342,7 @@ func setupDevSymlinks(rootfs string) error {
341 341
 // symlinks are resolved locally.
342 342
 func reOpenDevNull() error {
343 343
 	var stat, devNullStat syscall.Stat_t
344
-	file, err := os.Open("/dev/null")
344
+	file, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
345 345
 	if err != nil {
346 346
 		return fmt.Errorf("Failed to open /dev/null - %s", err)
347 347
 	}
... ...
@@ -420,14 +421,89 @@ func mknodDevice(dest string, node *configs.Device) error {
420 420
 	return syscall.Chown(dest, int(node.Uid), int(node.Gid))
421 421
 }
422 422
 
423
+func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
424
+	for _, m := range mountinfo {
425
+		if m.Mountpoint == dir {
426
+			return m
427
+		}
428
+	}
429
+	return nil
430
+}
431
+
432
+// Get the parent mount point of directory passed in as argument. Also return
433
+// optional fields.
434
+func getParentMount(rootfs string) (string, string, error) {
435
+	var path string
436
+
437
+	mountinfos, err := mount.GetMounts()
438
+	if err != nil {
439
+		return "", "", err
440
+	}
441
+
442
+	mountinfo := getMountInfo(mountinfos, rootfs)
443
+	if mountinfo != nil {
444
+		return rootfs, mountinfo.Optional, nil
445
+	}
446
+
447
+	path = rootfs
448
+	for {
449
+		path = filepath.Dir(path)
450
+
451
+		mountinfo = getMountInfo(mountinfos, path)
452
+		if mountinfo != nil {
453
+			return path, mountinfo.Optional, nil
454
+		}
455
+
456
+		if path == "/" {
457
+			break
458
+		}
459
+	}
460
+
461
+	// If we are here, we did not find parent mount. Something is wrong.
462
+	return "", "", fmt.Errorf("Could not find parent mount of %s", rootfs)
463
+}
464
+
465
+// Make parent mount private if it was shared
466
+func rootfsParentMountPrivate(config *configs.Config) error {
467
+	sharedMount := false
468
+
469
+	parentMount, optionalOpts, err := getParentMount(config.Rootfs)
470
+	if err != nil {
471
+		return err
472
+	}
473
+
474
+	optsSplit := strings.Split(optionalOpts, " ")
475
+	for _, opt := range optsSplit {
476
+		if strings.HasPrefix(opt, "shared:") {
477
+			sharedMount = true
478
+			break
479
+		}
480
+	}
481
+
482
+	// Make parent mount PRIVATE if it was shared. It is needed for two
483
+	// reasons. First of all pivot_root() will fail if parent mount is
484
+	// shared. Secondly when we bind mount rootfs it will propagate to
485
+	// parent namespace and we don't want that to happen.
486
+	if sharedMount {
487
+		return syscall.Mount("", parentMount, "", syscall.MS_PRIVATE, "")
488
+	}
489
+
490
+	return nil
491
+}
492
+
423 493
 func prepareRoot(config *configs.Config) error {
424 494
 	flag := syscall.MS_SLAVE | syscall.MS_REC
425
-	if config.Privatefs {
426
-		flag = syscall.MS_PRIVATE | syscall.MS_REC
495
+	if config.RootPropagation != 0 {
496
+		flag = config.RootPropagation
427 497
 	}
428 498
 	if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil {
429 499
 		return err
430 500
 	}
501
+
502
+	if err := rootfsParentMountPrivate(config); err != nil {
503
+		return err
504
+	}
505
+
431 506
 	return syscall.Mount(config.Rootfs, config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, "")
432 507
 }
433 508
 
... ...
@@ -469,6 +545,13 @@ func pivotRoot(rootfs, pivotBaseDir string) error {
469 469
 	}
470 470
 	// path to pivot dir now changed, update
471 471
 	pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir))
472
+
473
+	// Make pivotDir rprivate to make sure any of the unmounts don't
474
+	// propagate to parent.
475
+	if err := syscall.Mount("", pivotDir, "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil {
476
+		return err
477
+	}
478
+
472 479
 	if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
473 480
 		return fmt.Errorf("unmount pivot_root dir %s", err)
474 481
 	}
... ...
@@ -349,21 +349,26 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (
349 349
 	return user, nil
350 350
 }
351 351
 
352
-// GetAdditionalGroups looks up a list of groups by name or group id against
353
-// against the given /etc/group formatted data. If a group name cannot be found,
354
-// an error will be returned. If a group id cannot be found, it will be returned
355
-// as-is.
352
+// GetAdditionalGroups looks up a list of groups by name or group id
353
+// against the given /etc/group formatted data. If a group name cannot
354
+// be found, an error will be returned. If a group id cannot be found,
355
+// or the given group data is nil, the id will be returned as-is
356
+// provided it is in the legal range.
356 357
 func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, error) {
357
-	groups, err := ParseGroupFilter(group, func(g Group) bool {
358
-		for _, ag := range additionalGroups {
359
-			if g.Name == ag || strconv.Itoa(g.Gid) == ag {
360
-				return true
358
+	var groups = []Group{}
359
+	if group != nil {
360
+		var err error
361
+		groups, err = ParseGroupFilter(group, func(g Group) bool {
362
+			for _, ag := range additionalGroups {
363
+				if g.Name == ag || strconv.Itoa(g.Gid) == ag {
364
+					return true
365
+				}
361 366
 			}
367
+			return false
368
+		})
369
+		if err != nil {
370
+			return nil, fmt.Errorf("Unable to find additional groups %v: %v", additionalGroups, err)
362 371
 		}
363
-		return false
364
-	})
365
-	if err != nil {
366
-		return nil, fmt.Errorf("Unable to find additional groups %v: %v", additionalGroups, err)
367 372
 	}
368 373
 
369 374
 	gidMap := make(map[int]struct{})
... ...
@@ -401,13 +406,13 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err
401 401
 	return gids, nil
402 402
 }
403 403
 
404
-// Wrapper around GetAdditionalGroups that opens the groupPath given and gives
405
-// it as an argument to GetAdditionalGroups.
404
+// GetAdditionalGroupsPath is a wrapper around GetAdditionalGroups
405
+// that opens the groupPath given and gives it as an argument to
406
+// GetAdditionalGroups.
406 407
 func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) {
407 408
 	group, err := os.Open(groupPath)
408
-	if err != nil {
409
-		return nil, fmt.Errorf("Failed to open group file: %v", err)
409
+	if err == nil {
410
+		defer group.Close()
410 411
 	}
411
-	defer group.Close()
412 412
 	return GetAdditionalGroups(additionalGroups, group)
413 413
 }