Browse code

vendor runc 67169a9d43456ff0d5ae12b967acb8e366e2f181

v1.0.0-rc91-48-g67169a9d

Signed-off-by: Jintao Zhang <zhangjintao9020@gmail.com>

Jintao Zhang authored on 2020/07/30 15:34:33
Showing 37 changed files
... ...
@@ -30,7 +30,7 @@ func deviceCgroup(d *configs.Device) specs.LinuxDeviceCgroup {
30 30
 		Type:   string(d.Type),
31 31
 		Major:  &d.Major,
32 32
 		Minor:  &d.Minor,
33
-		Access: d.Permissions,
33
+		Access: string(d.Permissions),
34 34
 	}
35 35
 }
36 36
 
... ...
@@ -13,7 +13,7 @@ github.com/konsorten/go-windows-terminal-sequences  edb144dfd453055e1e49a3d8b410
13 13
 github.com/sirupsen/logrus                          60c74ad9be0d874af0ab0daef6ab07c5c5911f0d # v1.6.0
14 14
 github.com/tchap/go-patricia                        a7f0089c6f496e8e70402f61733606daa326cac5 # v2.3.0
15 15
 golang.org/x/net                                    0de0cce0169b09b364e001f108dc0399ea8630b3
16
-golang.org/x/sys                                    85ca7c5b95cdf1e557abb38a283d1e61a5959c31
16
+golang.org/x/sys                                    9dae0f8f577553e0f21298e18926efc9644c281d
17 17
 github.com/docker/go-units                          519db1ee28dcc9fd2474ae59fca29a810482bfb1 # v0.4.0
18 18
 github.com/docker/go-connections                    7395e3f8aa162843a74ed6d48e79627d9792ac55 # v0.4.0
19 19
 github.com/moby/sys                                 6154f11e6840c0d6b0dbb23f4125a6134b3013c9 # mountinfo/v0.1.3
... ...
@@ -83,8 +83,8 @@ google.golang.org/grpc                              f495f5b15ae7ccda3b38c53a1bfc
83 83
 # the containerd project first, and update both after that is merged.
84 84
 # This commit does not need to match RUNC_COMMIT as it is used for helper
85 85
 # packages but should be newer or equal.
86
-github.com/opencontainers/runc                      dc9208a3303feef5b3839f4323d9beb36df0a9dd # v1.0.0-rc10
87
-github.com/opencontainers/runtime-spec              c4ee7d12c742ffe806cd9350b6af3b4b19faed6f # v1.0.2
86
+github.com/opencontainers/runc                      67169a9d43456ff0d5ae12b967acb8e366e2f181 # v1.0.0-rc91-48-g67169a9d
87
+github.com/opencontainers/runtime-spec              237cc4f519e2e8f9b235bacccfa8ef5a84df2875 # v1.0.3-0.20200520003142-237cc4f519e2
88 88
 github.com/opencontainers/image-spec                d60099175f88c47cd379c4738d158884749ed235 # v1.0.1
89 89
 github.com/seccomp/libseccomp-golang                689e3c1541a84461afc49c1c87352a6cedf72e9c # v0.9.1
90 90
 
... ...
@@ -3,6 +3,7 @@
3 3
 [![Build Status](https://travis-ci.org/opencontainers/runc.svg?branch=master)](https://travis-ci.org/opencontainers/runc)
4 4
 [![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runc)](https://goreportcard.com/report/github.com/opencontainers/runc)
5 5
 [![GoDoc](https://godoc.org/github.com/opencontainers/runc?status.svg)](https://godoc.org/github.com/opencontainers/runc)
6
+[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/588/badge)](https://bestpractices.coreinfrastructure.org/projects/588)
6 7
 
7 8
 ## Introduction
8 9
 
... ...
@@ -18,22 +19,23 @@ You can find official releases of `runc` on the [release](https://github.com/ope
18 18
 
19 19
 Currently, the following features are not considered to be production-ready:
20 20
 
21
-* Support for cgroup v2
21
+* [Support for cgroup v2](./docs/cgroup-v2.md)
22 22
 
23 23
 ## Security
24 24
 
25
-The reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/).
25
+The reporting process and disclosure communications are outlined [here](https://github.com/opencontainers/org/blob/master/SECURITY.md).
26
+
27
+### Security Audit
28
+A third party security audit was performed by Cure53, you can see the full report [here](https://github.com/opencontainers/runc/blob/master/docs/Security-Audit.pdf).
26 29
 
27 30
 ## Building
28 31
 
29 32
 `runc` currently supports the Linux platform with various architecture support.
30
-It must be built with Go version 1.6 or higher in order for some features to function properly.
33
+It must be built with Go version 1.13 or higher.
31 34
 
32 35
 In order to enable seccomp support you will need to install `libseccomp` on your platform.
33 36
 > e.g. `libseccomp-devel` for CentOS, or `libseccomp-dev` for Ubuntu
34 37
 
35
-Otherwise, if you do not want to build `runc` with seccomp support you can add `BUILDTAGS=""` when running make.
36
-
37 38
 ```bash
38 39
 # create a 'github.com/opencontainers' in your GOPATH/src
39 40
 cd github.com/opencontainers
... ...
@@ -58,20 +60,22 @@ sudo make install
58 58
 
59 59
 #### Build Tags
60 60
 
61
-`runc` supports optional build tags for compiling support of various features.
62
-To add build tags to the make option the `BUILDTAGS` variable must be set.
61
+`runc` supports optional build tags for compiling support of various features,
62
+with some of them enabled by default (see `BUILDTAGS` in top-level `Makefile`).
63
+
64
+To change build tags from the default, set the `BUILDTAGS` variable for make,
65
+e.g.
63 66
 
64 67
 ```bash
65 68
 make BUILDTAGS='seccomp apparmor'
66 69
 ```
67 70
 
68
-| Build Tag | Feature                            | Dependency  |
69
-|-----------|------------------------------------|-------------|
70
-| seccomp   | Syscall filtering                  | libseccomp  |
71
-| selinux   | selinux process and mount labeling | <none>      |
72
-| apparmor  | apparmor profile support           | <none>      |
73
-| ambient   | ambient capability support         | kernel 4.3  |
74
-| nokmem    | disable kernel memory account      | <none>      |
71
+| Build Tag | Feature                            | Enabled by default | Dependency |
72
+|-----------|------------------------------------|--------------------|------------|
73
+| seccomp   | Syscall filtering                  | yes                | libseccomp |
74
+| selinux   | selinux process and mount labeling | yes                | <none>     |
75
+| apparmor  | apparmor profile support           | yes                | <none>     |
76
+| nokmem    | disable kernel memory accounting   | no                 | <none>     |
75 77
 
76 78
 
77 79
 ### Running the test suite
... ...
@@ -97,17 +101,30 @@ You can run a specific integration test by setting the `TESTPATH` variable.
97 97
 # make test TESTPATH="/checkpoint.bats"
98 98
 ```
99 99
 
100
-You can run a test in your proxy environment by setting `DOCKER_BUILD_PROXY` and `DOCKER_RUN_PROXY` variables.
100
+You can run a specific rootless integration test by setting the `ROOTLESS_TESTPATH` variable.
101 101
 
102 102
 ```bash
103
-# make test DOCKER_BUILD_PROXY="--build-arg HTTP_PROXY=http://yourproxy/" DOCKER_RUN_PROXY="-e HTTP_PROXY=http://yourproxy/"
103
+# make test ROOTLESS_TESTPATH="/checkpoint.bats"
104
+```
105
+
106
+You can run a test using your container engine's flags by setting `CONTAINER_ENGINE_BUILD_FLAGS` and `CONTAINER_ENGINE_RUN_FLAGS` variables.
107
+
108
+```bash
109
+# make test CONTAINER_ENGINE_BUILD_FLAGS="--build-arg http_proxy=http://yourproxy/" CONTAINER_ENGINE_RUN_FLAGS="-e http_proxy=http://yourproxy/"
104 110
 ```
105 111
 
106 112
 ### Dependencies Management
107 113
 
108
-`runc` uses [vndr](https://github.com/LK4D4/vndr) for dependencies management.
109
-Please refer to [vndr](https://github.com/LK4D4/vndr) for how to add or update
110
-new dependencies.
114
+`runc` uses [Go Modules](https://github.com/golang/go/wiki/Modules) for dependencies management.
115
+Please refer to [Go Modules](https://github.com/golang/go/wiki/Modules) for how to add or update
116
+new dependencies. When updating dependencies, be sure that you are running Go `1.14` or newer.
117
+
118
+```
119
+# Update vendored dependencies
120
+make vendor
121
+# Verify all dependencies
122
+make verify-dependencies
123
+```
111 124
 
112 125
 ## Using runc
113 126
 
... ...
@@ -275,6 +292,9 @@ PIDFile=/run/mycontainerid.pid
275 275
 WantedBy=multi-user.target
276 276
 ```
277 277
 
278
+#### cgroup v2
279
+See [`./docs/cgroup-v2.md`](./docs/cgroup-v2.md).
280
+
278 281
 ## License
279 282
 
280 283
 The code and docs are released under the [Apache 2.0 license](LICENSE).
281 284
new file mode 100644
... ...
@@ -0,0 +1,26 @@
0
+module github.com/opencontainers/runc
1
+
2
+go 1.14
3
+
4
+require (
5
+	github.com/checkpoint-restore/go-criu/v4 v4.0.2
6
+	github.com/cilium/ebpf v0.0.0-20200702112145-1c8d4c9ef775
7
+	github.com/containerd/console v1.0.0
8
+	github.com/coreos/go-systemd/v22 v22.0.0
9
+	github.com/cyphar/filepath-securejoin v0.2.2
10
+	github.com/docker/go-units v0.4.0
11
+	github.com/godbus/dbus/v5 v5.0.3
12
+	github.com/golang/protobuf v1.3.5
13
+	github.com/moby/sys/mountinfo v0.1.3
14
+	github.com/mrunalp/fileutils v0.0.0-20171103030105-7d4729fb3618
15
+	github.com/opencontainers/runtime-spec v1.0.3-0.20200520003142-237cc4f519e2
16
+	github.com/opencontainers/selinux v1.5.1
17
+	github.com/pkg/errors v0.9.1
18
+	github.com/seccomp/libseccomp-golang v0.9.1
19
+	github.com/sirupsen/logrus v1.6.0
20
+	github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2
21
+	// NOTE: urfave/cli must be <= v1.22.1 due to a regression: https://github.com/urfave/cli/issues/1092
22
+	github.com/urfave/cli v1.22.1
23
+	github.com/vishvananda/netlink v1.1.0
24
+	golang.org/x/sys v0.0.0-20200327173247-9dae0f8f5775
25
+)
... ...
@@ -155,8 +155,7 @@ config := &configs.Config{
155 155
 		Parent: "system",
156 156
 		Resources: &configs.Resources{
157 157
 			MemorySwappiness: nil,
158
-			AllowAllDevices:  nil,
159
-			AllowedDevices:   configs.DefaultAllowedDevices,
158
+			Devices:          specconv.AllowedDevices,
160 159
 		},
161 160
 	},
162 161
 	MaskPaths: []string{
... ...
@@ -166,7 +165,7 @@ config := &configs.Config{
166 166
 	ReadonlyPaths: []string{
167 167
 		"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
168 168
 	},
169
-	Devices:  configs.DefaultAutoCreatedDevices,
169
+	Devices:  specconv.AllowedDevices,
170 170
 	Hostname: "testing",
171 171
 	Mounts: []*configs.Mount{
172 172
 		{
... ...
@@ -3,8 +3,6 @@
3 3
 package cgroups
4 4
 
5 5
 import (
6
-	"fmt"
7
-
8 6
 	"github.com/opencontainers/runc/libcontainer/configs"
9 7
 )
10 8
 
... ...
@@ -27,48 +25,27 @@ type Manager interface {
27 27
 	// Destroys the cgroup set
28 28
 	Destroy() error
29 29
 
30
-	// The option func SystemdCgroups() and Cgroupfs() require following attributes:
31
-	// 	Paths   map[string]string
32
-	// 	Cgroups *configs.Cgroup
33
-	// Paths maps cgroup subsystem to path at which it is mounted.
34
-	// Cgroups specifies specific cgroup settings for the various subsystems
35
-
36
-	// Returns cgroup paths to save in a state file and to be able to
37
-	// restore the object later.
38
-	GetPaths() map[string]string
39
-
40
-	// GetUnifiedPath returns the unified path when running in unified mode.
41
-	// The value corresponds to the all values of GetPaths() map.
42
-	//
43
-	// GetUnifiedPath returns error when running in hybrid mode as well as
44
-	// in legacy mode.
45
-	GetUnifiedPath() (string, error)
30
+	// Path returns a cgroup path to the specified controller/subsystem.
31
+	// For cgroupv2, the argument is unused and can be empty.
32
+	Path(string) string
46 33
 
47 34
 	// Sets the cgroup as configured.
48 35
 	Set(container *configs.Config) error
49 36
 
50
-	// Gets the cgroup as configured.
51
-	GetCgroups() (*configs.Cgroup, error)
52
-}
53
-
54
-type NotFoundError struct {
55
-	Subsystem string
56
-}
37
+	// GetPaths returns cgroup path(s) to save in a state file in order to restore later.
38
+	//
39
+	// For cgroup v1, a key is cgroup subsystem name, and the value is the path
40
+	// to the cgroup for this subsystem.
41
+	//
42
+	// For cgroup v2 unified hierarchy, a key is "", and the value is the unified path.
43
+	GetPaths() map[string]string
57 44
 
58
-func (e *NotFoundError) Error() string {
59
-	return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
60
-}
45
+	// GetCgroups returns the cgroup data as configured.
46
+	GetCgroups() (*configs.Cgroup, error)
61 47
 
62
-func NewNotFoundError(sub string) error {
63
-	return &NotFoundError{
64
-		Subsystem: sub,
65
-	}
66
-}
48
+	// GetFreezerState retrieves the current FreezerState of the cgroup.
49
+	GetFreezerState() (configs.FreezerState, error)
67 50
 
68
-func IsNotFound(err error) bool {
69
-	if err == nil {
70
-		return false
71
-	}
72
-	_, ok := err.(*NotFoundError)
73
-	return ok
51
+	// Whether the cgroup path exists or not
52
+	Exists() bool
74 53
 }
... ...
@@ -20,6 +20,12 @@ type CpuUsage struct {
20 20
 	// Total CPU time consumed per core.
21 21
 	// Units: nanoseconds.
22 22
 	PercpuUsage []uint64 `json:"percpu_usage,omitempty"`
23
+	// CPU time consumed per core in kernel mode
24
+	// Units: nanoseconds.
25
+	PercpuUsageInKernelmode []uint64 `json:"percpu_usage_in_kernelmode"`
26
+	// CPU time consumed per core in user mode
27
+	// Units: nanoseconds.
28
+	PercpuUsageInUsermode []uint64 `json:"percpu_usage_in_usermode"`
23 29
 	// Time spent by tasks of the cgroup in kernel mode.
24 30
 	// Units: nanoseconds.
25 31
 	UsageInKernelmode uint64 `json:"usage_in_kernelmode"`
... ...
@@ -51,12 +57,33 @@ type MemoryStats struct {
51 51
 	KernelUsage MemoryData `json:"kernel_usage,omitempty"`
52 52
 	// usage of kernel TCP memory
53 53
 	KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
54
+	// usage of memory pages by NUMA node
55
+	// see chapter 5.6 of memory controller documentation
56
+	PageUsageByNUMA PageUsageByNUMA `json:"page_usage_by_numa,omitempty"`
54 57
 	// if true, memory usage is accounted for throughout a hierarchy of cgroups.
55 58
 	UseHierarchy bool `json:"use_hierarchy"`
56 59
 
57 60
 	Stats map[string]uint64 `json:"stats,omitempty"`
58 61
 }
59 62
 
63
+type PageUsageByNUMA struct {
64
+	// Embedding is used as types can't be recursive.
65
+	PageUsageByNUMAInner
66
+	Hierarchical PageUsageByNUMAInner `json:"hierarchical,omitempty"`
67
+}
68
+
69
+type PageUsageByNUMAInner struct {
70
+	Total       PageStats `json:"total,omitempty"`
71
+	File        PageStats `json:"file,omitempty"`
72
+	Anon        PageStats `json:"anon,omitempty"`
73
+	Unevictable PageStats `json:"unevictable,omitempty"`
74
+}
75
+
76
+type PageStats struct {
77
+	Total uint64           `json:"total,omitempty"`
78
+	Nodes map[uint8]uint64 `json:"nodes,omitempty"`
79
+}
80
+
60 81
 type PidsStats struct {
61 82
 	// number of pids in the cgroup
62 83
 	Current uint64 `json:"current,omitempty"`
... ...
@@ -4,6 +4,7 @@ package cgroups
4 4
 
5 5
 import (
6 6
 	"bufio"
7
+	"errors"
7 8
 	"fmt"
8 9
 	"io"
9 10
 	"io/ioutil"
... ...
@@ -12,7 +13,6 @@ import (
12 12
 	"strconv"
13 13
 	"strings"
14 14
 	"sync"
15
-	"syscall"
16 15
 	"time"
17 16
 
18 17
 	units "github.com/docker/go-units"
... ...
@@ -20,7 +20,6 @@ import (
20 20
 )
21 21
 
22 22
 const (
23
-	CgroupNamePrefix  = "name="
24 23
 	CgroupProcesses   = "cgroup.procs"
25 24
 	unifiedMountpoint = "/sys/fs/cgroup"
26 25
 )
... ...
@@ -40,8 +39,8 @@ var HugePageSizeUnitList = []string{"B", "KB", "MB", "GB", "TB", "PB"}
40 40
 // IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode.
41 41
 func IsCgroup2UnifiedMode() bool {
42 42
 	isUnifiedOnce.Do(func() {
43
-		var st syscall.Statfs_t
44
-		if err := syscall.Statfs(unifiedMountpoint, &st); err != nil {
43
+		var st unix.Statfs_t
44
+		if err := unix.Statfs(unifiedMountpoint, &st); err != nil {
45 45
 			panic("cannot statfs cgroup root")
46 46
 		}
47 47
 		isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC
... ...
@@ -49,191 +48,19 @@ func IsCgroup2UnifiedMode() bool {
49 49
 	return isUnified
50 50
 }
51 51
 
52
-// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
53
-func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
54
-	if IsCgroup2UnifiedMode() {
55
-		return unifiedMountpoint, nil
56
-	}
57
-	mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
58
-	return mnt, err
59
-}
60
-
61
-func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
62
-	// We are not using mount.GetMounts() because it's super-inefficient,
63
-	// parsing it directly sped up x10 times because of not using Sscanf.
64
-	// It was one of two major performance drawbacks in container start.
65
-	if !isSubsystemAvailable(subsystem) {
66
-		return "", "", NewNotFoundError(subsystem)
67
-	}
68
-
69
-	f, err := os.Open("/proc/self/mountinfo")
70
-	if err != nil {
71
-		return "", "", err
72
-	}
73
-	defer f.Close()
74
-
75
-	if IsCgroup2UnifiedMode() {
76
-		subsystem = ""
77
-	}
78
-
79
-	return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem)
80
-}
81
-
82
-func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) {
83
-	scanner := bufio.NewScanner(reader)
84
-	for scanner.Scan() {
85
-		txt := scanner.Text()
86
-		fields := strings.Fields(txt)
87
-		if len(fields) < 9 {
88
-			continue
89
-		}
90
-		if strings.HasPrefix(fields[4], cgroupPath) {
91
-			for _, opt := range strings.Split(fields[len(fields)-1], ",") {
92
-				if (subsystem == "" && fields[9] == "cgroup2") || opt == subsystem {
93
-					return fields[4], fields[3], nil
94
-				}
95
-			}
96
-		}
97
-	}
98
-	if err := scanner.Err(); err != nil {
99
-		return "", "", err
100
-	}
101
-
102
-	return "", "", NewNotFoundError(subsystem)
103
-}
104
-
105
-func isSubsystemAvailable(subsystem string) bool {
106
-	if IsCgroup2UnifiedMode() {
107
-		controllers, err := GetAllSubsystems()
108
-		if err != nil {
109
-			return false
110
-		}
111
-		for _, c := range controllers {
112
-			if c == subsystem {
113
-				return true
114
-			}
115
-		}
116
-		return false
117
-	}
118
-
119
-	cgroups, err := ParseCgroupFile("/proc/self/cgroup")
120
-	if err != nil {
121
-		return false
122
-	}
123
-	_, avail := cgroups[subsystem]
124
-	return avail
125
-}
126
-
127
-func GetClosestMountpointAncestor(dir, mountinfo string) string {
128
-	deepestMountPoint := ""
129
-	for _, mountInfoEntry := range strings.Split(mountinfo, "\n") {
130
-		mountInfoParts := strings.Fields(mountInfoEntry)
131
-		if len(mountInfoParts) < 5 {
132
-			continue
133
-		}
134
-		mountPoint := mountInfoParts[4]
135
-		if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) {
136
-			deepestMountPoint = mountPoint
137
-		}
138
-	}
139
-	return deepestMountPoint
140
-}
141
-
142
-func FindCgroupMountpointDir() (string, error) {
143
-	f, err := os.Open("/proc/self/mountinfo")
144
-	if err != nil {
145
-		return "", err
146
-	}
147
-	defer f.Close()
148
-
149
-	scanner := bufio.NewScanner(f)
150
-	for scanner.Scan() {
151
-		text := scanner.Text()
152
-		fields := strings.Split(text, " ")
153
-		// Safe as mountinfo encodes mountpoints with spaces as \040.
154
-		index := strings.Index(text, " - ")
155
-		postSeparatorFields := strings.Fields(text[index+3:])
156
-		numPostFields := len(postSeparatorFields)
157
-
158
-		// This is an error as we can't detect if the mount is for "cgroup"
159
-		if numPostFields == 0 {
160
-			return "", fmt.Errorf("Found no fields post '-' in %q", text)
161
-		}
162
-
163
-		if postSeparatorFields[0] == "cgroup" || postSeparatorFields[0] == "cgroup2" {
164
-			// Check that the mount is properly formatted.
165
-			if numPostFields < 3 {
166
-				return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
167
-			}
168
-
169
-			return filepath.Dir(fields[4]), nil
170
-		}
171
-	}
172
-	if err := scanner.Err(); err != nil {
173
-		return "", err
174
-	}
175
-
176
-	return "", NewNotFoundError("cgroup")
177
-}
178
-
179 52
 type Mount struct {
180 53
 	Mountpoint string
181 54
 	Root       string
182 55
 	Subsystems []string
183 56
 }
184 57
 
185
-func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
186
-	if len(m.Subsystems) == 0 {
187
-		return "", fmt.Errorf("no subsystem for mount")
188
-	}
189
-
190
-	return getControllerPath(m.Subsystems[0], cgroups)
191
-}
192
-
193
-func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
194
-	res := make([]Mount, 0, len(ss))
195
-	scanner := bufio.NewScanner(mi)
196
-	numFound := 0
197
-	for scanner.Scan() && numFound < len(ss) {
198
-		txt := scanner.Text()
199
-		sepIdx := strings.Index(txt, " - ")
200
-		if sepIdx == -1 {
201
-			return nil, fmt.Errorf("invalid mountinfo format")
202
-		}
203
-		if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
204
-			continue
205
-		}
206
-		fields := strings.Split(txt, " ")
207
-		m := Mount{
208
-			Mountpoint: fields[4],
209
-			Root:       fields[3],
210
-		}
211
-		for _, opt := range strings.Split(fields[len(fields)-1], ",") {
212
-			seen, known := ss[opt]
213
-			if !known || (!all && seen) {
214
-				continue
215
-			}
216
-			ss[opt] = true
217
-			if strings.HasPrefix(opt, CgroupNamePrefix) {
218
-				opt = opt[len(CgroupNamePrefix):]
219
-			}
220
-			m.Subsystems = append(m.Subsystems, opt)
221
-			numFound++
222
-		}
223
-		if len(m.Subsystems) > 0 || all {
224
-			res = append(res, m)
225
-		}
226
-	}
227
-	if err := scanner.Err(); err != nil {
228
-		return nil, err
229
-	}
230
-	return res, nil
231
-}
232
-
233 58
 // GetCgroupMounts returns the mounts for the cgroup subsystems.
234 59
 // all indicates whether to return just the first instance or all the mounts.
60
+// This function should not be used from cgroupv2 code, as in this case
61
+// all the controllers are available under the constant unifiedMountpoint.
235 62
 func GetCgroupMounts(all bool) ([]Mount, error) {
236 63
 	if IsCgroup2UnifiedMode() {
64
+		// TODO: remove cgroupv2 case once all external users are converted
237 65
 		availableControllers, err := GetAllSubsystems()
238 66
 		if err != nil {
239 67
 			return nil, err
... ...
@@ -246,22 +73,7 @@ func GetCgroupMounts(all bool) ([]Mount, error) {
246 246
 		return []Mount{m}, nil
247 247
 	}
248 248
 
249
-	f, err := os.Open("/proc/self/mountinfo")
250
-	if err != nil {
251
-		return nil, err
252
-	}
253
-	defer f.Close()
254
-
255
-	allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
256
-	if err != nil {
257
-		return nil, err
258
-	}
259
-
260
-	allMap := make(map[string]bool)
261
-	for s := range allSubsystems {
262
-		allMap[s] = false
263
-	}
264
-	return getCgroupMountsHelper(allMap, f, all)
249
+	return getCgroupMountsV1(all)
265 250
 }
266 251
 
267 252
 // GetAllSubsystems returns all the cgroup subsystems supported by the kernel
... ...
@@ -305,61 +117,8 @@ func GetAllSubsystems() ([]string, error) {
305 305
 	return subsystems, nil
306 306
 }
307 307
 
308
-// GetOwnCgroup returns the relative path to the cgroup docker is running in.
309
-func GetOwnCgroup(subsystem string) (string, error) {
310
-	cgroups, err := ParseCgroupFile("/proc/self/cgroup")
311
-	if err != nil {
312
-		return "", err
313
-	}
314
-
315
-	return getControllerPath(subsystem, cgroups)
316
-}
317
-
318
-func GetOwnCgroupPath(subsystem string) (string, error) {
319
-	cgroup, err := GetOwnCgroup(subsystem)
320
-	if err != nil {
321
-		return "", err
322
-	}
323
-
324
-	return getCgroupPathHelper(subsystem, cgroup)
325
-}
326
-
327
-func GetInitCgroup(subsystem string) (string, error) {
328
-	cgroups, err := ParseCgroupFile("/proc/1/cgroup")
329
-	if err != nil {
330
-		return "", err
331
-	}
332
-
333
-	return getControllerPath(subsystem, cgroups)
334
-}
335
-
336
-func GetInitCgroupPath(subsystem string) (string, error) {
337
-	cgroup, err := GetInitCgroup(subsystem)
338
-	if err != nil {
339
-		return "", err
340
-	}
341
-
342
-	return getCgroupPathHelper(subsystem, cgroup)
343
-}
344
-
345
-func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
346
-	mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
347
-	if err != nil {
348
-		return "", err
349
-	}
350
-
351
-	// This is needed for nested containers, because in /proc/self/cgroup we
352
-	// see paths from host, which don't exist in container.
353
-	relCgroup, err := filepath.Rel(root, cgroup)
354
-	if err != nil {
355
-		return "", err
356
-	}
357
-
358
-	return filepath.Join(mnt, relCgroup), nil
359
-}
360
-
361
-func readProcsFile(dir string) ([]int, error) {
362
-	f, err := os.Open(filepath.Join(dir, CgroupProcesses))
308
+func readProcsFile(file string) ([]int, error) {
309
+	f, err := os.Open(file)
363 310
 	if err != nil {
364 311
 		return nil, err
365 312
 	}
... ...
@@ -379,11 +138,18 @@ func readProcsFile(dir string) ([]int, error) {
379 379
 			out = append(out, pid)
380 380
 		}
381 381
 	}
382
-	return out, nil
382
+	return out, s.Err()
383 383
 }
384 384
 
385
-// ParseCgroupFile parses the given cgroup file, typically from
386
-// /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
385
+// ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup
386
+// or /proc/<pid>/cgroup, into a map of subsystems to cgroup paths, e.g.
387
+//   "cpu": "/user.slice/user-1000.slice"
388
+//   "pids": "/user.slice/user-1000.slice"
389
+// etc.
390
+//
391
+// Note that for cgroup v2 unified hierarchy, there are no per-controller
392
+// cgroup paths, so the resulting map will have a single element where the key
393
+// is empty string ("") and the value is the cgroup path the <pid> is in.
387 394
 func ParseCgroupFile(path string) (map[string]string, error) {
388 395
 	f, err := os.Open(path)
389 396
 	if err != nil {
... ...
@@ -423,22 +189,6 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
423 423
 	return cgroups, nil
424 424
 }
425 425
 
426
-func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
427
-	if IsCgroup2UnifiedMode() {
428
-		return "/", nil
429
-	}
430
-
431
-	if p, ok := cgroups[subsystem]; ok {
432
-		return p, nil
433
-	}
434
-
435
-	if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
436
-		return p, nil
437
-	}
438
-
439
-	return "", NewNotFoundError(subsystem)
440
-}
441
-
442 426
 func PathExists(path string) bool {
443 427
 	if _, err := os.Stat(path); err != nil {
444 428
 		return false
... ...
@@ -514,8 +264,8 @@ func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
514 514
 }
515 515
 
516 516
 // GetPids returns all pids, that were added to cgroup at path.
517
-func GetPids(path string) ([]int, error) {
518
-	return readProcsFile(path)
517
+func GetPids(dir string) ([]int, error) {
518
+	return readProcsFile(filepath.Join(dir, CgroupProcesses))
519 519
 }
520 520
 
521 521
 // GetAllPids returns all pids, that were added to cgroup at path and to all its
... ...
@@ -524,14 +274,13 @@ func GetAllPids(path string) ([]int, error) {
524 524
 	var pids []int
525 525
 	// collect pids from all sub-cgroups
526 526
 	err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
527
-		dir, file := filepath.Split(p)
528
-		if file != CgroupProcesses {
529
-			return nil
530
-		}
531 527
 		if iErr != nil {
532 528
 			return iErr
533 529
 		}
534
-		cPids, err := readProcsFile(dir)
530
+		if info.IsDir() || info.Name() != CgroupProcesses {
531
+			return nil
532
+		}
533
+		cPids, err := readProcsFile(p)
535 534
 		if err != nil {
536 535
 			return err
537 536
 		}
... ...
@@ -568,7 +317,7 @@ func WriteCgroupProc(dir string, pid int) error {
568 568
 
569 569
 		// EINVAL might mean that the task being added to cgroup.procs is in state
570 570
 		// TASK_NEW. We should attempt to do so again.
571
-		if isEINVAL(err) {
571
+		if errors.Is(err, unix.EINVAL) {
572 572
 			time.Sleep(30 * time.Millisecond)
573 573
 			continue
574 574
 		}
... ...
@@ -578,11 +327,53 @@ func WriteCgroupProc(dir string, pid int) error {
578 578
 	return err
579 579
 }
580 580
 
581
-func isEINVAL(err error) bool {
582
-	switch err := err.(type) {
583
-	case *os.PathError:
584
-		return err.Err == unix.EINVAL
585
-	default:
586
-		return false
581
+// Since the OCI spec is designed for cgroup v1, in some cases
582
+// there is need to convert from the cgroup v1 configuration to cgroup v2
583
+// the formula for BlkIOWeight is y = (1 + (x - 10) * 9999 / 990)
584
+// convert linearly from [10-1000] to [1-10000]
585
+func ConvertBlkIOToCgroupV2Value(blkIoWeight uint16) uint64 {
586
+	if blkIoWeight == 0 {
587
+		return 0
588
+	}
589
+	return uint64(1 + (uint64(blkIoWeight)-10)*9999/990)
590
+}
591
+
592
+// Since the OCI spec is designed for cgroup v1, in some cases
593
+// there is need to convert from the cgroup v1 configuration to cgroup v2
594
+// the formula for cpuShares is y = (1 + ((x - 2) * 9999) / 262142)
595
+// convert from [2-262144] to [1-10000]
596
+// 262144 comes from Linux kernel definition "#define MAX_SHARES (1UL << 18)"
597
+func ConvertCPUSharesToCgroupV2Value(cpuShares uint64) uint64 {
598
+	if cpuShares == 0 {
599
+		return 0
587 600
 	}
601
+	return (1 + ((cpuShares-2)*9999)/262142)
602
+}
603
+
604
+// ConvertMemorySwapToCgroupV2Value converts MemorySwap value from OCI spec
605
+// for use by cgroup v2 drivers. A conversion is needed since Resources.MemorySwap
606
+// is defined as memory+swap combined, while in cgroup v2 swap is a separate value.
607
+func ConvertMemorySwapToCgroupV2Value(memorySwap, memory int64) (int64, error) {
608
+	// for compatibility with cgroup1 controller, set swap to unlimited in
609
+	// case the memory is set to unlimited, and swap is not explicitly set,
610
+	// treating the request as "set both memory and swap to unlimited".
611
+	if memory == -1 && memorySwap == 0 {
612
+		return -1, nil
613
+	}
614
+	if memorySwap == -1 || memorySwap == 0 {
615
+		// -1 is "max", 0 is "unset", so treat as is
616
+		return memorySwap, nil
617
+	}
618
+	// sanity checks
619
+	if memory == 0 || memory == -1 {
620
+		return 0, errors.New("unable to set swap limit without memory limit")
621
+	}
622
+	if memory < 0 {
623
+		return 0, fmt.Errorf("invalid memory value: %d", memory)
624
+	}
625
+	if memorySwap < memory {
626
+		return 0, errors.New("memory+swap limit should be >= memory limit")
627
+	}
628
+
629
+	return memorySwap - memory, nil
588 630
 }
589 631
new file mode 100644
... ...
@@ -0,0 +1,250 @@
0
+package cgroups
1
+
2
+import (
3
+	"bufio"
4
+	"errors"
5
+	"fmt"
6
+	"io"
7
+	"os"
8
+	"path/filepath"
9
+	"strings"
10
+)
11
+
12
+// Code in this source file are specific to cgroup v1,
13
+// and must not be used from any cgroup v2 code.
14
+
15
+const (
16
+	CgroupNamePrefix = "name="
17
+)
18
+
19
+var (
20
+	errUnified = errors.New("not implemented for cgroup v2 unified hierarchy")
21
+)
22
+
23
+type NotFoundError struct {
24
+	Subsystem string
25
+}
26
+
27
+func (e *NotFoundError) Error() string {
28
+	return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
29
+}
30
+
31
+func NewNotFoundError(sub string) error {
32
+	return &NotFoundError{
33
+		Subsystem: sub,
34
+	}
35
+}
36
+
37
+func IsNotFound(err error) bool {
38
+	if err == nil {
39
+		return false
40
+	}
41
+	_, ok := err.(*NotFoundError)
42
+	return ok
43
+}
44
+
45
+// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
46
+func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
47
+	if IsCgroup2UnifiedMode() {
48
+		return "", errUnified
49
+	}
50
+	mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
51
+	return mnt, err
52
+}
53
+
54
+func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
55
+	if IsCgroup2UnifiedMode() {
56
+		return "", "", errUnified
57
+	}
58
+
59
+	// We are not using mount.GetMounts() because it's super-inefficient,
60
+	// parsing it directly sped up x10 times because of not using Sscanf.
61
+	// It was one of two major performance drawbacks in container start.
62
+	if !isSubsystemAvailable(subsystem) {
63
+		return "", "", NewNotFoundError(subsystem)
64
+	}
65
+
66
+	f, err := os.Open("/proc/self/mountinfo")
67
+	if err != nil {
68
+		return "", "", err
69
+	}
70
+	defer f.Close()
71
+
72
+	return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem)
73
+}
74
+
75
+func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) {
76
+	scanner := bufio.NewScanner(reader)
77
+	for scanner.Scan() {
78
+		txt := scanner.Text()
79
+		fields := strings.Fields(txt)
80
+		if len(fields) < 9 {
81
+			continue
82
+		}
83
+		if strings.HasPrefix(fields[4], cgroupPath) {
84
+			for _, opt := range strings.Split(fields[len(fields)-1], ",") {
85
+				if opt == subsystem {
86
+					return fields[4], fields[3], nil
87
+				}
88
+			}
89
+		}
90
+	}
91
+	if err := scanner.Err(); err != nil {
92
+		return "", "", err
93
+	}
94
+
95
+	return "", "", NewNotFoundError(subsystem)
96
+}
97
+
98
+func isSubsystemAvailable(subsystem string) bool {
99
+	if IsCgroup2UnifiedMode() {
100
+		panic("don't call isSubsystemAvailable from cgroupv2 code")
101
+	}
102
+
103
+	cgroups, err := ParseCgroupFile("/proc/self/cgroup")
104
+	if err != nil {
105
+		return false
106
+	}
107
+	_, avail := cgroups[subsystem]
108
+	return avail
109
+}
110
+
111
+func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
112
+	if len(m.Subsystems) == 0 {
113
+		return "", fmt.Errorf("no subsystem for mount")
114
+	}
115
+
116
+	return getControllerPath(m.Subsystems[0], cgroups)
117
+}
118
+
119
+func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
120
+	res := make([]Mount, 0, len(ss))
121
+	scanner := bufio.NewScanner(mi)
122
+	numFound := 0
123
+	for scanner.Scan() && numFound < len(ss) {
124
+		txt := scanner.Text()
125
+		sepIdx := strings.Index(txt, " - ")
126
+		if sepIdx == -1 {
127
+			return nil, fmt.Errorf("invalid mountinfo format")
128
+		}
129
+		if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
130
+			continue
131
+		}
132
+		fields := strings.Split(txt, " ")
133
+		m := Mount{
134
+			Mountpoint: fields[4],
135
+			Root:       fields[3],
136
+		}
137
+		for _, opt := range strings.Split(fields[len(fields)-1], ",") {
138
+			seen, known := ss[opt]
139
+			if !known || (!all && seen) {
140
+				continue
141
+			}
142
+			ss[opt] = true
143
+			opt = strings.TrimPrefix(opt, CgroupNamePrefix)
144
+			m.Subsystems = append(m.Subsystems, opt)
145
+			numFound++
146
+		}
147
+		if len(m.Subsystems) > 0 || all {
148
+			res = append(res, m)
149
+		}
150
+	}
151
+	if err := scanner.Err(); err != nil {
152
+		return nil, err
153
+	}
154
+	return res, nil
155
+}
156
+
157
+func getCgroupMountsV1(all bool) ([]Mount, error) {
158
+	f, err := os.Open("/proc/self/mountinfo")
159
+	if err != nil {
160
+		return nil, err
161
+	}
162
+	defer f.Close()
163
+
164
+	allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
165
+	if err != nil {
166
+		return nil, err
167
+	}
168
+
169
+	allMap := make(map[string]bool)
170
+	for s := range allSubsystems {
171
+		allMap[s] = false
172
+	}
173
+	return getCgroupMountsHelper(allMap, f, all)
174
+}
175
+
176
+// GetOwnCgroup returns the relative path to the cgroup docker is running in.
177
+func GetOwnCgroup(subsystem string) (string, error) {
178
+	if IsCgroup2UnifiedMode() {
179
+		return "", errUnified
180
+	}
181
+	cgroups, err := ParseCgroupFile("/proc/self/cgroup")
182
+	if err != nil {
183
+		return "", err
184
+	}
185
+
186
+	return getControllerPath(subsystem, cgroups)
187
+}
188
+
189
+func GetOwnCgroupPath(subsystem string) (string, error) {
190
+	cgroup, err := GetOwnCgroup(subsystem)
191
+	if err != nil {
192
+		return "", err
193
+	}
194
+
195
+	return getCgroupPathHelper(subsystem, cgroup)
196
+}
197
+
198
+func GetInitCgroup(subsystem string) (string, error) {
199
+	if IsCgroup2UnifiedMode() {
200
+		return "", errUnified
201
+	}
202
+	cgroups, err := ParseCgroupFile("/proc/1/cgroup")
203
+	if err != nil {
204
+		return "", err
205
+	}
206
+
207
+	return getControllerPath(subsystem, cgroups)
208
+}
209
+
210
+func GetInitCgroupPath(subsystem string) (string, error) {
211
+	cgroup, err := GetInitCgroup(subsystem)
212
+	if err != nil {
213
+		return "", err
214
+	}
215
+
216
+	return getCgroupPathHelper(subsystem, cgroup)
217
+}
218
+
219
+func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
220
+	mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
221
+	if err != nil {
222
+		return "", err
223
+	}
224
+
225
+	// This is needed for nested containers, because in /proc/self/cgroup we
226
+	// see paths from host, which don't exist in container.
227
+	relCgroup, err := filepath.Rel(root, cgroup)
228
+	if err != nil {
229
+		return "", err
230
+	}
231
+
232
+	return filepath.Join(mnt, relCgroup), nil
233
+}
234
+
235
+func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
236
+	if IsCgroup2UnifiedMode() {
237
+		return "", errUnified
238
+	}
239
+
240
+	if p, ok := cgroups[subsystem]; ok {
241
+		return p, nil
242
+	}
243
+
244
+	if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
245
+		return p, nil
246
+	}
247
+
248
+	return "", NewNotFoundError(subsystem)
249
+}
... ...
@@ -1,5 +1,9 @@
1 1
 package configs
2 2
 
3
+import (
4
+	systemdDbus "github.com/coreos/go-systemd/v22/dbus"
5
+)
6
+
3 7
 type FreezerState string
4 8
 
5 9
 const (
... ...
@@ -29,18 +33,16 @@ type Cgroup struct {
29 29
 
30 30
 	// Resources contains various cgroups settings to apply
31 31
 	*Resources
32
+
33
+	// SystemdProps are any additional properties for systemd,
34
+	// derived from org.systemd.property.xxx annotations.
35
+	// Ignored unless systemd is used for managing cgroups.
36
+	SystemdProps []systemdDbus.Property `json:"-"`
32 37
 }
33 38
 
34 39
 type Resources struct {
35
-	// If this is true allow access to any kind of device within the container.  If false, allow access only to devices explicitly listed in the allowed_devices list.
36
-	// Deprecated
37
-	AllowAllDevices *bool `json:"allow_all_devices,omitempty"`
38
-	// Deprecated
39
-	AllowedDevices []*Device `json:"allowed_devices,omitempty"`
40
-	// Deprecated
41
-	DeniedDevices []*Device `json:"denied_devices,omitempty"`
42
-
43
-	Devices []*Device `json:"devices"`
40
+	// Devices is the set of access rules for devices in the container.
41
+	Devices []*DeviceRule `json:"devices"`
44 42
 
45 43
 	// Memory limit (in bytes)
46 44
 	Memory int64 `json:"memory"`
... ...
@@ -125,6 +127,10 @@ type Resources struct {
125 125
 	// CpuWeight sets a proportional bandwidth limit.
126 126
 	CpuWeight uint64 `json:"cpu_weight"`
127 127
 
128
-	// CpuMax sets she maximum bandwidth limit (format: max period).
129
-	CpuMax string `json:"cpu_max"`
128
+	// SkipDevices allows to skip configuring device permissions.
129
+	// Used by e.g. kubelet while creating a parent cgroup (kubepods)
130
+	// common for many containers.
131
+	//
132
+	// NOTE it is impossible to start a container which has this flag set.
133
+	SkipDevices bool `json:"skip_devices"`
130 134
 }
... ...
@@ -8,7 +8,7 @@ import (
8 8
 	"time"
9 9
 
10 10
 	"github.com/opencontainers/runtime-spec/specs-go"
11
-
11
+	"github.com/pkg/errors"
12 12
 	"github.com/sirupsen/logrus"
13 13
 )
14 14
 
... ...
@@ -70,9 +70,10 @@ type Arg struct {
70 70
 
71 71
 // Syscall is a rule to match a syscall in Seccomp
72 72
 type Syscall struct {
73
-	Name   string `json:"name"`
74
-	Action Action `json:"action"`
75
-	Args   []*Arg `json:"args"`
73
+	Name     string `json:"name"`
74
+	Action   Action `json:"action"`
75
+	ErrnoRet *uint  `json:"errnoRet"`
76
+	Args     []*Arg `json:"args"`
76 77
 }
77 78
 
78 79
 // TODO Windows. Many of these fields should be factored out into those parts
... ...
@@ -175,7 +176,7 @@ type Config struct {
175 175
 
176 176
 	// Hooks are a collection of actions to perform at various container lifecycle events.
177 177
 	// CommandHooks are serialized to JSON, but other hooks are not.
178
-	Hooks *Hooks
178
+	Hooks Hooks
179 179
 
180 180
 	// Version is the version of opencontainer specification that is supported.
181 181
 	Version string `json:"version"`
... ...
@@ -202,17 +203,50 @@ type Config struct {
202 202
 	RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
203 203
 }
204 204
 
205
-type Hooks struct {
205
+type HookName string
206
+type HookList []Hook
207
+type Hooks map[HookName]HookList
208
+
209
+const (
206 210
 	// Prestart commands are executed after the container namespaces are created,
207 211
 	// but before the user supplied command is executed from init.
208
-	Prestart []Hook
212
+	// Note: This hook is now deprecated
213
+	// Prestart commands are called in the Runtime namespace.
214
+	Prestart HookName = "prestart"
215
+
216
+	// CreateRuntime commands MUST be called as part of the create operation after
217
+	// the runtime environment has been created but before the pivot_root has been executed.
218
+	// CreateRuntime is called immediately after the deprecated Prestart hook.
219
+	// CreateRuntime commands are called in the Runtime Namespace.
220
+	CreateRuntime = "createRuntime"
221
+
222
+	// CreateContainer commands MUST be called as part of the create operation after
223
+	// the runtime environment has been created but before the pivot_root has been executed.
224
+	// CreateContainer commands are called in the Container namespace.
225
+	CreateContainer = "createContainer"
226
+
227
+	// StartContainer commands MUST be called as part of the start operation and before
228
+	// the container process is started.
229
+	// StartContainer commands are called in the Container namespace.
230
+	StartContainer = "startContainer"
209 231
 
210 232
 	// Poststart commands are executed after the container init process starts.
211
-	Poststart []Hook
233
+	// Poststart commands are called in the Runtime Namespace.
234
+	Poststart = "poststart"
212 235
 
213 236
 	// Poststop commands are executed after the container init process exits.
214
-	Poststop []Hook
215
-}
237
+	// Poststop commands are called in the Runtime Namespace.
238
+	Poststop = "poststop"
239
+)
240
+
241
+// TODO move this to runtime-spec
242
+// See: https://github.com/opencontainers/runtime-spec/pull/1046
243
+const (
244
+	Creating = "creating"
245
+	Created  = "created"
246
+	Running  = "running"
247
+	Stopped  = "stopped"
248
+)
216 249
 
217 250
 type Capabilities struct {
218 251
 	// Bounding is the set of capabilities checked by the kernel.
... ...
@@ -227,32 +261,39 @@ type Capabilities struct {
227 227
 	Ambient []string
228 228
 }
229 229
 
230
-func (hooks *Hooks) UnmarshalJSON(b []byte) error {
231
-	var state struct {
232
-		Prestart  []CommandHook
233
-		Poststart []CommandHook
234
-		Poststop  []CommandHook
230
+func (hooks HookList) RunHooks(state *specs.State) error {
231
+	for i, h := range hooks {
232
+		if err := h.Run(state); err != nil {
233
+			return errors.Wrapf(err, "Running hook #%d:", i)
234
+		}
235 235
 	}
236 236
 
237
+	return nil
238
+}
239
+
240
+func (hooks *Hooks) UnmarshalJSON(b []byte) error {
241
+	var state map[HookName][]CommandHook
242
+
237 243
 	if err := json.Unmarshal(b, &state); err != nil {
238 244
 		return err
239 245
 	}
240 246
 
241
-	deserialize := func(shooks []CommandHook) (hooks []Hook) {
242
-		for _, shook := range shooks {
243
-			hooks = append(hooks, shook)
247
+	*hooks = Hooks{}
248
+	for n, commandHooks := range state {
249
+		if len(commandHooks) == 0 {
250
+			continue
244 251
 		}
245 252
 
246
-		return hooks
253
+		(*hooks)[n] = HookList{}
254
+		for _, h := range commandHooks {
255
+			(*hooks)[n] = append((*hooks)[n], h)
256
+		}
247 257
 	}
248 258
 
249
-	hooks.Prestart = deserialize(state.Prestart)
250
-	hooks.Poststart = deserialize(state.Poststart)
251
-	hooks.Poststop = deserialize(state.Poststop)
252 259
 	return nil
253 260
 }
254 261
 
255
-func (hooks Hooks) MarshalJSON() ([]byte, error) {
262
+func (hooks *Hooks) MarshalJSON() ([]byte, error) {
256 263
 	serialize := func(hooks []Hook) (serializableHooks []CommandHook) {
257 264
 		for _, hook := range hooks {
258 265
 			switch chook := hook.(type) {
... ...
@@ -267,9 +308,12 @@ func (hooks Hooks) MarshalJSON() ([]byte, error) {
267 267
 	}
268 268
 
269 269
 	return json.Marshal(map[string]interface{}{
270
-		"prestart":  serialize(hooks.Prestart),
271
-		"poststart": serialize(hooks.Poststart),
272
-		"poststop":  serialize(hooks.Poststop),
270
+		"prestart":        serialize((*hooks)[Prestart]),
271
+		"createRuntime":   serialize((*hooks)[CreateRuntime]),
272
+		"createContainer": serialize((*hooks)[CreateContainer]),
273
+		"startContainer":  serialize((*hooks)[StartContainer]),
274
+		"poststart":       serialize((*hooks)[Poststart]),
275
+		"poststop":        serialize((*hooks)[Poststop]),
273 276
 	})
274 277
 }
275 278
 
... ...
@@ -3,30 +3,19 @@ package configs
3 3
 import (
4 4
 	"fmt"
5 5
 	"os"
6
+	"strconv"
6 7
 )
7 8
 
8 9
 const (
9 10
 	Wildcard = -1
10 11
 )
11 12
 
12
-// TODO Windows: This can be factored out in the future
13
-
14 13
 type Device struct {
15
-	// Device type, block, char, etc.
16
-	Type rune `json:"type"`
14
+	DeviceRule
17 15
 
18 16
 	// Path to the device.
19 17
 	Path string `json:"path"`
20 18
 
21
-	// Major is the device's major number.
22
-	Major int64 `json:"major"`
23
-
24
-	// Minor is the device's minor number.
25
-	Minor int64 `json:"minor"`
26
-
27
-	// Cgroup permissions format, rwm.
28
-	Permissions string `json:"permissions"`
29
-
30 19
 	// FileMode permission bits for the device.
31 20
 	FileMode os.FileMode `json:"file_mode"`
32 21
 
... ...
@@ -35,23 +24,147 @@ type Device struct {
35 35
 
36 36
 	// Gid of the device.
37 37
 	Gid uint32 `json:"gid"`
38
+}
38 39
 
39
-	// Write the file to the allowed list
40
-	Allow bool `json:"allow"`
40
+// DevicePermissions is a cgroupv1-style string to represent device access. It
41
+// has to be a string for backward compatibility reasons, hence why it has
42
+// methods to do set operations.
43
+type DevicePermissions string
44
+
45
+const (
46
+	deviceRead uint = (1 << iota)
47
+	deviceWrite
48
+	deviceMknod
49
+)
50
+
51
+func (p DevicePermissions) toSet() uint {
52
+	var set uint
53
+	for _, perm := range p {
54
+		switch perm {
55
+		case 'r':
56
+			set |= deviceRead
57
+		case 'w':
58
+			set |= deviceWrite
59
+		case 'm':
60
+			set |= deviceMknod
61
+		}
62
+	}
63
+	return set
64
+}
65
+
66
+func fromSet(set uint) DevicePermissions {
67
+	var perm string
68
+	if set&deviceRead == deviceRead {
69
+		perm += "r"
70
+	}
71
+	if set&deviceWrite == deviceWrite {
72
+		perm += "w"
73
+	}
74
+	if set&deviceMknod == deviceMknod {
75
+		perm += "m"
76
+	}
77
+	return DevicePermissions(perm)
78
+}
79
+
80
+// Union returns the union of the two sets of DevicePermissions.
81
+func (p DevicePermissions) Union(o DevicePermissions) DevicePermissions {
82
+	lhs := p.toSet()
83
+	rhs := o.toSet()
84
+	return fromSet(lhs | rhs)
85
+}
86
+
87
+// Difference returns the set difference of the two sets of DevicePermissions.
88
+// In set notation, A.Difference(B) gives you A\B.
89
+func (p DevicePermissions) Difference(o DevicePermissions) DevicePermissions {
90
+	lhs := p.toSet()
91
+	rhs := o.toSet()
92
+	return fromSet(lhs &^ rhs)
93
+}
94
+
95
+// Intersection computes the intersection of the two sets of DevicePermissions.
96
+func (p DevicePermissions) Intersection(o DevicePermissions) DevicePermissions {
97
+	lhs := p.toSet()
98
+	rhs := o.toSet()
99
+	return fromSet(lhs & rhs)
41 100
 }
42 101
 
43
-func (d *Device) CgroupString() string {
44
-	return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions)
102
+// IsEmpty returns whether the set of permissions in a DevicePermissions is
103
+// empty.
104
+func (p DevicePermissions) IsEmpty() bool {
105
+	return p == DevicePermissions("")
45 106
 }
46 107
 
47
-func (d *Device) Mkdev() int {
48
-	return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12))
108
+// IsValid returns whether the set of permissions is a subset of valid
109
+// permissions (namely, {r,w,m}).
110
+func (p DevicePermissions) IsValid() bool {
111
+	return p == fromSet(p.toSet())
49 112
 }
50 113
 
51
-// deviceNumberString converts the device number to a string return result.
52
-func deviceNumberString(number int64) string {
53
-	if number == Wildcard {
54
-		return "*"
114
+type DeviceType rune
115
+
116
+const (
117
+	WildcardDevice DeviceType = 'a'
118
+	BlockDevice    DeviceType = 'b'
119
+	CharDevice     DeviceType = 'c' // or 'u'
120
+	FifoDevice     DeviceType = 'p'
121
+)
122
+
123
+func (t DeviceType) IsValid() bool {
124
+	switch t {
125
+	case WildcardDevice, BlockDevice, CharDevice, FifoDevice:
126
+		return true
127
+	default:
128
+		return false
129
+	}
130
+}
131
+
132
+func (t DeviceType) CanMknod() bool {
133
+	switch t {
134
+	case BlockDevice, CharDevice, FifoDevice:
135
+		return true
136
+	default:
137
+		return false
138
+	}
139
+}
140
+
141
+func (t DeviceType) CanCgroup() bool {
142
+	switch t {
143
+	case WildcardDevice, BlockDevice, CharDevice:
144
+		return true
145
+	default:
146
+		return false
147
+	}
148
+}
149
+
150
+type DeviceRule struct {
151
+	// Type of device ('c' for char, 'b' for block). If set to 'a', this rule
152
+	// acts as a wildcard and all fields other than Allow are ignored.
153
+	Type DeviceType `json:"type"`
154
+
155
+	// Major is the device's major number.
156
+	Major int64 `json:"major"`
157
+
158
+	// Minor is the device's minor number.
159
+	Minor int64 `json:"minor"`
160
+
161
+	// Permissions is the set of permissions that this rule applies to (in the
162
+	// cgroupv1 format -- any combination of "rwm").
163
+	Permissions DevicePermissions `json:"permissions"`
164
+
165
+	// Allow specifies whether this rule is allowed.
166
+	Allow bool `json:"allow"`
167
+}
168
+
169
+func (d *DeviceRule) CgroupString() string {
170
+	var (
171
+		major = strconv.FormatInt(d.Major, 10)
172
+		minor = strconv.FormatInt(d.Minor, 10)
173
+	)
174
+	if d.Major == Wildcard {
175
+		major = "*"
176
+	}
177
+	if d.Minor == Wildcard {
178
+		minor = "*"
55 179
 	}
56
-	return fmt.Sprint(number)
180
+	return fmt.Sprintf("%c %s:%s %s", d.Type, major, minor, d.Permissions)
57 181
 }
58 182
deleted file mode 100644
... ...
@@ -1,111 +0,0 @@
1
-// +build linux
2
-
3
-package configs
4
-
5
-var (
6
-	// DefaultSimpleDevices are devices that are to be both allowed and created.
7
-	DefaultSimpleDevices = []*Device{
8
-		// /dev/null and zero
9
-		{
10
-			Path:        "/dev/null",
11
-			Type:        'c',
12
-			Major:       1,
13
-			Minor:       3,
14
-			Permissions: "rwm",
15
-			FileMode:    0666,
16
-		},
17
-		{
18
-			Path:        "/dev/zero",
19
-			Type:        'c',
20
-			Major:       1,
21
-			Minor:       5,
22
-			Permissions: "rwm",
23
-			FileMode:    0666,
24
-		},
25
-
26
-		{
27
-			Path:        "/dev/full",
28
-			Type:        'c',
29
-			Major:       1,
30
-			Minor:       7,
31
-			Permissions: "rwm",
32
-			FileMode:    0666,
33
-		},
34
-
35
-		// consoles and ttys
36
-		{
37
-			Path:        "/dev/tty",
38
-			Type:        'c',
39
-			Major:       5,
40
-			Minor:       0,
41
-			Permissions: "rwm",
42
-			FileMode:    0666,
43
-		},
44
-
45
-		// /dev/urandom,/dev/random
46
-		{
47
-			Path:        "/dev/urandom",
48
-			Type:        'c',
49
-			Major:       1,
50
-			Minor:       9,
51
-			Permissions: "rwm",
52
-			FileMode:    0666,
53
-		},
54
-		{
55
-			Path:        "/dev/random",
56
-			Type:        'c',
57
-			Major:       1,
58
-			Minor:       8,
59
-			Permissions: "rwm",
60
-			FileMode:    0666,
61
-		},
62
-	}
63
-	DefaultAllowedDevices = append([]*Device{
64
-		// allow mknod for any device
65
-		{
66
-			Type:        'c',
67
-			Major:       Wildcard,
68
-			Minor:       Wildcard,
69
-			Permissions: "m",
70
-		},
71
-		{
72
-			Type:        'b',
73
-			Major:       Wildcard,
74
-			Minor:       Wildcard,
75
-			Permissions: "m",
76
-		},
77
-
78
-		{
79
-			Path:        "/dev/console",
80
-			Type:        'c',
81
-			Major:       5,
82
-			Minor:       1,
83
-			Permissions: "rwm",
84
-		},
85
-		// /dev/pts/ - pts namespaces are "coming soon"
86
-		{
87
-			Path:        "",
88
-			Type:        'c',
89
-			Major:       136,
90
-			Minor:       Wildcard,
91
-			Permissions: "rwm",
92
-		},
93
-		{
94
-			Path:        "",
95
-			Type:        'c',
96
-			Major:       5,
97
-			Minor:       2,
98
-			Permissions: "rwm",
99
-		},
100
-
101
-		// tuntap
102
-		{
103
-			Path:        "",
104
-			Type:        'c',
105
-			Major:       10,
106
-			Minor:       200,
107
-			Permissions: "rwm",
108
-		},
109
-	}, DefaultSimpleDevices...)
110
-	DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...)
111
-)
112 1
new file mode 100644
... ...
@@ -0,0 +1,16 @@
0
+// +build !windows
1
+
2
+package configs
3
+
4
+import (
5
+	"errors"
6
+
7
+	"golang.org/x/sys/unix"
8
+)
9
+
10
+func (d *DeviceRule) Mkdev() (uint64, error) {
11
+	if d.Major == Wildcard || d.Minor == Wildcard {
12
+		return 0, errors.New("cannot mkdev() device with wildcards")
13
+	}
14
+	return unix.Mkdev(uint32(d.Major), uint32(d.Minor)), nil
15
+}
0 16
new file mode 100644
... ...
@@ -0,0 +1,5 @@
0
+package configs
1
+
2
+func (d *DeviceRule) Mkdev() (uint64, error) {
3
+	return 0, nil
4
+}
... ...
@@ -31,33 +31,33 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) {
31 31
 	}
32 32
 
33 33
 	var (
34
+		devType   configs.DeviceType
35
+		mode      = stat.Mode
34 36
 		devNumber = uint64(stat.Rdev)
35 37
 		major     = unix.Major(devNumber)
36 38
 		minor     = unix.Minor(devNumber)
37 39
 	)
38
-	if major == 0 {
40
+	switch mode & unix.S_IFMT {
41
+	case unix.S_IFBLK:
42
+		devType = configs.BlockDevice
43
+	case unix.S_IFCHR:
44
+		devType = configs.CharDevice
45
+	case unix.S_IFIFO:
46
+		devType = configs.FifoDevice
47
+	default:
39 48
 		return nil, ErrNotADevice
40 49
 	}
41
-
42
-	var (
43
-		devType rune
44
-		mode    = stat.Mode
45
-	)
46
-	switch {
47
-	case mode&unix.S_IFBLK == unix.S_IFBLK:
48
-		devType = 'b'
49
-	case mode&unix.S_IFCHR == unix.S_IFCHR:
50
-		devType = 'c'
51
-	}
52 50
 	return &configs.Device{
53
-		Type:        devType,
54
-		Path:        path,
55
-		Major:       int64(major),
56
-		Minor:       int64(minor),
57
-		Permissions: permissions,
58
-		FileMode:    os.FileMode(mode),
59
-		Uid:         stat.Uid,
60
-		Gid:         stat.Gid,
51
+		DeviceRule: configs.DeviceRule{
52
+			Type:        devType,
53
+			Major:       int64(major),
54
+			Minor:       int64(minor),
55
+			Permissions: configs.DevicePermissions(permissions),
56
+		},
57
+		Path:     path,
58
+		FileMode: os.FileMode(mode),
59
+		Uid:      stat.Uid,
60
+		Gid:      stat.Gid,
61 61
 	}, nil
62 62
 }
63 63
 
... ...
@@ -104,6 +104,9 @@ func GetDevices(path string) ([]*configs.Device, error) {
104 104
 			}
105 105
 			return nil, err
106 106
 		}
107
+		if device.Type == configs.FifoDevice {
108
+			continue
109
+		}
107 110
 		out = append(out, device)
108 111
 	}
109 112
 	return out, nil
... ...
@@ -1,7 +1,14 @@
1
+// SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
1 2
 /*
2 3
  * Copyright (C) 2019 Aleksa Sarai <cyphar@cyphar.com>
3 4
  * Copyright (C) 2019 SUSE LLC
4 5
  *
6
+ * This work is dual licensed under the following licenses. You may use,
7
+ * redistribute, and/or modify the work under the conditions of either (or
8
+ * both) licenses.
9
+ *
10
+ * === Apache-2.0 ===
11
+ *
5 12
  * Licensed under the Apache License, Version 2.0 (the "License");
6 13
  * you may not use this file except in compliance with the License.
7 14
  * You may obtain a copy of the License at
... ...
@@ -13,6 +20,23 @@
13 13
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 14
  * See the License for the specific language governing permissions and
15 15
  * limitations under the License.
16
+ *
17
+ * === LGPL-2.1-or-later ===
18
+ *
19
+ * This library is free software; you can redistribute it and/or
20
+ * modify it under the terms of the GNU Lesser General Public
21
+ * License as published by the Free Software Foundation; either
22
+ * version 2.1 of the License, or (at your option) any later version.
23
+ *
24
+ * This library is distributed in the hope that it will be useful,
25
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27
+ * Lesser General Public License for more details.
28
+ *
29
+ * You should have received a copy of the GNU Lesser General Public
30
+ * License along with this library. If not, see
31
+ * <https://www.gnu.org/licenses/>.
32
+ *
16 33
  */
17 34
 
18 35
 #define _GNU_SOURCE
... ...
@@ -95,8 +119,10 @@ static int is_self_cloned(void)
95 95
 	struct statfs fsbuf = {};
96 96
 
97 97
 	fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC);
98
-	if (fd < 0)
98
+	if (fd < 0) {
99
+		fprintf(stderr, "you have no read access to runc binary file\n");
99 100
 		return -ENOTRECOVERABLE;
101
+	}
100 102
 
101 103
 	/*
102 104
 	 * Is the binary a fully-sealed memfd? We don't need CLONED_BINARY_ENV for
... ...
@@ -714,12 +714,12 @@ void nsexec(void)
714 714
 			 * ready, so we can receive all possible error codes
715 715
 			 * generated by children.
716 716
 			 */
717
+			syncfd = sync_child_pipe[1];
718
+			close(sync_child_pipe[0]);
719
+
717 720
 			while (!ready) {
718 721
 				enum sync_t s;
719 722
 
720
-				syncfd = sync_child_pipe[1];
721
-				close(sync_child_pipe[0]);
722
-
723 723
 				if (read(syncfd, &s, sizeof(s)) != sizeof(s))
724 724
 					bail("failed to sync with child: next state");
725 725
 
... ...
@@ -789,13 +789,13 @@ void nsexec(void)
789 789
 
790 790
 			/* Now sync with grandchild. */
791 791
 
792
+			syncfd = sync_grandchild_pipe[1];
793
+			close(sync_grandchild_pipe[0]);
794
+
792 795
 			ready = false;
793 796
 			while (!ready) {
794 797
 				enum sync_t s;
795 798
 
796
-				syncfd = sync_grandchild_pipe[1];
797
-				close(sync_grandchild_pipe[0]);
798
-
799 799
 				s = SYNC_GRANDCHILD;
800 800
 				if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
801 801
 					kill(child, SIGKILL);
... ...
@@ -60,7 +60,7 @@ type Group struct {
60 60
 
61 61
 // groupFromOS converts an os/user.(*Group) to local Group
62 62
 //
63
-// (This does not include Pass, Shell or Gecos)
63
+// (This does not include Pass or List)
64 64
 func groupFromOS(g *user.Group) (Group, error) {
65 65
 	newGroup := Group{
66 66
 		Name: g.Name,
... ...
@@ -162,10 +162,6 @@ func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
162 162
 	)
163 163
 
164 164
 	for s.Scan() {
165
-		if err := s.Err(); err != nil {
166
-			return nil, err
167
-		}
168
-
169 165
 		line := strings.TrimSpace(s.Text())
170 166
 		if line == "" {
171 167
 			continue
... ...
@@ -183,6 +179,9 @@ func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
183 183
 			out = append(out, p)
184 184
 		}
185 185
 	}
186
+	if err := s.Err(); err != nil {
187
+		return nil, err
188
+	}
186 189
 
187 190
 	return out, nil
188 191
 }
... ...
@@ -221,10 +220,6 @@ func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
221 221
 	)
222 222
 
223 223
 	for s.Scan() {
224
-		if err := s.Err(); err != nil {
225
-			return nil, err
226
-		}
227
-
228 224
 		text := s.Text()
229 225
 		if text == "" {
230 226
 			continue
... ...
@@ -242,6 +237,9 @@ func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
242 242
 			out = append(out, p)
243 243
 		}
244 244
 	}
245
+	if err := s.Err(); err != nil {
246
+		return nil, err
247
+	}
245 248
 
246 249
 	return out, nil
247 250
 }
... ...
@@ -532,10 +530,6 @@ func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) {
532 532
 	)
533 533
 
534 534
 	for s.Scan() {
535
-		if err := s.Err(); err != nil {
536
-			return nil, err
537
-		}
538
-
539 535
 		line := strings.TrimSpace(s.Text())
540 536
 		if line == "" {
541 537
 			continue
... ...
@@ -549,6 +543,9 @@ func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) {
549 549
 			out = append(out, p)
550 550
 		}
551 551
 	}
552
+	if err := s.Err(); err != nil {
553
+		return nil, err
554
+	}
552 555
 
553 556
 	return out, nil
554 557
 }
... ...
@@ -586,10 +583,6 @@ func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) {
586 586
 	)
587 587
 
588 588
 	for s.Scan() {
589
-		if err := s.Err(); err != nil {
590
-			return nil, err
591
-		}
592
-
593 589
 		line := strings.TrimSpace(s.Text())
594 590
 		if line == "" {
595 591
 			continue
... ...
@@ -603,6 +596,9 @@ func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) {
603 603
 			out = append(out, p)
604 604
 		}
605 605
 	}
606
+	if err := s.Err(); err != nil {
607
+		return nil, err
608
+	}
606 609
 
607 610
 	return out, nil
608 611
 }
609 612
deleted file mode 100644
... ...
@@ -1,31 +0,0 @@
1
-# OCI runtime-spec. When updating this, make sure you use a version tag rather
2
-# than a commit ID so it's much more obvious what version of the spec we are
3
-# using.
4
-github.com/opencontainers/runtime-spec  29686dbc5559d93fb1ef402eeda3e35c38d75af4 # v1.0.1-59-g29686db
5
-
6
-# Core libcontainer functionality.
7
-github.com/checkpoint-restore/go-criu   17b0214f6c48980c45dc47ecb0cfd6d9e02df723 # v3.11
8
-github.com/mrunalp/fileutils            7d4729fb36185a7c1719923406c9d40e54fb93c7
9
-github.com/opencontainers/selinux       5215b1806f52b1fcc2070a8826c542c9d33cd3cf # v1.3.0 (+ CVE-2019-16884)
10
-github.com/seccomp/libseccomp-golang    689e3c1541a84461afc49c1c87352a6cedf72e9c # v0.9.1
11
-github.com/sirupsen/logrus              8bdbc7bcc01dcbb8ec23dc8a28e332258d25251f # v1.4.1
12
-github.com/syndtr/gocapability          d98352740cb2c55f81556b63d4a1ec64c5a319c2
13
-github.com/vishvananda/netlink          1e2e08e8a2dcdacaae3f14ac44c5cfa31361f270
14
-
15
-# systemd integration.
16
-github.com/coreos/go-systemd            95778dfbb74eb7e4dbaf43bf7d71809650ef8076 # v19
17
-github.com/godbus/dbus                  2ff6f7ffd60f0f2410b3105864bdd12c7894f844 # v5.0.1
18
-github.com/golang/protobuf              925541529c1fa6821df4e44ce2723319eb2be768 # v1.0.0
19
-
20
-# Command-line interface.
21
-github.com/cyphar/filepath-securejoin   a261ee33d7a517f054effbf451841abaafe3e0fd # v0.2.2
22
-github.com/docker/go-units              47565b4f722fb6ceae66b95f853feed578a4a51c # v0.3.3
23
-github.com/urfave/cli                   cfb38830724cc34fedffe9a2a29fb54fa9169cd1 # v1.20.0
24
-golang.org/x/sys                        9eafafc0a87e0fd0aeeba439a4573537970c44c7 https://github.com/golang/sys
25
-
26
-# console dependencies
27
-github.com/containerd/console           0650fd9eeb50bab4fc99dceb9f2e14cf58f36e7f
28
-github.com/pkg/errors                   ba968bfe8b2f7e042a574c888954fccecfa385b4 # v0.8.1
29
-
30
-# ebpf dependencies
31
-github.com/cilium/ebpf                  95b36a581eed7b0f127306ed1d16cc0ddc06cf67
... ...
@@ -667,9 +667,10 @@ type LinuxSeccompArg struct {
667 667
 
668 668
 // LinuxSyscall is used to match a syscall in Seccomp
669 669
 type LinuxSyscall struct {
670
-	Names  []string           `json:"names"`
671
-	Action LinuxSeccompAction `json:"action"`
672
-	Args   []LinuxSeccompArg  `json:"args,omitempty"`
670
+	Names    []string           `json:"names"`
671
+	Action   LinuxSeccompAction `json:"action"`
672
+	ErrnoRet *uint              `json:"errnoRet,omitempty"`
673
+	Args     []LinuxSeccompArg  `json:"args,omitempty"`
673 674
 }
674 675
 
675 676
 // LinuxIntelRdt has container runtime resource constraints for Intel RDT
... ...
@@ -11,7 +11,7 @@ const (
11 11
 	VersionPatch = 2
12 12
 
13 13
 	// VersionDev indicates development branch. Releases will be empty string.
14
-	VersionDev = ""
14
+	VersionDev = "-dev"
15 15
 )
16 16
 
17 17
 // Version is the specification version that the package types support.
... ...
@@ -671,6 +671,7 @@ const (
671 671
 	FS_IOC_ADD_ENCRYPTION_KEY                   = 0xc0506617
672 672
 	FS_IOC_GET_ENCRYPTION_KEY_STATUS            = 0xc080661a
673 673
 	FS_IOC_GET_ENCRYPTION_POLICY_EX             = 0xc0096616
674
+	FS_IOC_MEASURE_VERITY                       = 0xc0046686
674 675
 	FS_IOC_REMOVE_ENCRYPTION_KEY                = 0xc0406618
675 676
 	FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS      = 0xc0406619
676 677
 	FS_KEY_DESCRIPTOR_SIZE                      = 0x8
... ...
@@ -683,6 +684,9 @@ const (
683 683
 	FS_POLICY_FLAGS_PAD_8                       = 0x1
684 684
 	FS_POLICY_FLAGS_PAD_MASK                    = 0x3
685 685
 	FS_POLICY_FLAGS_VALID                       = 0xf
686
+	FS_VERITY_FL                                = 0x100000
687
+	FS_VERITY_HASH_ALG_SHA256                   = 0x1
688
+	FS_VERITY_HASH_ALG_SHA512                   = 0x2
686 689
 	FUTEXFS_SUPER_MAGIC                         = 0xbad1dea
687 690
 	F_ADD_SEALS                                 = 0x409
688 691
 	F_DUPFD                                     = 0x0
... ...
@@ -73,6 +73,8 @@ const (
73 73
 	FFDLY                            = 0x8000
74 74
 	FLUSHO                           = 0x1000
75 75
 	FP_XSTATE_MAGIC2                 = 0x46505845
76
+	FS_IOC_ENABLE_VERITY             = 0x40806685
77
+	FS_IOC_GETFLAGS                  = 0x80046601
76 78
 	FS_IOC_GET_ENCRYPTION_POLICY     = 0x400c6615
77 79
 	FS_IOC_GET_ENCRYPTION_PWSALT     = 0x40106614
78 80
 	FS_IOC_SET_ENCRYPTION_POLICY     = 0x800c6613
... ...
@@ -73,6 +73,8 @@ const (
73 73
 	FFDLY                            = 0x8000
74 74
 	FLUSHO                           = 0x1000
75 75
 	FP_XSTATE_MAGIC2                 = 0x46505845
76
+	FS_IOC_ENABLE_VERITY             = 0x40806685
77
+	FS_IOC_GETFLAGS                  = 0x80086601
76 78
 	FS_IOC_GET_ENCRYPTION_POLICY     = 0x400c6615
77 79
 	FS_IOC_GET_ENCRYPTION_PWSALT     = 0x40106614
78 80
 	FS_IOC_SET_ENCRYPTION_POLICY     = 0x800c6613
... ...
@@ -72,6 +72,8 @@ const (
72 72
 	FF1                              = 0x8000
73 73
 	FFDLY                            = 0x8000
74 74
 	FLUSHO                           = 0x1000
75
+	FS_IOC_ENABLE_VERITY             = 0x40806685
76
+	FS_IOC_GETFLAGS                  = 0x80046601
75 77
 	FS_IOC_GET_ENCRYPTION_POLICY     = 0x400c6615
76 78
 	FS_IOC_GET_ENCRYPTION_PWSALT     = 0x40106614
77 79
 	FS_IOC_SET_ENCRYPTION_POLICY     = 0x800c6613
... ...
@@ -75,6 +75,8 @@ const (
75 75
 	FFDLY                            = 0x8000
76 76
 	FLUSHO                           = 0x1000
77 77
 	FPSIMD_MAGIC                     = 0x46508001
78
+	FS_IOC_ENABLE_VERITY             = 0x40806685
79
+	FS_IOC_GETFLAGS                  = 0x80086601
78 80
 	FS_IOC_GET_ENCRYPTION_POLICY     = 0x400c6615
79 81
 	FS_IOC_GET_ENCRYPTION_PWSALT     = 0x40106614
80 82
 	FS_IOC_SET_ENCRYPTION_POLICY     = 0x800c6613
... ...
@@ -72,6 +72,8 @@ const (
72 72
 	FF1                              = 0x8000
73 73
 	FFDLY                            = 0x8000
74 74
 	FLUSHO                           = 0x2000
75
+	FS_IOC_ENABLE_VERITY             = 0x80806685
76
+	FS_IOC_GETFLAGS                  = 0x40046601
75 77
 	FS_IOC_GET_ENCRYPTION_POLICY     = 0x800c6615
76 78
 	FS_IOC_GET_ENCRYPTION_PWSALT     = 0x80106614
77 79
 	FS_IOC_SET_ENCRYPTION_POLICY     = 0x400c6613
... ...
@@ -72,6 +72,8 @@ const (
72 72
 	FF1                              = 0x8000
73 73
 	FFDLY                            = 0x8000
74 74
 	FLUSHO                           = 0x2000
75
+	FS_IOC_ENABLE_VERITY             = 0x80806685
76
+	FS_IOC_GETFLAGS                  = 0x40086601
75 77
 	FS_IOC_GET_ENCRYPTION_POLICY     = 0x800c6615
76 78
 	FS_IOC_GET_ENCRYPTION_PWSALT     = 0x80106614
77 79
 	FS_IOC_SET_ENCRYPTION_POLICY     = 0x400c6613
... ...
@@ -72,6 +72,8 @@ const (
72 72
 	FF1                              = 0x8000
73 73
 	FFDLY                            = 0x8000
74 74
 	FLUSHO                           = 0x2000
75
+	FS_IOC_ENABLE_VERITY             = 0x80806685
76
+	FS_IOC_GETFLAGS                  = 0x40086601
75 77
 	FS_IOC_GET_ENCRYPTION_POLICY     = 0x800c6615
76 78
 	FS_IOC_GET_ENCRYPTION_PWSALT     = 0x80106614
77 79
 	FS_IOC_SET_ENCRYPTION_POLICY     = 0x400c6613
... ...
@@ -72,6 +72,8 @@ const (
72 72
 	FF1                              = 0x8000
73 73
 	FFDLY                            = 0x8000
74 74
 	FLUSHO                           = 0x2000
75
+	FS_IOC_ENABLE_VERITY             = 0x80806685
76
+	FS_IOC_GETFLAGS                  = 0x40046601
75 77
 	FS_IOC_GET_ENCRYPTION_POLICY     = 0x800c6615
76 78
 	FS_IOC_GET_ENCRYPTION_PWSALT     = 0x80106614
77 79
 	FS_IOC_SET_ENCRYPTION_POLICY     = 0x400c6613
... ...
@@ -72,6 +72,8 @@ const (
72 72
 	FF1                              = 0x4000
73 73
 	FFDLY                            = 0x4000
74 74
 	FLUSHO                           = 0x800000
75
+	FS_IOC_ENABLE_VERITY             = 0x80806685
76
+	FS_IOC_GETFLAGS                  = 0x40086601
75 77
 	FS_IOC_GET_ENCRYPTION_POLICY     = 0x800c6615
76 78
 	FS_IOC_GET_ENCRYPTION_PWSALT     = 0x80106614
77 79
 	FS_IOC_SET_ENCRYPTION_POLICY     = 0x400c6613
... ...
@@ -72,6 +72,8 @@ const (
72 72
 	FF1                              = 0x4000
73 73
 	FFDLY                            = 0x4000
74 74
 	FLUSHO                           = 0x800000
75
+	FS_IOC_ENABLE_VERITY             = 0x80806685
76
+	FS_IOC_GETFLAGS                  = 0x40086601
75 77
 	FS_IOC_GET_ENCRYPTION_POLICY     = 0x800c6615
76 78
 	FS_IOC_GET_ENCRYPTION_PWSALT     = 0x80106614
77 79
 	FS_IOC_SET_ENCRYPTION_POLICY     = 0x400c6613
... ...
@@ -72,6 +72,8 @@ const (
72 72
 	FF1                              = 0x8000
73 73
 	FFDLY                            = 0x8000
74 74
 	FLUSHO                           = 0x1000
75
+	FS_IOC_ENABLE_VERITY             = 0x40806685
76
+	FS_IOC_GETFLAGS                  = 0x80086601
75 77
 	FS_IOC_GET_ENCRYPTION_POLICY     = 0x400c6615
76 78
 	FS_IOC_GET_ENCRYPTION_PWSALT     = 0x40106614
77 79
 	FS_IOC_SET_ENCRYPTION_POLICY     = 0x800c6613
... ...
@@ -72,6 +72,8 @@ const (
72 72
 	FF1                              = 0x8000
73 73
 	FFDLY                            = 0x8000
74 74
 	FLUSHO                           = 0x1000
75
+	FS_IOC_ENABLE_VERITY             = 0x40806685
76
+	FS_IOC_GETFLAGS                  = 0x80086601
75 77
 	FS_IOC_GET_ENCRYPTION_POLICY     = 0x400c6615
76 78
 	FS_IOC_GET_ENCRYPTION_PWSALT     = 0x40106614
77 79
 	FS_IOC_SET_ENCRYPTION_POLICY     = 0x800c6613
... ...
@@ -76,6 +76,8 @@ const (
76 76
 	FF1                              = 0x8000
77 77
 	FFDLY                            = 0x8000
78 78
 	FLUSHO                           = 0x1000
79
+	FS_IOC_ENABLE_VERITY             = 0x80806685
80
+	FS_IOC_GETFLAGS                  = 0x40086601
79 81
 	FS_IOC_GET_ENCRYPTION_POLICY     = 0x800c6615
80 82
 	FS_IOC_GET_ENCRYPTION_PWSALT     = 0x80106614
81 83
 	FS_IOC_SET_ENCRYPTION_POLICY     = 0x400c6613
... ...
@@ -2291,3 +2291,20 @@ const (
2291 2291
 	DEVLINK_DPIPE_HEADER_IPV4                 = 0x1
2292 2292
 	DEVLINK_DPIPE_HEADER_IPV6                 = 0x2
2293 2293
 )
2294
+
2295
+type FsverityDigest struct {
2296
+	Algorithm uint16
2297
+	Size      uint16
2298
+}
2299
+
2300
+type FsverityEnableArg struct {
2301
+	Version        uint32
2302
+	Hash_algorithm uint32
2303
+	Block_size     uint32
2304
+	Salt_size      uint32
2305
+	Salt_ptr       uint64
2306
+	Sig_size       uint32
2307
+	_              uint32
2308
+	Sig_ptr        uint64
2309
+	_              [11]uint64
2310
+}