v1.0.0-rc91-48-g67169a9d
Signed-off-by: Jintao Zhang <zhangjintao9020@gmail.com>
| ... | ... |
@@ -13,7 +13,7 @@ github.com/konsorten/go-windows-terminal-sequences edb144dfd453055e1e49a3d8b410 |
| 13 | 13 |
github.com/sirupsen/logrus 60c74ad9be0d874af0ab0daef6ab07c5c5911f0d # v1.6.0 |
| 14 | 14 |
github.com/tchap/go-patricia a7f0089c6f496e8e70402f61733606daa326cac5 # v2.3.0 |
| 15 | 15 |
golang.org/x/net 0de0cce0169b09b364e001f108dc0399ea8630b3 |
| 16 |
-golang.org/x/sys 85ca7c5b95cdf1e557abb38a283d1e61a5959c31 |
|
| 16 |
+golang.org/x/sys 9dae0f8f577553e0f21298e18926efc9644c281d |
|
| 17 | 17 |
github.com/docker/go-units 519db1ee28dcc9fd2474ae59fca29a810482bfb1 # v0.4.0 |
| 18 | 18 |
github.com/docker/go-connections 7395e3f8aa162843a74ed6d48e79627d9792ac55 # v0.4.0 |
| 19 | 19 |
github.com/moby/sys 6154f11e6840c0d6b0dbb23f4125a6134b3013c9 # mountinfo/v0.1.3 |
| ... | ... |
@@ -83,8 +83,8 @@ google.golang.org/grpc f495f5b15ae7ccda3b38c53a1bfc |
| 83 | 83 |
# the containerd project first, and update both after that is merged. |
| 84 | 84 |
# This commit does not need to match RUNC_COMMIT as it is used for helper |
| 85 | 85 |
# packages but should be newer or equal. |
| 86 |
-github.com/opencontainers/runc dc9208a3303feef5b3839f4323d9beb36df0a9dd # v1.0.0-rc10 |
|
| 87 |
-github.com/opencontainers/runtime-spec c4ee7d12c742ffe806cd9350b6af3b4b19faed6f # v1.0.2 |
|
| 86 |
+github.com/opencontainers/runc 67169a9d43456ff0d5ae12b967acb8e366e2f181 # v1.0.0-rc91-48-g67169a9d |
|
| 87 |
+github.com/opencontainers/runtime-spec 237cc4f519e2e8f9b235bacccfa8ef5a84df2875 # v1.0.3-0.20200520003142-237cc4f519e2 |
|
| 88 | 88 |
github.com/opencontainers/image-spec d60099175f88c47cd379c4738d158884749ed235 # v1.0.1 |
| 89 | 89 |
github.com/seccomp/libseccomp-golang 689e3c1541a84461afc49c1c87352a6cedf72e9c # v0.9.1 |
| 90 | 90 |
|
| ... | ... |
@@ -3,6 +3,7 @@ |
| 3 | 3 |
[](https://travis-ci.org/opencontainers/runc) |
| 4 | 4 |
[](https://goreportcard.com/report/github.com/opencontainers/runc) |
| 5 | 5 |
[](https://godoc.org/github.com/opencontainers/runc) |
| 6 |
+[](https://bestpractices.coreinfrastructure.org/projects/588) |
|
| 6 | 7 |
|
| 7 | 8 |
## Introduction |
| 8 | 9 |
|
| ... | ... |
@@ -18,22 +19,23 @@ You can find official releases of `runc` on the [release](https://github.com/ope |
| 18 | 18 |
|
| 19 | 19 |
Currently, the following features are not considered to be production-ready: |
| 20 | 20 |
|
| 21 |
-* Support for cgroup v2 |
|
| 21 |
+* [Support for cgroup v2](./docs/cgroup-v2.md) |
|
| 22 | 22 |
|
| 23 | 23 |
## Security |
| 24 | 24 |
|
| 25 |
-The reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/). |
|
| 25 |
+The reporting process and disclosure communications are outlined [here](https://github.com/opencontainers/org/blob/master/SECURITY.md). |
|
| 26 |
+ |
|
| 27 |
+### Security Audit |
|
| 28 |
+A third party security audit was performed by Cure53, you can see the full report [here](https://github.com/opencontainers/runc/blob/master/docs/Security-Audit.pdf). |
|
| 26 | 29 |
|
| 27 | 30 |
## Building |
| 28 | 31 |
|
| 29 | 32 |
`runc` currently supports the Linux platform with various architecture support. |
| 30 |
-It must be built with Go version 1.6 or higher in order for some features to function properly. |
|
| 33 |
+It must be built with Go version 1.13 or higher. |
|
| 31 | 34 |
|
| 32 | 35 |
In order to enable seccomp support you will need to install `libseccomp` on your platform. |
| 33 | 36 |
> e.g. `libseccomp-devel` for CentOS, or `libseccomp-dev` for Ubuntu |
| 34 | 37 |
|
| 35 |
-Otherwise, if you do not want to build `runc` with seccomp support you can add `BUILDTAGS=""` when running make. |
|
| 36 |
- |
|
| 37 | 38 |
```bash |
| 38 | 39 |
# create a 'github.com/opencontainers' in your GOPATH/src |
| 39 | 40 |
cd github.com/opencontainers |
| ... | ... |
@@ -58,20 +60,22 @@ sudo make install |
| 58 | 58 |
|
| 59 | 59 |
#### Build Tags |
| 60 | 60 |
|
| 61 |
-`runc` supports optional build tags for compiling support of various features. |
|
| 62 |
-To add build tags to the make option the `BUILDTAGS` variable must be set. |
|
| 61 |
+`runc` supports optional build tags for compiling support of various features, |
|
| 62 |
+with some of them enabled by default (see `BUILDTAGS` in top-level `Makefile`). |
|
| 63 |
+ |
|
| 64 |
+To change build tags from the default, set the `BUILDTAGS` variable for make, |
|
| 65 |
+e.g. |
|
| 63 | 66 |
|
| 64 | 67 |
```bash |
| 65 | 68 |
make BUILDTAGS='seccomp apparmor' |
| 66 | 69 |
``` |
| 67 | 70 |
|
| 68 |
-| Build Tag | Feature | Dependency | |
|
| 69 |
-|-----------|------------------------------------|-------------| |
|
| 70 |
-| seccomp | Syscall filtering | libseccomp | |
|
| 71 |
-| selinux | selinux process and mount labeling | <none> | |
|
| 72 |
-| apparmor | apparmor profile support | <none> | |
|
| 73 |
-| ambient | ambient capability support | kernel 4.3 | |
|
| 74 |
-| nokmem | disable kernel memory account | <none> | |
|
| 71 |
+| Build Tag | Feature | Enabled by default | Dependency | |
|
| 72 |
+|-----------|------------------------------------|--------------------|------------| |
|
| 73 |
+| seccomp | Syscall filtering | yes | libseccomp | |
|
| 74 |
+| selinux | selinux process and mount labeling | yes | <none> | |
|
| 75 |
+| apparmor | apparmor profile support | yes | <none> | |
|
| 76 |
+| nokmem | disable kernel memory accounting | no | <none> | |
|
| 75 | 77 |
|
| 76 | 78 |
|
| 77 | 79 |
### Running the test suite |
| ... | ... |
@@ -97,17 +101,30 @@ You can run a specific integration test by setting the `TESTPATH` variable. |
| 97 | 97 |
# make test TESTPATH="/checkpoint.bats" |
| 98 | 98 |
``` |
| 99 | 99 |
|
| 100 |
-You can run a test in your proxy environment by setting `DOCKER_BUILD_PROXY` and `DOCKER_RUN_PROXY` variables. |
|
| 100 |
+You can run a specific rootless integration test by setting the `ROOTLESS_TESTPATH` variable. |
|
| 101 | 101 |
|
| 102 | 102 |
```bash |
| 103 |
-# make test DOCKER_BUILD_PROXY="--build-arg HTTP_PROXY=http://yourproxy/" DOCKER_RUN_PROXY="-e HTTP_PROXY=http://yourproxy/" |
|
| 103 |
+# make test ROOTLESS_TESTPATH="/checkpoint.bats" |
|
| 104 |
+``` |
|
| 105 |
+ |
|
| 106 |
+You can run a test using your container engine's flags by setting `CONTAINER_ENGINE_BUILD_FLAGS` and `CONTAINER_ENGINE_RUN_FLAGS` variables. |
|
| 107 |
+ |
|
| 108 |
+```bash |
|
| 109 |
+# make test CONTAINER_ENGINE_BUILD_FLAGS="--build-arg http_proxy=http://yourproxy/" CONTAINER_ENGINE_RUN_FLAGS="-e http_proxy=http://yourproxy/" |
|
| 104 | 110 |
``` |
| 105 | 111 |
|
| 106 | 112 |
### Dependencies Management |
| 107 | 113 |
|
| 108 |
-`runc` uses [vndr](https://github.com/LK4D4/vndr) for dependencies management. |
|
| 109 |
-Please refer to [vndr](https://github.com/LK4D4/vndr) for how to add or update |
|
| 110 |
-new dependencies. |
|
| 114 |
+`runc` uses [Go Modules](https://github.com/golang/go/wiki/Modules) for dependencies management. |
|
| 115 |
+Please refer to [Go Modules](https://github.com/golang/go/wiki/Modules) for how to add or update |
|
| 116 |
+new dependencies. When updating dependencies, be sure that you are running Go `1.14` or newer. |
|
| 117 |
+ |
|
| 118 |
+``` |
|
| 119 |
+# Update vendored dependencies |
|
| 120 |
+make vendor |
|
| 121 |
+# Verify all dependencies |
|
| 122 |
+make verify-dependencies |
|
| 123 |
+``` |
|
| 111 | 124 |
|
| 112 | 125 |
## Using runc |
| 113 | 126 |
|
| ... | ... |
@@ -275,6 +292,9 @@ PIDFile=/run/mycontainerid.pid |
| 275 | 275 |
WantedBy=multi-user.target |
| 276 | 276 |
``` |
| 277 | 277 |
|
| 278 |
+#### cgroup v2 |
|
| 279 |
+See [`./docs/cgroup-v2.md`](./docs/cgroup-v2.md). |
|
| 280 |
+ |
|
| 278 | 281 |
## License |
| 279 | 282 |
|
| 280 | 283 |
The code and docs are released under the [Apache 2.0 license](LICENSE). |
| 281 | 284 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,26 @@ |
| 0 |
+module github.com/opencontainers/runc |
|
| 1 |
+ |
|
| 2 |
+go 1.14 |
|
| 3 |
+ |
|
| 4 |
+require ( |
|
| 5 |
+ github.com/checkpoint-restore/go-criu/v4 v4.0.2 |
|
| 6 |
+ github.com/cilium/ebpf v0.0.0-20200702112145-1c8d4c9ef775 |
|
| 7 |
+ github.com/containerd/console v1.0.0 |
|
| 8 |
+ github.com/coreos/go-systemd/v22 v22.0.0 |
|
| 9 |
+ github.com/cyphar/filepath-securejoin v0.2.2 |
|
| 10 |
+ github.com/docker/go-units v0.4.0 |
|
| 11 |
+ github.com/godbus/dbus/v5 v5.0.3 |
|
| 12 |
+ github.com/golang/protobuf v1.3.5 |
|
| 13 |
+ github.com/moby/sys/mountinfo v0.1.3 |
|
| 14 |
+ github.com/mrunalp/fileutils v0.0.0-20171103030105-7d4729fb3618 |
|
| 15 |
+ github.com/opencontainers/runtime-spec v1.0.3-0.20200520003142-237cc4f519e2 |
|
| 16 |
+ github.com/opencontainers/selinux v1.5.1 |
|
| 17 |
+ github.com/pkg/errors v0.9.1 |
|
| 18 |
+ github.com/seccomp/libseccomp-golang v0.9.1 |
|
| 19 |
+ github.com/sirupsen/logrus v1.6.0 |
|
| 20 |
+ github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2 |
|
| 21 |
+ // NOTE: urfave/cli must be <= v1.22.1 due to a regression: https://github.com/urfave/cli/issues/1092 |
|
| 22 |
+ github.com/urfave/cli v1.22.1 |
|
| 23 |
+ github.com/vishvananda/netlink v1.1.0 |
|
| 24 |
+ golang.org/x/sys v0.0.0-20200327173247-9dae0f8f5775 |
|
| 25 |
+) |
| ... | ... |
@@ -155,8 +155,7 @@ config := &configs.Config{
|
| 155 | 155 |
Parent: "system", |
| 156 | 156 |
Resources: &configs.Resources{
|
| 157 | 157 |
MemorySwappiness: nil, |
| 158 |
- AllowAllDevices: nil, |
|
| 159 |
- AllowedDevices: configs.DefaultAllowedDevices, |
|
| 158 |
+ Devices: specconv.AllowedDevices, |
|
| 160 | 159 |
}, |
| 161 | 160 |
}, |
| 162 | 161 |
MaskPaths: []string{
|
| ... | ... |
@@ -166,7 +165,7 @@ config := &configs.Config{
|
| 166 | 166 |
ReadonlyPaths: []string{
|
| 167 | 167 |
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", |
| 168 | 168 |
}, |
| 169 |
- Devices: configs.DefaultAutoCreatedDevices, |
|
| 169 |
+ Devices: specconv.AllowedDevices, |
|
| 170 | 170 |
Hostname: "testing", |
| 171 | 171 |
Mounts: []*configs.Mount{
|
| 172 | 172 |
{
|
| ... | ... |
@@ -3,8 +3,6 @@ |
| 3 | 3 |
package cgroups |
| 4 | 4 |
|
| 5 | 5 |
import ( |
| 6 |
- "fmt" |
|
| 7 |
- |
|
| 8 | 6 |
"github.com/opencontainers/runc/libcontainer/configs" |
| 9 | 7 |
) |
| 10 | 8 |
|
| ... | ... |
@@ -27,48 +25,27 @@ type Manager interface {
|
| 27 | 27 |
// Destroys the cgroup set |
| 28 | 28 |
Destroy() error |
| 29 | 29 |
|
| 30 |
- // The option func SystemdCgroups() and Cgroupfs() require following attributes: |
|
| 31 |
- // Paths map[string]string |
|
| 32 |
- // Cgroups *configs.Cgroup |
|
| 33 |
- // Paths maps cgroup subsystem to path at which it is mounted. |
|
| 34 |
- // Cgroups specifies specific cgroup settings for the various subsystems |
|
| 35 |
- |
|
| 36 |
- // Returns cgroup paths to save in a state file and to be able to |
|
| 37 |
- // restore the object later. |
|
| 38 |
- GetPaths() map[string]string |
|
| 39 |
- |
|
| 40 |
- // GetUnifiedPath returns the unified path when running in unified mode. |
|
| 41 |
- // The value corresponds to the all values of GetPaths() map. |
|
| 42 |
- // |
|
| 43 |
- // GetUnifiedPath returns error when running in hybrid mode as well as |
|
| 44 |
- // in legacy mode. |
|
| 45 |
- GetUnifiedPath() (string, error) |
|
| 30 |
+ // Path returns a cgroup path to the specified controller/subsystem. |
|
| 31 |
+ // For cgroupv2, the argument is unused and can be empty. |
|
| 32 |
+ Path(string) string |
|
| 46 | 33 |
|
| 47 | 34 |
// Sets the cgroup as configured. |
| 48 | 35 |
Set(container *configs.Config) error |
| 49 | 36 |
|
| 50 |
- // Gets the cgroup as configured. |
|
| 51 |
- GetCgroups() (*configs.Cgroup, error) |
|
| 52 |
-} |
|
| 53 |
- |
|
| 54 |
-type NotFoundError struct {
|
|
| 55 |
- Subsystem string |
|
| 56 |
-} |
|
| 37 |
+ // GetPaths returns cgroup path(s) to save in a state file in order to restore later. |
|
| 38 |
+ // |
|
| 39 |
+ // For cgroup v1, a key is cgroup subsystem name, and the value is the path |
|
| 40 |
+ // to the cgroup for this subsystem. |
|
| 41 |
+ // |
|
| 42 |
+ // For cgroup v2 unified hierarchy, a key is "", and the value is the unified path. |
|
| 43 |
+ GetPaths() map[string]string |
|
| 57 | 44 |
|
| 58 |
-func (e *NotFoundError) Error() string {
|
|
| 59 |
- return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
|
|
| 60 |
-} |
|
| 45 |
+ // GetCgroups returns the cgroup data as configured. |
|
| 46 |
+ GetCgroups() (*configs.Cgroup, error) |
|
| 61 | 47 |
|
| 62 |
-func NewNotFoundError(sub string) error {
|
|
| 63 |
- return &NotFoundError{
|
|
| 64 |
- Subsystem: sub, |
|
| 65 |
- } |
|
| 66 |
-} |
|
| 48 |
+ // GetFreezerState retrieves the current FreezerState of the cgroup. |
|
| 49 |
+ GetFreezerState() (configs.FreezerState, error) |
|
| 67 | 50 |
|
| 68 |
-func IsNotFound(err error) bool {
|
|
| 69 |
- if err == nil {
|
|
| 70 |
- return false |
|
| 71 |
- } |
|
| 72 |
- _, ok := err.(*NotFoundError) |
|
| 73 |
- return ok |
|
| 51 |
+ // Whether the cgroup path exists or not |
|
| 52 |
+ Exists() bool |
|
| 74 | 53 |
} |
| ... | ... |
@@ -20,6 +20,12 @@ type CpuUsage struct {
|
| 20 | 20 |
// Total CPU time consumed per core. |
| 21 | 21 |
// Units: nanoseconds. |
| 22 | 22 |
PercpuUsage []uint64 `json:"percpu_usage,omitempty"` |
| 23 |
+ // CPU time consumed per core in kernel mode |
|
| 24 |
+ // Units: nanoseconds. |
|
| 25 |
+ PercpuUsageInKernelmode []uint64 `json:"percpu_usage_in_kernelmode"` |
|
| 26 |
+ // CPU time consumed per core in user mode |
|
| 27 |
+ // Units: nanoseconds. |
|
| 28 |
+ PercpuUsageInUsermode []uint64 `json:"percpu_usage_in_usermode"` |
|
| 23 | 29 |
// Time spent by tasks of the cgroup in kernel mode. |
| 24 | 30 |
// Units: nanoseconds. |
| 25 | 31 |
UsageInKernelmode uint64 `json:"usage_in_kernelmode"` |
| ... | ... |
@@ -51,12 +57,33 @@ type MemoryStats struct {
|
| 51 | 51 |
KernelUsage MemoryData `json:"kernel_usage,omitempty"` |
| 52 | 52 |
// usage of kernel TCP memory |
| 53 | 53 |
KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"` |
| 54 |
+ // usage of memory pages by NUMA node |
|
| 55 |
+ // see chapter 5.6 of memory controller documentation |
|
| 56 |
+ PageUsageByNUMA PageUsageByNUMA `json:"page_usage_by_numa,omitempty"` |
|
| 54 | 57 |
// if true, memory usage is accounted for throughout a hierarchy of cgroups. |
| 55 | 58 |
UseHierarchy bool `json:"use_hierarchy"` |
| 56 | 59 |
|
| 57 | 60 |
Stats map[string]uint64 `json:"stats,omitempty"` |
| 58 | 61 |
} |
| 59 | 62 |
|
| 63 |
+type PageUsageByNUMA struct {
|
|
| 64 |
+ // Embedding is used as types can't be recursive. |
|
| 65 |
+ PageUsageByNUMAInner |
|
| 66 |
+ Hierarchical PageUsageByNUMAInner `json:"hierarchical,omitempty"` |
|
| 67 |
+} |
|
| 68 |
+ |
|
| 69 |
+type PageUsageByNUMAInner struct {
|
|
| 70 |
+ Total PageStats `json:"total,omitempty"` |
|
| 71 |
+ File PageStats `json:"file,omitempty"` |
|
| 72 |
+ Anon PageStats `json:"anon,omitempty"` |
|
| 73 |
+ Unevictable PageStats `json:"unevictable,omitempty"` |
|
| 74 |
+} |
|
| 75 |
+ |
|
| 76 |
+type PageStats struct {
|
|
| 77 |
+ Total uint64 `json:"total,omitempty"` |
|
| 78 |
+ Nodes map[uint8]uint64 `json:"nodes,omitempty"` |
|
| 79 |
+} |
|
| 80 |
+ |
|
| 60 | 81 |
type PidsStats struct {
|
| 61 | 82 |
// number of pids in the cgroup |
| 62 | 83 |
Current uint64 `json:"current,omitempty"` |
| ... | ... |
@@ -4,6 +4,7 @@ package cgroups |
| 4 | 4 |
|
| 5 | 5 |
import ( |
| 6 | 6 |
"bufio" |
| 7 |
+ "errors" |
|
| 7 | 8 |
"fmt" |
| 8 | 9 |
"io" |
| 9 | 10 |
"io/ioutil" |
| ... | ... |
@@ -12,7 +13,6 @@ import ( |
| 12 | 12 |
"strconv" |
| 13 | 13 |
"strings" |
| 14 | 14 |
"sync" |
| 15 |
- "syscall" |
|
| 16 | 15 |
"time" |
| 17 | 16 |
|
| 18 | 17 |
units "github.com/docker/go-units" |
| ... | ... |
@@ -20,7 +20,6 @@ import ( |
| 20 | 20 |
) |
| 21 | 21 |
|
| 22 | 22 |
const ( |
| 23 |
- CgroupNamePrefix = "name=" |
|
| 24 | 23 |
CgroupProcesses = "cgroup.procs" |
| 25 | 24 |
unifiedMountpoint = "/sys/fs/cgroup" |
| 26 | 25 |
) |
| ... | ... |
@@ -40,8 +39,8 @@ var HugePageSizeUnitList = []string{"B", "KB", "MB", "GB", "TB", "PB"}
|
| 40 | 40 |
// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode. |
| 41 | 41 |
func IsCgroup2UnifiedMode() bool {
|
| 42 | 42 |
isUnifiedOnce.Do(func() {
|
| 43 |
- var st syscall.Statfs_t |
|
| 44 |
- if err := syscall.Statfs(unifiedMountpoint, &st); err != nil {
|
|
| 43 |
+ var st unix.Statfs_t |
|
| 44 |
+ if err := unix.Statfs(unifiedMountpoint, &st); err != nil {
|
|
| 45 | 45 |
panic("cannot statfs cgroup root")
|
| 46 | 46 |
} |
| 47 | 47 |
isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC |
| ... | ... |
@@ -49,191 +48,19 @@ func IsCgroup2UnifiedMode() bool {
|
| 49 | 49 |
return isUnified |
| 50 | 50 |
} |
| 51 | 51 |
|
| 52 |
-// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt |
|
| 53 |
-func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
|
|
| 54 |
- if IsCgroup2UnifiedMode() {
|
|
| 55 |
- return unifiedMountpoint, nil |
|
| 56 |
- } |
|
| 57 |
- mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem) |
|
| 58 |
- return mnt, err |
|
| 59 |
-} |
|
| 60 |
- |
|
| 61 |
-func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
|
|
| 62 |
- // We are not using mount.GetMounts() because it's super-inefficient, |
|
| 63 |
- // parsing it directly sped up x10 times because of not using Sscanf. |
|
| 64 |
- // It was one of two major performance drawbacks in container start. |
|
| 65 |
- if !isSubsystemAvailable(subsystem) {
|
|
| 66 |
- return "", "", NewNotFoundError(subsystem) |
|
| 67 |
- } |
|
| 68 |
- |
|
| 69 |
- f, err := os.Open("/proc/self/mountinfo")
|
|
| 70 |
- if err != nil {
|
|
| 71 |
- return "", "", err |
|
| 72 |
- } |
|
| 73 |
- defer f.Close() |
|
| 74 |
- |
|
| 75 |
- if IsCgroup2UnifiedMode() {
|
|
| 76 |
- subsystem = "" |
|
| 77 |
- } |
|
| 78 |
- |
|
| 79 |
- return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem) |
|
| 80 |
-} |
|
| 81 |
- |
|
| 82 |
-func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) {
|
|
| 83 |
- scanner := bufio.NewScanner(reader) |
|
| 84 |
- for scanner.Scan() {
|
|
| 85 |
- txt := scanner.Text() |
|
| 86 |
- fields := strings.Fields(txt) |
|
| 87 |
- if len(fields) < 9 {
|
|
| 88 |
- continue |
|
| 89 |
- } |
|
| 90 |
- if strings.HasPrefix(fields[4], cgroupPath) {
|
|
| 91 |
- for _, opt := range strings.Split(fields[len(fields)-1], ",") {
|
|
| 92 |
- if (subsystem == "" && fields[9] == "cgroup2") || opt == subsystem {
|
|
| 93 |
- return fields[4], fields[3], nil |
|
| 94 |
- } |
|
| 95 |
- } |
|
| 96 |
- } |
|
| 97 |
- } |
|
| 98 |
- if err := scanner.Err(); err != nil {
|
|
| 99 |
- return "", "", err |
|
| 100 |
- } |
|
| 101 |
- |
|
| 102 |
- return "", "", NewNotFoundError(subsystem) |
|
| 103 |
-} |
|
| 104 |
- |
|
| 105 |
-func isSubsystemAvailable(subsystem string) bool {
|
|
| 106 |
- if IsCgroup2UnifiedMode() {
|
|
| 107 |
- controllers, err := GetAllSubsystems() |
|
| 108 |
- if err != nil {
|
|
| 109 |
- return false |
|
| 110 |
- } |
|
| 111 |
- for _, c := range controllers {
|
|
| 112 |
- if c == subsystem {
|
|
| 113 |
- return true |
|
| 114 |
- } |
|
| 115 |
- } |
|
| 116 |
- return false |
|
| 117 |
- } |
|
| 118 |
- |
|
| 119 |
- cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
|
| 120 |
- if err != nil {
|
|
| 121 |
- return false |
|
| 122 |
- } |
|
| 123 |
- _, avail := cgroups[subsystem] |
|
| 124 |
- return avail |
|
| 125 |
-} |
|
| 126 |
- |
|
| 127 |
-func GetClosestMountpointAncestor(dir, mountinfo string) string {
|
|
| 128 |
- deepestMountPoint := "" |
|
| 129 |
- for _, mountInfoEntry := range strings.Split(mountinfo, "\n") {
|
|
| 130 |
- mountInfoParts := strings.Fields(mountInfoEntry) |
|
| 131 |
- if len(mountInfoParts) < 5 {
|
|
| 132 |
- continue |
|
| 133 |
- } |
|
| 134 |
- mountPoint := mountInfoParts[4] |
|
| 135 |
- if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) {
|
|
| 136 |
- deepestMountPoint = mountPoint |
|
| 137 |
- } |
|
| 138 |
- } |
|
| 139 |
- return deepestMountPoint |
|
| 140 |
-} |
|
| 141 |
- |
|
| 142 |
-func FindCgroupMountpointDir() (string, error) {
|
|
| 143 |
- f, err := os.Open("/proc/self/mountinfo")
|
|
| 144 |
- if err != nil {
|
|
| 145 |
- return "", err |
|
| 146 |
- } |
|
| 147 |
- defer f.Close() |
|
| 148 |
- |
|
| 149 |
- scanner := bufio.NewScanner(f) |
|
| 150 |
- for scanner.Scan() {
|
|
| 151 |
- text := scanner.Text() |
|
| 152 |
- fields := strings.Split(text, " ") |
|
| 153 |
- // Safe as mountinfo encodes mountpoints with spaces as \040. |
|
| 154 |
- index := strings.Index(text, " - ") |
|
| 155 |
- postSeparatorFields := strings.Fields(text[index+3:]) |
|
| 156 |
- numPostFields := len(postSeparatorFields) |
|
| 157 |
- |
|
| 158 |
- // This is an error as we can't detect if the mount is for "cgroup" |
|
| 159 |
- if numPostFields == 0 {
|
|
| 160 |
- return "", fmt.Errorf("Found no fields post '-' in %q", text)
|
|
| 161 |
- } |
|
| 162 |
- |
|
| 163 |
- if postSeparatorFields[0] == "cgroup" || postSeparatorFields[0] == "cgroup2" {
|
|
| 164 |
- // Check that the mount is properly formatted. |
|
| 165 |
- if numPostFields < 3 {
|
|
| 166 |
- return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
|
|
| 167 |
- } |
|
| 168 |
- |
|
| 169 |
- return filepath.Dir(fields[4]), nil |
|
| 170 |
- } |
|
| 171 |
- } |
|
| 172 |
- if err := scanner.Err(); err != nil {
|
|
| 173 |
- return "", err |
|
| 174 |
- } |
|
| 175 |
- |
|
| 176 |
- return "", NewNotFoundError("cgroup")
|
|
| 177 |
-} |
|
| 178 |
- |
|
| 179 | 52 |
type Mount struct {
|
| 180 | 53 |
Mountpoint string |
| 181 | 54 |
Root string |
| 182 | 55 |
Subsystems []string |
| 183 | 56 |
} |
| 184 | 57 |
|
| 185 |
-func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
|
|
| 186 |
- if len(m.Subsystems) == 0 {
|
|
| 187 |
- return "", fmt.Errorf("no subsystem for mount")
|
|
| 188 |
- } |
|
| 189 |
- |
|
| 190 |
- return getControllerPath(m.Subsystems[0], cgroups) |
|
| 191 |
-} |
|
| 192 |
- |
|
| 193 |
-func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
|
|
| 194 |
- res := make([]Mount, 0, len(ss)) |
|
| 195 |
- scanner := bufio.NewScanner(mi) |
|
| 196 |
- numFound := 0 |
|
| 197 |
- for scanner.Scan() && numFound < len(ss) {
|
|
| 198 |
- txt := scanner.Text() |
|
| 199 |
- sepIdx := strings.Index(txt, " - ") |
|
| 200 |
- if sepIdx == -1 {
|
|
| 201 |
- return nil, fmt.Errorf("invalid mountinfo format")
|
|
| 202 |
- } |
|
| 203 |
- if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
|
|
| 204 |
- continue |
|
| 205 |
- } |
|
| 206 |
- fields := strings.Split(txt, " ") |
|
| 207 |
- m := Mount{
|
|
| 208 |
- Mountpoint: fields[4], |
|
| 209 |
- Root: fields[3], |
|
| 210 |
- } |
|
| 211 |
- for _, opt := range strings.Split(fields[len(fields)-1], ",") {
|
|
| 212 |
- seen, known := ss[opt] |
|
| 213 |
- if !known || (!all && seen) {
|
|
| 214 |
- continue |
|
| 215 |
- } |
|
| 216 |
- ss[opt] = true |
|
| 217 |
- if strings.HasPrefix(opt, CgroupNamePrefix) {
|
|
| 218 |
- opt = opt[len(CgroupNamePrefix):] |
|
| 219 |
- } |
|
| 220 |
- m.Subsystems = append(m.Subsystems, opt) |
|
| 221 |
- numFound++ |
|
| 222 |
- } |
|
| 223 |
- if len(m.Subsystems) > 0 || all {
|
|
| 224 |
- res = append(res, m) |
|
| 225 |
- } |
|
| 226 |
- } |
|
| 227 |
- if err := scanner.Err(); err != nil {
|
|
| 228 |
- return nil, err |
|
| 229 |
- } |
|
| 230 |
- return res, nil |
|
| 231 |
-} |
|
| 232 |
- |
|
| 233 | 58 |
// GetCgroupMounts returns the mounts for the cgroup subsystems. |
| 234 | 59 |
// all indicates whether to return just the first instance or all the mounts. |
| 60 |
+// This function should not be used from cgroupv2 code, as in this case |
|
| 61 |
+// all the controllers are available under the constant unifiedMountpoint. |
|
| 235 | 62 |
func GetCgroupMounts(all bool) ([]Mount, error) {
|
| 236 | 63 |
if IsCgroup2UnifiedMode() {
|
| 64 |
+ // TODO: remove cgroupv2 case once all external users are converted |
|
| 237 | 65 |
availableControllers, err := GetAllSubsystems() |
| 238 | 66 |
if err != nil {
|
| 239 | 67 |
return nil, err |
| ... | ... |
@@ -246,22 +73,7 @@ func GetCgroupMounts(all bool) ([]Mount, error) {
|
| 246 | 246 |
return []Mount{m}, nil
|
| 247 | 247 |
} |
| 248 | 248 |
|
| 249 |
- f, err := os.Open("/proc/self/mountinfo")
|
|
| 250 |
- if err != nil {
|
|
| 251 |
- return nil, err |
|
| 252 |
- } |
|
| 253 |
- defer f.Close() |
|
| 254 |
- |
|
| 255 |
- allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
|
|
| 256 |
- if err != nil {
|
|
| 257 |
- return nil, err |
|
| 258 |
- } |
|
| 259 |
- |
|
| 260 |
- allMap := make(map[string]bool) |
|
| 261 |
- for s := range allSubsystems {
|
|
| 262 |
- allMap[s] = false |
|
| 263 |
- } |
|
| 264 |
- return getCgroupMountsHelper(allMap, f, all) |
|
| 249 |
+ return getCgroupMountsV1(all) |
|
| 265 | 250 |
} |
| 266 | 251 |
|
| 267 | 252 |
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel |
| ... | ... |
@@ -305,61 +117,8 @@ func GetAllSubsystems() ([]string, error) {
|
| 305 | 305 |
return subsystems, nil |
| 306 | 306 |
} |
| 307 | 307 |
|
| 308 |
-// GetOwnCgroup returns the relative path to the cgroup docker is running in. |
|
| 309 |
-func GetOwnCgroup(subsystem string) (string, error) {
|
|
| 310 |
- cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
|
| 311 |
- if err != nil {
|
|
| 312 |
- return "", err |
|
| 313 |
- } |
|
| 314 |
- |
|
| 315 |
- return getControllerPath(subsystem, cgroups) |
|
| 316 |
-} |
|
| 317 |
- |
|
| 318 |
-func GetOwnCgroupPath(subsystem string) (string, error) {
|
|
| 319 |
- cgroup, err := GetOwnCgroup(subsystem) |
|
| 320 |
- if err != nil {
|
|
| 321 |
- return "", err |
|
| 322 |
- } |
|
| 323 |
- |
|
| 324 |
- return getCgroupPathHelper(subsystem, cgroup) |
|
| 325 |
-} |
|
| 326 |
- |
|
| 327 |
-func GetInitCgroup(subsystem string) (string, error) {
|
|
| 328 |
- cgroups, err := ParseCgroupFile("/proc/1/cgroup")
|
|
| 329 |
- if err != nil {
|
|
| 330 |
- return "", err |
|
| 331 |
- } |
|
| 332 |
- |
|
| 333 |
- return getControllerPath(subsystem, cgroups) |
|
| 334 |
-} |
|
| 335 |
- |
|
| 336 |
-func GetInitCgroupPath(subsystem string) (string, error) {
|
|
| 337 |
- cgroup, err := GetInitCgroup(subsystem) |
|
| 338 |
- if err != nil {
|
|
| 339 |
- return "", err |
|
| 340 |
- } |
|
| 341 |
- |
|
| 342 |
- return getCgroupPathHelper(subsystem, cgroup) |
|
| 343 |
-} |
|
| 344 |
- |
|
| 345 |
-func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
|
|
| 346 |
- mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
|
|
| 347 |
- if err != nil {
|
|
| 348 |
- return "", err |
|
| 349 |
- } |
|
| 350 |
- |
|
| 351 |
- // This is needed for nested containers, because in /proc/self/cgroup we |
|
| 352 |
- // see paths from host, which don't exist in container. |
|
| 353 |
- relCgroup, err := filepath.Rel(root, cgroup) |
|
| 354 |
- if err != nil {
|
|
| 355 |
- return "", err |
|
| 356 |
- } |
|
| 357 |
- |
|
| 358 |
- return filepath.Join(mnt, relCgroup), nil |
|
| 359 |
-} |
|
| 360 |
- |
|
| 361 |
-func readProcsFile(dir string) ([]int, error) {
|
|
| 362 |
- f, err := os.Open(filepath.Join(dir, CgroupProcesses)) |
|
| 308 |
+func readProcsFile(file string) ([]int, error) {
|
|
| 309 |
+ f, err := os.Open(file) |
|
| 363 | 310 |
if err != nil {
|
| 364 | 311 |
return nil, err |
| 365 | 312 |
} |
| ... | ... |
@@ -379,11 +138,18 @@ func readProcsFile(dir string) ([]int, error) {
|
| 379 | 379 |
out = append(out, pid) |
| 380 | 380 |
} |
| 381 | 381 |
} |
| 382 |
- return out, nil |
|
| 382 |
+ return out, s.Err() |
|
| 383 | 383 |
} |
| 384 | 384 |
|
| 385 |
-// ParseCgroupFile parses the given cgroup file, typically from |
|
| 386 |
-// /proc/<pid>/cgroup, into a map of subgroups to cgroup names. |
|
| 385 |
+// ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup |
|
| 386 |
+// or /proc/<pid>/cgroup, into a map of subsystems to cgroup paths, e.g. |
|
| 387 |
+// "cpu": "/user.slice/user-1000.slice" |
|
| 388 |
+// "pids": "/user.slice/user-1000.slice" |
|
| 389 |
+// etc. |
|
| 390 |
+// |
|
| 391 |
+// Note that for cgroup v2 unified hierarchy, there are no per-controller |
|
| 392 |
+// cgroup paths, so the resulting map will have a single element where the key |
|
| 393 |
+// is empty string ("") and the value is the cgroup path the <pid> is in.
|
|
| 387 | 394 |
func ParseCgroupFile(path string) (map[string]string, error) {
|
| 388 | 395 |
f, err := os.Open(path) |
| 389 | 396 |
if err != nil {
|
| ... | ... |
@@ -423,22 +189,6 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
|
| 423 | 423 |
return cgroups, nil |
| 424 | 424 |
} |
| 425 | 425 |
|
| 426 |
-func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
|
|
| 427 |
- if IsCgroup2UnifiedMode() {
|
|
| 428 |
- return "/", nil |
|
| 429 |
- } |
|
| 430 |
- |
|
| 431 |
- if p, ok := cgroups[subsystem]; ok {
|
|
| 432 |
- return p, nil |
|
| 433 |
- } |
|
| 434 |
- |
|
| 435 |
- if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
|
|
| 436 |
- return p, nil |
|
| 437 |
- } |
|
| 438 |
- |
|
| 439 |
- return "", NewNotFoundError(subsystem) |
|
| 440 |
-} |
|
| 441 |
- |
|
| 442 | 426 |
func PathExists(path string) bool {
|
| 443 | 427 |
if _, err := os.Stat(path); err != nil {
|
| 444 | 428 |
return false |
| ... | ... |
@@ -514,8 +264,8 @@ func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
|
| 514 | 514 |
} |
| 515 | 515 |
|
| 516 | 516 |
// GetPids returns all pids, that were added to cgroup at path. |
| 517 |
-func GetPids(path string) ([]int, error) {
|
|
| 518 |
- return readProcsFile(path) |
|
| 517 |
+func GetPids(dir string) ([]int, error) {
|
|
| 518 |
+ return readProcsFile(filepath.Join(dir, CgroupProcesses)) |
|
| 519 | 519 |
} |
| 520 | 520 |
|
| 521 | 521 |
// GetAllPids returns all pids, that were added to cgroup at path and to all its |
| ... | ... |
@@ -524,14 +274,13 @@ func GetAllPids(path string) ([]int, error) {
|
| 524 | 524 |
var pids []int |
| 525 | 525 |
// collect pids from all sub-cgroups |
| 526 | 526 |
err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
|
| 527 |
- dir, file := filepath.Split(p) |
|
| 528 |
- if file != CgroupProcesses {
|
|
| 529 |
- return nil |
|
| 530 |
- } |
|
| 531 | 527 |
if iErr != nil {
|
| 532 | 528 |
return iErr |
| 533 | 529 |
} |
| 534 |
- cPids, err := readProcsFile(dir) |
|
| 530 |
+ if info.IsDir() || info.Name() != CgroupProcesses {
|
|
| 531 |
+ return nil |
|
| 532 |
+ } |
|
| 533 |
+ cPids, err := readProcsFile(p) |
|
| 535 | 534 |
if err != nil {
|
| 536 | 535 |
return err |
| 537 | 536 |
} |
| ... | ... |
@@ -568,7 +317,7 @@ func WriteCgroupProc(dir string, pid int) error {
|
| 568 | 568 |
|
| 569 | 569 |
// EINVAL might mean that the task being added to cgroup.procs is in state |
| 570 | 570 |
// TASK_NEW. We should attempt to do so again. |
| 571 |
- if isEINVAL(err) {
|
|
| 571 |
+ if errors.Is(err, unix.EINVAL) {
|
|
| 572 | 572 |
time.Sleep(30 * time.Millisecond) |
| 573 | 573 |
continue |
| 574 | 574 |
} |
| ... | ... |
@@ -578,11 +327,53 @@ func WriteCgroupProc(dir string, pid int) error {
|
| 578 | 578 |
return err |
| 579 | 579 |
} |
| 580 | 580 |
|
| 581 |
-func isEINVAL(err error) bool {
|
|
| 582 |
- switch err := err.(type) {
|
|
| 583 |
- case *os.PathError: |
|
| 584 |
- return err.Err == unix.EINVAL |
|
| 585 |
- default: |
|
| 586 |
- return false |
|
| 581 |
+// Since the OCI spec is designed for cgroup v1, in some cases |
|
| 582 |
+// there is need to convert from the cgroup v1 configuration to cgroup v2 |
|
| 583 |
+// the formula for BlkIOWeight is y = (1 + (x - 10) * 9999 / 990) |
|
| 584 |
+// convert linearly from [10-1000] to [1-10000] |
|
| 585 |
+func ConvertBlkIOToCgroupV2Value(blkIoWeight uint16) uint64 {
|
|
| 586 |
+ if blkIoWeight == 0 {
|
|
| 587 |
+ return 0 |
|
| 588 |
+ } |
|
| 589 |
+ return uint64(1 + (uint64(blkIoWeight)-10)*9999/990) |
|
| 590 |
+} |
|
| 591 |
+ |
|
| 592 |
+// Since the OCI spec is designed for cgroup v1, in some cases |
|
| 593 |
+// there is need to convert from the cgroup v1 configuration to cgroup v2 |
|
| 594 |
+// the formula for cpuShares is y = (1 + ((x - 2) * 9999) / 262142) |
|
| 595 |
+// convert from [2-262144] to [1-10000] |
|
| 596 |
+// 262144 comes from Linux kernel definition "#define MAX_SHARES (1UL << 18)" |
|
| 597 |
+func ConvertCPUSharesToCgroupV2Value(cpuShares uint64) uint64 {
|
|
| 598 |
+ if cpuShares == 0 {
|
|
| 599 |
+ return 0 |
|
| 587 | 600 |
} |
| 601 |
+ return (1 + ((cpuShares-2)*9999)/262142) |
|
| 602 |
+} |
|
| 603 |
+ |
|
| 604 |
+// ConvertMemorySwapToCgroupV2Value converts MemorySwap value from OCI spec |
|
| 605 |
+// for use by cgroup v2 drivers. A conversion is needed since Resources.MemorySwap |
|
| 606 |
+// is defined as memory+swap combined, while in cgroup v2 swap is a separate value. |
|
| 607 |
+func ConvertMemorySwapToCgroupV2Value(memorySwap, memory int64) (int64, error) {
|
|
| 608 |
+ // for compatibility with cgroup1 controller, set swap to unlimited in |
|
| 609 |
+ // case the memory is set to unlimited, and swap is not explicitly set, |
|
| 610 |
+ // treating the request as "set both memory and swap to unlimited". |
|
| 611 |
+ if memory == -1 && memorySwap == 0 {
|
|
| 612 |
+ return -1, nil |
|
| 613 |
+ } |
|
| 614 |
+ if memorySwap == -1 || memorySwap == 0 {
|
|
| 615 |
+ // -1 is "max", 0 is "unset", so treat as is |
|
| 616 |
+ return memorySwap, nil |
|
| 617 |
+ } |
|
| 618 |
+ // sanity checks |
|
| 619 |
+ if memory == 0 || memory == -1 {
|
|
| 620 |
+ return 0, errors.New("unable to set swap limit without memory limit")
|
|
| 621 |
+ } |
|
| 622 |
+ if memory < 0 {
|
|
| 623 |
+ return 0, fmt.Errorf("invalid memory value: %d", memory)
|
|
| 624 |
+ } |
|
| 625 |
+ if memorySwap < memory {
|
|
| 626 |
+ return 0, errors.New("memory+swap limit should be >= memory limit")
|
|
| 627 |
+ } |
|
| 628 |
+ |
|
| 629 |
+ return memorySwap - memory, nil |
|
| 588 | 630 |
} |
| 589 | 631 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,250 @@ |
| 0 |
+package cgroups |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "bufio" |
|
| 4 |
+ "errors" |
|
| 5 |
+ "fmt" |
|
| 6 |
+ "io" |
|
| 7 |
+ "os" |
|
| 8 |
+ "path/filepath" |
|
| 9 |
+ "strings" |
|
| 10 |
+) |
|
| 11 |
+ |
|
| 12 |
+// Code in this source file are specific to cgroup v1, |
|
| 13 |
+// and must not be used from any cgroup v2 code. |
|
| 14 |
+ |
|
| 15 |
+const ( |
|
| 16 |
+ CgroupNamePrefix = "name=" |
|
| 17 |
+) |
|
| 18 |
+ |
|
| 19 |
+var ( |
|
| 20 |
+ errUnified = errors.New("not implemented for cgroup v2 unified hierarchy")
|
|
| 21 |
+) |
|
| 22 |
+ |
|
| 23 |
+type NotFoundError struct {
|
|
| 24 |
+ Subsystem string |
|
| 25 |
+} |
|
| 26 |
+ |
|
| 27 |
+func (e *NotFoundError) Error() string {
|
|
| 28 |
+ return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
|
|
| 29 |
+} |
|
| 30 |
+ |
|
| 31 |
+func NewNotFoundError(sub string) error {
|
|
| 32 |
+ return &NotFoundError{
|
|
| 33 |
+ Subsystem: sub, |
|
| 34 |
+ } |
|
| 35 |
+} |
|
| 36 |
+ |
|
| 37 |
+func IsNotFound(err error) bool {
|
|
| 38 |
+ if err == nil {
|
|
| 39 |
+ return false |
|
| 40 |
+ } |
|
| 41 |
+ _, ok := err.(*NotFoundError) |
|
| 42 |
+ return ok |
|
| 43 |
+} |
|
| 44 |
+ |
|
| 45 |
+// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt |
|
| 46 |
+func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
|
|
| 47 |
+ if IsCgroup2UnifiedMode() {
|
|
| 48 |
+ return "", errUnified |
|
| 49 |
+ } |
|
| 50 |
+ mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem) |
|
| 51 |
+ return mnt, err |
|
| 52 |
+} |
|
| 53 |
+ |
|
| 54 |
+func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
|
|
| 55 |
+ if IsCgroup2UnifiedMode() {
|
|
| 56 |
+ return "", "", errUnified |
|
| 57 |
+ } |
|
| 58 |
+ |
|
| 59 |
+ // We are not using mount.GetMounts() because it's super-inefficient, |
|
| 60 |
+ // parsing it directly sped up x10 times because of not using Sscanf. |
|
| 61 |
+ // It was one of two major performance drawbacks in container start. |
|
| 62 |
+ if !isSubsystemAvailable(subsystem) {
|
|
| 63 |
+ return "", "", NewNotFoundError(subsystem) |
|
| 64 |
+ } |
|
| 65 |
+ |
|
| 66 |
+ f, err := os.Open("/proc/self/mountinfo")
|
|
| 67 |
+ if err != nil {
|
|
| 68 |
+ return "", "", err |
|
| 69 |
+ } |
|
| 70 |
+ defer f.Close() |
|
| 71 |
+ |
|
| 72 |
+ return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem) |
|
| 73 |
+} |
|
| 74 |
+ |
|
| 75 |
+func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) {
|
|
| 76 |
+ scanner := bufio.NewScanner(reader) |
|
| 77 |
+ for scanner.Scan() {
|
|
| 78 |
+ txt := scanner.Text() |
|
| 79 |
+ fields := strings.Fields(txt) |
|
| 80 |
+ if len(fields) < 9 {
|
|
| 81 |
+ continue |
|
| 82 |
+ } |
|
| 83 |
+ if strings.HasPrefix(fields[4], cgroupPath) {
|
|
| 84 |
+ for _, opt := range strings.Split(fields[len(fields)-1], ",") {
|
|
| 85 |
+ if opt == subsystem {
|
|
| 86 |
+ return fields[4], fields[3], nil |
|
| 87 |
+ } |
|
| 88 |
+ } |
|
| 89 |
+ } |
|
| 90 |
+ } |
|
| 91 |
+ if err := scanner.Err(); err != nil {
|
|
| 92 |
+ return "", "", err |
|
| 93 |
+ } |
|
| 94 |
+ |
|
| 95 |
+ return "", "", NewNotFoundError(subsystem) |
|
| 96 |
+} |
|
| 97 |
+ |
|
| 98 |
+func isSubsystemAvailable(subsystem string) bool {
|
|
| 99 |
+ if IsCgroup2UnifiedMode() {
|
|
| 100 |
+ panic("don't call isSubsystemAvailable from cgroupv2 code")
|
|
| 101 |
+ } |
|
| 102 |
+ |
|
| 103 |
+ cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
|
| 104 |
+ if err != nil {
|
|
| 105 |
+ return false |
|
| 106 |
+ } |
|
| 107 |
+ _, avail := cgroups[subsystem] |
|
| 108 |
+ return avail |
|
| 109 |
+} |
|
| 110 |
+ |
|
| 111 |
+func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
|
|
| 112 |
+ if len(m.Subsystems) == 0 {
|
|
| 113 |
+ return "", fmt.Errorf("no subsystem for mount")
|
|
| 114 |
+ } |
|
| 115 |
+ |
|
| 116 |
+ return getControllerPath(m.Subsystems[0], cgroups) |
|
| 117 |
+} |
|
| 118 |
+ |
|
| 119 |
+func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
|
|
| 120 |
+ res := make([]Mount, 0, len(ss)) |
|
| 121 |
+ scanner := bufio.NewScanner(mi) |
|
| 122 |
+ numFound := 0 |
|
| 123 |
+ for scanner.Scan() && numFound < len(ss) {
|
|
| 124 |
+ txt := scanner.Text() |
|
| 125 |
+ sepIdx := strings.Index(txt, " - ") |
|
| 126 |
+ if sepIdx == -1 {
|
|
| 127 |
+ return nil, fmt.Errorf("invalid mountinfo format")
|
|
| 128 |
+ } |
|
| 129 |
+ if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
|
|
| 130 |
+ continue |
|
| 131 |
+ } |
|
| 132 |
+ fields := strings.Split(txt, " ") |
|
| 133 |
+ m := Mount{
|
|
| 134 |
+ Mountpoint: fields[4], |
|
| 135 |
+ Root: fields[3], |
|
| 136 |
+ } |
|
| 137 |
+ for _, opt := range strings.Split(fields[len(fields)-1], ",") {
|
|
| 138 |
+ seen, known := ss[opt] |
|
| 139 |
+ if !known || (!all && seen) {
|
|
| 140 |
+ continue |
|
| 141 |
+ } |
|
| 142 |
+ ss[opt] = true |
|
| 143 |
+ opt = strings.TrimPrefix(opt, CgroupNamePrefix) |
|
| 144 |
+ m.Subsystems = append(m.Subsystems, opt) |
|
| 145 |
+ numFound++ |
|
| 146 |
+ } |
|
| 147 |
+ if len(m.Subsystems) > 0 || all {
|
|
| 148 |
+ res = append(res, m) |
|
| 149 |
+ } |
|
| 150 |
+ } |
|
| 151 |
+ if err := scanner.Err(); err != nil {
|
|
| 152 |
+ return nil, err |
|
| 153 |
+ } |
|
| 154 |
+ return res, nil |
|
| 155 |
+} |
|
| 156 |
+ |
|
| 157 |
+func getCgroupMountsV1(all bool) ([]Mount, error) {
|
|
| 158 |
+ f, err := os.Open("/proc/self/mountinfo")
|
|
| 159 |
+ if err != nil {
|
|
| 160 |
+ return nil, err |
|
| 161 |
+ } |
|
| 162 |
+ defer f.Close() |
|
| 163 |
+ |
|
| 164 |
+ allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
|
|
| 165 |
+ if err != nil {
|
|
| 166 |
+ return nil, err |
|
| 167 |
+ } |
|
| 168 |
+ |
|
| 169 |
+ allMap := make(map[string]bool) |
|
| 170 |
+ for s := range allSubsystems {
|
|
| 171 |
+ allMap[s] = false |
|
| 172 |
+ } |
|
| 173 |
+ return getCgroupMountsHelper(allMap, f, all) |
|
| 174 |
+} |
|
| 175 |
+ |
|
| 176 |
+// GetOwnCgroup returns the relative path to the cgroup docker is running in. |
|
| 177 |
+func GetOwnCgroup(subsystem string) (string, error) {
|
|
| 178 |
+ if IsCgroup2UnifiedMode() {
|
|
| 179 |
+ return "", errUnified |
|
| 180 |
+ } |
|
| 181 |
+ cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
|
| 182 |
+ if err != nil {
|
|
| 183 |
+ return "", err |
|
| 184 |
+ } |
|
| 185 |
+ |
|
| 186 |
+ return getControllerPath(subsystem, cgroups) |
|
| 187 |
+} |
|
| 188 |
+ |
|
| 189 |
+func GetOwnCgroupPath(subsystem string) (string, error) {
|
|
| 190 |
+ cgroup, err := GetOwnCgroup(subsystem) |
|
| 191 |
+ if err != nil {
|
|
| 192 |
+ return "", err |
|
| 193 |
+ } |
|
| 194 |
+ |
|
| 195 |
+ return getCgroupPathHelper(subsystem, cgroup) |
|
| 196 |
+} |
|
| 197 |
+ |
|
| 198 |
+func GetInitCgroup(subsystem string) (string, error) {
|
|
| 199 |
+ if IsCgroup2UnifiedMode() {
|
|
| 200 |
+ return "", errUnified |
|
| 201 |
+ } |
|
| 202 |
+ cgroups, err := ParseCgroupFile("/proc/1/cgroup")
|
|
| 203 |
+ if err != nil {
|
|
| 204 |
+ return "", err |
|
| 205 |
+ } |
|
| 206 |
+ |
|
| 207 |
+ return getControllerPath(subsystem, cgroups) |
|
| 208 |
+} |
|
| 209 |
+ |
|
| 210 |
+func GetInitCgroupPath(subsystem string) (string, error) {
|
|
| 211 |
+ cgroup, err := GetInitCgroup(subsystem) |
|
| 212 |
+ if err != nil {
|
|
| 213 |
+ return "", err |
|
| 214 |
+ } |
|
| 215 |
+ |
|
| 216 |
+ return getCgroupPathHelper(subsystem, cgroup) |
|
| 217 |
+} |
|
| 218 |
+ |
|
| 219 |
+func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
|
|
| 220 |
+ mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
|
|
| 221 |
+ if err != nil {
|
|
| 222 |
+ return "", err |
|
| 223 |
+ } |
|
| 224 |
+ |
|
| 225 |
+ // This is needed for nested containers, because in /proc/self/cgroup we |
|
| 226 |
+ // see paths from host, which don't exist in container. |
|
| 227 |
+ relCgroup, err := filepath.Rel(root, cgroup) |
|
| 228 |
+ if err != nil {
|
|
| 229 |
+ return "", err |
|
| 230 |
+ } |
|
| 231 |
+ |
|
| 232 |
+ return filepath.Join(mnt, relCgroup), nil |
|
| 233 |
+} |
|
| 234 |
+ |
|
| 235 |
+func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
|
|
| 236 |
+ if IsCgroup2UnifiedMode() {
|
|
| 237 |
+ return "", errUnified |
|
| 238 |
+ } |
|
| 239 |
+ |
|
| 240 |
+ if p, ok := cgroups[subsystem]; ok {
|
|
| 241 |
+ return p, nil |
|
| 242 |
+ } |
|
| 243 |
+ |
|
| 244 |
+ if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
|
|
| 245 |
+ return p, nil |
|
| 246 |
+ } |
|
| 247 |
+ |
|
| 248 |
+ return "", NewNotFoundError(subsystem) |
|
| 249 |
+} |
| ... | ... |
@@ -1,5 +1,9 @@ |
| 1 | 1 |
package configs |
| 2 | 2 |
|
| 3 |
+import ( |
|
| 4 |
+ systemdDbus "github.com/coreos/go-systemd/v22/dbus" |
|
| 5 |
+) |
|
| 6 |
+ |
|
| 3 | 7 |
type FreezerState string |
| 4 | 8 |
|
| 5 | 9 |
const ( |
| ... | ... |
@@ -29,18 +33,16 @@ type Cgroup struct {
|
| 29 | 29 |
|
| 30 | 30 |
// Resources contains various cgroups settings to apply |
| 31 | 31 |
*Resources |
| 32 |
+ |
|
| 33 |
+ // SystemdProps are any additional properties for systemd, |
|
| 34 |
+ // derived from org.systemd.property.xxx annotations. |
|
| 35 |
+ // Ignored unless systemd is used for managing cgroups. |
|
| 36 |
+ SystemdProps []systemdDbus.Property `json:"-"` |
|
| 32 | 37 |
} |
| 33 | 38 |
|
| 34 | 39 |
type Resources struct {
|
| 35 |
- // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. |
|
| 36 |
- // Deprecated |
|
| 37 |
- AllowAllDevices *bool `json:"allow_all_devices,omitempty"` |
|
| 38 |
- // Deprecated |
|
| 39 |
- AllowedDevices []*Device `json:"allowed_devices,omitempty"` |
|
| 40 |
- // Deprecated |
|
| 41 |
- DeniedDevices []*Device `json:"denied_devices,omitempty"` |
|
| 42 |
- |
|
| 43 |
- Devices []*Device `json:"devices"` |
|
| 40 |
+ // Devices is the set of access rules for devices in the container. |
|
| 41 |
+ Devices []*DeviceRule `json:"devices"` |
|
| 44 | 42 |
|
| 45 | 43 |
// Memory limit (in bytes) |
| 46 | 44 |
Memory int64 `json:"memory"` |
| ... | ... |
@@ -125,6 +127,10 @@ type Resources struct {
|
| 125 | 125 |
// CpuWeight sets a proportional bandwidth limit. |
| 126 | 126 |
CpuWeight uint64 `json:"cpu_weight"` |
| 127 | 127 |
|
| 128 |
- // CpuMax sets she maximum bandwidth limit (format: max period). |
|
| 129 |
- CpuMax string `json:"cpu_max"` |
|
| 128 |
+ // SkipDevices allows to skip configuring device permissions. |
|
| 129 |
+ // Used by e.g. kubelet while creating a parent cgroup (kubepods) |
|
| 130 |
+ // common for many containers. |
|
| 131 |
+ // |
|
| 132 |
+ // NOTE it is impossible to start a container which has this flag set. |
|
| 133 |
+ SkipDevices bool `json:"skip_devices"` |
|
| 130 | 134 |
} |
| ... | ... |
@@ -8,7 +8,7 @@ import ( |
| 8 | 8 |
"time" |
| 9 | 9 |
|
| 10 | 10 |
"github.com/opencontainers/runtime-spec/specs-go" |
| 11 |
- |
|
| 11 |
+ "github.com/pkg/errors" |
|
| 12 | 12 |
"github.com/sirupsen/logrus" |
| 13 | 13 |
) |
| 14 | 14 |
|
| ... | ... |
@@ -70,9 +70,10 @@ type Arg struct {
|
| 70 | 70 |
|
| 71 | 71 |
// Syscall is a rule to match a syscall in Seccomp |
| 72 | 72 |
type Syscall struct {
|
| 73 |
- Name string `json:"name"` |
|
| 74 |
- Action Action `json:"action"` |
|
| 75 |
- Args []*Arg `json:"args"` |
|
| 73 |
+ Name string `json:"name"` |
|
| 74 |
+ Action Action `json:"action"` |
|
| 75 |
+ ErrnoRet *uint `json:"errnoRet"` |
|
| 76 |
+ Args []*Arg `json:"args"` |
|
| 76 | 77 |
} |
| 77 | 78 |
|
| 78 | 79 |
// TODO Windows. Many of these fields should be factored out into those parts |
| ... | ... |
@@ -175,7 +176,7 @@ type Config struct {
|
| 175 | 175 |
|
| 176 | 176 |
// Hooks are a collection of actions to perform at various container lifecycle events. |
| 177 | 177 |
// CommandHooks are serialized to JSON, but other hooks are not. |
| 178 |
- Hooks *Hooks |
|
| 178 |
+ Hooks Hooks |
|
| 179 | 179 |
|
| 180 | 180 |
// Version is the version of opencontainer specification that is supported. |
| 181 | 181 |
Version string `json:"version"` |
| ... | ... |
@@ -202,17 +203,50 @@ type Config struct {
|
| 202 | 202 |
RootlessCgroups bool `json:"rootless_cgroups,omitempty"` |
| 203 | 203 |
} |
| 204 | 204 |
|
| 205 |
-type Hooks struct {
|
|
| 205 |
+type HookName string |
|
| 206 |
+type HookList []Hook |
|
| 207 |
+type Hooks map[HookName]HookList |
|
| 208 |
+ |
|
| 209 |
+const ( |
|
| 206 | 210 |
// Prestart commands are executed after the container namespaces are created, |
| 207 | 211 |
// but before the user supplied command is executed from init. |
| 208 |
- Prestart []Hook |
|
| 212 |
+ // Note: This hook is now deprecated |
|
| 213 |
+ // Prestart commands are called in the Runtime namespace. |
|
| 214 |
+ Prestart HookName = "prestart" |
|
| 215 |
+ |
|
| 216 |
+ // CreateRuntime commands MUST be called as part of the create operation after |
|
| 217 |
+ // the runtime environment has been created but before the pivot_root has been executed. |
|
| 218 |
+ // CreateRuntime is called immediately after the deprecated Prestart hook. |
|
| 219 |
+ // CreateRuntime commands are called in the Runtime Namespace. |
|
| 220 |
+ CreateRuntime = "createRuntime" |
|
| 221 |
+ |
|
| 222 |
+ // CreateContainer commands MUST be called as part of the create operation after |
|
| 223 |
+ // the runtime environment has been created but before the pivot_root has been executed. |
|
| 224 |
+ // CreateContainer commands are called in the Container namespace. |
|
| 225 |
+ CreateContainer = "createContainer" |
|
| 226 |
+ |
|
| 227 |
+ // StartContainer commands MUST be called as part of the start operation and before |
|
| 228 |
+ // the container process is started. |
|
| 229 |
+ // StartContainer commands are called in the Container namespace. |
|
| 230 |
+ StartContainer = "startContainer" |
|
| 209 | 231 |
|
| 210 | 232 |
// Poststart commands are executed after the container init process starts. |
| 211 |
- Poststart []Hook |
|
| 233 |
+ // Poststart commands are called in the Runtime Namespace. |
|
| 234 |
+ Poststart = "poststart" |
|
| 212 | 235 |
|
| 213 | 236 |
// Poststop commands are executed after the container init process exits. |
| 214 |
- Poststop []Hook |
|
| 215 |
-} |
|
| 237 |
+ // Poststop commands are called in the Runtime Namespace. |
|
| 238 |
+ Poststop = "poststop" |
|
| 239 |
+) |
|
| 240 |
+ |
|
| 241 |
+// TODO move this to runtime-spec |
|
| 242 |
+// See: https://github.com/opencontainers/runtime-spec/pull/1046 |
|
| 243 |
+const ( |
|
| 244 |
+ Creating = "creating" |
|
| 245 |
+ Created = "created" |
|
| 246 |
+ Running = "running" |
|
| 247 |
+ Stopped = "stopped" |
|
| 248 |
+) |
|
| 216 | 249 |
|
| 217 | 250 |
type Capabilities struct {
|
| 218 | 251 |
// Bounding is the set of capabilities checked by the kernel. |
| ... | ... |
@@ -227,32 +261,39 @@ type Capabilities struct {
|
| 227 | 227 |
Ambient []string |
| 228 | 228 |
} |
| 229 | 229 |
|
| 230 |
-func (hooks *Hooks) UnmarshalJSON(b []byte) error {
|
|
| 231 |
- var state struct {
|
|
| 232 |
- Prestart []CommandHook |
|
| 233 |
- Poststart []CommandHook |
|
| 234 |
- Poststop []CommandHook |
|
| 230 |
+func (hooks HookList) RunHooks(state *specs.State) error {
|
|
| 231 |
+ for i, h := range hooks {
|
|
| 232 |
+ if err := h.Run(state); err != nil {
|
|
| 233 |
+ return errors.Wrapf(err, "Running hook #%d:", i) |
|
| 234 |
+ } |
|
| 235 | 235 |
} |
| 236 | 236 |
|
| 237 |
+ return nil |
|
| 238 |
+} |
|
| 239 |
+ |
|
| 240 |
+func (hooks *Hooks) UnmarshalJSON(b []byte) error {
|
|
| 241 |
+ var state map[HookName][]CommandHook |
|
| 242 |
+ |
|
| 237 | 243 |
if err := json.Unmarshal(b, &state); err != nil {
|
| 238 | 244 |
return err |
| 239 | 245 |
} |
| 240 | 246 |
|
| 241 |
- deserialize := func(shooks []CommandHook) (hooks []Hook) {
|
|
| 242 |
- for _, shook := range shooks {
|
|
| 243 |
- hooks = append(hooks, shook) |
|
| 247 |
+ *hooks = Hooks{}
|
|
| 248 |
+ for n, commandHooks := range state {
|
|
| 249 |
+ if len(commandHooks) == 0 {
|
|
| 250 |
+ continue |
|
| 244 | 251 |
} |
| 245 | 252 |
|
| 246 |
- return hooks |
|
| 253 |
+ (*hooks)[n] = HookList{}
|
|
| 254 |
+ for _, h := range commandHooks {
|
|
| 255 |
+ (*hooks)[n] = append((*hooks)[n], h) |
|
| 256 |
+ } |
|
| 247 | 257 |
} |
| 248 | 258 |
|
| 249 |
- hooks.Prestart = deserialize(state.Prestart) |
|
| 250 |
- hooks.Poststart = deserialize(state.Poststart) |
|
| 251 |
- hooks.Poststop = deserialize(state.Poststop) |
|
| 252 | 259 |
return nil |
| 253 | 260 |
} |
| 254 | 261 |
|
| 255 |
-func (hooks Hooks) MarshalJSON() ([]byte, error) {
|
|
| 262 |
+func (hooks *Hooks) MarshalJSON() ([]byte, error) {
|
|
| 256 | 263 |
serialize := func(hooks []Hook) (serializableHooks []CommandHook) {
|
| 257 | 264 |
for _, hook := range hooks {
|
| 258 | 265 |
switch chook := hook.(type) {
|
| ... | ... |
@@ -267,9 +308,12 @@ func (hooks Hooks) MarshalJSON() ([]byte, error) {
|
| 267 | 267 |
} |
| 268 | 268 |
|
| 269 | 269 |
return json.Marshal(map[string]interface{}{
|
| 270 |
- "prestart": serialize(hooks.Prestart), |
|
| 271 |
- "poststart": serialize(hooks.Poststart), |
|
| 272 |
- "poststop": serialize(hooks.Poststop), |
|
| 270 |
+ "prestart": serialize((*hooks)[Prestart]), |
|
| 271 |
+ "createRuntime": serialize((*hooks)[CreateRuntime]), |
|
| 272 |
+ "createContainer": serialize((*hooks)[CreateContainer]), |
|
| 273 |
+ "startContainer": serialize((*hooks)[StartContainer]), |
|
| 274 |
+ "poststart": serialize((*hooks)[Poststart]), |
|
| 275 |
+ "poststop": serialize((*hooks)[Poststop]), |
|
| 273 | 276 |
}) |
| 274 | 277 |
} |
| 275 | 278 |
|
| ... | ... |
@@ -3,30 +3,19 @@ package configs |
| 3 | 3 |
import ( |
| 4 | 4 |
"fmt" |
| 5 | 5 |
"os" |
| 6 |
+ "strconv" |
|
| 6 | 7 |
) |
| 7 | 8 |
|
| 8 | 9 |
const ( |
| 9 | 10 |
Wildcard = -1 |
| 10 | 11 |
) |
| 11 | 12 |
|
| 12 |
-// TODO Windows: This can be factored out in the future |
|
| 13 |
- |
|
| 14 | 13 |
type Device struct {
|
| 15 |
- // Device type, block, char, etc. |
|
| 16 |
- Type rune `json:"type"` |
|
| 14 |
+ DeviceRule |
|
| 17 | 15 |
|
| 18 | 16 |
// Path to the device. |
| 19 | 17 |
Path string `json:"path"` |
| 20 | 18 |
|
| 21 |
- // Major is the device's major number. |
|
| 22 |
- Major int64 `json:"major"` |
|
| 23 |
- |
|
| 24 |
- // Minor is the device's minor number. |
|
| 25 |
- Minor int64 `json:"minor"` |
|
| 26 |
- |
|
| 27 |
- // Cgroup permissions format, rwm. |
|
| 28 |
- Permissions string `json:"permissions"` |
|
| 29 |
- |
|
| 30 | 19 |
// FileMode permission bits for the device. |
| 31 | 20 |
FileMode os.FileMode `json:"file_mode"` |
| 32 | 21 |
|
| ... | ... |
@@ -35,23 +24,147 @@ type Device struct {
|
| 35 | 35 |
|
| 36 | 36 |
// Gid of the device. |
| 37 | 37 |
Gid uint32 `json:"gid"` |
| 38 |
+} |
|
| 38 | 39 |
|
| 39 |
- // Write the file to the allowed list |
|
| 40 |
- Allow bool `json:"allow"` |
|
| 40 |
+// DevicePermissions is a cgroupv1-style string to represent device access. It |
|
| 41 |
+// has to be a string for backward compatibility reasons, hence why it has |
|
| 42 |
+// methods to do set operations. |
|
| 43 |
+type DevicePermissions string |
|
| 44 |
+ |
|
| 45 |
+const ( |
|
| 46 |
+ deviceRead uint = (1 << iota) |
|
| 47 |
+ deviceWrite |
|
| 48 |
+ deviceMknod |
|
| 49 |
+) |
|
| 50 |
+ |
|
| 51 |
+func (p DevicePermissions) toSet() uint {
|
|
| 52 |
+ var set uint |
|
| 53 |
+ for _, perm := range p {
|
|
| 54 |
+ switch perm {
|
|
| 55 |
+ case 'r': |
|
| 56 |
+ set |= deviceRead |
|
| 57 |
+ case 'w': |
|
| 58 |
+ set |= deviceWrite |
|
| 59 |
+ case 'm': |
|
| 60 |
+ set |= deviceMknod |
|
| 61 |
+ } |
|
| 62 |
+ } |
|
| 63 |
+ return set |
|
| 64 |
+} |
|
| 65 |
+ |
|
| 66 |
+func fromSet(set uint) DevicePermissions {
|
|
| 67 |
+ var perm string |
|
| 68 |
+ if set&deviceRead == deviceRead {
|
|
| 69 |
+ perm += "r" |
|
| 70 |
+ } |
|
| 71 |
+ if set&deviceWrite == deviceWrite {
|
|
| 72 |
+ perm += "w" |
|
| 73 |
+ } |
|
| 74 |
+ if set&deviceMknod == deviceMknod {
|
|
| 75 |
+ perm += "m" |
|
| 76 |
+ } |
|
| 77 |
+ return DevicePermissions(perm) |
|
| 78 |
+} |
|
| 79 |
+ |
|
| 80 |
+// Union returns the union of the two sets of DevicePermissions. |
|
| 81 |
+func (p DevicePermissions) Union(o DevicePermissions) DevicePermissions {
|
|
| 82 |
+ lhs := p.toSet() |
|
| 83 |
+ rhs := o.toSet() |
|
| 84 |
+ return fromSet(lhs | rhs) |
|
| 85 |
+} |
|
| 86 |
+ |
|
| 87 |
+// Difference returns the set difference of the two sets of DevicePermissions. |
|
| 88 |
+// In set notation, A.Difference(B) gives you A\B. |
|
| 89 |
+func (p DevicePermissions) Difference(o DevicePermissions) DevicePermissions {
|
|
| 90 |
+ lhs := p.toSet() |
|
| 91 |
+ rhs := o.toSet() |
|
| 92 |
+ return fromSet(lhs &^ rhs) |
|
| 93 |
+} |
|
| 94 |
+ |
|
| 95 |
+// Intersection computes the intersection of the two sets of DevicePermissions. |
|
| 96 |
+func (p DevicePermissions) Intersection(o DevicePermissions) DevicePermissions {
|
|
| 97 |
+ lhs := p.toSet() |
|
| 98 |
+ rhs := o.toSet() |
|
| 99 |
+ return fromSet(lhs & rhs) |
|
| 41 | 100 |
} |
| 42 | 101 |
|
| 43 |
-func (d *Device) CgroupString() string {
|
|
| 44 |
- return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions)
|
|
| 102 |
+// IsEmpty returns whether the set of permissions in a DevicePermissions is |
|
| 103 |
+// empty. |
|
| 104 |
+func (p DevicePermissions) IsEmpty() bool {
|
|
| 105 |
+ return p == DevicePermissions("")
|
|
| 45 | 106 |
} |
| 46 | 107 |
|
| 47 |
-func (d *Device) Mkdev() int {
|
|
| 48 |
- return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12)) |
|
| 108 |
+// IsValid returns whether the set of permissions is a subset of valid |
|
| 109 |
+// permissions (namely, {r,w,m}).
|
|
| 110 |
+func (p DevicePermissions) IsValid() bool {
|
|
| 111 |
+ return p == fromSet(p.toSet()) |
|
| 49 | 112 |
} |
| 50 | 113 |
|
| 51 |
-// deviceNumberString converts the device number to a string return result. |
|
| 52 |
-func deviceNumberString(number int64) string {
|
|
| 53 |
- if number == Wildcard {
|
|
| 54 |
- return "*" |
|
| 114 |
+type DeviceType rune |
|
| 115 |
+ |
|
| 116 |
+const ( |
|
| 117 |
+ WildcardDevice DeviceType = 'a' |
|
| 118 |
+ BlockDevice DeviceType = 'b' |
|
| 119 |
+ CharDevice DeviceType = 'c' // or 'u' |
|
| 120 |
+ FifoDevice DeviceType = 'p' |
|
| 121 |
+) |
|
| 122 |
+ |
|
| 123 |
+func (t DeviceType) IsValid() bool {
|
|
| 124 |
+ switch t {
|
|
| 125 |
+ case WildcardDevice, BlockDevice, CharDevice, FifoDevice: |
|
| 126 |
+ return true |
|
| 127 |
+ default: |
|
| 128 |
+ return false |
|
| 129 |
+ } |
|
| 130 |
+} |
|
| 131 |
+ |
|
| 132 |
+func (t DeviceType) CanMknod() bool {
|
|
| 133 |
+ switch t {
|
|
| 134 |
+ case BlockDevice, CharDevice, FifoDevice: |
|
| 135 |
+ return true |
|
| 136 |
+ default: |
|
| 137 |
+ return false |
|
| 138 |
+ } |
|
| 139 |
+} |
|
| 140 |
+ |
|
| 141 |
+func (t DeviceType) CanCgroup() bool {
|
|
| 142 |
+ switch t {
|
|
| 143 |
+ case WildcardDevice, BlockDevice, CharDevice: |
|
| 144 |
+ return true |
|
| 145 |
+ default: |
|
| 146 |
+ return false |
|
| 147 |
+ } |
|
| 148 |
+} |
|
| 149 |
+ |
|
| 150 |
+type DeviceRule struct {
|
|
| 151 |
+ // Type of device ('c' for char, 'b' for block). If set to 'a', this rule
|
|
| 152 |
+ // acts as a wildcard and all fields other than Allow are ignored. |
|
| 153 |
+ Type DeviceType `json:"type"` |
|
| 154 |
+ |
|
| 155 |
+ // Major is the device's major number. |
|
| 156 |
+ Major int64 `json:"major"` |
|
| 157 |
+ |
|
| 158 |
+ // Minor is the device's minor number. |
|
| 159 |
+ Minor int64 `json:"minor"` |
|
| 160 |
+ |
|
| 161 |
+ // Permissions is the set of permissions that this rule applies to (in the |
|
| 162 |
+ // cgroupv1 format -- any combination of "rwm"). |
|
| 163 |
+ Permissions DevicePermissions `json:"permissions"` |
|
| 164 |
+ |
|
| 165 |
+ // Allow specifies whether this rule is allowed. |
|
| 166 |
+ Allow bool `json:"allow"` |
|
| 167 |
+} |
|
| 168 |
+ |
|
| 169 |
+func (d *DeviceRule) CgroupString() string {
|
|
| 170 |
+ var ( |
|
| 171 |
+ major = strconv.FormatInt(d.Major, 10) |
|
| 172 |
+ minor = strconv.FormatInt(d.Minor, 10) |
|
| 173 |
+ ) |
|
| 174 |
+ if d.Major == Wildcard {
|
|
| 175 |
+ major = "*" |
|
| 176 |
+ } |
|
| 177 |
+ if d.Minor == Wildcard {
|
|
| 178 |
+ minor = "*" |
|
| 55 | 179 |
} |
| 56 |
- return fmt.Sprint(number) |
|
| 180 |
+ return fmt.Sprintf("%c %s:%s %s", d.Type, major, minor, d.Permissions)
|
|
| 57 | 181 |
} |
| 58 | 182 |
deleted file mode 100644 |
| ... | ... |
@@ -1,111 +0,0 @@ |
| 1 |
-// +build linux |
|
| 2 |
- |
|
| 3 |
-package configs |
|
| 4 |
- |
|
| 5 |
-var ( |
|
| 6 |
- // DefaultSimpleDevices are devices that are to be both allowed and created. |
|
| 7 |
- DefaultSimpleDevices = []*Device{
|
|
| 8 |
- // /dev/null and zero |
|
| 9 |
- {
|
|
| 10 |
- Path: "/dev/null", |
|
| 11 |
- Type: 'c', |
|
| 12 |
- Major: 1, |
|
| 13 |
- Minor: 3, |
|
| 14 |
- Permissions: "rwm", |
|
| 15 |
- FileMode: 0666, |
|
| 16 |
- }, |
|
| 17 |
- {
|
|
| 18 |
- Path: "/dev/zero", |
|
| 19 |
- Type: 'c', |
|
| 20 |
- Major: 1, |
|
| 21 |
- Minor: 5, |
|
| 22 |
- Permissions: "rwm", |
|
| 23 |
- FileMode: 0666, |
|
| 24 |
- }, |
|
| 25 |
- |
|
| 26 |
- {
|
|
| 27 |
- Path: "/dev/full", |
|
| 28 |
- Type: 'c', |
|
| 29 |
- Major: 1, |
|
| 30 |
- Minor: 7, |
|
| 31 |
- Permissions: "rwm", |
|
| 32 |
- FileMode: 0666, |
|
| 33 |
- }, |
|
| 34 |
- |
|
| 35 |
- // consoles and ttys |
|
| 36 |
- {
|
|
| 37 |
- Path: "/dev/tty", |
|
| 38 |
- Type: 'c', |
|
| 39 |
- Major: 5, |
|
| 40 |
- Minor: 0, |
|
| 41 |
- Permissions: "rwm", |
|
| 42 |
- FileMode: 0666, |
|
| 43 |
- }, |
|
| 44 |
- |
|
| 45 |
- // /dev/urandom,/dev/random |
|
| 46 |
- {
|
|
| 47 |
- Path: "/dev/urandom", |
|
| 48 |
- Type: 'c', |
|
| 49 |
- Major: 1, |
|
| 50 |
- Minor: 9, |
|
| 51 |
- Permissions: "rwm", |
|
| 52 |
- FileMode: 0666, |
|
| 53 |
- }, |
|
| 54 |
- {
|
|
| 55 |
- Path: "/dev/random", |
|
| 56 |
- Type: 'c', |
|
| 57 |
- Major: 1, |
|
| 58 |
- Minor: 8, |
|
| 59 |
- Permissions: "rwm", |
|
| 60 |
- FileMode: 0666, |
|
| 61 |
- }, |
|
| 62 |
- } |
|
| 63 |
- DefaultAllowedDevices = append([]*Device{
|
|
| 64 |
- // allow mknod for any device |
|
| 65 |
- {
|
|
| 66 |
- Type: 'c', |
|
| 67 |
- Major: Wildcard, |
|
| 68 |
- Minor: Wildcard, |
|
| 69 |
- Permissions: "m", |
|
| 70 |
- }, |
|
| 71 |
- {
|
|
| 72 |
- Type: 'b', |
|
| 73 |
- Major: Wildcard, |
|
| 74 |
- Minor: Wildcard, |
|
| 75 |
- Permissions: "m", |
|
| 76 |
- }, |
|
| 77 |
- |
|
| 78 |
- {
|
|
| 79 |
- Path: "/dev/console", |
|
| 80 |
- Type: 'c', |
|
| 81 |
- Major: 5, |
|
| 82 |
- Minor: 1, |
|
| 83 |
- Permissions: "rwm", |
|
| 84 |
- }, |
|
| 85 |
- // /dev/pts/ - pts namespaces are "coming soon" |
|
| 86 |
- {
|
|
| 87 |
- Path: "", |
|
| 88 |
- Type: 'c', |
|
| 89 |
- Major: 136, |
|
| 90 |
- Minor: Wildcard, |
|
| 91 |
- Permissions: "rwm", |
|
| 92 |
- }, |
|
| 93 |
- {
|
|
| 94 |
- Path: "", |
|
| 95 |
- Type: 'c', |
|
| 96 |
- Major: 5, |
|
| 97 |
- Minor: 2, |
|
| 98 |
- Permissions: "rwm", |
|
| 99 |
- }, |
|
| 100 |
- |
|
| 101 |
- // tuntap |
|
| 102 |
- {
|
|
| 103 |
- Path: "", |
|
| 104 |
- Type: 'c', |
|
| 105 |
- Major: 10, |
|
| 106 |
- Minor: 200, |
|
| 107 |
- Permissions: "rwm", |
|
| 108 |
- }, |
|
| 109 |
- }, DefaultSimpleDevices...) |
|
| 110 |
- DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...)
|
|
| 111 |
-) |
| 112 | 1 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,16 @@ |
| 0 |
+// +build !windows |
|
| 1 |
+ |
|
| 2 |
+package configs |
|
| 3 |
+ |
|
| 4 |
+import ( |
|
| 5 |
+ "errors" |
|
| 6 |
+ |
|
| 7 |
+ "golang.org/x/sys/unix" |
|
| 8 |
+) |
|
| 9 |
+ |
|
| 10 |
+func (d *DeviceRule) Mkdev() (uint64, error) {
|
|
| 11 |
+ if d.Major == Wildcard || d.Minor == Wildcard {
|
|
| 12 |
+ return 0, errors.New("cannot mkdev() device with wildcards")
|
|
| 13 |
+ } |
|
| 14 |
+ return unix.Mkdev(uint32(d.Major), uint32(d.Minor)), nil |
|
| 15 |
+} |
| ... | ... |
@@ -31,33 +31,33 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) {
|
| 31 | 31 |
} |
| 32 | 32 |
|
| 33 | 33 |
var ( |
| 34 |
+ devType configs.DeviceType |
|
| 35 |
+ mode = stat.Mode |
|
| 34 | 36 |
devNumber = uint64(stat.Rdev) |
| 35 | 37 |
major = unix.Major(devNumber) |
| 36 | 38 |
minor = unix.Minor(devNumber) |
| 37 | 39 |
) |
| 38 |
- if major == 0 {
|
|
| 40 |
+ switch mode & unix.S_IFMT {
|
|
| 41 |
+ case unix.S_IFBLK: |
|
| 42 |
+ devType = configs.BlockDevice |
|
| 43 |
+ case unix.S_IFCHR: |
|
| 44 |
+ devType = configs.CharDevice |
|
| 45 |
+ case unix.S_IFIFO: |
|
| 46 |
+ devType = configs.FifoDevice |
|
| 47 |
+ default: |
|
| 39 | 48 |
return nil, ErrNotADevice |
| 40 | 49 |
} |
| 41 |
- |
|
| 42 |
- var ( |
|
| 43 |
- devType rune |
|
| 44 |
- mode = stat.Mode |
|
| 45 |
- ) |
|
| 46 |
- switch {
|
|
| 47 |
- case mode&unix.S_IFBLK == unix.S_IFBLK: |
|
| 48 |
- devType = 'b' |
|
| 49 |
- case mode&unix.S_IFCHR == unix.S_IFCHR: |
|
| 50 |
- devType = 'c' |
|
| 51 |
- } |
|
| 52 | 50 |
return &configs.Device{
|
| 53 |
- Type: devType, |
|
| 54 |
- Path: path, |
|
| 55 |
- Major: int64(major), |
|
| 56 |
- Minor: int64(minor), |
|
| 57 |
- Permissions: permissions, |
|
| 58 |
- FileMode: os.FileMode(mode), |
|
| 59 |
- Uid: stat.Uid, |
|
| 60 |
- Gid: stat.Gid, |
|
| 51 |
+ DeviceRule: configs.DeviceRule{
|
|
| 52 |
+ Type: devType, |
|
| 53 |
+ Major: int64(major), |
|
| 54 |
+ Minor: int64(minor), |
|
| 55 |
+ Permissions: configs.DevicePermissions(permissions), |
|
| 56 |
+ }, |
|
| 57 |
+ Path: path, |
|
| 58 |
+ FileMode: os.FileMode(mode), |
|
| 59 |
+ Uid: stat.Uid, |
|
| 60 |
+ Gid: stat.Gid, |
|
| 61 | 61 |
}, nil |
| 62 | 62 |
} |
| 63 | 63 |
|
| ... | ... |
@@ -104,6 +104,9 @@ func GetDevices(path string) ([]*configs.Device, error) {
|
| 104 | 104 |
} |
| 105 | 105 |
return nil, err |
| 106 | 106 |
} |
| 107 |
+ if device.Type == configs.FifoDevice {
|
|
| 108 |
+ continue |
|
| 109 |
+ } |
|
| 107 | 110 |
out = append(out, device) |
| 108 | 111 |
} |
| 109 | 112 |
return out, nil |
| ... | ... |
@@ -1,7 +1,14 @@ |
| 1 |
+// SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later |
|
| 1 | 2 |
/* |
| 2 | 3 |
* Copyright (C) 2019 Aleksa Sarai <cyphar@cyphar.com> |
| 3 | 4 |
* Copyright (C) 2019 SUSE LLC |
| 4 | 5 |
* |
| 6 |
+ * This work is dual licensed under the following licenses. You may use, |
|
| 7 |
+ * redistribute, and/or modify the work under the conditions of either (or |
|
| 8 |
+ * both) licenses. |
|
| 9 |
+ * |
|
| 10 |
+ * === Apache-2.0 === |
|
| 11 |
+ * |
|
| 5 | 12 |
* Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | 13 |
* you may not use this file except in compliance with the License. |
| 7 | 14 |
* You may obtain a copy of the License at |
| ... | ... |
@@ -13,6 +20,23 @@ |
| 13 | 13 |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | 14 |
* See the License for the specific language governing permissions and |
| 15 | 15 |
* limitations under the License. |
| 16 |
+ * |
|
| 17 |
+ * === LGPL-2.1-or-later === |
|
| 18 |
+ * |
|
| 19 |
+ * This library is free software; you can redistribute it and/or |
|
| 20 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 21 |
+ * License as published by the Free Software Foundation; either |
|
| 22 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 23 |
+ * |
|
| 24 |
+ * This library is distributed in the hope that it will be useful, |
|
| 25 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 26 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 27 |
+ * Lesser General Public License for more details. |
|
| 28 |
+ * |
|
| 29 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 30 |
+ * License along with this library. If not, see |
|
| 31 |
+ * <https://www.gnu.org/licenses/>. |
|
| 32 |
+ * |
|
| 16 | 33 |
*/ |
| 17 | 34 |
|
| 18 | 35 |
#define _GNU_SOURCE |
| ... | ... |
@@ -95,8 +119,10 @@ static int is_self_cloned(void) |
| 95 | 95 |
struct statfs fsbuf = {};
|
| 96 | 96 |
|
| 97 | 97 |
fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC);
|
| 98 |
- if (fd < 0) |
|
| 98 |
+ if (fd < 0) {
|
|
| 99 |
+ fprintf(stderr, "you have no read access to runc binary file\n"); |
|
| 99 | 100 |
return -ENOTRECOVERABLE; |
| 101 |
+ } |
|
| 100 | 102 |
|
| 101 | 103 |
/* |
| 102 | 104 |
* Is the binary a fully-sealed memfd? We don't need CLONED_BINARY_ENV for |
| ... | ... |
@@ -714,12 +714,12 @@ void nsexec(void) |
| 714 | 714 |
* ready, so we can receive all possible error codes |
| 715 | 715 |
* generated by children. |
| 716 | 716 |
*/ |
| 717 |
+ syncfd = sync_child_pipe[1]; |
|
| 718 |
+ close(sync_child_pipe[0]); |
|
| 719 |
+ |
|
| 717 | 720 |
while (!ready) {
|
| 718 | 721 |
enum sync_t s; |
| 719 | 722 |
|
| 720 |
- syncfd = sync_child_pipe[1]; |
|
| 721 |
- close(sync_child_pipe[0]); |
|
| 722 |
- |
|
| 723 | 723 |
if (read(syncfd, &s, sizeof(s)) != sizeof(s)) |
| 724 | 724 |
bail("failed to sync with child: next state");
|
| 725 | 725 |
|
| ... | ... |
@@ -789,13 +789,13 @@ void nsexec(void) |
| 789 | 789 |
|
| 790 | 790 |
/* Now sync with grandchild. */ |
| 791 | 791 |
|
| 792 |
+ syncfd = sync_grandchild_pipe[1]; |
|
| 793 |
+ close(sync_grandchild_pipe[0]); |
|
| 794 |
+ |
|
| 792 | 795 |
ready = false; |
| 793 | 796 |
while (!ready) {
|
| 794 | 797 |
enum sync_t s; |
| 795 | 798 |
|
| 796 |
- syncfd = sync_grandchild_pipe[1]; |
|
| 797 |
- close(sync_grandchild_pipe[0]); |
|
| 798 |
- |
|
| 799 | 799 |
s = SYNC_GRANDCHILD; |
| 800 | 800 |
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
| 801 | 801 |
kill(child, SIGKILL); |
| ... | ... |
@@ -60,7 +60,7 @@ type Group struct {
|
| 60 | 60 |
|
| 61 | 61 |
// groupFromOS converts an os/user.(*Group) to local Group |
| 62 | 62 |
// |
| 63 |
-// (This does not include Pass, Shell or Gecos) |
|
| 63 |
+// (This does not include Pass or List) |
|
| 64 | 64 |
func groupFromOS(g *user.Group) (Group, error) {
|
| 65 | 65 |
newGroup := Group{
|
| 66 | 66 |
Name: g.Name, |
| ... | ... |
@@ -162,10 +162,6 @@ func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
|
| 162 | 162 |
) |
| 163 | 163 |
|
| 164 | 164 |
for s.Scan() {
|
| 165 |
- if err := s.Err(); err != nil {
|
|
| 166 |
- return nil, err |
|
| 167 |
- } |
|
| 168 |
- |
|
| 169 | 165 |
line := strings.TrimSpace(s.Text()) |
| 170 | 166 |
if line == "" {
|
| 171 | 167 |
continue |
| ... | ... |
@@ -183,6 +179,9 @@ func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
|
| 183 | 183 |
out = append(out, p) |
| 184 | 184 |
} |
| 185 | 185 |
} |
| 186 |
+ if err := s.Err(); err != nil {
|
|
| 187 |
+ return nil, err |
|
| 188 |
+ } |
|
| 186 | 189 |
|
| 187 | 190 |
return out, nil |
| 188 | 191 |
} |
| ... | ... |
@@ -221,10 +220,6 @@ func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
|
| 221 | 221 |
) |
| 222 | 222 |
|
| 223 | 223 |
for s.Scan() {
|
| 224 |
- if err := s.Err(); err != nil {
|
|
| 225 |
- return nil, err |
|
| 226 |
- } |
|
| 227 |
- |
|
| 228 | 224 |
text := s.Text() |
| 229 | 225 |
if text == "" {
|
| 230 | 226 |
continue |
| ... | ... |
@@ -242,6 +237,9 @@ func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
|
| 242 | 242 |
out = append(out, p) |
| 243 | 243 |
} |
| 244 | 244 |
} |
| 245 |
+ if err := s.Err(); err != nil {
|
|
| 246 |
+ return nil, err |
|
| 247 |
+ } |
|
| 245 | 248 |
|
| 246 | 249 |
return out, nil |
| 247 | 250 |
} |
| ... | ... |
@@ -532,10 +530,6 @@ func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) {
|
| 532 | 532 |
) |
| 533 | 533 |
|
| 534 | 534 |
for s.Scan() {
|
| 535 |
- if err := s.Err(); err != nil {
|
|
| 536 |
- return nil, err |
|
| 537 |
- } |
|
| 538 |
- |
|
| 539 | 535 |
line := strings.TrimSpace(s.Text()) |
| 540 | 536 |
if line == "" {
|
| 541 | 537 |
continue |
| ... | ... |
@@ -549,6 +543,9 @@ func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) {
|
| 549 | 549 |
out = append(out, p) |
| 550 | 550 |
} |
| 551 | 551 |
} |
| 552 |
+ if err := s.Err(); err != nil {
|
|
| 553 |
+ return nil, err |
|
| 554 |
+ } |
|
| 552 | 555 |
|
| 553 | 556 |
return out, nil |
| 554 | 557 |
} |
| ... | ... |
@@ -586,10 +583,6 @@ func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) {
|
| 586 | 586 |
) |
| 587 | 587 |
|
| 588 | 588 |
for s.Scan() {
|
| 589 |
- if err := s.Err(); err != nil {
|
|
| 590 |
- return nil, err |
|
| 591 |
- } |
|
| 592 |
- |
|
| 593 | 589 |
line := strings.TrimSpace(s.Text()) |
| 594 | 590 |
if line == "" {
|
| 595 | 591 |
continue |
| ... | ... |
@@ -603,6 +596,9 @@ func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) {
|
| 603 | 603 |
out = append(out, p) |
| 604 | 604 |
} |
| 605 | 605 |
} |
| 606 |
+ if err := s.Err(); err != nil {
|
|
| 607 |
+ return nil, err |
|
| 608 |
+ } |
|
| 606 | 609 |
|
| 607 | 610 |
return out, nil |
| 608 | 611 |
} |
| 609 | 612 |
deleted file mode 100644 |
| ... | ... |
@@ -1,31 +0,0 @@ |
| 1 |
-# OCI runtime-spec. When updating this, make sure you use a version tag rather |
|
| 2 |
-# than a commit ID so it's much more obvious what version of the spec we are |
|
| 3 |
-# using. |
|
| 4 |
-github.com/opencontainers/runtime-spec 29686dbc5559d93fb1ef402eeda3e35c38d75af4 # v1.0.1-59-g29686db |
|
| 5 |
- |
|
| 6 |
-# Core libcontainer functionality. |
|
| 7 |
-github.com/checkpoint-restore/go-criu 17b0214f6c48980c45dc47ecb0cfd6d9e02df723 # v3.11 |
|
| 8 |
-github.com/mrunalp/fileutils 7d4729fb36185a7c1719923406c9d40e54fb93c7 |
|
| 9 |
-github.com/opencontainers/selinux 5215b1806f52b1fcc2070a8826c542c9d33cd3cf # v1.3.0 (+ CVE-2019-16884) |
|
| 10 |
-github.com/seccomp/libseccomp-golang 689e3c1541a84461afc49c1c87352a6cedf72e9c # v0.9.1 |
|
| 11 |
-github.com/sirupsen/logrus 8bdbc7bcc01dcbb8ec23dc8a28e332258d25251f # v1.4.1 |
|
| 12 |
-github.com/syndtr/gocapability d98352740cb2c55f81556b63d4a1ec64c5a319c2 |
|
| 13 |
-github.com/vishvananda/netlink 1e2e08e8a2dcdacaae3f14ac44c5cfa31361f270 |
|
| 14 |
- |
|
| 15 |
-# systemd integration. |
|
| 16 |
-github.com/coreos/go-systemd 95778dfbb74eb7e4dbaf43bf7d71809650ef8076 # v19 |
|
| 17 |
-github.com/godbus/dbus 2ff6f7ffd60f0f2410b3105864bdd12c7894f844 # v5.0.1 |
|
| 18 |
-github.com/golang/protobuf 925541529c1fa6821df4e44ce2723319eb2be768 # v1.0.0 |
|
| 19 |
- |
|
| 20 |
-# Command-line interface. |
|
| 21 |
-github.com/cyphar/filepath-securejoin a261ee33d7a517f054effbf451841abaafe3e0fd # v0.2.2 |
|
| 22 |
-github.com/docker/go-units 47565b4f722fb6ceae66b95f853feed578a4a51c # v0.3.3 |
|
| 23 |
-github.com/urfave/cli cfb38830724cc34fedffe9a2a29fb54fa9169cd1 # v1.20.0 |
|
| 24 |
-golang.org/x/sys 9eafafc0a87e0fd0aeeba439a4573537970c44c7 https://github.com/golang/sys |
|
| 25 |
- |
|
| 26 |
-# console dependencies |
|
| 27 |
-github.com/containerd/console 0650fd9eeb50bab4fc99dceb9f2e14cf58f36e7f |
|
| 28 |
-github.com/pkg/errors ba968bfe8b2f7e042a574c888954fccecfa385b4 # v0.8.1 |
|
| 29 |
- |
|
| 30 |
-# ebpf dependencies |
|
| 31 |
-github.com/cilium/ebpf 95b36a581eed7b0f127306ed1d16cc0ddc06cf67 |
| ... | ... |
@@ -667,9 +667,10 @@ type LinuxSeccompArg struct {
|
| 667 | 667 |
|
| 668 | 668 |
// LinuxSyscall is used to match a syscall in Seccomp |
| 669 | 669 |
type LinuxSyscall struct {
|
| 670 |
- Names []string `json:"names"` |
|
| 671 |
- Action LinuxSeccompAction `json:"action"` |
|
| 672 |
- Args []LinuxSeccompArg `json:"args,omitempty"` |
|
| 670 |
+ Names []string `json:"names"` |
|
| 671 |
+ Action LinuxSeccompAction `json:"action"` |
|
| 672 |
+ ErrnoRet *uint `json:"errnoRet,omitempty"` |
|
| 673 |
+ Args []LinuxSeccompArg `json:"args,omitempty"` |
|
| 673 | 674 |
} |
| 674 | 675 |
|
| 675 | 676 |
// LinuxIntelRdt has container runtime resource constraints for Intel RDT |
| ... | ... |
@@ -671,6 +671,7 @@ const ( |
| 671 | 671 |
FS_IOC_ADD_ENCRYPTION_KEY = 0xc0506617 |
| 672 | 672 |
FS_IOC_GET_ENCRYPTION_KEY_STATUS = 0xc080661a |
| 673 | 673 |
FS_IOC_GET_ENCRYPTION_POLICY_EX = 0xc0096616 |
| 674 |
+ FS_IOC_MEASURE_VERITY = 0xc0046686 |
|
| 674 | 675 |
FS_IOC_REMOVE_ENCRYPTION_KEY = 0xc0406618 |
| 675 | 676 |
FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS = 0xc0406619 |
| 676 | 677 |
FS_KEY_DESCRIPTOR_SIZE = 0x8 |
| ... | ... |
@@ -683,6 +684,9 @@ const ( |
| 683 | 683 |
FS_POLICY_FLAGS_PAD_8 = 0x1 |
| 684 | 684 |
FS_POLICY_FLAGS_PAD_MASK = 0x3 |
| 685 | 685 |
FS_POLICY_FLAGS_VALID = 0xf |
| 686 |
+ FS_VERITY_FL = 0x100000 |
|
| 687 |
+ FS_VERITY_HASH_ALG_SHA256 = 0x1 |
|
| 688 |
+ FS_VERITY_HASH_ALG_SHA512 = 0x2 |
|
| 686 | 689 |
FUTEXFS_SUPER_MAGIC = 0xbad1dea |
| 687 | 690 |
F_ADD_SEALS = 0x409 |
| 688 | 691 |
F_DUPFD = 0x0 |
| ... | ... |
@@ -73,6 +73,8 @@ const ( |
| 73 | 73 |
FFDLY = 0x8000 |
| 74 | 74 |
FLUSHO = 0x1000 |
| 75 | 75 |
FP_XSTATE_MAGIC2 = 0x46505845 |
| 76 |
+ FS_IOC_ENABLE_VERITY = 0x40806685 |
|
| 77 |
+ FS_IOC_GETFLAGS = 0x80046601 |
|
| 76 | 78 |
FS_IOC_GET_ENCRYPTION_POLICY = 0x400c6615 |
| 77 | 79 |
FS_IOC_GET_ENCRYPTION_PWSALT = 0x40106614 |
| 78 | 80 |
FS_IOC_SET_ENCRYPTION_POLICY = 0x800c6613 |
| ... | ... |
@@ -73,6 +73,8 @@ const ( |
| 73 | 73 |
FFDLY = 0x8000 |
| 74 | 74 |
FLUSHO = 0x1000 |
| 75 | 75 |
FP_XSTATE_MAGIC2 = 0x46505845 |
| 76 |
+ FS_IOC_ENABLE_VERITY = 0x40806685 |
|
| 77 |
+ FS_IOC_GETFLAGS = 0x80086601 |
|
| 76 | 78 |
FS_IOC_GET_ENCRYPTION_POLICY = 0x400c6615 |
| 77 | 79 |
FS_IOC_GET_ENCRYPTION_PWSALT = 0x40106614 |
| 78 | 80 |
FS_IOC_SET_ENCRYPTION_POLICY = 0x800c6613 |
| ... | ... |
@@ -72,6 +72,8 @@ const ( |
| 72 | 72 |
FF1 = 0x8000 |
| 73 | 73 |
FFDLY = 0x8000 |
| 74 | 74 |
FLUSHO = 0x1000 |
| 75 |
+ FS_IOC_ENABLE_VERITY = 0x40806685 |
|
| 76 |
+ FS_IOC_GETFLAGS = 0x80046601 |
|
| 75 | 77 |
FS_IOC_GET_ENCRYPTION_POLICY = 0x400c6615 |
| 76 | 78 |
FS_IOC_GET_ENCRYPTION_PWSALT = 0x40106614 |
| 77 | 79 |
FS_IOC_SET_ENCRYPTION_POLICY = 0x800c6613 |
| ... | ... |
@@ -75,6 +75,8 @@ const ( |
| 75 | 75 |
FFDLY = 0x8000 |
| 76 | 76 |
FLUSHO = 0x1000 |
| 77 | 77 |
FPSIMD_MAGIC = 0x46508001 |
| 78 |
+ FS_IOC_ENABLE_VERITY = 0x40806685 |
|
| 79 |
+ FS_IOC_GETFLAGS = 0x80086601 |
|
| 78 | 80 |
FS_IOC_GET_ENCRYPTION_POLICY = 0x400c6615 |
| 79 | 81 |
FS_IOC_GET_ENCRYPTION_PWSALT = 0x40106614 |
| 80 | 82 |
FS_IOC_SET_ENCRYPTION_POLICY = 0x800c6613 |
| ... | ... |
@@ -72,6 +72,8 @@ const ( |
| 72 | 72 |
FF1 = 0x8000 |
| 73 | 73 |
FFDLY = 0x8000 |
| 74 | 74 |
FLUSHO = 0x2000 |
| 75 |
+ FS_IOC_ENABLE_VERITY = 0x80806685 |
|
| 76 |
+ FS_IOC_GETFLAGS = 0x40046601 |
|
| 75 | 77 |
FS_IOC_GET_ENCRYPTION_POLICY = 0x800c6615 |
| 76 | 78 |
FS_IOC_GET_ENCRYPTION_PWSALT = 0x80106614 |
| 77 | 79 |
FS_IOC_SET_ENCRYPTION_POLICY = 0x400c6613 |
| ... | ... |
@@ -72,6 +72,8 @@ const ( |
| 72 | 72 |
FF1 = 0x8000 |
| 73 | 73 |
FFDLY = 0x8000 |
| 74 | 74 |
FLUSHO = 0x2000 |
| 75 |
+ FS_IOC_ENABLE_VERITY = 0x80806685 |
|
| 76 |
+ FS_IOC_GETFLAGS = 0x40086601 |
|
| 75 | 77 |
FS_IOC_GET_ENCRYPTION_POLICY = 0x800c6615 |
| 76 | 78 |
FS_IOC_GET_ENCRYPTION_PWSALT = 0x80106614 |
| 77 | 79 |
FS_IOC_SET_ENCRYPTION_POLICY = 0x400c6613 |
| ... | ... |
@@ -72,6 +72,8 @@ const ( |
| 72 | 72 |
FF1 = 0x8000 |
| 73 | 73 |
FFDLY = 0x8000 |
| 74 | 74 |
FLUSHO = 0x2000 |
| 75 |
+ FS_IOC_ENABLE_VERITY = 0x80806685 |
|
| 76 |
+ FS_IOC_GETFLAGS = 0x40086601 |
|
| 75 | 77 |
FS_IOC_GET_ENCRYPTION_POLICY = 0x800c6615 |
| 76 | 78 |
FS_IOC_GET_ENCRYPTION_PWSALT = 0x80106614 |
| 77 | 79 |
FS_IOC_SET_ENCRYPTION_POLICY = 0x400c6613 |
| ... | ... |
@@ -72,6 +72,8 @@ const ( |
| 72 | 72 |
FF1 = 0x8000 |
| 73 | 73 |
FFDLY = 0x8000 |
| 74 | 74 |
FLUSHO = 0x2000 |
| 75 |
+ FS_IOC_ENABLE_VERITY = 0x80806685 |
|
| 76 |
+ FS_IOC_GETFLAGS = 0x40046601 |
|
| 75 | 77 |
FS_IOC_GET_ENCRYPTION_POLICY = 0x800c6615 |
| 76 | 78 |
FS_IOC_GET_ENCRYPTION_PWSALT = 0x80106614 |
| 77 | 79 |
FS_IOC_SET_ENCRYPTION_POLICY = 0x400c6613 |
| ... | ... |
@@ -72,6 +72,8 @@ const ( |
| 72 | 72 |
FF1 = 0x4000 |
| 73 | 73 |
FFDLY = 0x4000 |
| 74 | 74 |
FLUSHO = 0x800000 |
| 75 |
+ FS_IOC_ENABLE_VERITY = 0x80806685 |
|
| 76 |
+ FS_IOC_GETFLAGS = 0x40086601 |
|
| 75 | 77 |
FS_IOC_GET_ENCRYPTION_POLICY = 0x800c6615 |
| 76 | 78 |
FS_IOC_GET_ENCRYPTION_PWSALT = 0x80106614 |
| 77 | 79 |
FS_IOC_SET_ENCRYPTION_POLICY = 0x400c6613 |
| ... | ... |
@@ -72,6 +72,8 @@ const ( |
| 72 | 72 |
FF1 = 0x4000 |
| 73 | 73 |
FFDLY = 0x4000 |
| 74 | 74 |
FLUSHO = 0x800000 |
| 75 |
+ FS_IOC_ENABLE_VERITY = 0x80806685 |
|
| 76 |
+ FS_IOC_GETFLAGS = 0x40086601 |
|
| 75 | 77 |
FS_IOC_GET_ENCRYPTION_POLICY = 0x800c6615 |
| 76 | 78 |
FS_IOC_GET_ENCRYPTION_PWSALT = 0x80106614 |
| 77 | 79 |
FS_IOC_SET_ENCRYPTION_POLICY = 0x400c6613 |
| ... | ... |
@@ -72,6 +72,8 @@ const ( |
| 72 | 72 |
FF1 = 0x8000 |
| 73 | 73 |
FFDLY = 0x8000 |
| 74 | 74 |
FLUSHO = 0x1000 |
| 75 |
+ FS_IOC_ENABLE_VERITY = 0x40806685 |
|
| 76 |
+ FS_IOC_GETFLAGS = 0x80086601 |
|
| 75 | 77 |
FS_IOC_GET_ENCRYPTION_POLICY = 0x400c6615 |
| 76 | 78 |
FS_IOC_GET_ENCRYPTION_PWSALT = 0x40106614 |
| 77 | 79 |
FS_IOC_SET_ENCRYPTION_POLICY = 0x800c6613 |
| ... | ... |
@@ -72,6 +72,8 @@ const ( |
| 72 | 72 |
FF1 = 0x8000 |
| 73 | 73 |
FFDLY = 0x8000 |
| 74 | 74 |
FLUSHO = 0x1000 |
| 75 |
+ FS_IOC_ENABLE_VERITY = 0x40806685 |
|
| 76 |
+ FS_IOC_GETFLAGS = 0x80086601 |
|
| 75 | 77 |
FS_IOC_GET_ENCRYPTION_POLICY = 0x400c6615 |
| 76 | 78 |
FS_IOC_GET_ENCRYPTION_PWSALT = 0x40106614 |
| 77 | 79 |
FS_IOC_SET_ENCRYPTION_POLICY = 0x800c6613 |
| ... | ... |
@@ -76,6 +76,8 @@ const ( |
| 76 | 76 |
FF1 = 0x8000 |
| 77 | 77 |
FFDLY = 0x8000 |
| 78 | 78 |
FLUSHO = 0x1000 |
| 79 |
+ FS_IOC_ENABLE_VERITY = 0x80806685 |
|
| 80 |
+ FS_IOC_GETFLAGS = 0x40086601 |
|
| 79 | 81 |
FS_IOC_GET_ENCRYPTION_POLICY = 0x800c6615 |
| 80 | 82 |
FS_IOC_GET_ENCRYPTION_PWSALT = 0x80106614 |
| 81 | 83 |
FS_IOC_SET_ENCRYPTION_POLICY = 0x400c6613 |
| ... | ... |
@@ -2291,3 +2291,20 @@ const ( |
| 2291 | 2291 |
DEVLINK_DPIPE_HEADER_IPV4 = 0x1 |
| 2292 | 2292 |
DEVLINK_DPIPE_HEADER_IPV6 = 0x2 |
| 2293 | 2293 |
) |
| 2294 |
+ |
|
| 2295 |
+type FsverityDigest struct {
|
|
| 2296 |
+ Algorithm uint16 |
|
| 2297 |
+ Size uint16 |
|
| 2298 |
+} |
|
| 2299 |
+ |
|
| 2300 |
+type FsverityEnableArg struct {
|
|
| 2301 |
+ Version uint32 |
|
| 2302 |
+ Hash_algorithm uint32 |
|
| 2303 |
+ Block_size uint32 |
|
| 2304 |
+ Salt_size uint32 |
|
| 2305 |
+ Salt_ptr uint64 |
|
| 2306 |
+ Sig_size uint32 |
|
| 2307 |
+ _ uint32 |
|
| 2308 |
+ Sig_ptr uint64 |
|
| 2309 |
+ _ [11]uint64 |
|
| 2310 |
+} |