Notable changes:
* Fix CVE-2019-19921 (Volume mount race condition with shared mounts): https://github.com/opencontainers/runc/pull/2207
* Fix exec FIFO race: https://github.com/opencontainers/runc/pull/2185
* Basic support for cgroup v2. Almost feature-complete, but still missing support for systemd mode in rootless.
See also https://github.com/opencontainers/runc/issues/2209 for the known issues.
Full changes: https://github.com/opencontainers/runc/compare/v1.0.0-rc9...v1.0.0-rc10
Also updates go-selinux: https://github.com/opencontainers/selinux/compare/3a1f366feb7aecbf7a0e71ac4cea88b31597de9e...5215b1806f52b1fcc2070a8826c542c9d33cd3cf
(See https://github.com/containerd/cri/pull/1383#issuecomment-578227009)
Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
(cherry picked from commit 6d6808090736ac76e908e78aa6894f5586c7d243)
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
| ... | ... |
@@ -80,7 +80,7 @@ google.golang.org/grpc 6eaf6f47437a6b4e2153a190160e |
| 80 | 80 |
# the containerd project first, and update both after that is merged. |
| 81 | 81 |
# This commit does not need to match RUNC_COMMIT as it is used for helper |
| 82 | 82 |
# packages but should be newer or equal. |
| 83 |
-github.com/opencontainers/runc 3e425f80a8c931f88e6d94a8c831b9d5aa481657 # v1.0.0-rc8-92-g84373aaa |
|
| 83 |
+github.com/opencontainers/runc dc9208a3303feef5b3839f4323d9beb36df0a9dd # v1.0.0-rc10 |
|
| 84 | 84 |
github.com/opencontainers/runtime-spec 29686dbc5559d93fb1ef402eeda3e35c38d75af4 # v1.0.1-59-g29686db |
| 85 | 85 |
github.com/opencontainers/image-spec d60099175f88c47cd379c4738d158884749ed235 # v1.0.1 |
| 86 | 86 |
github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0 |
| ... | ... |
@@ -161,6 +161,6 @@ github.com/morikuni/aec 39771216ff4c63d11f5e604076f9 |
| 161 | 161 |
# metrics |
| 162 | 162 |
github.com/docker/go-metrics d466d4f6fd960e01820085bd7e1a24426ee7ef18 |
| 163 | 163 |
|
| 164 |
-github.com/opencontainers/selinux 3a1f366feb7aecbf7a0e71ac4cea88b31597de9e # v1.2.2 |
|
| 164 |
+github.com/opencontainers/selinux 5215b1806f52b1fcc2070a8826c542c9d33cd3cf |
|
| 165 | 165 |
|
| 166 | 166 |
# DO NOT EDIT BELOW THIS LINE -------- reserved for downstream projects -------- |
| ... | ... |
@@ -16,9 +16,13 @@ This means that `runc` 1.0.0 should implement the 1.0 version of the specificati |
| 16 | 16 |
|
| 17 | 17 |
You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page. |
| 18 | 18 |
|
| 19 |
+Currently, the following features are not considered to be production-ready: |
|
| 20 |
+ |
|
| 21 |
+* Support for cgroup v2 |
|
| 22 |
+ |
|
| 19 | 23 |
## Security |
| 20 | 24 |
|
| 21 |
-Reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/) |
|
| 25 |
+The reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/). |
|
| 22 | 26 |
|
| 23 | 27 |
## Building |
| 24 | 28 |
|
| ... | ... |
@@ -229,7 +233,14 @@ runc delete mycontainerid |
| 229 | 229 |
This allows higher level systems to augment the containers creation logic with setup of various settings after the container is created and/or before it is deleted. For example, the container's network stack is commonly set up after `create` but before `start`. |
| 230 | 230 |
|
| 231 | 231 |
#### Rootless containers |
| 232 |
-`runc` has the ability to run containers without root privileges. This is called `rootless`. You need to pass some parameters to `runc` in order to run rootless containers. See below and compare with the previous version. Run the following commands as an ordinary user: |
|
| 232 |
+`runc` has the ability to run containers without root privileges. This is called `rootless`. You need to pass some parameters to `runc` in order to run rootless containers. See below and compare with the previous version. |
|
| 233 |
+ |
|
| 234 |
+**Note:** In order to use this feature, "User Namespaces" must be compiled and enabled in your kernel. There are various ways to do this depending on your distribution: |
|
| 235 |
+- Confirm `CONFIG_USER_NS=y` is set in your kernel configuration (normally found in `/proc/config.gz`) |
|
| 236 |
+- Arch/Debian: `echo 1 > /proc/sys/kernel/unprivileged_userns_clone` |
|
| 237 |
+- RHEL/CentOS 7: `echo 28633 > /proc/sys/user/max_user_namespaces` |
|
| 238 |
+ |
|
| 239 |
+Run the following commands as an ordinary user: |
|
| 233 | 240 |
```bash |
| 234 | 241 |
# Same as the first example |
| 235 | 242 |
mkdir ~/mycontainer |
| ... | ... |
@@ -6,6 +6,8 @@ import ( |
| 6 | 6 |
"fmt" |
| 7 | 7 |
"io/ioutil" |
| 8 | 8 |
"os" |
| 9 |
+ |
|
| 10 |
+ "github.com/opencontainers/runc/libcontainer/utils" |
|
| 9 | 11 |
) |
| 10 | 12 |
|
| 11 | 13 |
// IsEnabled returns true if apparmor is enabled for the host. |
| ... | ... |
@@ -19,7 +21,7 @@ func IsEnabled() bool {
|
| 19 | 19 |
return false |
| 20 | 20 |
} |
| 21 | 21 |
|
| 22 |
-func setprocattr(attr, value string) error {
|
|
| 22 |
+func setProcAttr(attr, value string) error {
|
|
| 23 | 23 |
// Under AppArmor you can only change your own attr, so use /proc/self/ |
| 24 | 24 |
// instead of /proc/<tid>/ like libapparmor does |
| 25 | 25 |
path := fmt.Sprintf("/proc/self/attr/%s", attr)
|
| ... | ... |
@@ -30,6 +32,10 @@ func setprocattr(attr, value string) error {
|
| 30 | 30 |
} |
| 31 | 31 |
defer f.Close() |
| 32 | 32 |
|
| 33 |
+ if err := utils.EnsureProcHandle(f); err != nil {
|
|
| 34 |
+ return err |
|
| 35 |
+ } |
|
| 36 |
+ |
|
| 33 | 37 |
_, err = fmt.Fprintf(f, "%s", value) |
| 34 | 38 |
return err |
| 35 | 39 |
} |
| ... | ... |
@@ -37,7 +43,7 @@ func setprocattr(attr, value string) error {
|
| 37 | 37 |
// changeOnExec reimplements aa_change_onexec from libapparmor in Go |
| 38 | 38 |
func changeOnExec(name string) error {
|
| 39 | 39 |
value := "exec " + name |
| 40 |
- if err := setprocattr("exec", value); err != nil {
|
|
| 40 |
+ if err := setProcAttr("exec", value); err != nil {
|
|
| 41 | 41 |
return fmt.Errorf("apparmor failed to apply profile: %s", err)
|
| 42 | 42 |
} |
| 43 | 43 |
return nil |
| ... | ... |
@@ -37,8 +37,18 @@ type Manager interface {
|
| 37 | 37 |
// restore the object later. |
| 38 | 38 |
GetPaths() map[string]string |
| 39 | 39 |
|
| 40 |
+ // GetUnifiedPath returns the unified path when running in unified mode. |
|
| 41 |
+ // The value corresponds to the all values of GetPaths() map. |
|
| 42 |
+ // |
|
| 43 |
+ // GetUnifiedPath returns error when running in hybrid mode as well as |
|
| 44 |
+ // in legacy mode. |
|
| 45 |
+ GetUnifiedPath() (string, error) |
|
| 46 |
+ |
|
| 40 | 47 |
// Sets the cgroup as configured. |
| 41 | 48 |
Set(container *configs.Config) error |
| 49 |
+ |
|
| 50 |
+ // Gets the cgroup as configured. |
|
| 51 |
+ GetCgroups() (*configs.Cgroup, error) |
|
| 42 | 52 |
} |
| 43 | 53 |
|
| 44 | 54 |
type NotFoundError struct {
|
| ... | ... |
@@ -20,8 +20,9 @@ import ( |
| 20 | 20 |
) |
| 21 | 21 |
|
| 22 | 22 |
const ( |
| 23 |
- CgroupNamePrefix = "name=" |
|
| 24 |
- CgroupProcesses = "cgroup.procs" |
|
| 23 |
+ CgroupNamePrefix = "name=" |
|
| 24 |
+ CgroupProcesses = "cgroup.procs" |
|
| 25 |
+ unifiedMountpoint = "/sys/fs/cgroup" |
|
| 25 | 26 |
) |
| 26 | 27 |
|
| 27 | 28 |
var ( |
| ... | ... |
@@ -40,7 +41,7 @@ var HugePageSizeUnitList = []string{"B", "KB", "MB", "GB", "TB", "PB"}
|
| 40 | 40 |
func IsCgroup2UnifiedMode() bool {
|
| 41 | 41 |
isUnifiedOnce.Do(func() {
|
| 42 | 42 |
var st syscall.Statfs_t |
| 43 |
- if err := syscall.Statfs("/sys/fs/cgroup", &st); err != nil {
|
|
| 43 |
+ if err := syscall.Statfs(unifiedMountpoint, &st); err != nil {
|
|
| 44 | 44 |
panic("cannot statfs cgroup root")
|
| 45 | 45 |
} |
| 46 | 46 |
isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC |
| ... | ... |
@@ -50,6 +51,9 @@ func IsCgroup2UnifiedMode() bool {
|
| 50 | 50 |
|
| 51 | 51 |
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt |
| 52 | 52 |
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
|
| 53 |
+ if IsCgroup2UnifiedMode() {
|
|
| 54 |
+ return unifiedMountpoint, nil |
|
| 55 |
+ } |
|
| 53 | 56 |
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem) |
| 54 | 57 |
return mnt, err |
| 55 | 58 |
} |
| ... | ... |
@@ -235,8 +239,8 @@ func GetCgroupMounts(all bool) ([]Mount, error) {
|
| 235 | 235 |
return nil, err |
| 236 | 236 |
} |
| 237 | 237 |
m := Mount{
|
| 238 |
- Mountpoint: "/sys/fs/cgroup", |
|
| 239 |
- Root: "/sys/fs/cgroup", |
|
| 238 |
+ Mountpoint: unifiedMountpoint, |
|
| 239 |
+ Root: unifiedMountpoint, |
|
| 240 | 240 |
Subsystems: availableControllers, |
| 241 | 241 |
} |
| 242 | 242 |
return []Mount{m}, nil
|
| ... | ... |
@@ -262,6 +266,21 @@ func GetCgroupMounts(all bool) ([]Mount, error) {
|
| 262 | 262 |
|
| 263 | 263 |
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel |
| 264 | 264 |
func GetAllSubsystems() ([]string, error) {
|
| 265 |
+ // /proc/cgroups is meaningless for v2 |
|
| 266 |
+ // https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#deprecated-v1-core-features |
|
| 267 |
+ if IsCgroup2UnifiedMode() {
|
|
| 268 |
+ // "pseudo" controllers do not appear in /sys/fs/cgroup/cgroup.controllers. |
|
| 269 |
+ // - devices: implemented in kernel 4.15 |
|
| 270 |
+ // - freezer: implemented in kernel 5.2 |
|
| 271 |
+ // We assume these are always available, as it is hard to detect availability. |
|
| 272 |
+ pseudo := []string{"devices", "freezer"}
|
|
| 273 |
+ data, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers")
|
|
| 274 |
+ if err != nil {
|
|
| 275 |
+ return nil, err |
|
| 276 |
+ } |
|
| 277 |
+ subsystems := append(pseudo, strings.Fields(string(data))...) |
|
| 278 |
+ return subsystems, nil |
|
| 279 |
+ } |
|
| 265 | 280 |
f, err := os.Open("/proc/cgroups")
|
| 266 | 281 |
if err != nil {
|
| 267 | 282 |
return nil, err |
| 50 | 51 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,93 @@ |
| 0 |
+// +build linux |
|
| 1 |
+ |
|
| 2 |
+package utils |
|
| 3 |
+ |
|
| 4 |
+/* |
|
| 5 |
+ * Copyright 2016, 2017 SUSE LLC |
|
| 6 |
+ * |
|
| 7 |
+ * Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 8 |
+ * you may not use this file except in compliance with the License. |
|
| 9 |
+ * You may obtain a copy of the License at |
|
| 10 |
+ * |
|
| 11 |
+ * http://www.apache.org/licenses/LICENSE-2.0 |
|
| 12 |
+ * |
|
| 13 |
+ * Unless required by applicable law or agreed to in writing, software |
|
| 14 |
+ * distributed under the License is distributed on an "AS IS" BASIS, |
|
| 15 |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 16 |
+ * See the License for the specific language governing permissions and |
|
| 17 |
+ * limitations under the License. |
|
| 18 |
+ */ |
|
| 19 |
+ |
|
| 20 |
+import ( |
|
| 21 |
+ "fmt" |
|
| 22 |
+ "os" |
|
| 23 |
+ |
|
| 24 |
+ "golang.org/x/sys/unix" |
|
| 25 |
+) |
|
| 26 |
+ |
|
| 27 |
+// MaxSendfdLen is the maximum length of the name of a file descriptor being |
|
| 28 |
+// sent using SendFd. The name of the file handle returned by RecvFd will never |
|
| 29 |
+// be larger than this value. |
|
| 30 |
+const MaxNameLen = 4096 |
|
| 31 |
+ |
|
| 32 |
+// oobSpace is the size of the oob slice required to store a single FD. Note |
|
| 33 |
+// that unix.UnixRights appears to make the assumption that fd is always int32, |
|
| 34 |
+// so sizeof(fd) = 4. |
|
| 35 |
+var oobSpace = unix.CmsgSpace(4) |
|
| 36 |
+ |
|
| 37 |
+// RecvFd waits for a file descriptor to be sent over the given AF_UNIX |
|
| 38 |
+// socket. The file name of the remote file descriptor will be recreated |
|
| 39 |
+// locally (it is sent as non-auxiliary data in the same payload). |
|
| 40 |
+func RecvFd(socket *os.File) (*os.File, error) {
|
|
| 41 |
+ // For some reason, unix.Recvmsg uses the length rather than the capacity |
|
| 42 |
+ // when passing the msg_controllen and other attributes to recvmsg. So we |
|
| 43 |
+ // have to actually set the length. |
|
| 44 |
+ name := make([]byte, MaxNameLen) |
|
| 45 |
+ oob := make([]byte, oobSpace) |
|
| 46 |
+ |
|
| 47 |
+ sockfd := socket.Fd() |
|
| 48 |
+ n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0) |
|
| 49 |
+ if err != nil {
|
|
| 50 |
+ return nil, err |
|
| 51 |
+ } |
|
| 52 |
+ |
|
| 53 |
+ if n >= MaxNameLen || oobn != oobSpace {
|
|
| 54 |
+ return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
|
|
| 55 |
+ } |
|
| 56 |
+ |
|
| 57 |
+ // Truncate. |
|
| 58 |
+ name = name[:n] |
|
| 59 |
+ oob = oob[:oobn] |
|
| 60 |
+ |
|
| 61 |
+ scms, err := unix.ParseSocketControlMessage(oob) |
|
| 62 |
+ if err != nil {
|
|
| 63 |
+ return nil, err |
|
| 64 |
+ } |
|
| 65 |
+ if len(scms) != 1 {
|
|
| 66 |
+ return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
|
|
| 67 |
+ } |
|
| 68 |
+ scm := scms[0] |
|
| 69 |
+ |
|
| 70 |
+ fds, err := unix.ParseUnixRights(&scm) |
|
| 71 |
+ if err != nil {
|
|
| 72 |
+ return nil, err |
|
| 73 |
+ } |
|
| 74 |
+ if len(fds) != 1 {
|
|
| 75 |
+ return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
|
|
| 76 |
+ } |
|
| 77 |
+ fd := uintptr(fds[0]) |
|
| 78 |
+ |
|
| 79 |
+ return os.NewFile(fd, string(name)), nil |
|
| 80 |
+} |
|
| 81 |
+ |
|
| 82 |
+// SendFd sends a file descriptor over the given AF_UNIX socket. In |
|
| 83 |
+// addition, the file.Name() of the given file will also be sent as |
|
| 84 |
+// non-auxiliary data in the same payload (allowing to send contextual |
|
| 85 |
+// information for a file descriptor). |
|
| 86 |
+func SendFd(socket *os.File, name string, fd uintptr) error {
|
|
| 87 |
+ if len(name) >= MaxNameLen {
|
|
| 88 |
+ return fmt.Errorf("sendfd: filename too long: %s", name)
|
|
| 89 |
+ } |
|
| 90 |
+ oob := unix.UnixRights(int(fd)) |
|
| 91 |
+ return unix.Sendmsg(int(socket.Fd()), []byte(name), oob, nil, 0) |
|
| 92 |
+} |
| 0 | 93 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,112 @@ |
| 0 |
+package utils |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "encoding/json" |
|
| 4 |
+ "io" |
|
| 5 |
+ "os" |
|
| 6 |
+ "path/filepath" |
|
| 7 |
+ "strings" |
|
| 8 |
+ "unsafe" |
|
| 9 |
+ |
|
| 10 |
+ "golang.org/x/sys/unix" |
|
| 11 |
+) |
|
| 12 |
+ |
|
| 13 |
+const ( |
|
| 14 |
+ exitSignalOffset = 128 |
|
| 15 |
+) |
|
| 16 |
+ |
|
| 17 |
+// ResolveRootfs ensures that the current working directory is |
|
| 18 |
+// not a symlink and returns the absolute path to the rootfs |
|
| 19 |
+func ResolveRootfs(uncleanRootfs string) (string, error) {
|
|
| 20 |
+ rootfs, err := filepath.Abs(uncleanRootfs) |
|
| 21 |
+ if err != nil {
|
|
| 22 |
+ return "", err |
|
| 23 |
+ } |
|
| 24 |
+ return filepath.EvalSymlinks(rootfs) |
|
| 25 |
+} |
|
| 26 |
+ |
|
| 27 |
+// ExitStatus returns the correct exit status for a process based on if it |
|
| 28 |
+// was signaled or exited cleanly |
|
| 29 |
+func ExitStatus(status unix.WaitStatus) int {
|
|
| 30 |
+ if status.Signaled() {
|
|
| 31 |
+ return exitSignalOffset + int(status.Signal()) |
|
| 32 |
+ } |
|
| 33 |
+ return status.ExitStatus() |
|
| 34 |
+} |
|
| 35 |
+ |
|
| 36 |
+// WriteJSON writes the provided struct v to w using standard json marshaling |
|
| 37 |
+func WriteJSON(w io.Writer, v interface{}) error {
|
|
| 38 |
+ data, err := json.Marshal(v) |
|
| 39 |
+ if err != nil {
|
|
| 40 |
+ return err |
|
| 41 |
+ } |
|
| 42 |
+ _, err = w.Write(data) |
|
| 43 |
+ return err |
|
| 44 |
+} |
|
| 45 |
+ |
|
| 46 |
+// CleanPath makes a path safe for use with filepath.Join. This is done by not |
|
| 47 |
+// only cleaning the path, but also (if the path is relative) adding a leading |
|
| 48 |
+// '/' and cleaning it (then removing the leading '/'). This ensures that a |
|
| 49 |
+// path resulting from prepending another path will always resolve to lexically |
|
| 50 |
+// be a subdirectory of the prefixed path. This is all done lexically, so paths |
|
| 51 |
+// that include symlinks won't be safe as a result of using CleanPath. |
|
| 52 |
+func CleanPath(path string) string {
|
|
| 53 |
+ // Deal with empty strings nicely. |
|
| 54 |
+ if path == "" {
|
|
| 55 |
+ return "" |
|
| 56 |
+ } |
|
| 57 |
+ |
|
| 58 |
+ // Ensure that all paths are cleaned (especially problematic ones like |
|
| 59 |
+ // "/../../../../../" which can cause lots of issues). |
|
| 60 |
+ path = filepath.Clean(path) |
|
| 61 |
+ |
|
| 62 |
+ // If the path isn't absolute, we need to do more processing to fix paths |
|
| 63 |
+ // such as "../../../../<etc>/some/path". We also shouldn't convert absolute |
|
| 64 |
+ // paths to relative ones. |
|
| 65 |
+ if !filepath.IsAbs(path) {
|
|
| 66 |
+ path = filepath.Clean(string(os.PathSeparator) + path) |
|
| 67 |
+ // This can't fail, as (by definition) all paths are relative to root. |
|
| 68 |
+ path, _ = filepath.Rel(string(os.PathSeparator), path) |
|
| 69 |
+ } |
|
| 70 |
+ |
|
| 71 |
+ // Clean the path again for good measure. |
|
| 72 |
+ return filepath.Clean(path) |
|
| 73 |
+} |
|
| 74 |
+ |
|
| 75 |
+// SearchLabels searches a list of key-value pairs for the provided key and |
|
| 76 |
+// returns the corresponding value. The pairs must be separated with '='. |
|
| 77 |
+func SearchLabels(labels []string, query string) string {
|
|
| 78 |
+ for _, l := range labels {
|
|
| 79 |
+ parts := strings.SplitN(l, "=", 2) |
|
| 80 |
+ if len(parts) < 2 {
|
|
| 81 |
+ continue |
|
| 82 |
+ } |
|
| 83 |
+ if parts[0] == query {
|
|
| 84 |
+ return parts[1] |
|
| 85 |
+ } |
|
| 86 |
+ } |
|
| 87 |
+ return "" |
|
| 88 |
+} |
|
| 89 |
+ |
|
| 90 |
+// Annotations returns the bundle path and user defined annotations from the |
|
| 91 |
+// libcontainer state. We need to remove the bundle because that is a label |
|
| 92 |
+// added by libcontainer. |
|
| 93 |
+func Annotations(labels []string) (bundle string, userAnnotations map[string]string) {
|
|
| 94 |
+ userAnnotations = make(map[string]string) |
|
| 95 |
+ for _, l := range labels {
|
|
| 96 |
+ parts := strings.SplitN(l, "=", 2) |
|
| 97 |
+ if len(parts) < 2 {
|
|
| 98 |
+ continue |
|
| 99 |
+ } |
|
| 100 |
+ if parts[0] == "bundle" {
|
|
| 101 |
+ bundle = parts[1] |
|
| 102 |
+ } else {
|
|
| 103 |
+ userAnnotations[parts[0]] = parts[1] |
|
| 104 |
+ } |
|
| 105 |
+ } |
|
| 106 |
+ return |
|
| 107 |
+} |
|
| 108 |
+ |
|
| 109 |
+func GetIntSize() int {
|
|
| 110 |
+ return int(unsafe.Sizeof(1)) |
|
| 111 |
+} |
| 0 | 112 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,68 @@ |
| 0 |
+// +build !windows |
|
| 1 |
+ |
|
| 2 |
+package utils |
|
| 3 |
+ |
|
| 4 |
+import ( |
|
| 5 |
+ "fmt" |
|
| 6 |
+ "os" |
|
| 7 |
+ "strconv" |
|
| 8 |
+ |
|
| 9 |
+ "golang.org/x/sys/unix" |
|
| 10 |
+) |
|
| 11 |
+ |
|
| 12 |
+// EnsureProcHandle returns whether or not the given file handle is on procfs. |
|
| 13 |
+func EnsureProcHandle(fh *os.File) error {
|
|
| 14 |
+ var buf unix.Statfs_t |
|
| 15 |
+ if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil {
|
|
| 16 |
+ return fmt.Errorf("ensure %s is on procfs: %v", fh.Name(), err)
|
|
| 17 |
+ } |
|
| 18 |
+ if buf.Type != unix.PROC_SUPER_MAGIC {
|
|
| 19 |
+ return fmt.Errorf("%s is not on procfs", fh.Name())
|
|
| 20 |
+ } |
|
| 21 |
+ return nil |
|
| 22 |
+} |
|
| 23 |
+ |
|
| 24 |
+// CloseExecFrom applies O_CLOEXEC to all file descriptors currently open for |
|
| 25 |
+// the process (except for those below the given fd value). |
|
| 26 |
+func CloseExecFrom(minFd int) error {
|
|
| 27 |
+ fdDir, err := os.Open("/proc/self/fd")
|
|
| 28 |
+ if err != nil {
|
|
| 29 |
+ return err |
|
| 30 |
+ } |
|
| 31 |
+ defer fdDir.Close() |
|
| 32 |
+ |
|
| 33 |
+ if err := EnsureProcHandle(fdDir); err != nil {
|
|
| 34 |
+ return err |
|
| 35 |
+ } |
|
| 36 |
+ |
|
| 37 |
+ fdList, err := fdDir.Readdirnames(-1) |
|
| 38 |
+ if err != nil {
|
|
| 39 |
+ return err |
|
| 40 |
+ } |
|
| 41 |
+ for _, fdStr := range fdList {
|
|
| 42 |
+ fd, err := strconv.Atoi(fdStr) |
|
| 43 |
+ // Ignore non-numeric file names. |
|
| 44 |
+ if err != nil {
|
|
| 45 |
+ continue |
|
| 46 |
+ } |
|
| 47 |
+ // Ignore descriptors lower than our specified minimum. |
|
| 48 |
+ if fd < minFd {
|
|
| 49 |
+ continue |
|
| 50 |
+ } |
|
| 51 |
+ // Intentionally ignore errors from unix.CloseOnExec -- the cases where |
|
| 52 |
+ // this might fail are basically file descriptors that have already |
|
| 53 |
+ // been closed (including and especially the one that was created when |
|
| 54 |
+ // ioutil.ReadDir did the "opendir" syscall). |
|
| 55 |
+ unix.CloseOnExec(fd) |
|
| 56 |
+ } |
|
| 57 |
+ return nil |
|
| 58 |
+} |
|
| 59 |
+ |
|
| 60 |
+// NewSockPair returns a new unix socket pair |
|
| 61 |
+func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
|
|
| 62 |
+ fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) |
|
| 63 |
+ if err != nil {
|
|
| 64 |
+ return nil, nil, err |
|
| 65 |
+ } |
|
| 66 |
+ return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil |
|
| 67 |
+} |
| ... | ... |
@@ -6,7 +6,7 @@ github.com/opencontainers/runtime-spec 29686dbc5559d93fb1ef402eeda3e35c38d75af4 |
| 6 | 6 |
# Core libcontainer functionality. |
| 7 | 7 |
github.com/checkpoint-restore/go-criu 17b0214f6c48980c45dc47ecb0cfd6d9e02df723 # v3.11 |
| 8 | 8 |
github.com/mrunalp/fileutils 7d4729fb36185a7c1719923406c9d40e54fb93c7 |
| 9 |
-github.com/opencontainers/selinux 3a1f366feb7aecbf7a0e71ac4cea88b31597de9e # v1.2.2 |
|
| 9 |
+github.com/opencontainers/selinux 5215b1806f52b1fcc2070a8826c542c9d33cd3cf # v1.3.0 (+ CVE-2019-16884) |
|
| 10 | 10 |
github.com/seccomp/libseccomp-golang 689e3c1541a84461afc49c1c87352a6cedf72e9c # v0.9.1 |
| 11 | 11 |
github.com/sirupsen/logrus 8bdbc7bcc01dcbb8ec23dc8a28e332258d25251f # v1.4.1 |
| 12 | 12 |
github.com/syndtr/gocapability d98352740cb2c55f81556b63d4a1ec64c5a319c2 |
| ... | ... |
@@ -26,3 +26,6 @@ golang.org/x/sys 9eafafc0a87e0fd0aeeba439a4573537970c44c7 |
| 26 | 26 |
# console dependencies |
| 27 | 27 |
github.com/containerd/console 0650fd9eeb50bab4fc99dceb9f2e14cf58f36e7f |
| 28 | 28 |
github.com/pkg/errors ba968bfe8b2f7e042a574c888954fccecfa385b4 # v0.8.1 |
| 29 |
+ |
|
| 30 |
+# ebpf dependencies |
|
| 31 |
+github.com/cilium/ebpf 95b36a581eed7b0f127306ed1d16cc0ddc06cf67 |
| ... | ... |
@@ -13,11 +13,12 @@ import ( |
| 13 | 13 |
|
| 14 | 14 |
// Valid Label Options |
| 15 | 15 |
var validOptions = map[string]bool{
|
| 16 |
- "disable": true, |
|
| 17 |
- "type": true, |
|
| 18 |
- "user": true, |
|
| 19 |
- "role": true, |
|
| 20 |
- "level": true, |
|
| 16 |
+ "disable": true, |
|
| 17 |
+ "type": true, |
|
| 18 |
+ "filetype": true, |
|
| 19 |
+ "user": true, |
|
| 20 |
+ "role": true, |
|
| 21 |
+ "level": true, |
|
| 21 | 22 |
} |
| 22 | 23 |
|
| 23 | 24 |
var ErrIncompatibleLabel = fmt.Errorf("Bad SELinux option z and Z can not be used together")
|
| ... | ... |
@@ -51,13 +52,16 @@ func InitLabels(options []string) (plabel string, mlabel string, Err error) {
|
| 51 | 51 |
return "", mountLabel, nil |
| 52 | 52 |
} |
| 53 | 53 |
if i := strings.Index(opt, ":"); i == -1 {
|
| 54 |
- return "", "", fmt.Errorf("Bad label option %q, valid options 'disable' or \n'user, role, level, type' followed by ':' and a value", opt)
|
|
| 54 |
+ return "", "", fmt.Errorf("Bad label option %q, valid options 'disable' or \n'user, role, level, type, filetype' followed by ':' and a value", opt)
|
|
| 55 | 55 |
} |
| 56 | 56 |
con := strings.SplitN(opt, ":", 2) |
| 57 | 57 |
if !validOptions[con[0]] {
|
| 58 |
- return "", "", fmt.Errorf("Bad label option %q, valid options 'disable, user, role, level, type'", con[0])
|
|
| 58 |
+ return "", "", fmt.Errorf("Bad label option %q, valid options 'disable, user, role, level, type, filetype'", con[0])
|
|
| 59 | 59 |
|
| 60 | 60 |
} |
| 61 |
+ if con[0] == "filetype" {
|
|
| 62 |
+ mcon["type"] = con[1] |
|
| 63 |
+ } |
|
| 61 | 64 |
pcon[con[0]] = con[1] |
| 62 | 65 |
if con[0] == "level" || con[0] == "user" {
|
| 63 | 66 |
mcon[con[0]] = con[1] |
| ... | ... |
@@ -18,6 +18,8 @@ import ( |
| 18 | 18 |
"strings" |
| 19 | 19 |
"sync" |
| 20 | 20 |
"syscall" |
| 21 |
+ |
|
| 22 |
+ "golang.org/x/sys/unix" |
|
| 21 | 23 |
) |
| 22 | 24 |
|
| 23 | 25 |
const ( |
| ... | ... |
@@ -252,6 +254,12 @@ func getSELinuxPolicyRoot() string {
|
| 252 | 252 |
return filepath.Join(selinuxDir, readConfig(selinuxTypeTag)) |
| 253 | 253 |
} |
| 254 | 254 |
|
| 255 |
+func isProcHandle(fh *os.File) (bool, error) {
|
|
| 256 |
+ var buf unix.Statfs_t |
|
| 257 |
+ err := unix.Fstatfs(int(fh.Fd()), &buf) |
|
| 258 |
+ return buf.Type == unix.PROC_SUPER_MAGIC, err |
|
| 259 |
+} |
|
| 260 |
+ |
|
| 255 | 261 |
func readCon(fpath string) (string, error) {
|
| 256 | 262 |
if fpath == "" {
|
| 257 | 263 |
return "", ErrEmptyPath |
| ... | ... |
@@ -263,6 +271,12 @@ func readCon(fpath string) (string, error) {
|
| 263 | 263 |
} |
| 264 | 264 |
defer in.Close() |
| 265 | 265 |
|
| 266 |
+ if ok, err := isProcHandle(in); err != nil {
|
|
| 267 |
+ return "", err |
|
| 268 |
+ } else if !ok {
|
|
| 269 |
+ return "", fmt.Errorf("%s not on procfs", fpath)
|
|
| 270 |
+ } |
|
| 271 |
+ |
|
| 266 | 272 |
var retval string |
| 267 | 273 |
if _, err := fmt.Fscanf(in, "%s", &retval); err != nil {
|
| 268 | 274 |
return "", err |
| ... | ... |
@@ -345,6 +359,12 @@ func writeCon(fpath string, val string) error {
|
| 345 | 345 |
} |
| 346 | 346 |
defer out.Close() |
| 347 | 347 |
|
| 348 |
+ if ok, err := isProcHandle(out); err != nil {
|
|
| 349 |
+ return err |
|
| 350 |
+ } else if !ok {
|
|
| 351 |
+ return fmt.Errorf("%s not on procfs", fpath)
|
|
| 352 |
+ } |
|
| 353 |
+ |
|
| 348 | 354 |
if val != "" {
|
| 349 | 355 |
_, err = out.Write([]byte(val)) |
| 350 | 356 |
} else {
|
| ... | ... |
@@ -392,6 +412,14 @@ func SetExecLabel(label string) error {
|
| 392 | 392 |
return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid()), label)
|
| 393 | 393 |
} |
| 394 | 394 |
|
| 395 |
+/* |
|
| 396 |
+SetTaskLabel sets the SELinux label for the current thread, or an error. |
|
| 397 |
+This requires the dyntransition permission. |
|
| 398 |
+*/ |
|
| 399 |
+func SetTaskLabel(label string) error {
|
|
| 400 |
+ return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/current", syscall.Gettid()), label)
|
|
| 401 |
+} |
|
| 402 |
+ |
|
| 395 | 403 |
// SetSocketLabel takes a process label and tells the kernel to assign the |
| 396 | 404 |
// label to the next socket that gets created |
| 397 | 405 |
func SetSocketLabel(label string) error {
|
| ... | ... |
@@ -403,6 +431,11 @@ func SocketLabel() (string, error) {
|
| 403 | 403 |
return readCon(fmt.Sprintf("/proc/self/task/%d/attr/sockcreate", syscall.Gettid()))
|
| 404 | 404 |
} |
| 405 | 405 |
|
| 406 |
+// PeerLabel retrieves the label of the client on the other side of a socket |
|
| 407 |
+func PeerLabel(fd uintptr) (string, error) {
|
|
| 408 |
+ return unix.GetsockoptString(int(fd), syscall.SOL_SOCKET, syscall.SO_PEERSEC) |
|
| 409 |
+} |
|
| 410 |
+ |
|
| 406 | 411 |
// SetKeyLabel takes a process label and tells the kernel to assign the |
| 407 | 412 |
// label to the next kernel keyring that gets created |
| 408 | 413 |
func SetKeyLabel(label string) error {
|
| ... | ... |
@@ -97,6 +97,14 @@ func SetExecLabel(label string) error {
|
| 97 | 97 |
} |
| 98 | 98 |
|
| 99 | 99 |
/* |
| 100 |
+SetTaskLabel sets the SELinux label for the current thread, or an error. |
|
| 101 |
+This requires the dyntransition permission. |
|
| 102 |
+*/ |
|
| 103 |
+func SetTaskLabel(label string) error {
|
|
| 104 |
+ return nil |
|
| 105 |
+} |
|
| 106 |
+ |
|
| 107 |
+/* |
|
| 100 | 108 |
SetSocketLabel sets the SELinux label that the kernel will use for any programs |
| 101 | 109 |
that are executed by the current process thread, or an error. |
| 102 | 110 |
*/ |
| ... | ... |
@@ -109,6 +117,11 @@ func SocketLabel() (string, error) {
|
| 109 | 109 |
return "", nil |
| 110 | 110 |
} |
| 111 | 111 |
|
| 112 |
+// PeerLabel retrieves the label of the client on the other side of a socket |
|
| 113 |
+func PeerLabel(fd uintptr) (string, error) {
|
|
| 114 |
+ return "", nil |
|
| 115 |
+} |
|
| 116 |
+ |
|
| 112 | 117 |
// SetKeyLabel takes a process label and tells the kernel to assign the |
| 113 | 118 |
// label to the next kernel keyring that gets created |
| 114 | 119 |
func SetKeyLabel(label string) error {
|