Removes dependency on various libcontainer packages;
- github.com/opencontainers/runc/libcontainer/system
- github.com/opencontainers/runc/libcontainer/user
- github.com/opencontainers/runc/libcontainer/userns
full diff: https://github.com/opencontainers/runc/compare/v1.1.14..v1.2.0
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
| ... | ... |
@@ -81,7 +81,7 @@ require ( |
| 81 | 81 |
github.com/morikuni/aec v1.0.0 |
| 82 | 82 |
github.com/opencontainers/go-digest v1.0.0 |
| 83 | 83 |
github.com/opencontainers/image-spec v1.1.0 |
| 84 |
- github.com/opencontainers/runc v1.1.14 |
|
| 84 |
+ github.com/opencontainers/runc v1.2.0 |
|
| 85 | 85 |
github.com/opencontainers/runtime-spec v1.2.0 |
| 86 | 86 |
github.com/opencontainers/selinux v1.11.1 |
| 87 | 87 |
github.com/pelletier/go-toml v1.9.5 |
| ... | ... |
@@ -422,8 +422,8 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8 |
| 422 | 422 |
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= |
| 423 | 423 |
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug= |
| 424 | 424 |
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM= |
| 425 |
-github.com/opencontainers/runc v1.1.14 h1:rgSuzbmgz5DUJjeSnw337TxDbRuqjs6iqQck/2weR6w= |
|
| 426 |
-github.com/opencontainers/runc v1.1.14/go.mod h1:E4C2z+7BxR7GHXp0hAY53mek+x49X1LjPNeMTfRGvOA= |
|
| 425 |
+github.com/opencontainers/runc v1.2.0 h1:qke7ZVCmJcKrJVY2iHJVC+0kql9uYdkusOPsQOOeBw4= |
|
| 426 |
+github.com/opencontainers/runc v1.2.0/go.mod h1:/PXzF0h531HTMsYQnmxXkBD7YaGShm/2zcRB79dksUc= |
|
| 427 | 427 |
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= |
| 428 | 428 |
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk= |
| 429 | 429 |
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= |
| ... | ... |
@@ -8,9 +8,9 @@ The following is courtesy of our legal counsel: |
| 8 | 8 |
|
| 9 | 9 |
|
| 10 | 10 |
Use and transfer of Docker may be subject to certain restrictions by the |
| 11 |
-United States and other governments. |
|
| 11 |
+United States and other governments. |
|
| 12 | 12 |
It is your responsibility to ensure that your use and/or transfer does not |
| 13 |
-violate applicable laws. |
|
| 13 |
+violate applicable laws. |
|
| 14 | 14 |
|
| 15 | 15 |
For more information, please see http://www.bis.doc.gov |
| 16 | 16 |
|
| ... | ... |
@@ -1,9 +1,30 @@ |
| 1 | 1 |
package cgroups |
| 2 | 2 |
|
| 3 | 3 |
import ( |
| 4 |
+ "errors" |
|
| 5 |
+ |
|
| 4 | 6 |
"github.com/opencontainers/runc/libcontainer/configs" |
| 5 | 7 |
) |
| 6 | 8 |
|
| 9 |
+var ( |
|
| 10 |
+ // ErrDevicesUnsupported is an error returned when a cgroup manager |
|
| 11 |
+ // is not configured to set device rules. |
|
| 12 |
+ ErrDevicesUnsupported = errors.New("cgroup manager is not configured to set device rules")
|
|
| 13 |
+ |
|
| 14 |
+ // ErrRootless is returned by [Manager.Apply] when there is an error |
|
| 15 |
+ // creating cgroup directory, and cgroup.Rootless is set. In general, |
|
| 16 |
+ // this error is to be ignored. |
|
| 17 |
+ ErrRootless = errors.New("cgroup manager can not access cgroup (rootless container)")
|
|
| 18 |
+ |
|
| 19 |
+ // DevicesSetV1 and DevicesSetV2 are functions to set devices for |
|
| 20 |
+ // cgroup v1 and v2, respectively. Unless |
|
| 21 |
+ // [github.com/opencontainers/runc/libcontainer/cgroups/devices] |
|
| 22 |
+ // package is imported, it is set to nil, so cgroup managers can't |
|
| 23 |
+ // manage devices. |
|
| 24 |
+ DevicesSetV1 func(path string, r *configs.Resources) error |
|
| 25 |
+ DevicesSetV2 func(path string, r *configs.Resources) error |
|
| 26 |
+) |
|
| 27 |
+ |
|
| 7 | 28 |
type Manager interface {
|
| 8 | 29 |
// Apply creates a cgroup, if not yet created, and adds a process |
| 9 | 30 |
// with the specified pid into that cgroup. A special value of -1 |
| ... | ... |
@@ -50,22 +50,45 @@ func WriteFile(dir, file, data string) error {
|
| 50 | 50 |
return err |
| 51 | 51 |
} |
| 52 | 52 |
defer fd.Close() |
| 53 |
- if err := retryingWriteFile(fd, data); err != nil {
|
|
| 53 |
+ if _, err := fd.WriteString(data); err != nil {
|
|
| 54 | 54 |
// Having data in the error message helps in debugging. |
| 55 | 55 |
return fmt.Errorf("failed to write %q: %w", data, err)
|
| 56 | 56 |
} |
| 57 | 57 |
return nil |
| 58 | 58 |
} |
| 59 | 59 |
|
| 60 |
-func retryingWriteFile(fd *os.File, data string) error {
|
|
| 60 |
+// WriteFileByLine is the same as WriteFile, except if data contains newlines, |
|
| 61 |
+// it is written line by line. |
|
| 62 |
+func WriteFileByLine(dir, file, data string) error {
|
|
| 63 |
+ i := strings.Index(data, "\n") |
|
| 64 |
+ if i == -1 {
|
|
| 65 |
+ return WriteFile(dir, file, data) |
|
| 66 |
+ } |
|
| 67 |
+ |
|
| 68 |
+ fd, err := OpenFile(dir, file, unix.O_WRONLY) |
|
| 69 |
+ if err != nil {
|
|
| 70 |
+ return err |
|
| 71 |
+ } |
|
| 72 |
+ defer fd.Close() |
|
| 73 |
+ start := 0 |
|
| 61 | 74 |
for {
|
| 62 |
- _, err := fd.Write([]byte(data)) |
|
| 63 |
- if errors.Is(err, unix.EINTR) {
|
|
| 64 |
- logrus.Infof("interrupted while writing %s to %s", data, fd.Name())
|
|
| 65 |
- continue |
|
| 75 |
+ var line string |
|
| 76 |
+ if i == -1 {
|
|
| 77 |
+ line = data[start:] |
|
| 78 |
+ } else {
|
|
| 79 |
+ line = data[start : start+i+1] |
|
| 66 | 80 |
} |
| 67 |
- return err |
|
| 81 |
+ _, err := fd.WriteString(line) |
|
| 82 |
+ if err != nil {
|
|
| 83 |
+ return fmt.Errorf("failed to write %q: %w", line, err)
|
|
| 84 |
+ } |
|
| 85 |
+ if i == -1 {
|
|
| 86 |
+ break |
|
| 87 |
+ } |
|
| 88 |
+ start += i + 1 |
|
| 89 |
+ i = strings.Index(data[start:], "\n") |
|
| 68 | 90 |
} |
| 91 |
+ return nil |
|
| 69 | 92 |
} |
| 70 | 93 |
|
| 71 | 94 |
const ( |
| ... | ... |
@@ -90,7 +113,7 @@ func prepareOpenat2() error {
|
| 90 | 90 |
}) |
| 91 | 91 |
if err != nil {
|
| 92 | 92 |
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
|
| 93 |
- if err != unix.ENOSYS { //nolint:errorlint // unix errors are bare
|
|
| 93 |
+ if err != unix.ENOSYS {
|
|
| 94 | 94 |
logrus.Warnf("falling back to securejoin: %s", prepErr)
|
| 95 | 95 |
} else {
|
| 96 | 96 |
logrus.Debug("openat2 not available, falling back to securejoin")
|
| ... | ... |
@@ -148,8 +171,9 @@ func openFile(dir, file string, flags int) (*os.File, error) {
|
| 148 | 148 |
// |
| 149 | 149 |
// TODO: if such usage will ever be common, amend this |
| 150 | 150 |
// to reopen cgroupRootHandle and retry openat2. |
| 151 |
- fdStr := strconv.Itoa(int(cgroupRootHandle.Fd())) |
|
| 152 |
- fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr)
|
|
| 151 |
+ fdPath, closer := utils.ProcThreadSelf("fd/" + strconv.Itoa(int(cgroupRootHandle.Fd())))
|
|
| 152 |
+ defer closer() |
|
| 153 |
+ fdDest, _ := os.Readlink(fdPath) |
|
| 153 | 154 |
if fdDest != cgroupfsDir {
|
| 154 | 155 |
// Wrap the error so it is clear that cgroupRootHandle |
| 155 | 156 |
// is opened to an unexpected/wrong directory. |
| ... | ... |
@@ -32,9 +32,22 @@ type CpuUsage struct {
|
| 32 | 32 |
UsageInUsermode uint64 `json:"usage_in_usermode"` |
| 33 | 33 |
} |
| 34 | 34 |
|
| 35 |
+type PSIData struct {
|
|
| 36 |
+ Avg10 float64 `json:"avg10"` |
|
| 37 |
+ Avg60 float64 `json:"avg60"` |
|
| 38 |
+ Avg300 float64 `json:"avg300"` |
|
| 39 |
+ Total uint64 `json:"total"` |
|
| 40 |
+} |
|
| 41 |
+ |
|
| 42 |
+type PSIStats struct {
|
|
| 43 |
+ Some PSIData `json:"some,omitempty"` |
|
| 44 |
+ Full PSIData `json:"full,omitempty"` |
|
| 45 |
+} |
|
| 46 |
+ |
|
| 35 | 47 |
type CpuStats struct {
|
| 36 | 48 |
CpuUsage CpuUsage `json:"cpu_usage,omitempty"` |
| 37 | 49 |
ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` |
| 50 |
+ PSI *PSIStats `json:"psi,omitempty"` |
|
| 38 | 51 |
} |
| 39 | 52 |
|
| 40 | 53 |
type CPUSetStats struct {
|
| ... | ... |
@@ -91,6 +104,7 @@ type MemoryStats struct {
|
| 91 | 91 |
UseHierarchy bool `json:"use_hierarchy"` |
| 92 | 92 |
|
| 93 | 93 |
Stats map[string]uint64 `json:"stats,omitempty"` |
| 94 |
+ PSI *PSIStats `json:"psi,omitempty"` |
|
| 94 | 95 |
} |
| 95 | 96 |
|
| 96 | 97 |
type PageUsageByNUMA struct {
|
| ... | ... |
@@ -135,6 +149,7 @@ type BlkioStats struct {
|
| 135 | 135 |
IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"` |
| 136 | 136 |
IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"` |
| 137 | 137 |
SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"` |
| 138 |
+ PSI *PSIStats `json:"psi,omitempty"` |
|
| 138 | 139 |
} |
| 139 | 140 |
|
| 140 | 141 |
type HugetlbStats struct {
|
| ... | ... |
@@ -157,6 +172,13 @@ type RdmaStats struct {
|
| 157 | 157 |
RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"` |
| 158 | 158 |
} |
| 159 | 159 |
|
| 160 |
+type MiscStats struct {
|
|
| 161 |
+ // current resource usage for a key in misc |
|
| 162 |
+ Usage uint64 `json:"usage,omitempty"` |
|
| 163 |
+ // number of times the resource usage was about to go over the max boundary |
|
| 164 |
+ Events uint64 `json:"events,omitempty"` |
|
| 165 |
+} |
|
| 166 |
+ |
|
| 160 | 167 |
type Stats struct {
|
| 161 | 168 |
CpuStats CpuStats `json:"cpu_stats,omitempty"` |
| 162 | 169 |
CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"` |
| ... | ... |
@@ -166,10 +188,13 @@ type Stats struct {
|
| 166 | 166 |
// the map is in the format "size of hugepage: stats of the hugepage" |
| 167 | 167 |
HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` |
| 168 | 168 |
RdmaStats RdmaStats `json:"rdma_stats,omitempty"` |
| 169 |
+ // the map is in the format "misc resource name: stats of the key" |
|
| 170 |
+ MiscStats map[string]MiscStats `json:"misc_stats,omitempty"` |
|
| 169 | 171 |
} |
| 170 | 172 |
|
| 171 | 173 |
func NewStats() *Stats {
|
| 172 | 174 |
memoryStats := MemoryStats{Stats: make(map[string]uint64)}
|
| 173 | 175 |
hugetlbStats := make(map[string]HugetlbStats) |
| 174 |
- return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats}
|
|
| 176 |
+ miscStats := make(map[string]MiscStats) |
|
| 177 |
+ return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats, MiscStats: miscStats}
|
|
| 175 | 178 |
} |
| ... | ... |
@@ -12,7 +12,7 @@ import ( |
| 12 | 12 |
"sync" |
| 13 | 13 |
"time" |
| 14 | 14 |
|
| 15 |
- "github.com/opencontainers/runc/libcontainer/userns" |
|
| 15 |
+ "github.com/moby/sys/userns" |
|
| 16 | 16 |
"github.com/sirupsen/logrus" |
| 17 | 17 |
"golang.org/x/sys/unix" |
| 18 | 18 |
) |
| ... | ... |
@@ -36,13 +36,13 @@ func IsCgroup2UnifiedMode() bool {
|
| 36 | 36 |
var st unix.Statfs_t |
| 37 | 37 |
err := unix.Statfs(unifiedMountpoint, &st) |
| 38 | 38 |
if err != nil {
|
| 39 |
+ level := logrus.WarnLevel |
|
| 39 | 40 |
if os.IsNotExist(err) && userns.RunningInUserNS() {
|
| 40 |
- // ignore the "not found" error if running in userns |
|
| 41 |
- logrus.WithError(err).Debugf("%s missing, assuming cgroup v1", unifiedMountpoint)
|
|
| 42 |
- isUnified = false |
|
| 43 |
- return |
|
| 41 |
+ // For rootless containers, sweep it under the rug. |
|
| 42 |
+ level = logrus.DebugLevel |
|
| 44 | 43 |
} |
| 45 |
- panic(fmt.Sprintf("cannot statfs cgroup root: %s", err))
|
|
| 44 |
+ logrus.StandardLogger().Logf(level, |
|
| 45 |
+ "statfs %s: %v; assuming cgroup v1", unifiedMountpoint, err) |
|
| 46 | 46 |
} |
| 47 | 47 |
isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC |
| 48 | 48 |
}) |
| ... | ... |
@@ -136,18 +136,18 @@ func GetAllSubsystems() ([]string, error) {
|
| 136 | 136 |
return subsystems, nil |
| 137 | 137 |
} |
| 138 | 138 |
|
| 139 |
-func readProcsFile(dir string) ([]int, error) {
|
|
| 140 |
- f, err := OpenFile(dir, CgroupProcesses, os.O_RDONLY) |
|
| 139 |
+func readProcsFile(dir string) (out []int, _ error) {
|
|
| 140 |
+ file := CgroupProcesses |
|
| 141 |
+ retry := true |
|
| 142 |
+ |
|
| 143 |
+again: |
|
| 144 |
+ f, err := OpenFile(dir, file, os.O_RDONLY) |
|
| 141 | 145 |
if err != nil {
|
| 142 | 146 |
return nil, err |
| 143 | 147 |
} |
| 144 | 148 |
defer f.Close() |
| 145 | 149 |
|
| 146 |
- var ( |
|
| 147 |
- s = bufio.NewScanner(f) |
|
| 148 |
- out = []int{}
|
|
| 149 |
- ) |
|
| 150 |
- |
|
| 150 |
+ s := bufio.NewScanner(f) |
|
| 151 | 151 |
for s.Scan() {
|
| 152 | 152 |
if t := s.Text(); t != "" {
|
| 153 | 153 |
pid, err := strconv.Atoi(t) |
| ... | ... |
@@ -157,6 +157,13 @@ func readProcsFile(dir string) ([]int, error) {
|
| 157 | 157 |
out = append(out, pid) |
| 158 | 158 |
} |
| 159 | 159 |
} |
| 160 |
+ if errors.Is(s.Err(), unix.ENOTSUP) && retry {
|
|
| 161 |
+ // For a threaded cgroup, read returns ENOTSUP, and we should |
|
| 162 |
+ // read from cgroup.threads instead. |
|
| 163 |
+ file = "cgroup.threads" |
|
| 164 |
+ retry = false |
|
| 165 |
+ goto again |
|
| 166 |
+ } |
|
| 160 | 167 |
return out, s.Err() |
| 161 | 168 |
} |
| 162 | 169 |
|
| ... | ... |
@@ -217,21 +224,26 @@ func PathExists(path string) bool {
|
| 217 | 217 |
return true |
| 218 | 218 |
} |
| 219 | 219 |
|
| 220 |
-func EnterPid(cgroupPaths map[string]string, pid int) error {
|
|
| 221 |
- for _, path := range cgroupPaths {
|
|
| 222 |
- if PathExists(path) {
|
|
| 223 |
- if err := WriteCgroupProc(path, pid); err != nil {
|
|
| 224 |
- return err |
|
| 225 |
- } |
|
| 226 |
- } |
|
| 227 |
- } |
|
| 228 |
- return nil |
|
| 229 |
-} |
|
| 220 |
+// rmdir tries to remove a directory, optionally retrying on EBUSY. |
|
| 221 |
+func rmdir(path string, retry bool) error {
|
|
| 222 |
+ delay := time.Millisecond |
|
| 223 |
+ tries := 10 |
|
| 230 | 224 |
|
| 231 |
-func rmdir(path string) error {
|
|
| 225 |
+again: |
|
| 232 | 226 |
err := unix.Rmdir(path) |
| 233 |
- if err == nil || err == unix.ENOENT { //nolint:errorlint // unix errors are bare
|
|
| 227 |
+ switch err { // nolint:errorlint // unix errors are bare
|
|
| 228 |
+ case nil, unix.ENOENT: |
|
| 234 | 229 |
return nil |
| 230 |
+ case unix.EINTR: |
|
| 231 |
+ goto again |
|
| 232 |
+ case unix.EBUSY: |
|
| 233 |
+ if retry && tries > 0 {
|
|
| 234 |
+ time.Sleep(delay) |
|
| 235 |
+ delay *= 2 |
|
| 236 |
+ tries-- |
|
| 237 |
+ goto again |
|
| 238 |
+ |
|
| 239 |
+ } |
|
| 235 | 240 |
} |
| 236 | 241 |
return &os.PathError{Op: "rmdir", Path: path, Err: err}
|
| 237 | 242 |
} |
| ... | ... |
@@ -239,68 +251,40 @@ func rmdir(path string) error {
|
| 239 | 239 |
// RemovePath aims to remove cgroup path. It does so recursively, |
| 240 | 240 |
// by removing any subdirectories (sub-cgroups) first. |
| 241 | 241 |
func RemovePath(path string) error {
|
| 242 |
- // try the fast path first |
|
| 243 |
- if err := rmdir(path); err == nil {
|
|
| 242 |
+ // Try the fast path first. |
|
| 243 |
+ if err := rmdir(path, false); err == nil {
|
|
| 244 | 244 |
return nil |
| 245 | 245 |
} |
| 246 | 246 |
|
| 247 | 247 |
infos, err := os.ReadDir(path) |
| 248 |
- if err != nil {
|
|
| 249 |
- if os.IsNotExist(err) {
|
|
| 250 |
- err = nil |
|
| 251 |
- } |
|
| 248 |
+ if err != nil && !os.IsNotExist(err) {
|
|
| 252 | 249 |
return err |
| 253 | 250 |
} |
| 254 | 251 |
for _, info := range infos {
|
| 255 | 252 |
if info.IsDir() {
|
| 256 |
- // We should remove subcgroups dir first |
|
| 253 |
+ // We should remove subcgroup first. |
|
| 257 | 254 |
if err = RemovePath(filepath.Join(path, info.Name())); err != nil {
|
| 258 | 255 |
break |
| 259 | 256 |
} |
| 260 | 257 |
} |
| 261 | 258 |
} |
| 262 | 259 |
if err == nil {
|
| 263 |
- err = rmdir(path) |
|
| 260 |
+ err = rmdir(path, true) |
|
| 264 | 261 |
} |
| 265 | 262 |
return err |
| 266 | 263 |
} |
| 267 | 264 |
|
| 268 | 265 |
// RemovePaths iterates over the provided paths removing them. |
| 269 |
-// We trying to remove all paths five times with increasing delay between tries. |
|
| 270 |
-// If after all there are not removed cgroups - appropriate error will be |
|
| 271 |
-// returned. |
|
| 272 | 266 |
func RemovePaths(paths map[string]string) (err error) {
|
| 273 |
- const retries = 5 |
|
| 274 |
- delay := 10 * time.Millisecond |
|
| 275 |
- for i := 0; i < retries; i++ {
|
|
| 276 |
- if i != 0 {
|
|
| 277 |
- time.Sleep(delay) |
|
| 278 |
- delay *= 2 |
|
| 279 |
- } |
|
| 280 |
- for s, p := range paths {
|
|
| 281 |
- if err := RemovePath(p); err != nil {
|
|
| 282 |
- // do not log intermediate iterations |
|
| 283 |
- switch i {
|
|
| 284 |
- case 0: |
|
| 285 |
- logrus.WithError(err).Warnf("Failed to remove cgroup (will retry)")
|
|
| 286 |
- case retries - 1: |
|
| 287 |
- logrus.WithError(err).Error("Failed to remove cgroup")
|
|
| 288 |
- } |
|
| 289 |
- } |
|
| 290 |
- _, err := os.Stat(p) |
|
| 291 |
- // We need this strange way of checking cgroups existence because |
|
| 292 |
- // RemoveAll almost always returns error, even on already removed |
|
| 293 |
- // cgroups |
|
| 294 |
- if os.IsNotExist(err) {
|
|
| 295 |
- delete(paths, s) |
|
| 296 |
- } |
|
| 297 |
- } |
|
| 298 |
- if len(paths) == 0 {
|
|
| 299 |
- //nolint:ineffassign,staticcheck // done to help garbage collecting: opencontainers/runc#2506 |
|
| 300 |
- paths = make(map[string]string) |
|
| 301 |
- return nil |
|
| 267 |
+ for s, p := range paths {
|
|
| 268 |
+ if err := RemovePath(p); err == nil {
|
|
| 269 |
+ delete(paths, s) |
|
| 302 | 270 |
} |
| 303 | 271 |
} |
| 272 |
+ if len(paths) == 0 {
|
|
| 273 |
+ clear(paths) |
|
| 274 |
+ return nil |
|
| 275 |
+ } |
|
| 304 | 276 |
return fmt.Errorf("Failed to remove paths: %v", paths)
|
| 305 | 277 |
} |
| 306 | 278 |
|
| ... | ... |
@@ -99,11 +99,12 @@ func tryDefaultPath(cgroupPath, subsystem string) string {
|
| 99 | 99 |
// expensive), so it is assumed that cgroup mounts are not being changed. |
| 100 | 100 |
func readCgroupMountinfo() ([]*mountinfo.Info, error) {
|
| 101 | 101 |
readMountinfoOnce.Do(func() {
|
| 102 |
+ // mountinfo.GetMounts uses /proc/thread-self, so we can use it without |
|
| 103 |
+ // issues. |
|
| 102 | 104 |
cgroupMountinfo, readMountinfoErr = mountinfo.GetMounts( |
| 103 | 105 |
mountinfo.FSTypeFilter("cgroup"),
|
| 104 | 106 |
) |
| 105 | 107 |
}) |
| 106 |
- |
|
| 107 | 108 |
return cgroupMountinfo, readMountinfoErr |
| 108 | 109 |
} |
| 109 | 110 |
|
| ... | ... |
@@ -196,6 +197,9 @@ func getCgroupMountsV1(all bool) ([]Mount, error) {
|
| 196 | 196 |
return nil, err |
| 197 | 197 |
} |
| 198 | 198 |
|
| 199 |
+ // We don't need to use /proc/thread-self here because runc always runs |
|
| 200 |
+ // with every thread in the same cgroup. This lets us avoid having to do |
|
| 201 |
+ // runtime.LockOSThread. |
|
| 199 | 202 |
allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
|
| 200 | 203 |
if err != nil {
|
| 201 | 204 |
return nil, err |
| ... | ... |
@@ -214,6 +218,10 @@ func GetOwnCgroup(subsystem string) (string, error) {
|
| 214 | 214 |
if IsCgroup2UnifiedMode() {
|
| 215 | 215 |
return "", errUnified |
| 216 | 216 |
} |
| 217 |
+ |
|
| 218 |
+ // We don't need to use /proc/thread-self here because runc always runs |
|
| 219 |
+ // with every thread in the same cgroup. This lets us avoid having to do |
|
| 220 |
+ // runtime.LockOSThread. |
|
| 217 | 221 |
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
| 218 | 222 |
if err != nil {
|
| 219 | 223 |
return "", err |
| ... | ... |
@@ -236,27 +244,6 @@ func GetOwnCgroupPath(subsystem string) (string, error) {
|
| 236 | 236 |
return getCgroupPathHelper(subsystem, cgroup) |
| 237 | 237 |
} |
| 238 | 238 |
|
| 239 |
-func GetInitCgroup(subsystem string) (string, error) {
|
|
| 240 |
- if IsCgroup2UnifiedMode() {
|
|
| 241 |
- return "", errUnified |
|
| 242 |
- } |
|
| 243 |
- cgroups, err := ParseCgroupFile("/proc/1/cgroup")
|
|
| 244 |
- if err != nil {
|
|
| 245 |
- return "", err |
|
| 246 |
- } |
|
| 247 |
- |
|
| 248 |
- return getControllerPath(subsystem, cgroups) |
|
| 249 |
-} |
|
| 250 |
- |
|
| 251 |
-func GetInitCgroupPath(subsystem string) (string, error) {
|
|
| 252 |
- cgroup, err := GetInitCgroup(subsystem) |
|
| 253 |
- if err != nil {
|
|
| 254 |
- return "", err |
|
| 255 |
- } |
|
| 256 |
- |
|
| 257 |
- return getCgroupPathHelper(subsystem, cgroup) |
|
| 258 |
-} |
|
| 259 |
- |
|
| 260 | 239 |
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
|
| 261 | 240 |
mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
|
| 262 | 241 |
if err != nil {
|
| ... | ... |
@@ -2,8 +2,8 @@ package configs |
| 2 | 2 |
|
| 3 | 3 |
import "fmt" |
| 4 | 4 |
|
| 5 |
-// blockIODevice holds major:minor format supported in blkio cgroup |
|
| 6 |
-type blockIODevice struct {
|
|
| 5 |
+// BlockIODevice holds major:minor format supported in blkio cgroup. |
|
| 6 |
+type BlockIODevice struct {
|
|
| 7 | 7 |
// Major is the device's major number |
| 8 | 8 |
Major int64 `json:"major"` |
| 9 | 9 |
// Minor is the device's minor number |
| ... | ... |
@@ -12,7 +12,7 @@ type blockIODevice struct {
|
| 12 | 12 |
|
| 13 | 13 |
// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair |
| 14 | 14 |
type WeightDevice struct {
|
| 15 |
- blockIODevice |
|
| 15 |
+ BlockIODevice |
|
| 16 | 16 |
// Weight is the bandwidth rate for the device, range is from 10 to 1000 |
| 17 | 17 |
Weight uint16 `json:"weight"` |
| 18 | 18 |
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only |
| ... | ... |
@@ -41,7 +41,7 @@ func (wd *WeightDevice) LeafWeightString() string {
|
| 41 | 41 |
|
| 42 | 42 |
// ThrottleDevice struct holds a `major:minor rate_per_second` pair |
| 43 | 43 |
type ThrottleDevice struct {
|
| 44 |
- blockIODevice |
|
| 44 |
+ BlockIODevice |
|
| 45 | 45 |
// Rate is the IO rate limit per cgroup per device |
| 46 | 46 |
Rate uint64 `json:"rate"` |
| 47 | 47 |
} |
| ... | ... |
@@ -69,6 +69,9 @@ type Resources struct {
|
| 69 | 69 |
// CPU hardcap limit (in usecs). Allowed cpu time in a given period. |
| 70 | 70 |
CpuQuota int64 `json:"cpu_quota"` |
| 71 | 71 |
|
| 72 |
+ // CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a given period. |
|
| 73 |
+ CpuBurst *uint64 `json:"cpu_burst"` //nolint:revive |
|
| 74 |
+ |
|
| 72 | 75 |
// CPU period to be used for hardcapping (in usecs). 0 to use system default. |
| 73 | 76 |
CpuPeriod uint64 `json:"cpu_period"` |
| 74 | 77 |
|
| ... | ... |
@@ -84,6 +87,9 @@ type Resources struct {
|
| 84 | 84 |
// MEM to use |
| 85 | 85 |
CpusetMems string `json:"cpuset_mems"` |
| 86 | 86 |
|
| 87 |
+ // cgroup SCHED_IDLE |
|
| 88 |
+ CPUIdle *int64 `json:"cpu_idle,omitempty"` |
|
| 89 |
+ |
|
| 87 | 90 |
// Process limit; set <= `0' to disable limit. |
| 88 | 91 |
PidsLimit int64 `json:"pids_limit"` |
| 89 | 92 |
|
| ... | ... |
@@ -155,4 +161,9 @@ type Resources struct {
|
| 155 | 155 |
// during Set() to figure out whether the freeze is required. Those |
| 156 | 156 |
// methods may be relatively slow, thus this flag. |
| 157 | 157 |
SkipFreezeOnSet bool `json:"-"` |
| 158 |
+ |
|
| 159 |
+ // MemoryCheckBeforeUpdate is a flag for cgroup v2 managers to check |
|
| 160 |
+ // if the new memory limits (Memory and MemorySwap) being set are lower |
|
| 161 |
+ // than the current memory usage, and reject if so. |
|
| 162 |
+ MemoryCheckBeforeUpdate bool `json:"memory_check_before_update"` |
|
| 158 | 163 |
} |
| ... | ... |
@@ -8,6 +8,7 @@ import ( |
| 8 | 8 |
"time" |
| 9 | 9 |
|
| 10 | 10 |
"github.com/sirupsen/logrus" |
| 11 |
+ "golang.org/x/sys/unix" |
|
| 11 | 12 |
|
| 12 | 13 |
"github.com/opencontainers/runc/libcontainer/devices" |
| 13 | 14 |
"github.com/opencontainers/runtime-spec/specs-go" |
| ... | ... |
@@ -31,12 +32,13 @@ type IDMap struct {
|
| 31 | 31 |
// for syscalls. Additional architectures can be added by specifying them in |
| 32 | 32 |
// Architectures. |
| 33 | 33 |
type Seccomp struct {
|
| 34 |
- DefaultAction Action `json:"default_action"` |
|
| 35 |
- Architectures []string `json:"architectures"` |
|
| 36 |
- Syscalls []*Syscall `json:"syscalls"` |
|
| 37 |
- DefaultErrnoRet *uint `json:"default_errno_ret"` |
|
| 38 |
- ListenerPath string `json:"listener_path,omitempty"` |
|
| 39 |
- ListenerMetadata string `json:"listener_metadata,omitempty"` |
|
| 34 |
+ DefaultAction Action `json:"default_action"` |
|
| 35 |
+ Architectures []string `json:"architectures"` |
|
| 36 |
+ Flags []specs.LinuxSeccompFlag `json:"flags"` |
|
| 37 |
+ Syscalls []*Syscall `json:"syscalls"` |
|
| 38 |
+ DefaultErrnoRet *uint `json:"default_errno_ret"` |
|
| 39 |
+ ListenerPath string `json:"listener_path,omitempty"` |
|
| 40 |
+ ListenerMetadata string `json:"listener_metadata,omitempty"` |
|
| 40 | 41 |
} |
| 41 | 42 |
|
| 42 | 43 |
// Action is taken upon rule match in Seccomp |
| ... | ... |
@@ -83,9 +85,6 @@ type Syscall struct {
|
| 83 | 83 |
Args []*Arg `json:"args"` |
| 84 | 84 |
} |
| 85 | 85 |
|
| 86 |
-// TODO Windows. Many of these fields should be factored out into those parts |
|
| 87 |
-// which are common across platforms, and those which are platform specific. |
|
| 88 |
- |
|
| 89 | 86 |
// Config defines configuration options for executing a process inside a contained environment. |
| 90 | 87 |
type Config struct {
|
| 91 | 88 |
// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs |
| ... | ... |
@@ -121,6 +120,9 @@ type Config struct {
|
| 121 | 121 |
// Hostname optionally sets the container's hostname if provided |
| 122 | 122 |
Hostname string `json:"hostname"` |
| 123 | 123 |
|
| 124 |
+ // Domainname optionally sets the container's domainname if provided |
|
| 125 |
+ Domainname string `json:"domainname"` |
|
| 126 |
+ |
|
| 124 | 127 |
// Namespaces specifies the container's namespaces that it should setup when cloning the init process |
| 125 | 128 |
// If a namespace is not provided that namespace is shared from the container's parent process |
| 126 | 129 |
Namespaces Namespaces `json:"namespaces"` |
| ... | ... |
@@ -158,11 +160,11 @@ type Config struct {
|
| 158 | 158 |
// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/ |
| 159 | 159 |
OomScoreAdj *int `json:"oom_score_adj,omitempty"` |
| 160 | 160 |
|
| 161 |
- // UidMappings is an array of User ID mappings for User Namespaces |
|
| 162 |
- UidMappings []IDMap `json:"uid_mappings"` |
|
| 161 |
+ // UIDMappings is an array of User ID mappings for User Namespaces |
|
| 162 |
+ UIDMappings []IDMap `json:"uid_mappings"` |
|
| 163 | 163 |
|
| 164 |
- // GidMappings is an array of Group ID mappings for User Namespaces |
|
| 165 |
- GidMappings []IDMap `json:"gid_mappings"` |
|
| 164 |
+ // GIDMappings is an array of Group ID mappings for User Namespaces |
|
| 165 |
+ GIDMappings []IDMap `json:"gid_mappings"` |
|
| 166 | 166 |
|
| 167 | 167 |
// MaskPaths specifies paths within the container's rootfs to mask over with a bind |
| 168 | 168 |
// mount pointing to /dev/null as to prevent reads of the file. |
| ... | ... |
@@ -211,8 +213,87 @@ type Config struct {
|
| 211 | 211 |
// RootlessCgroups is set when unlikely to have the full access to cgroups. |
| 212 | 212 |
// When RootlessCgroups is set, cgroups errors are ignored. |
| 213 | 213 |
RootlessCgroups bool `json:"rootless_cgroups,omitempty"` |
| 214 |
+ |
|
| 215 |
+ // TimeOffsets specifies the offset for supporting time namespaces. |
|
| 216 |
+ TimeOffsets map[string]specs.LinuxTimeOffset `json:"time_offsets,omitempty"` |
|
| 217 |
+ |
|
| 218 |
+ // Scheduler represents the scheduling attributes for a process. |
|
| 219 |
+ Scheduler *Scheduler `json:"scheduler,omitempty"` |
|
| 220 |
+ |
|
| 221 |
+ // Personality contains configuration for the Linux personality syscall. |
|
| 222 |
+ Personality *LinuxPersonality `json:"personality,omitempty"` |
|
| 223 |
+ |
|
| 224 |
+ // IOPriority is the container's I/O priority. |
|
| 225 |
+ IOPriority *IOPriority `json:"io_priority,omitempty"` |
|
| 214 | 226 |
} |
| 215 | 227 |
|
| 228 |
+// Scheduler is based on the Linux sched_setattr(2) syscall. |
|
| 229 |
+type Scheduler = specs.Scheduler |
|
| 230 |
+ |
|
| 231 |
+// ToSchedAttr is to convert *configs.Scheduler to *unix.SchedAttr. |
|
| 232 |
+func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) {
|
|
| 233 |
+ var policy uint32 |
|
| 234 |
+ switch scheduler.Policy {
|
|
| 235 |
+ case specs.SchedOther: |
|
| 236 |
+ policy = 0 |
|
| 237 |
+ case specs.SchedFIFO: |
|
| 238 |
+ policy = 1 |
|
| 239 |
+ case specs.SchedRR: |
|
| 240 |
+ policy = 2 |
|
| 241 |
+ case specs.SchedBatch: |
|
| 242 |
+ policy = 3 |
|
| 243 |
+ case specs.SchedISO: |
|
| 244 |
+ policy = 4 |
|
| 245 |
+ case specs.SchedIdle: |
|
| 246 |
+ policy = 5 |
|
| 247 |
+ case specs.SchedDeadline: |
|
| 248 |
+ policy = 6 |
|
| 249 |
+ default: |
|
| 250 |
+ return nil, fmt.Errorf("invalid scheduler policy: %s", scheduler.Policy)
|
|
| 251 |
+ } |
|
| 252 |
+ |
|
| 253 |
+ var flags uint64 |
|
| 254 |
+ for _, flag := range scheduler.Flags {
|
|
| 255 |
+ switch flag {
|
|
| 256 |
+ case specs.SchedFlagResetOnFork: |
|
| 257 |
+ flags |= 0x01 |
|
| 258 |
+ case specs.SchedFlagReclaim: |
|
| 259 |
+ flags |= 0x02 |
|
| 260 |
+ case specs.SchedFlagDLOverrun: |
|
| 261 |
+ flags |= 0x04 |
|
| 262 |
+ case specs.SchedFlagKeepPolicy: |
|
| 263 |
+ flags |= 0x08 |
|
| 264 |
+ case specs.SchedFlagKeepParams: |
|
| 265 |
+ flags |= 0x10 |
|
| 266 |
+ case specs.SchedFlagUtilClampMin: |
|
| 267 |
+ flags |= 0x20 |
|
| 268 |
+ case specs.SchedFlagUtilClampMax: |
|
| 269 |
+ flags |= 0x40 |
|
| 270 |
+ default: |
|
| 271 |
+ return nil, fmt.Errorf("invalid scheduler flag: %s", flag)
|
|
| 272 |
+ } |
|
| 273 |
+ } |
|
| 274 |
+ |
|
| 275 |
+ return &unix.SchedAttr{
|
|
| 276 |
+ Size: unix.SizeofSchedAttr, |
|
| 277 |
+ Policy: policy, |
|
| 278 |
+ Flags: flags, |
|
| 279 |
+ Nice: scheduler.Nice, |
|
| 280 |
+ Priority: uint32(scheduler.Priority), |
|
| 281 |
+ Runtime: scheduler.Runtime, |
|
| 282 |
+ Deadline: scheduler.Deadline, |
|
| 283 |
+ Period: scheduler.Period, |
|
| 284 |
+ }, nil |
|
| 285 |
+} |
|
| 286 |
+ |
|
| 287 |
+var IOPrioClassMapping = map[specs.IOPriorityClass]int{
|
|
| 288 |
+ specs.IOPRIO_CLASS_RT: 1, |
|
| 289 |
+ specs.IOPRIO_CLASS_BE: 2, |
|
| 290 |
+ specs.IOPRIO_CLASS_IDLE: 3, |
|
| 291 |
+} |
|
| 292 |
+ |
|
| 293 |
+type IOPriority = specs.LinuxIOPriority |
|
| 294 |
+ |
|
| 216 | 295 |
type ( |
| 217 | 296 |
HookName string |
| 218 | 297 |
HookList []Hook |
| ... | ... |
@@ -277,6 +358,7 @@ type Capabilities struct {
|
| 277 | 277 |
Ambient []string |
| 278 | 278 |
} |
| 279 | 279 |
|
| 280 |
+// Deprecated: use (Hooks).Run instead. |
|
| 280 | 281 |
func (hooks HookList) RunHooks(state *specs.State) error {
|
| 281 | 282 |
for i, h := range hooks {
|
| 282 | 283 |
if err := h.Run(state); err != nil {
|
| ... | ... |
@@ -333,6 +415,18 @@ func (hooks *Hooks) MarshalJSON() ([]byte, error) {
|
| 333 | 333 |
}) |
| 334 | 334 |
} |
| 335 | 335 |
|
| 336 |
+// Run executes all hooks for the given hook name. |
|
| 337 |
+func (hooks Hooks) Run(name HookName, state *specs.State) error {
|
|
| 338 |
+ list := hooks[name] |
|
| 339 |
+ for i, h := range list {
|
|
| 340 |
+ if err := h.Run(state); err != nil {
|
|
| 341 |
+ return fmt.Errorf("error running %s hook #%d: %w", name, i, err)
|
|
| 342 |
+ } |
|
| 343 |
+ } |
|
| 344 |
+ |
|
| 345 |
+ return nil |
|
| 346 |
+} |
|
| 347 |
+ |
|
| 336 | 348 |
type Hook interface {
|
| 337 | 349 |
// Run executes the hook with the provided state. |
| 338 | 350 |
Run(*specs.State) error |
| ... | ... |
@@ -393,7 +487,7 @@ func (c Command) Run(s *specs.State) error {
|
| 393 | 393 |
go func() {
|
| 394 | 394 |
err := cmd.Wait() |
| 395 | 395 |
if err != nil {
|
| 396 |
- err = fmt.Errorf("error running hook: %w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
|
|
| 396 |
+ err = fmt.Errorf("%w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
|
|
| 397 | 397 |
} |
| 398 | 398 |
errC <- err |
| 399 | 399 |
}() |
| ... | ... |
@@ -7,22 +7,33 @@ import ( |
| 7 | 7 |
) |
| 8 | 8 |
|
| 9 | 9 |
var ( |
| 10 |
- errNoUIDMap = errors.New("User namespaces enabled, but no uid mappings found.")
|
|
| 11 |
- errNoUserMap = errors.New("User namespaces enabled, but no user mapping found.")
|
|
| 12 |
- errNoGIDMap = errors.New("User namespaces enabled, but no gid mappings found.")
|
|
| 13 |
- errNoGroupMap = errors.New("User namespaces enabled, but no group mapping found.")
|
|
| 10 |
+ errNoUIDMap = errors.New("user namespaces enabled, but no uid mappings found")
|
|
| 11 |
+ errNoGIDMap = errors.New("user namespaces enabled, but no gid mappings found")
|
|
| 14 | 12 |
) |
| 15 | 13 |
|
| 14 |
+// Please check https://man7.org/linux/man-pages/man2/personality.2.html for const details. |
|
| 15 |
+// https://raw.githubusercontent.com/torvalds/linux/master/include/uapi/linux/personality.h |
|
| 16 |
+const ( |
|
| 17 |
+ PerLinux = 0x0000 |
|
| 18 |
+ PerLinux32 = 0x0008 |
|
| 19 |
+) |
|
| 20 |
+ |
|
| 21 |
+type LinuxPersonality struct {
|
|
| 22 |
+ // Domain for the personality |
|
| 23 |
+ // can only contain values "LINUX" and "LINUX32" |
|
| 24 |
+ Domain int `json:"domain"` |
|
| 25 |
+} |
|
| 26 |
+ |
|
| 16 | 27 |
// HostUID gets the translated uid for the process on host which could be |
| 17 | 28 |
// different when user namespaces are enabled. |
| 18 | 29 |
func (c Config) HostUID(containerId int) (int, error) {
|
| 19 | 30 |
if c.Namespaces.Contains(NEWUSER) {
|
| 20 |
- if c.UidMappings == nil {
|
|
| 31 |
+ if len(c.UIDMappings) == 0 {
|
|
| 21 | 32 |
return -1, errNoUIDMap |
| 22 | 33 |
} |
| 23 |
- id, found := c.hostIDFromMapping(int64(containerId), c.UidMappings) |
|
| 34 |
+ id, found := c.hostIDFromMapping(int64(containerId), c.UIDMappings) |
|
| 24 | 35 |
if !found {
|
| 25 |
- return -1, errNoUserMap |
|
| 36 |
+ return -1, fmt.Errorf("user namespaces enabled, but no mapping found for uid %d", containerId)
|
|
| 26 | 37 |
} |
| 27 | 38 |
// If we are a 32-bit binary running on a 64-bit system, it's possible |
| 28 | 39 |
// the mapped user is too large to store in an int, which means we |
| ... | ... |
@@ -47,12 +58,12 @@ func (c Config) HostRootUID() (int, error) {
|
| 47 | 47 |
// different when user namespaces are enabled. |
| 48 | 48 |
func (c Config) HostGID(containerId int) (int, error) {
|
| 49 | 49 |
if c.Namespaces.Contains(NEWUSER) {
|
| 50 |
- if c.GidMappings == nil {
|
|
| 50 |
+ if len(c.GIDMappings) == 0 {
|
|
| 51 | 51 |
return -1, errNoGIDMap |
| 52 | 52 |
} |
| 53 |
- id, found := c.hostIDFromMapping(int64(containerId), c.GidMappings) |
|
| 53 |
+ id, found := c.hostIDFromMapping(int64(containerId), c.GIDMappings) |
|
| 54 | 54 |
if !found {
|
| 55 |
- return -1, errNoGroupMap |
|
| 55 |
+ return -1, fmt.Errorf("user namespaces enabled, but no mapping found for gid %d", containerId)
|
|
| 56 | 56 |
} |
| 57 | 57 |
// If we are a 32-bit binary running on a 64-bit system, it's possible |
| 58 | 58 |
// the mapped user is too large to store in an int, which means we |
| ... | ... |
@@ -1,48 +1,7 @@ |
| 1 | 1 |
package configs |
| 2 | 2 |
|
| 3 |
-import "golang.org/x/sys/unix" |
|
| 4 |
- |
|
| 5 | 3 |
const ( |
| 6 | 4 |
// EXT_COPYUP is a directive to copy up the contents of a directory when |
| 7 | 5 |
// a tmpfs is mounted over it. |
| 8 |
- EXT_COPYUP = 1 << iota //nolint:golint // ignore "don't use ALL_CAPS" warning |
|
| 6 |
+ EXT_COPYUP = 1 << iota //nolint:golint,revive // ignore "don't use ALL_CAPS" warning |
|
| 9 | 7 |
) |
| 10 |
- |
|
| 11 |
-type Mount struct {
|
|
| 12 |
- // Source path for the mount. |
|
| 13 |
- Source string `json:"source"` |
|
| 14 |
- |
|
| 15 |
- // Destination path for the mount inside the container. |
|
| 16 |
- Destination string `json:"destination"` |
|
| 17 |
- |
|
| 18 |
- // Device the mount is for. |
|
| 19 |
- Device string `json:"device"` |
|
| 20 |
- |
|
| 21 |
- // Mount flags. |
|
| 22 |
- Flags int `json:"flags"` |
|
| 23 |
- |
|
| 24 |
- // Propagation Flags |
|
| 25 |
- PropagationFlags []int `json:"propagation_flags"` |
|
| 26 |
- |
|
| 27 |
- // Mount data applied to the mount. |
|
| 28 |
- Data string `json:"data"` |
|
| 29 |
- |
|
| 30 |
- // Relabel source if set, "z" indicates shared, "Z" indicates unshared. |
|
| 31 |
- Relabel string `json:"relabel"` |
|
| 32 |
- |
|
| 33 |
- // RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2). |
|
| 34 |
- RecAttr *unix.MountAttr `json:"rec_attr"` |
|
| 35 |
- |
|
| 36 |
- // Extensions are additional flags that are specific to runc. |
|
| 37 |
- Extensions int `json:"extensions"` |
|
| 38 |
- |
|
| 39 |
- // Optional Command to be run before Source is mounted. |
|
| 40 |
- PremountCmds []Command `json:"premount_cmds"` |
|
| 41 |
- |
|
| 42 |
- // Optional Command to be run after Source is mounted. |
|
| 43 |
- PostmountCmds []Command `json:"postmount_cmds"` |
|
| 44 |
-} |
|
| 45 |
- |
|
| 46 |
-func (m *Mount) IsBind() bool {
|
|
| 47 |
- return m.Flags&unix.MS_BIND != 0 |
|
| 48 |
-} |
| 49 | 8 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,66 @@ |
| 0 |
+package configs |
|
| 1 |
+ |
|
| 2 |
+import "golang.org/x/sys/unix" |
|
| 3 |
+ |
|
| 4 |
+type MountIDMapping struct {
|
|
| 5 |
+ // Recursive indicates if the mapping needs to be recursive. |
|
| 6 |
+ Recursive bool `json:"recursive"` |
|
| 7 |
+ |
|
| 8 |
+ // UserNSPath is a path to a user namespace that indicates the necessary |
|
| 9 |
+ // id-mappings for MOUNT_ATTR_IDMAP. If set to non-"", UIDMappings and |
|
| 10 |
+ // GIDMappings must be set to nil. |
|
| 11 |
+ UserNSPath string `json:"userns_path,omitempty"` |
|
| 12 |
+ |
|
| 13 |
+ // UIDMappings is the uid mapping set for this mount, to be used with |
|
| 14 |
+ // MOUNT_ATTR_IDMAP. |
|
| 15 |
+ UIDMappings []IDMap `json:"uid_mappings,omitempty"` |
|
| 16 |
+ |
|
| 17 |
+ // GIDMappings is the gid mapping set for this mount, to be used with |
|
| 18 |
+ // MOUNT_ATTR_IDMAP. |
|
| 19 |
+ GIDMappings []IDMap `json:"gid_mappings,omitempty"` |
|
| 20 |
+} |
|
| 21 |
+ |
|
| 22 |
+type Mount struct {
|
|
| 23 |
+ // Source path for the mount. |
|
| 24 |
+ Source string `json:"source"` |
|
| 25 |
+ |
|
| 26 |
+ // Destination path for the mount inside the container. |
|
| 27 |
+ Destination string `json:"destination"` |
|
| 28 |
+ |
|
| 29 |
+ // Device the mount is for. |
|
| 30 |
+ Device string `json:"device"` |
|
| 31 |
+ |
|
| 32 |
+ // Mount flags. |
|
| 33 |
+ Flags int `json:"flags"` |
|
| 34 |
+ |
|
| 35 |
+ // Mount flags that were explicitly cleared in the configuration (meaning |
|
| 36 |
+ // the user explicitly requested that these flags *not* be set). |
|
| 37 |
+ ClearedFlags int `json:"cleared_flags"` |
|
| 38 |
+ |
|
| 39 |
+ // Propagation Flags |
|
| 40 |
+ PropagationFlags []int `json:"propagation_flags"` |
|
| 41 |
+ |
|
| 42 |
+ // Mount data applied to the mount. |
|
| 43 |
+ Data string `json:"data"` |
|
| 44 |
+ |
|
| 45 |
+ // Relabel source if set, "z" indicates shared, "Z" indicates unshared. |
|
| 46 |
+ Relabel string `json:"relabel"` |
|
| 47 |
+ |
|
| 48 |
+ // RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2). |
|
| 49 |
+ RecAttr *unix.MountAttr `json:"rec_attr"` |
|
| 50 |
+ |
|
| 51 |
+ // Extensions are additional flags that are specific to runc. |
|
| 52 |
+ Extensions int `json:"extensions"` |
|
| 53 |
+ |
|
| 54 |
+ // Mapping is the MOUNT_ATTR_IDMAP configuration for the mount. If non-nil, |
|
| 55 |
+ // the mount is configured to use MOUNT_ATTR_IDMAP-style id mappings. |
|
| 56 |
+ IDMapping *MountIDMapping `json:"id_mapping,omitempty"` |
|
| 57 |
+} |
|
| 58 |
+ |
|
| 59 |
+func (m *Mount) IsBind() bool {
|
|
| 60 |
+ return m.Flags&unix.MS_BIND != 0 |
|
| 61 |
+} |
|
| 62 |
+ |
|
| 63 |
+func (m *Mount) IsIDMapped() bool {
|
|
| 64 |
+ return m.IDMapping != nil |
|
| 65 |
+} |
| ... | ... |
@@ -14,6 +14,7 @@ const ( |
| 14 | 14 |
NEWIPC NamespaceType = "NEWIPC" |
| 15 | 15 |
NEWUSER NamespaceType = "NEWUSER" |
| 16 | 16 |
NEWCGROUP NamespaceType = "NEWCGROUP" |
| 17 |
+ NEWTIME NamespaceType = "NEWTIME" |
|
| 17 | 18 |
) |
| 18 | 19 |
|
| 19 | 20 |
var ( |
| ... | ... |
@@ -38,6 +39,8 @@ func NsName(ns NamespaceType) string {
|
| 38 | 38 |
return "uts" |
| 39 | 39 |
case NEWCGROUP: |
| 40 | 40 |
return "cgroup" |
| 41 |
+ case NEWTIME: |
|
| 42 |
+ return "time" |
|
| 41 | 43 |
} |
| 42 | 44 |
return "" |
| 43 | 45 |
} |
| ... | ... |
@@ -56,6 +59,9 @@ func IsNamespaceSupported(ns NamespaceType) bool {
|
| 56 | 56 |
if nsFile == "" {
|
| 57 | 57 |
return false |
| 58 | 58 |
} |
| 59 |
+ // We don't need to use /proc/thread-self here because the list of |
|
| 60 |
+ // namespace types is unrelated to the thread. This lets us avoid having to |
|
| 61 |
+ // do runtime.LockOSThread. |
|
| 59 | 62 |
_, err := os.Stat("/proc/self/ns/" + nsFile)
|
| 60 | 63 |
// a namespace is supported if it exists and we have permissions to read it |
| 61 | 64 |
supported = err == nil |
| ... | ... |
@@ -72,6 +78,7 @@ func NamespaceTypes() []NamespaceType {
|
| 72 | 72 |
NEWPID, |
| 73 | 73 |
NEWNS, |
| 74 | 74 |
NEWCGROUP, |
| 75 |
+ NEWTIME, |
|
| 75 | 76 |
} |
| 76 | 77 |
} |
| 77 | 78 |
|
| ... | ... |
@@ -1,5 +1,4 @@ |
| 1 | 1 |
//go:build linux |
| 2 |
-// +build linux |
|
| 3 | 2 |
|
| 4 | 3 |
package configs |
| 5 | 4 |
|
| ... | ... |
@@ -17,6 +16,7 @@ var namespaceInfo = map[NamespaceType]int{
|
| 17 | 17 |
NEWUTS: unix.CLONE_NEWUTS, |
| 18 | 18 |
NEWPID: unix.CLONE_NEWPID, |
| 19 | 19 |
NEWCGROUP: unix.CLONE_NEWCGROUP, |
| 20 |
+ NEWTIME: unix.CLONE_NEWTIME, |
|
| 20 | 21 |
} |
| 21 | 22 |
|
| 22 | 23 |
// CloneFlags parses the container's Namespaces options to set the correct |
| ... | ... |
@@ -31,3 +31,15 @@ func (n *Namespaces) CloneFlags() uintptr {
|
| 31 | 31 |
} |
| 32 | 32 |
return uintptr(flag) |
| 33 | 33 |
} |
| 34 |
+ |
|
| 35 |
+// IsPrivate tells whether the namespace of type t is configured as private |
|
| 36 |
+// (i.e. it exists and is not shared). |
|
| 37 |
+func (n Namespaces) IsPrivate(t NamespaceType) bool {
|
|
| 38 |
+ for _, v := range n {
|
|
| 39 |
+ if v.Type == t {
|
|
| 40 |
+ return v.Path == "" |
|
| 41 |
+ } |
|
| 42 |
+ } |
|
| 43 |
+ // Not found, so implicitly sharing a parent namespace. |
|
| 44 |
+ return false |
|
| 45 |
+} |
| 6 | 5 |
deleted file mode 100644 |
| ... | ... |
@@ -1,145 +0,0 @@ |
| 1 |
-//go:build linux |
|
| 2 |
-// +build linux |
|
| 3 |
- |
|
| 4 |
-package system |
|
| 5 |
- |
|
| 6 |
-import ( |
|
| 7 |
- "os" |
|
| 8 |
- "os/exec" |
|
| 9 |
- "runtime" |
|
| 10 |
- "strings" |
|
| 11 |
- "unsafe" |
|
| 12 |
- |
|
| 13 |
- "golang.org/x/sys/unix" |
|
| 14 |
-) |
|
| 15 |
- |
|
| 16 |
-type ParentDeathSignal int |
|
| 17 |
- |
|
| 18 |
-func (p ParentDeathSignal) Restore() error {
|
|
| 19 |
- if p == 0 {
|
|
| 20 |
- return nil |
|
| 21 |
- } |
|
| 22 |
- current, err := GetParentDeathSignal() |
|
| 23 |
- if err != nil {
|
|
| 24 |
- return err |
|
| 25 |
- } |
|
| 26 |
- if p == current {
|
|
| 27 |
- return nil |
|
| 28 |
- } |
|
| 29 |
- return p.Set() |
|
| 30 |
-} |
|
| 31 |
- |
|
| 32 |
-func (p ParentDeathSignal) Set() error {
|
|
| 33 |
- return SetParentDeathSignal(uintptr(p)) |
|
| 34 |
-} |
|
| 35 |
- |
|
| 36 |
-// Deprecated: Execv is not used in runc anymore, it will be removed in v1.2.0. |
|
| 37 |
-func Execv(cmd string, args []string, env []string) error {
|
|
| 38 |
- name, err := exec.LookPath(cmd) |
|
| 39 |
- if err != nil {
|
|
| 40 |
- return err |
|
| 41 |
- } |
|
| 42 |
- return Exec(name, args, env) |
|
| 43 |
-} |
|
| 44 |
- |
|
| 45 |
-func Exec(cmd string, args []string, env []string) error {
|
|
| 46 |
- for {
|
|
| 47 |
- err := unix.Exec(cmd, args, env) |
|
| 48 |
- if err != unix.EINTR { //nolint:errorlint // unix errors are bare
|
|
| 49 |
- return &os.PathError{Op: "exec", Path: cmd, Err: err}
|
|
| 50 |
- } |
|
| 51 |
- } |
|
| 52 |
-} |
|
| 53 |
- |
|
| 54 |
-func SetParentDeathSignal(sig uintptr) error {
|
|
| 55 |
- if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil {
|
|
| 56 |
- return err |
|
| 57 |
- } |
|
| 58 |
- return nil |
|
| 59 |
-} |
|
| 60 |
- |
|
| 61 |
-func GetParentDeathSignal() (ParentDeathSignal, error) {
|
|
| 62 |
- var sig int |
|
| 63 |
- if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil {
|
|
| 64 |
- return -1, err |
|
| 65 |
- } |
|
| 66 |
- return ParentDeathSignal(sig), nil |
|
| 67 |
-} |
|
| 68 |
- |
|
| 69 |
-func SetKeepCaps() error {
|
|
| 70 |
- if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil {
|
|
| 71 |
- return err |
|
| 72 |
- } |
|
| 73 |
- |
|
| 74 |
- return nil |
|
| 75 |
-} |
|
| 76 |
- |
|
| 77 |
-func ClearKeepCaps() error {
|
|
| 78 |
- if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil {
|
|
| 79 |
- return err |
|
| 80 |
- } |
|
| 81 |
- |
|
| 82 |
- return nil |
|
| 83 |
-} |
|
| 84 |
- |
|
| 85 |
-func Setctty() error {
|
|
| 86 |
- if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil {
|
|
| 87 |
- return err |
|
| 88 |
- } |
|
| 89 |
- return nil |
|
| 90 |
-} |
|
| 91 |
- |
|
| 92 |
-// SetSubreaper sets the value i as the subreaper setting for the calling process |
|
| 93 |
-func SetSubreaper(i int) error {
|
|
| 94 |
- return unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0) |
|
| 95 |
-} |
|
| 96 |
- |
|
| 97 |
-// GetSubreaper returns the subreaper setting for the calling process |
|
| 98 |
-func GetSubreaper() (int, error) {
|
|
| 99 |
- var i uintptr |
|
| 100 |
- |
|
| 101 |
- if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil {
|
|
| 102 |
- return -1, err |
|
| 103 |
- } |
|
| 104 |
- |
|
| 105 |
- return int(i), nil |
|
| 106 |
-} |
|
| 107 |
- |
|
| 108 |
-func prepareAt(dir *os.File, path string) (int, string) {
|
|
| 109 |
- if dir == nil {
|
|
| 110 |
- return unix.AT_FDCWD, path |
|
| 111 |
- } |
|
| 112 |
- |
|
| 113 |
- // Rather than just filepath.Join-ing path here, do it manually so the |
|
| 114 |
- // error and handle correctly indicate cases like path=".." as being |
|
| 115 |
- // relative to the correct directory. The handle.Name() might end up being |
|
| 116 |
- // wrong but because this is (currently) only used in MkdirAllInRoot, that |
|
| 117 |
- // isn't a problem. |
|
| 118 |
- dirName := dir.Name() |
|
| 119 |
- if !strings.HasSuffix(dirName, "/") {
|
|
| 120 |
- dirName += "/" |
|
| 121 |
- } |
|
| 122 |
- fullPath := dirName + path |
|
| 123 |
- |
|
| 124 |
- return int(dir.Fd()), fullPath |
|
| 125 |
-} |
|
| 126 |
- |
|
| 127 |
-func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) {
|
|
| 128 |
- dirFd, fullPath := prepareAt(dir, path) |
|
| 129 |
- fd, err := unix.Openat(dirFd, path, flags, mode) |
|
| 130 |
- if err != nil {
|
|
| 131 |
- return nil, &os.PathError{Op: "openat", Path: fullPath, Err: err}
|
|
| 132 |
- } |
|
| 133 |
- runtime.KeepAlive(dir) |
|
| 134 |
- return os.NewFile(uintptr(fd), fullPath), nil |
|
| 135 |
-} |
|
| 136 |
- |
|
| 137 |
-func Mkdirat(dir *os.File, path string, mode uint32) error {
|
|
| 138 |
- dirFd, fullPath := prepareAt(dir, path) |
|
| 139 |
- err := unix.Mkdirat(dirFd, path, mode) |
|
| 140 |
- if err != nil {
|
|
| 141 |
- err = &os.PathError{Op: "mkdirat", Path: fullPath, Err: err}
|
|
| 142 |
- } |
|
| 143 |
- runtime.KeepAlive(dir) |
|
| 144 |
- return err |
|
| 145 |
-} |
| 146 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,127 +0,0 @@ |
| 1 |
-package system |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "fmt" |
|
| 5 |
- "os" |
|
| 6 |
- "path/filepath" |
|
| 7 |
- "strconv" |
|
| 8 |
- "strings" |
|
| 9 |
-) |
|
| 10 |
- |
|
| 11 |
-// State is the status of a process. |
|
| 12 |
-type State rune |
|
| 13 |
- |
|
| 14 |
-const ( // Only values for Linux 3.14 and later are listed here |
|
| 15 |
- Dead State = 'X' |
|
| 16 |
- DiskSleep State = 'D' |
|
| 17 |
- Running State = 'R' |
|
| 18 |
- Sleeping State = 'S' |
|
| 19 |
- Stopped State = 'T' |
|
| 20 |
- TracingStop State = 't' |
|
| 21 |
- Zombie State = 'Z' |
|
| 22 |
- Parked State = 'P' |
|
| 23 |
- Idle State = 'I' |
|
| 24 |
-) |
|
| 25 |
- |
|
| 26 |
-// String forms of the state from proc(5)'s documentation for |
|
| 27 |
-// /proc/[pid]/status' "State" field. |
|
| 28 |
-func (s State) String() string {
|
|
| 29 |
- switch s {
|
|
| 30 |
- case Dead: |
|
| 31 |
- return "dead" |
|
| 32 |
- case DiskSleep: |
|
| 33 |
- return "disk sleep" |
|
| 34 |
- case Running: |
|
| 35 |
- return "running" |
|
| 36 |
- case Sleeping: |
|
| 37 |
- return "sleeping" |
|
| 38 |
- case Stopped: |
|
| 39 |
- return "stopped" |
|
| 40 |
- case TracingStop: |
|
| 41 |
- return "tracing stop" |
|
| 42 |
- case Zombie: |
|
| 43 |
- return "zombie" |
|
| 44 |
- case Parked: |
|
| 45 |
- return "parked" |
|
| 46 |
- case Idle: |
|
| 47 |
- return "idle" // kernel thread |
|
| 48 |
- default: |
|
| 49 |
- return fmt.Sprintf("unknown (%c)", s)
|
|
| 50 |
- } |
|
| 51 |
-} |
|
| 52 |
- |
|
| 53 |
-// Stat_t represents the information from /proc/[pid]/stat, as |
|
| 54 |
-// described in proc(5) with names based on the /proc/[pid]/status |
|
| 55 |
-// fields. |
|
| 56 |
-type Stat_t struct {
|
|
| 57 |
- // Name is the command run by the process. |
|
| 58 |
- Name string |
|
| 59 |
- |
|
| 60 |
- // State is the state of the process. |
|
| 61 |
- State State |
|
| 62 |
- |
|
| 63 |
- // StartTime is the number of clock ticks after system boot (since |
|
| 64 |
- // Linux 2.6). |
|
| 65 |
- StartTime uint64 |
|
| 66 |
-} |
|
| 67 |
- |
|
| 68 |
-// Stat returns a Stat_t instance for the specified process. |
|
| 69 |
-func Stat(pid int) (stat Stat_t, err error) {
|
|
| 70 |
- bytes, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
|
|
| 71 |
- if err != nil {
|
|
| 72 |
- return stat, err |
|
| 73 |
- } |
|
| 74 |
- return parseStat(string(bytes)) |
|
| 75 |
-} |
|
| 76 |
- |
|
| 77 |
-func parseStat(data string) (stat Stat_t, err error) {
|
|
| 78 |
- // Example: |
|
| 79 |
- // 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0 |
|
| 80 |
- // The fields are space-separated, see full description in proc(5). |
|
| 81 |
- // |
|
| 82 |
- // We are only interested in: |
|
| 83 |
- // * field 2: process name. It is the only field enclosed into |
|
| 84 |
- // parenthesis, as it can contain spaces (and parenthesis) inside. |
|
| 85 |
- // * field 3: process state, a single character (%c) |
|
| 86 |
- // * field 22: process start time, a long unsigned integer (%llu). |
|
| 87 |
- |
|
| 88 |
- // 1. Look for the first '(' and the last ')' first, what's in between is Name.
|
|
| 89 |
- // We expect at least 20 fields and a space after the last one. |
|
| 90 |
- |
|
| 91 |
- const minAfterName = 20*2 + 1 // the min field is '0 '. |
|
| 92 |
- |
|
| 93 |
- first := strings.IndexByte(data, '(')
|
|
| 94 |
- if first < 0 || first+minAfterName >= len(data) {
|
|
| 95 |
- return stat, fmt.Errorf("invalid stat data (no comm or too short): %q", data)
|
|
| 96 |
- } |
|
| 97 |
- |
|
| 98 |
- last := strings.LastIndexByte(data, ')') |
|
| 99 |
- if last <= first || last+minAfterName >= len(data) {
|
|
| 100 |
- return stat, fmt.Errorf("invalid stat data (no comm or too short): %q", data)
|
|
| 101 |
- } |
|
| 102 |
- |
|
| 103 |
- stat.Name = data[first+1 : last] |
|
| 104 |
- |
|
| 105 |
- // 2. Remove fields 1 and 2 and a space after. State is right after. |
|
| 106 |
- data = data[last+2:] |
|
| 107 |
- stat.State = State(data[0]) |
|
| 108 |
- |
|
| 109 |
- // 3. StartTime is field 22, data is at field 3 now, so we need to skip 19 spaces. |
|
| 110 |
- skipSpaces := 22 - 3 |
|
| 111 |
- for first = 0; skipSpaces > 0 && first < len(data); first++ {
|
|
| 112 |
- if data[first] == ' ' {
|
|
| 113 |
- skipSpaces-- |
|
| 114 |
- } |
|
| 115 |
- } |
|
| 116 |
- // Now first points to StartTime; look for space right after. |
|
| 117 |
- i := strings.IndexByte(data[first:], ' ') |
|
| 118 |
- if i < 0 {
|
|
| 119 |
- return stat, fmt.Errorf("invalid stat data (too short): %q", data)
|
|
| 120 |
- } |
|
| 121 |
- stat.StartTime, err = strconv.ParseUint(data[first:first+i], 10, 64) |
|
| 122 |
- if err != nil {
|
|
| 123 |
- return stat, fmt.Errorf("invalid stat data (bad start time): %w", err)
|
|
| 124 |
- } |
|
| 125 |
- |
|
| 126 |
- return stat, nil |
|
| 127 |
-} |
| 128 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,15 +0,0 @@ |
| 1 |
-//go:build go1.23 |
|
| 2 |
- |
|
| 3 |
-package system |
|
| 4 |
- |
|
| 5 |
-import ( |
|
| 6 |
- "syscall" |
|
| 7 |
-) |
|
| 8 |
- |
|
| 9 |
-// ClearRlimitNofileCache clears go runtime's nofile rlimit cache. The argument |
|
| 10 |
-// is process RLIMIT_NOFILE values. Relies on go.dev/cl/588076. |
|
| 11 |
-func ClearRlimitNofileCache(lim *syscall.Rlimit) {
|
|
| 12 |
- // Ignore the return values since we only need to clean the cache, |
|
| 13 |
- // the limit is going to be set via unix.Prlimit elsewhere. |
|
| 14 |
- _ = syscall.Setrlimit(syscall.RLIMIT_NOFILE, lim) |
|
| 15 |
-} |
| 16 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,27 +0,0 @@ |
| 1 |
-//go:build go1.19 && !go1.23 |
|
| 2 |
- |
|
| 3 |
-// TODO: remove this file once go 1.22 is no longer supported. |
|
| 4 |
- |
|
| 5 |
-package system |
|
| 6 |
- |
|
| 7 |
-import ( |
|
| 8 |
- "sync/atomic" |
|
| 9 |
- "syscall" |
|
| 10 |
- _ "unsafe" // Needed for go:linkname to work. |
|
| 11 |
-) |
|
| 12 |
- |
|
| 13 |
-//go:linkname syscallOrigRlimitNofile syscall.origRlimitNofile |
|
| 14 |
-var syscallOrigRlimitNofile atomic.Pointer[syscall.Rlimit] |
|
| 15 |
- |
|
| 16 |
-// ClearRlimitNofileCache clears go runtime's nofile rlimit cache. |
|
| 17 |
-// The argument is process RLIMIT_NOFILE values. |
|
| 18 |
-func ClearRlimitNofileCache(_ *syscall.Rlimit) {
|
|
| 19 |
- // As reported in issue #4195, the new version of go runtime(since 1.19) |
|
| 20 |
- // will cache rlimit-nofile. Before executing execve, the rlimit-nofile |
|
| 21 |
- // of the process will be restored with the cache. In runc, this will |
|
| 22 |
- // cause the rlimit-nofile setting by the parent process for the container |
|
| 23 |
- // to become invalid. It can be solved by clearing this cache. But |
|
| 24 |
- // unfortunately, go stdlib doesn't provide such function, so we need to |
|
| 25 |
- // link to the private var `origRlimitNofile` in package syscall to hack. |
|
| 26 |
- syscallOrigRlimitNofile.Store(nil) |
|
| 27 |
-} |
| 8 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,27 +0,0 @@ |
| 1 |
-//go:build linux && (386 || arm) |
|
| 2 |
-// +build linux |
|
| 3 |
-// +build 386 arm |
|
| 4 |
- |
|
| 5 |
-package system |
|
| 6 |
- |
|
| 7 |
-import ( |
|
| 8 |
- "golang.org/x/sys/unix" |
|
| 9 |
-) |
|
| 10 |
- |
|
| 11 |
-// Setuid sets the uid of the calling thread to the specified uid. |
|
| 12 |
-func Setuid(uid int) (err error) {
|
|
| 13 |
- _, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0) |
|
| 14 |
- if e1 != 0 {
|
|
| 15 |
- err = e1 |
|
| 16 |
- } |
|
| 17 |
- return |
|
| 18 |
-} |
|
| 19 |
- |
|
| 20 |
-// Setgid sets the gid of the calling thread to the specified gid. |
|
| 21 |
-func Setgid(gid int) (err error) {
|
|
| 22 |
- _, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0) |
|
| 23 |
- if e1 != 0 {
|
|
| 24 |
- err = e1 |
|
| 25 |
- } |
|
| 26 |
- return |
|
| 27 |
-} |
| 28 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,27 +0,0 @@ |
| 1 |
-//go:build linux && (arm64 || amd64 || mips || mipsle || mips64 || mips64le || ppc || ppc64 || ppc64le || riscv64 || s390x) |
|
| 2 |
-// +build linux |
|
| 3 |
-// +build arm64 amd64 mips mipsle mips64 mips64le ppc ppc64 ppc64le riscv64 s390x |
|
| 4 |
- |
|
| 5 |
-package system |
|
| 6 |
- |
|
| 7 |
-import ( |
|
| 8 |
- "golang.org/x/sys/unix" |
|
| 9 |
-) |
|
| 10 |
- |
|
| 11 |
-// Setuid sets the uid of the calling thread to the specified uid. |
|
| 12 |
-func Setuid(uid int) (err error) {
|
|
| 13 |
- _, _, e1 := unix.RawSyscall(unix.SYS_SETUID, uintptr(uid), 0, 0) |
|
| 14 |
- if e1 != 0 {
|
|
| 15 |
- err = e1 |
|
| 16 |
- } |
|
| 17 |
- return |
|
| 18 |
-} |
|
| 19 |
- |
|
| 20 |
-// Setgid sets the gid of the calling thread to the specified gid. |
|
| 21 |
-func Setgid(gid int) (err error) {
|
|
| 22 |
- _, _, e1 := unix.RawSyscall(unix.SYS_SETGID, uintptr(gid), 0, 0) |
|
| 23 |
- if e1 != 0 {
|
|
| 24 |
- err = e1 |
|
| 25 |
- } |
|
| 26 |
- return |
|
| 27 |
-} |
| 28 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,157 +0,0 @@ |
| 1 |
-//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris |
|
| 2 |
-// +build darwin dragonfly freebsd linux netbsd openbsd solaris |
|
| 3 |
- |
|
| 4 |
-package user |
|
| 5 |
- |
|
| 6 |
-import ( |
|
| 7 |
- "io" |
|
| 8 |
- "os" |
|
| 9 |
- "strconv" |
|
| 10 |
- |
|
| 11 |
- "golang.org/x/sys/unix" |
|
| 12 |
-) |
|
| 13 |
- |
|
| 14 |
-// Unix-specific path to the passwd and group formatted files. |
|
| 15 |
-const ( |
|
| 16 |
- unixPasswdPath = "/etc/passwd" |
|
| 17 |
- unixGroupPath = "/etc/group" |
|
| 18 |
-) |
|
| 19 |
- |
|
| 20 |
-// LookupUser looks up a user by their username in /etc/passwd. If the user |
|
| 21 |
-// cannot be found (or there is no /etc/passwd file on the filesystem), then |
|
| 22 |
-// LookupUser returns an error. |
|
| 23 |
-func LookupUser(username string) (User, error) {
|
|
| 24 |
- return lookupUserFunc(func(u User) bool {
|
|
| 25 |
- return u.Name == username |
|
| 26 |
- }) |
|
| 27 |
-} |
|
| 28 |
- |
|
| 29 |
-// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot |
|
| 30 |
-// be found (or there is no /etc/passwd file on the filesystem), then LookupId |
|
| 31 |
-// returns an error. |
|
| 32 |
-func LookupUid(uid int) (User, error) {
|
|
| 33 |
- return lookupUserFunc(func(u User) bool {
|
|
| 34 |
- return u.Uid == uid |
|
| 35 |
- }) |
|
| 36 |
-} |
|
| 37 |
- |
|
| 38 |
-func lookupUserFunc(filter func(u User) bool) (User, error) {
|
|
| 39 |
- // Get operating system-specific passwd reader-closer. |
|
| 40 |
- passwd, err := GetPasswd() |
|
| 41 |
- if err != nil {
|
|
| 42 |
- return User{}, err
|
|
| 43 |
- } |
|
| 44 |
- defer passwd.Close() |
|
| 45 |
- |
|
| 46 |
- // Get the users. |
|
| 47 |
- users, err := ParsePasswdFilter(passwd, filter) |
|
| 48 |
- if err != nil {
|
|
| 49 |
- return User{}, err
|
|
| 50 |
- } |
|
| 51 |
- |
|
| 52 |
- // No user entries found. |
|
| 53 |
- if len(users) == 0 {
|
|
| 54 |
- return User{}, ErrNoPasswdEntries
|
|
| 55 |
- } |
|
| 56 |
- |
|
| 57 |
- // Assume the first entry is the "correct" one. |
|
| 58 |
- return users[0], nil |
|
| 59 |
-} |
|
| 60 |
- |
|
| 61 |
-// LookupGroup looks up a group by its name in /etc/group. If the group cannot |
|
| 62 |
-// be found (or there is no /etc/group file on the filesystem), then LookupGroup |
|
| 63 |
-// returns an error. |
|
| 64 |
-func LookupGroup(groupname string) (Group, error) {
|
|
| 65 |
- return lookupGroupFunc(func(g Group) bool {
|
|
| 66 |
- return g.Name == groupname |
|
| 67 |
- }) |
|
| 68 |
-} |
|
| 69 |
- |
|
| 70 |
-// LookupGid looks up a group by its group id in /etc/group. If the group cannot |
|
| 71 |
-// be found (or there is no /etc/group file on the filesystem), then LookupGid |
|
| 72 |
-// returns an error. |
|
| 73 |
-func LookupGid(gid int) (Group, error) {
|
|
| 74 |
- return lookupGroupFunc(func(g Group) bool {
|
|
| 75 |
- return g.Gid == gid |
|
| 76 |
- }) |
|
| 77 |
-} |
|
| 78 |
- |
|
| 79 |
-func lookupGroupFunc(filter func(g Group) bool) (Group, error) {
|
|
| 80 |
- // Get operating system-specific group reader-closer. |
|
| 81 |
- group, err := GetGroup() |
|
| 82 |
- if err != nil {
|
|
| 83 |
- return Group{}, err
|
|
| 84 |
- } |
|
| 85 |
- defer group.Close() |
|
| 86 |
- |
|
| 87 |
- // Get the users. |
|
| 88 |
- groups, err := ParseGroupFilter(group, filter) |
|
| 89 |
- if err != nil {
|
|
| 90 |
- return Group{}, err
|
|
| 91 |
- } |
|
| 92 |
- |
|
| 93 |
- // No user entries found. |
|
| 94 |
- if len(groups) == 0 {
|
|
| 95 |
- return Group{}, ErrNoGroupEntries
|
|
| 96 |
- } |
|
| 97 |
- |
|
| 98 |
- // Assume the first entry is the "correct" one. |
|
| 99 |
- return groups[0], nil |
|
| 100 |
-} |
|
| 101 |
- |
|
| 102 |
-func GetPasswdPath() (string, error) {
|
|
| 103 |
- return unixPasswdPath, nil |
|
| 104 |
-} |
|
| 105 |
- |
|
| 106 |
-func GetPasswd() (io.ReadCloser, error) {
|
|
| 107 |
- return os.Open(unixPasswdPath) |
|
| 108 |
-} |
|
| 109 |
- |
|
| 110 |
-func GetGroupPath() (string, error) {
|
|
| 111 |
- return unixGroupPath, nil |
|
| 112 |
-} |
|
| 113 |
- |
|
| 114 |
-func GetGroup() (io.ReadCloser, error) {
|
|
| 115 |
- return os.Open(unixGroupPath) |
|
| 116 |
-} |
|
| 117 |
- |
|
| 118 |
-// CurrentUser looks up the current user by their user id in /etc/passwd. If the |
|
| 119 |
-// user cannot be found (or there is no /etc/passwd file on the filesystem), |
|
| 120 |
-// then CurrentUser returns an error. |
|
| 121 |
-func CurrentUser() (User, error) {
|
|
| 122 |
- return LookupUid(unix.Getuid()) |
|
| 123 |
-} |
|
| 124 |
- |
|
| 125 |
-// CurrentGroup looks up the current user's group by their primary group id's |
|
| 126 |
-// entry in /etc/passwd. If the group cannot be found (or there is no |
|
| 127 |
-// /etc/group file on the filesystem), then CurrentGroup returns an error. |
|
| 128 |
-func CurrentGroup() (Group, error) {
|
|
| 129 |
- return LookupGid(unix.Getgid()) |
|
| 130 |
-} |
|
| 131 |
- |
|
| 132 |
-func currentUserSubIDs(fileName string) ([]SubID, error) {
|
|
| 133 |
- u, err := CurrentUser() |
|
| 134 |
- if err != nil {
|
|
| 135 |
- return nil, err |
|
| 136 |
- } |
|
| 137 |
- filter := func(entry SubID) bool {
|
|
| 138 |
- return entry.Name == u.Name || entry.Name == strconv.Itoa(u.Uid) |
|
| 139 |
- } |
|
| 140 |
- return ParseSubIDFileFilter(fileName, filter) |
|
| 141 |
-} |
|
| 142 |
- |
|
| 143 |
-func CurrentUserSubUIDs() ([]SubID, error) {
|
|
| 144 |
- return currentUserSubIDs("/etc/subuid")
|
|
| 145 |
-} |
|
| 146 |
- |
|
| 147 |
-func CurrentUserSubGIDs() ([]SubID, error) {
|
|
| 148 |
- return currentUserSubIDs("/etc/subgid")
|
|
| 149 |
-} |
|
| 150 |
- |
|
| 151 |
-func CurrentProcessUIDMap() ([]IDMap, error) {
|
|
| 152 |
- return ParseIDMapFile("/proc/self/uid_map")
|
|
| 153 |
-} |
|
| 154 |
- |
|
| 155 |
-func CurrentProcessGIDMap() ([]IDMap, error) {
|
|
| 156 |
- return ParseIDMapFile("/proc/self/gid_map")
|
|
| 157 |
-} |
| 158 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,604 +0,0 @@ |
| 1 |
-package user |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "bufio" |
|
| 5 |
- "bytes" |
|
| 6 |
- "errors" |
|
| 7 |
- "fmt" |
|
| 8 |
- "io" |
|
| 9 |
- "os" |
|
| 10 |
- "strconv" |
|
| 11 |
- "strings" |
|
| 12 |
-) |
|
| 13 |
- |
|
| 14 |
-const ( |
|
| 15 |
- minID = 0 |
|
| 16 |
- maxID = 1<<31 - 1 // for 32-bit systems compatibility |
|
| 17 |
-) |
|
| 18 |
- |
|
| 19 |
-var ( |
|
| 20 |
- // ErrNoPasswdEntries is returned if no matching entries were found in /etc/group. |
|
| 21 |
- ErrNoPasswdEntries = errors.New("no matching entries in passwd file")
|
|
| 22 |
- // ErrNoGroupEntries is returned if no matching entries were found in /etc/passwd. |
|
| 23 |
- ErrNoGroupEntries = errors.New("no matching entries in group file")
|
|
| 24 |
- // ErrRange is returned if a UID or GID is outside of the valid range. |
|
| 25 |
- ErrRange = fmt.Errorf("uids and gids must be in range %d-%d", minID, maxID)
|
|
| 26 |
-) |
|
| 27 |
- |
|
| 28 |
-type User struct {
|
|
| 29 |
- Name string |
|
| 30 |
- Pass string |
|
| 31 |
- Uid int |
|
| 32 |
- Gid int |
|
| 33 |
- Gecos string |
|
| 34 |
- Home string |
|
| 35 |
- Shell string |
|
| 36 |
-} |
|
| 37 |
- |
|
| 38 |
-type Group struct {
|
|
| 39 |
- Name string |
|
| 40 |
- Pass string |
|
| 41 |
- Gid int |
|
| 42 |
- List []string |
|
| 43 |
-} |
|
| 44 |
- |
|
| 45 |
-// SubID represents an entry in /etc/sub{u,g}id
|
|
| 46 |
-type SubID struct {
|
|
| 47 |
- Name string |
|
| 48 |
- SubID int64 |
|
| 49 |
- Count int64 |
|
| 50 |
-} |
|
| 51 |
- |
|
| 52 |
-// IDMap represents an entry in /proc/PID/{u,g}id_map
|
|
| 53 |
-type IDMap struct {
|
|
| 54 |
- ID int64 |
|
| 55 |
- ParentID int64 |
|
| 56 |
- Count int64 |
|
| 57 |
-} |
|
| 58 |
- |
|
| 59 |
-func parseLine(line []byte, v ...interface{}) {
|
|
| 60 |
- parseParts(bytes.Split(line, []byte(":")), v...)
|
|
| 61 |
-} |
|
| 62 |
- |
|
| 63 |
-func parseParts(parts [][]byte, v ...interface{}) {
|
|
| 64 |
- if len(parts) == 0 {
|
|
| 65 |
- return |
|
| 66 |
- } |
|
| 67 |
- |
|
| 68 |
- for i, p := range parts {
|
|
| 69 |
- // Ignore cases where we don't have enough fields to populate the arguments. |
|
| 70 |
- // Some configuration files like to misbehave. |
|
| 71 |
- if len(v) <= i {
|
|
| 72 |
- break |
|
| 73 |
- } |
|
| 74 |
- |
|
| 75 |
- // Use the type of the argument to figure out how to parse it, scanf() style. |
|
| 76 |
- // This is legit. |
|
| 77 |
- switch e := v[i].(type) {
|
|
| 78 |
- case *string: |
|
| 79 |
- *e = string(p) |
|
| 80 |
- case *int: |
|
| 81 |
- // "numbers", with conversion errors ignored because of some misbehaving configuration files. |
|
| 82 |
- *e, _ = strconv.Atoi(string(p)) |
|
| 83 |
- case *int64: |
|
| 84 |
- *e, _ = strconv.ParseInt(string(p), 10, 64) |
|
| 85 |
- case *[]string: |
|
| 86 |
- // Comma-separated lists. |
|
| 87 |
- if len(p) != 0 {
|
|
| 88 |
- *e = strings.Split(string(p), ",") |
|
| 89 |
- } else {
|
|
| 90 |
- *e = []string{}
|
|
| 91 |
- } |
|
| 92 |
- default: |
|
| 93 |
- // Someone goof'd when writing code using this function. Scream so they can hear us. |
|
| 94 |
- panic(fmt.Sprintf("parseLine only accepts {*string, *int, *int64, *[]string} as arguments! %#v is not a pointer!", e))
|
|
| 95 |
- } |
|
| 96 |
- } |
|
| 97 |
-} |
|
| 98 |
- |
|
| 99 |
-func ParsePasswdFile(path string) ([]User, error) {
|
|
| 100 |
- passwd, err := os.Open(path) |
|
| 101 |
- if err != nil {
|
|
| 102 |
- return nil, err |
|
| 103 |
- } |
|
| 104 |
- defer passwd.Close() |
|
| 105 |
- return ParsePasswd(passwd) |
|
| 106 |
-} |
|
| 107 |
- |
|
| 108 |
-func ParsePasswd(passwd io.Reader) ([]User, error) {
|
|
| 109 |
- return ParsePasswdFilter(passwd, nil) |
|
| 110 |
-} |
|
| 111 |
- |
|
| 112 |
-func ParsePasswdFileFilter(path string, filter func(User) bool) ([]User, error) {
|
|
| 113 |
- passwd, err := os.Open(path) |
|
| 114 |
- if err != nil {
|
|
| 115 |
- return nil, err |
|
| 116 |
- } |
|
| 117 |
- defer passwd.Close() |
|
| 118 |
- return ParsePasswdFilter(passwd, filter) |
|
| 119 |
-} |
|
| 120 |
- |
|
| 121 |
-func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
|
|
| 122 |
- if r == nil {
|
|
| 123 |
- return nil, errors.New("nil source for passwd-formatted data")
|
|
| 124 |
- } |
|
| 125 |
- |
|
| 126 |
- var ( |
|
| 127 |
- s = bufio.NewScanner(r) |
|
| 128 |
- out = []User{}
|
|
| 129 |
- ) |
|
| 130 |
- |
|
| 131 |
- for s.Scan() {
|
|
| 132 |
- line := bytes.TrimSpace(s.Bytes()) |
|
| 133 |
- if len(line) == 0 {
|
|
| 134 |
- continue |
|
| 135 |
- } |
|
| 136 |
- |
|
| 137 |
- // see: man 5 passwd |
|
| 138 |
- // name:password:UID:GID:GECOS:directory:shell |
|
| 139 |
- // Name:Pass:Uid:Gid:Gecos:Home:Shell |
|
| 140 |
- // root:x:0:0:root:/root:/bin/bash |
|
| 141 |
- // adm:x:3:4:adm:/var/adm:/bin/false |
|
| 142 |
- p := User{}
|
|
| 143 |
- parseLine(line, &p.Name, &p.Pass, &p.Uid, &p.Gid, &p.Gecos, &p.Home, &p.Shell) |
|
| 144 |
- |
|
| 145 |
- if filter == nil || filter(p) {
|
|
| 146 |
- out = append(out, p) |
|
| 147 |
- } |
|
| 148 |
- } |
|
| 149 |
- if err := s.Err(); err != nil {
|
|
| 150 |
- return nil, err |
|
| 151 |
- } |
|
| 152 |
- |
|
| 153 |
- return out, nil |
|
| 154 |
-} |
|
| 155 |
- |
|
| 156 |
-func ParseGroupFile(path string) ([]Group, error) {
|
|
| 157 |
- group, err := os.Open(path) |
|
| 158 |
- if err != nil {
|
|
| 159 |
- return nil, err |
|
| 160 |
- } |
|
| 161 |
- |
|
| 162 |
- defer group.Close() |
|
| 163 |
- return ParseGroup(group) |
|
| 164 |
-} |
|
| 165 |
- |
|
| 166 |
-func ParseGroup(group io.Reader) ([]Group, error) {
|
|
| 167 |
- return ParseGroupFilter(group, nil) |
|
| 168 |
-} |
|
| 169 |
- |
|
| 170 |
-func ParseGroupFileFilter(path string, filter func(Group) bool) ([]Group, error) {
|
|
| 171 |
- group, err := os.Open(path) |
|
| 172 |
- if err != nil {
|
|
| 173 |
- return nil, err |
|
| 174 |
- } |
|
| 175 |
- defer group.Close() |
|
| 176 |
- return ParseGroupFilter(group, filter) |
|
| 177 |
-} |
|
| 178 |
- |
|
| 179 |
-func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
|
|
| 180 |
- if r == nil {
|
|
| 181 |
- return nil, errors.New("nil source for group-formatted data")
|
|
| 182 |
- } |
|
| 183 |
- rd := bufio.NewReader(r) |
|
| 184 |
- out := []Group{}
|
|
| 185 |
- |
|
| 186 |
- // Read the file line-by-line. |
|
| 187 |
- for {
|
|
| 188 |
- var ( |
|
| 189 |
- isPrefix bool |
|
| 190 |
- wholeLine []byte |
|
| 191 |
- err error |
|
| 192 |
- ) |
|
| 193 |
- |
|
| 194 |
- // Read the next line. We do so in chunks (as much as reader's |
|
| 195 |
- // buffer is able to keep), check if we read enough columns |
|
| 196 |
- // already on each step and store final result in wholeLine. |
|
| 197 |
- for {
|
|
| 198 |
- var line []byte |
|
| 199 |
- line, isPrefix, err = rd.ReadLine() |
|
| 200 |
- if err != nil {
|
|
| 201 |
- // We should return no error if EOF is reached |
|
| 202 |
- // without a match. |
|
| 203 |
- if err == io.EOF {
|
|
| 204 |
- err = nil |
|
| 205 |
- } |
|
| 206 |
- return out, err |
|
| 207 |
- } |
|
| 208 |
- |
|
| 209 |
- // Simple common case: line is short enough to fit in a |
|
| 210 |
- // single reader's buffer. |
|
| 211 |
- if !isPrefix && len(wholeLine) == 0 {
|
|
| 212 |
- wholeLine = line |
|
| 213 |
- break |
|
| 214 |
- } |
|
| 215 |
- |
|
| 216 |
- wholeLine = append(wholeLine, line...) |
|
| 217 |
- |
|
| 218 |
- // Check if we read the whole line already. |
|
| 219 |
- if !isPrefix {
|
|
| 220 |
- break |
|
| 221 |
- } |
|
| 222 |
- } |
|
| 223 |
- |
|
| 224 |
- // There's no spec for /etc/passwd or /etc/group, but we try to follow |
|
| 225 |
- // the same rules as the glibc parser, which allows comments and blank |
|
| 226 |
- // space at the beginning of a line. |
|
| 227 |
- wholeLine = bytes.TrimSpace(wholeLine) |
|
| 228 |
- if len(wholeLine) == 0 || wholeLine[0] == '#' {
|
|
| 229 |
- continue |
|
| 230 |
- } |
|
| 231 |
- |
|
| 232 |
- // see: man 5 group |
|
| 233 |
- // group_name:password:GID:user_list |
|
| 234 |
- // Name:Pass:Gid:List |
|
| 235 |
- // root:x:0:root |
|
| 236 |
- // adm:x:4:root,adm,daemon |
|
| 237 |
- p := Group{}
|
|
| 238 |
- parseLine(wholeLine, &p.Name, &p.Pass, &p.Gid, &p.List) |
|
| 239 |
- |
|
| 240 |
- if filter == nil || filter(p) {
|
|
| 241 |
- out = append(out, p) |
|
| 242 |
- } |
|
| 243 |
- } |
|
| 244 |
-} |
|
| 245 |
- |
|
| 246 |
-type ExecUser struct {
|
|
| 247 |
- Uid int |
|
| 248 |
- Gid int |
|
| 249 |
- Sgids []int |
|
| 250 |
- Home string |
|
| 251 |
-} |
|
| 252 |
- |
|
| 253 |
-// GetExecUserPath is a wrapper for GetExecUser. It reads data from each of the |
|
| 254 |
-// given file paths and uses that data as the arguments to GetExecUser. If the |
|
| 255 |
-// files cannot be opened for any reason, the error is ignored and a nil |
|
| 256 |
-// io.Reader is passed instead. |
|
| 257 |
-func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) {
|
|
| 258 |
- var passwd, group io.Reader |
|
| 259 |
- |
|
| 260 |
- if passwdFile, err := os.Open(passwdPath); err == nil {
|
|
| 261 |
- passwd = passwdFile |
|
| 262 |
- defer passwdFile.Close() |
|
| 263 |
- } |
|
| 264 |
- |
|
| 265 |
- if groupFile, err := os.Open(groupPath); err == nil {
|
|
| 266 |
- group = groupFile |
|
| 267 |
- defer groupFile.Close() |
|
| 268 |
- } |
|
| 269 |
- |
|
| 270 |
- return GetExecUser(userSpec, defaults, passwd, group) |
|
| 271 |
-} |
|
| 272 |
- |
|
| 273 |
-// GetExecUser parses a user specification string (using the passwd and group |
|
| 274 |
-// readers as sources for /etc/passwd and /etc/group data, respectively). In |
|
| 275 |
-// the case of blank fields or missing data from the sources, the values in |
|
| 276 |
-// defaults is used. |
|
| 277 |
-// |
|
| 278 |
-// GetExecUser will return an error if a user or group literal could not be |
|
| 279 |
-// found in any entry in passwd and group respectively. |
|
| 280 |
-// |
|
| 281 |
-// Examples of valid user specifications are: |
|
| 282 |
-// - "" |
|
| 283 |
-// - "user" |
|
| 284 |
-// - "uid" |
|
| 285 |
-// - "user:group" |
|
| 286 |
-// - "uid:gid |
|
| 287 |
-// - "user:gid" |
|
| 288 |
-// - "uid:group" |
|
| 289 |
-// |
|
| 290 |
-// It should be noted that if you specify a numeric user or group id, they will |
|
| 291 |
-// not be evaluated as usernames (only the metadata will be filled). So attempting |
|
| 292 |
-// to parse a user with user.Name = "1337" will produce the user with a UID of |
|
| 293 |
-// 1337. |
|
| 294 |
-func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (*ExecUser, error) {
|
|
| 295 |
- if defaults == nil {
|
|
| 296 |
- defaults = new(ExecUser) |
|
| 297 |
- } |
|
| 298 |
- |
|
| 299 |
- // Copy over defaults. |
|
| 300 |
- user := &ExecUser{
|
|
| 301 |
- Uid: defaults.Uid, |
|
| 302 |
- Gid: defaults.Gid, |
|
| 303 |
- Sgids: defaults.Sgids, |
|
| 304 |
- Home: defaults.Home, |
|
| 305 |
- } |
|
| 306 |
- |
|
| 307 |
- // Sgids slice *cannot* be nil. |
|
| 308 |
- if user.Sgids == nil {
|
|
| 309 |
- user.Sgids = []int{}
|
|
| 310 |
- } |
|
| 311 |
- |
|
| 312 |
- // Allow for userArg to have either "user" syntax, or optionally "user:group" syntax |
|
| 313 |
- var userArg, groupArg string |
|
| 314 |
- parseLine([]byte(userSpec), &userArg, &groupArg) |
|
| 315 |
- |
|
| 316 |
- // Convert userArg and groupArg to be numeric, so we don't have to execute |
|
| 317 |
- // Atoi *twice* for each iteration over lines. |
|
| 318 |
- uidArg, uidErr := strconv.Atoi(userArg) |
|
| 319 |
- gidArg, gidErr := strconv.Atoi(groupArg) |
|
| 320 |
- |
|
| 321 |
- // Find the matching user. |
|
| 322 |
- users, err := ParsePasswdFilter(passwd, func(u User) bool {
|
|
| 323 |
- if userArg == "" {
|
|
| 324 |
- // Default to current state of the user. |
|
| 325 |
- return u.Uid == user.Uid |
|
| 326 |
- } |
|
| 327 |
- |
|
| 328 |
- if uidErr == nil {
|
|
| 329 |
- // If the userArg is numeric, always treat it as a UID. |
|
| 330 |
- return uidArg == u.Uid |
|
| 331 |
- } |
|
| 332 |
- |
|
| 333 |
- return u.Name == userArg |
|
| 334 |
- }) |
|
| 335 |
- |
|
| 336 |
- // If we can't find the user, we have to bail. |
|
| 337 |
- if err != nil && passwd != nil {
|
|
| 338 |
- if userArg == "" {
|
|
| 339 |
- userArg = strconv.Itoa(user.Uid) |
|
| 340 |
- } |
|
| 341 |
- return nil, fmt.Errorf("unable to find user %s: %w", userArg, err)
|
|
| 342 |
- } |
|
| 343 |
- |
|
| 344 |
- var matchedUserName string |
|
| 345 |
- if len(users) > 0 {
|
|
| 346 |
- // First match wins, even if there's more than one matching entry. |
|
| 347 |
- matchedUserName = users[0].Name |
|
| 348 |
- user.Uid = users[0].Uid |
|
| 349 |
- user.Gid = users[0].Gid |
|
| 350 |
- user.Home = users[0].Home |
|
| 351 |
- } else if userArg != "" {
|
|
| 352 |
- // If we can't find a user with the given username, the only other valid |
|
| 353 |
- // option is if it's a numeric username with no associated entry in passwd. |
|
| 354 |
- |
|
| 355 |
- if uidErr != nil {
|
|
| 356 |
- // Not numeric. |
|
| 357 |
- return nil, fmt.Errorf("unable to find user %s: %w", userArg, ErrNoPasswdEntries)
|
|
| 358 |
- } |
|
| 359 |
- user.Uid = uidArg |
|
| 360 |
- |
|
| 361 |
- // Must be inside valid uid range. |
|
| 362 |
- if user.Uid < minID || user.Uid > maxID {
|
|
| 363 |
- return nil, ErrRange |
|
| 364 |
- } |
|
| 365 |
- |
|
| 366 |
- // Okay, so it's numeric. We can just roll with this. |
|
| 367 |
- } |
|
| 368 |
- |
|
| 369 |
- // On to the groups. If we matched a username, we need to do this because of |
|
| 370 |
- // the supplementary group IDs. |
|
| 371 |
- if groupArg != "" || matchedUserName != "" {
|
|
| 372 |
- groups, err := ParseGroupFilter(group, func(g Group) bool {
|
|
| 373 |
- // If the group argument isn't explicit, we'll just search for it. |
|
| 374 |
- if groupArg == "" {
|
|
| 375 |
- // Check if user is a member of this group. |
|
| 376 |
- for _, u := range g.List {
|
|
| 377 |
- if u == matchedUserName {
|
|
| 378 |
- return true |
|
| 379 |
- } |
|
| 380 |
- } |
|
| 381 |
- return false |
|
| 382 |
- } |
|
| 383 |
- |
|
| 384 |
- if gidErr == nil {
|
|
| 385 |
- // If the groupArg is numeric, always treat it as a GID. |
|
| 386 |
- return gidArg == g.Gid |
|
| 387 |
- } |
|
| 388 |
- |
|
| 389 |
- return g.Name == groupArg |
|
| 390 |
- }) |
|
| 391 |
- if err != nil && group != nil {
|
|
| 392 |
- return nil, fmt.Errorf("unable to find groups for spec %v: %w", matchedUserName, err)
|
|
| 393 |
- } |
|
| 394 |
- |
|
| 395 |
- // Only start modifying user.Gid if it is in explicit form. |
|
| 396 |
- if groupArg != "" {
|
|
| 397 |
- if len(groups) > 0 {
|
|
| 398 |
- // First match wins, even if there's more than one matching entry. |
|
| 399 |
- user.Gid = groups[0].Gid |
|
| 400 |
- } else {
|
|
| 401 |
- // If we can't find a group with the given name, the only other valid |
|
| 402 |
- // option is if it's a numeric group name with no associated entry in group. |
|
| 403 |
- |
|
| 404 |
- if gidErr != nil {
|
|
| 405 |
- // Not numeric. |
|
| 406 |
- return nil, fmt.Errorf("unable to find group %s: %w", groupArg, ErrNoGroupEntries)
|
|
| 407 |
- } |
|
| 408 |
- user.Gid = gidArg |
|
| 409 |
- |
|
| 410 |
- // Must be inside valid gid range. |
|
| 411 |
- if user.Gid < minID || user.Gid > maxID {
|
|
| 412 |
- return nil, ErrRange |
|
| 413 |
- } |
|
| 414 |
- |
|
| 415 |
- // Okay, so it's numeric. We can just roll with this. |
|
| 416 |
- } |
|
| 417 |
- } else if len(groups) > 0 {
|
|
| 418 |
- // Supplementary group ids only make sense if in the implicit form. |
|
| 419 |
- user.Sgids = make([]int, len(groups)) |
|
| 420 |
- for i, group := range groups {
|
|
| 421 |
- user.Sgids[i] = group.Gid |
|
| 422 |
- } |
|
| 423 |
- } |
|
| 424 |
- } |
|
| 425 |
- |
|
| 426 |
- return user, nil |
|
| 427 |
-} |
|
| 428 |
- |
|
| 429 |
-// GetAdditionalGroups looks up a list of groups by name or group id |
|
| 430 |
-// against the given /etc/group formatted data. If a group name cannot |
|
| 431 |
-// be found, an error will be returned. If a group id cannot be found, |
|
| 432 |
-// or the given group data is nil, the id will be returned as-is |
|
| 433 |
-// provided it is in the legal range. |
|
| 434 |
-func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, error) {
|
|
| 435 |
- groups := []Group{}
|
|
| 436 |
- if group != nil {
|
|
| 437 |
- var err error |
|
| 438 |
- groups, err = ParseGroupFilter(group, func(g Group) bool {
|
|
| 439 |
- for _, ag := range additionalGroups {
|
|
| 440 |
- if g.Name == ag || strconv.Itoa(g.Gid) == ag {
|
|
| 441 |
- return true |
|
| 442 |
- } |
|
| 443 |
- } |
|
| 444 |
- return false |
|
| 445 |
- }) |
|
| 446 |
- if err != nil {
|
|
| 447 |
- return nil, fmt.Errorf("Unable to find additional groups %v: %w", additionalGroups, err)
|
|
| 448 |
- } |
|
| 449 |
- } |
|
| 450 |
- |
|
| 451 |
- gidMap := make(map[int]struct{})
|
|
| 452 |
- for _, ag := range additionalGroups {
|
|
| 453 |
- var found bool |
|
| 454 |
- for _, g := range groups {
|
|
| 455 |
- // if we found a matched group either by name or gid, take the |
|
| 456 |
- // first matched as correct |
|
| 457 |
- if g.Name == ag || strconv.Itoa(g.Gid) == ag {
|
|
| 458 |
- if _, ok := gidMap[g.Gid]; !ok {
|
|
| 459 |
- gidMap[g.Gid] = struct{}{}
|
|
| 460 |
- found = true |
|
| 461 |
- break |
|
| 462 |
- } |
|
| 463 |
- } |
|
| 464 |
- } |
|
| 465 |
- // we asked for a group but didn't find it. let's check to see |
|
| 466 |
- // if we wanted a numeric group |
|
| 467 |
- if !found {
|
|
| 468 |
- gid, err := strconv.ParseInt(ag, 10, 64) |
|
| 469 |
- if err != nil {
|
|
| 470 |
- // Not a numeric ID either. |
|
| 471 |
- return nil, fmt.Errorf("Unable to find group %s: %w", ag, ErrNoGroupEntries)
|
|
| 472 |
- } |
|
| 473 |
- // Ensure gid is inside gid range. |
|
| 474 |
- if gid < minID || gid > maxID {
|
|
| 475 |
- return nil, ErrRange |
|
| 476 |
- } |
|
| 477 |
- gidMap[int(gid)] = struct{}{}
|
|
| 478 |
- } |
|
| 479 |
- } |
|
| 480 |
- gids := []int{}
|
|
| 481 |
- for gid := range gidMap {
|
|
| 482 |
- gids = append(gids, gid) |
|
| 483 |
- } |
|
| 484 |
- return gids, nil |
|
| 485 |
-} |
|
| 486 |
- |
|
| 487 |
-// GetAdditionalGroupsPath is a wrapper around GetAdditionalGroups |
|
| 488 |
-// that opens the groupPath given and gives it as an argument to |
|
| 489 |
-// GetAdditionalGroups. |
|
| 490 |
-func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) {
|
|
| 491 |
- var group io.Reader |
|
| 492 |
- |
|
| 493 |
- if groupFile, err := os.Open(groupPath); err == nil {
|
|
| 494 |
- group = groupFile |
|
| 495 |
- defer groupFile.Close() |
|
| 496 |
- } |
|
| 497 |
- return GetAdditionalGroups(additionalGroups, group) |
|
| 498 |
-} |
|
| 499 |
- |
|
| 500 |
-func ParseSubIDFile(path string) ([]SubID, error) {
|
|
| 501 |
- subid, err := os.Open(path) |
|
| 502 |
- if err != nil {
|
|
| 503 |
- return nil, err |
|
| 504 |
- } |
|
| 505 |
- defer subid.Close() |
|
| 506 |
- return ParseSubID(subid) |
|
| 507 |
-} |
|
| 508 |
- |
|
| 509 |
-func ParseSubID(subid io.Reader) ([]SubID, error) {
|
|
| 510 |
- return ParseSubIDFilter(subid, nil) |
|
| 511 |
-} |
|
| 512 |
- |
|
| 513 |
-func ParseSubIDFileFilter(path string, filter func(SubID) bool) ([]SubID, error) {
|
|
| 514 |
- subid, err := os.Open(path) |
|
| 515 |
- if err != nil {
|
|
| 516 |
- return nil, err |
|
| 517 |
- } |
|
| 518 |
- defer subid.Close() |
|
| 519 |
- return ParseSubIDFilter(subid, filter) |
|
| 520 |
-} |
|
| 521 |
- |
|
| 522 |
-func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) {
|
|
| 523 |
- if r == nil {
|
|
| 524 |
- return nil, errors.New("nil source for subid-formatted data")
|
|
| 525 |
- } |
|
| 526 |
- |
|
| 527 |
- var ( |
|
| 528 |
- s = bufio.NewScanner(r) |
|
| 529 |
- out = []SubID{}
|
|
| 530 |
- ) |
|
| 531 |
- |
|
| 532 |
- for s.Scan() {
|
|
| 533 |
- line := bytes.TrimSpace(s.Bytes()) |
|
| 534 |
- if len(line) == 0 {
|
|
| 535 |
- continue |
|
| 536 |
- } |
|
| 537 |
- |
|
| 538 |
- // see: man 5 subuid |
|
| 539 |
- p := SubID{}
|
|
| 540 |
- parseLine(line, &p.Name, &p.SubID, &p.Count) |
|
| 541 |
- |
|
| 542 |
- if filter == nil || filter(p) {
|
|
| 543 |
- out = append(out, p) |
|
| 544 |
- } |
|
| 545 |
- } |
|
| 546 |
- if err := s.Err(); err != nil {
|
|
| 547 |
- return nil, err |
|
| 548 |
- } |
|
| 549 |
- |
|
| 550 |
- return out, nil |
|
| 551 |
-} |
|
| 552 |
- |
|
| 553 |
-func ParseIDMapFile(path string) ([]IDMap, error) {
|
|
| 554 |
- r, err := os.Open(path) |
|
| 555 |
- if err != nil {
|
|
| 556 |
- return nil, err |
|
| 557 |
- } |
|
| 558 |
- defer r.Close() |
|
| 559 |
- return ParseIDMap(r) |
|
| 560 |
-} |
|
| 561 |
- |
|
| 562 |
-func ParseIDMap(r io.Reader) ([]IDMap, error) {
|
|
| 563 |
- return ParseIDMapFilter(r, nil) |
|
| 564 |
-} |
|
| 565 |
- |
|
| 566 |
-func ParseIDMapFileFilter(path string, filter func(IDMap) bool) ([]IDMap, error) {
|
|
| 567 |
- r, err := os.Open(path) |
|
| 568 |
- if err != nil {
|
|
| 569 |
- return nil, err |
|
| 570 |
- } |
|
| 571 |
- defer r.Close() |
|
| 572 |
- return ParseIDMapFilter(r, filter) |
|
| 573 |
-} |
|
| 574 |
- |
|
| 575 |
-func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) {
|
|
| 576 |
- if r == nil {
|
|
| 577 |
- return nil, errors.New("nil source for idmap-formatted data")
|
|
| 578 |
- } |
|
| 579 |
- |
|
| 580 |
- var ( |
|
| 581 |
- s = bufio.NewScanner(r) |
|
| 582 |
- out = []IDMap{}
|
|
| 583 |
- ) |
|
| 584 |
- |
|
| 585 |
- for s.Scan() {
|
|
| 586 |
- line := bytes.TrimSpace(s.Bytes()) |
|
| 587 |
- if len(line) == 0 {
|
|
| 588 |
- continue |
|
| 589 |
- } |
|
| 590 |
- |
|
| 591 |
- // see: man 7 user_namespaces |
|
| 592 |
- p := IDMap{}
|
|
| 593 |
- parseParts(bytes.Fields(line), &p.ID, &p.ParentID, &p.Count) |
|
| 594 |
- |
|
| 595 |
- if filter == nil || filter(p) {
|
|
| 596 |
- out = append(out, p) |
|
| 597 |
- } |
|
| 598 |
- } |
|
| 599 |
- if err := s.Err(); err != nil {
|
|
| 600 |
- return nil, err |
|
| 601 |
- } |
|
| 602 |
- |
|
| 603 |
- return out, nil |
|
| 604 |
-} |
| 605 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,43 +0,0 @@ |
| 1 |
-//go:build gofuzz |
|
| 2 |
-// +build gofuzz |
|
| 3 |
- |
|
| 4 |
-package user |
|
| 5 |
- |
|
| 6 |
-import ( |
|
| 7 |
- "io" |
|
| 8 |
- "strings" |
|
| 9 |
-) |
|
| 10 |
- |
|
| 11 |
-func IsDivisbleBy(n int, divisibleby int) bool {
|
|
| 12 |
- return (n % divisibleby) == 0 |
|
| 13 |
-} |
|
| 14 |
- |
|
| 15 |
-func FuzzUser(data []byte) int {
|
|
| 16 |
- if len(data) == 0 {
|
|
| 17 |
- return -1 |
|
| 18 |
- } |
|
| 19 |
- if !IsDivisbleBy(len(data), 5) {
|
|
| 20 |
- return -1 |
|
| 21 |
- } |
|
| 22 |
- |
|
| 23 |
- var divided [][]byte |
|
| 24 |
- |
|
| 25 |
- chunkSize := len(data) / 5 |
|
| 26 |
- |
|
| 27 |
- for i := 0; i < len(data); i += chunkSize {
|
|
| 28 |
- end := i + chunkSize |
|
| 29 |
- |
|
| 30 |
- divided = append(divided, data[i:end]) |
|
| 31 |
- } |
|
| 32 |
- |
|
| 33 |
- _, _ = ParsePasswdFilter(strings.NewReader(string(divided[0])), nil) |
|
| 34 |
- |
|
| 35 |
- var passwd, group io.Reader |
|
| 36 |
- |
|
| 37 |
- group = strings.NewReader(string(divided[1])) |
|
| 38 |
- _, _ = GetAdditionalGroups([]string{string(divided[2])}, group)
|
|
| 39 |
- |
|
| 40 |
- passwd = strings.NewReader(string(divided[3])) |
|
| 41 |
- _, _ = GetExecUser(string(divided[4]), nil, passwd, group) |
|
| 42 |
- return 1 |
|
| 43 |
-} |
| 6 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,16 +0,0 @@ |
| 1 |
-//go:build gofuzz |
|
| 2 |
-// +build gofuzz |
|
| 3 |
- |
|
| 4 |
-package userns |
|
| 5 |
- |
|
| 6 |
-import ( |
|
| 7 |
- "strings" |
|
| 8 |
- |
|
| 9 |
- "github.com/opencontainers/runc/libcontainer/user" |
|
| 10 |
-) |
|
| 11 |
- |
|
| 12 |
-func FuzzUIDMap(data []byte) int {
|
|
| 13 |
- uidmap, _ := user.ParseIDMap(strings.NewReader(string(data))) |
|
| 14 |
- _ = uidMapInUserNS(uidmap) |
|
| 15 |
- return 1 |
|
| 16 |
-} |
| 17 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,37 +0,0 @@ |
| 1 |
-package userns |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "sync" |
|
| 5 |
- |
|
| 6 |
- "github.com/opencontainers/runc/libcontainer/user" |
|
| 7 |
-) |
|
| 8 |
- |
|
| 9 |
-var ( |
|
| 10 |
- inUserNS bool |
|
| 11 |
- nsOnce sync.Once |
|
| 12 |
-) |
|
| 13 |
- |
|
| 14 |
-// runningInUserNS detects whether we are currently running in a user namespace. |
|
| 15 |
-// Originally copied from github.com/lxc/lxd/shared/util.go |
|
| 16 |
-func runningInUserNS() bool {
|
|
| 17 |
- nsOnce.Do(func() {
|
|
| 18 |
- uidmap, err := user.CurrentProcessUIDMap() |
|
| 19 |
- if err != nil {
|
|
| 20 |
- // This kernel-provided file only exists if user namespaces are supported |
|
| 21 |
- return |
|
| 22 |
- } |
|
| 23 |
- inUserNS = uidMapInUserNS(uidmap) |
|
| 24 |
- }) |
|
| 25 |
- return inUserNS |
|
| 26 |
-} |
|
| 27 |
- |
|
| 28 |
-func uidMapInUserNS(uidmap []user.IDMap) bool {
|
|
| 29 |
- /* |
|
| 30 |
- * We assume we are in the initial user namespace if we have a full |
|
| 31 |
- * range - 4294967295 uids starting at uid 0. |
|
| 32 |
- */ |
|
| 33 |
- if len(uidmap) == 1 && uidmap[0].ID == 0 && uidmap[0].ParentID == 0 && uidmap[0].Count == 4294967295 {
|
|
| 34 |
- return false |
|
| 35 |
- } |
|
| 36 |
- return true |
|
| 37 |
-} |
| 38 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,79 +0,0 @@ |
| 1 |
-#define _GNU_SOURCE |
|
| 2 |
-#include <fcntl.h> |
|
| 3 |
-#include <sched.h> |
|
| 4 |
-#include <stdio.h> |
|
| 5 |
-#include <unistd.h> |
|
| 6 |
-#include <stdarg.h> |
|
| 7 |
-#include <stdlib.h> |
|
| 8 |
- |
|
| 9 |
-/* |
|
| 10 |
- * All of the code here is run inside an aync-signal-safe context, so we need |
|
| 11 |
- * to be careful to not call any functions that could cause issues. In theory, |
|
| 12 |
- * since we are a Go program, there are fewer restrictions in practice, it's |
|
| 13 |
- * better to be safe than sorry. |
|
| 14 |
- * |
|
| 15 |
- * The only exception is exit, which we need to call to make sure we don't |
|
| 16 |
- * return into runc. |
|
| 17 |
- */ |
|
| 18 |
- |
|
| 19 |
-void bail(int pipefd, const char *fmt, ...) |
|
| 20 |
-{
|
|
| 21 |
- va_list args; |
|
| 22 |
- |
|
| 23 |
- va_start(args, fmt); |
|
| 24 |
- vdprintf(pipefd, fmt, args); |
|
| 25 |
- va_end(args); |
|
| 26 |
- |
|
| 27 |
- exit(1); |
|
| 28 |
-} |
|
| 29 |
- |
|
| 30 |
-int spawn_userns_cat(char *userns_path, char *path, int outfd, int errfd) |
|
| 31 |
-{
|
|
| 32 |
- char buffer[4096] = { 0 };
|
|
| 33 |
- |
|
| 34 |
- pid_t child = fork(); |
|
| 35 |
- if (child != 0) |
|
| 36 |
- return child; |
|
| 37 |
- /* in child */ |
|
| 38 |
- |
|
| 39 |
- /* Join the target userns. */ |
|
| 40 |
- int nsfd = open(userns_path, O_RDONLY); |
|
| 41 |
- if (nsfd < 0) |
|
| 42 |
- bail(errfd, "open userns path %s failed: %m", userns_path); |
|
| 43 |
- |
|
| 44 |
- int err = setns(nsfd, CLONE_NEWUSER); |
|
| 45 |
- if (err < 0) |
|
| 46 |
- bail(errfd, "setns %s failed: %m", userns_path); |
|
| 47 |
- |
|
| 48 |
- close(nsfd); |
|
| 49 |
- |
|
| 50 |
- /* Pipe the requested file contents. */ |
|
| 51 |
- int fd = open(path, O_RDONLY); |
|
| 52 |
- if (fd < 0) |
|
| 53 |
- bail(errfd, "open %s in userns %s failed: %m", path, userns_path); |
|
| 54 |
- |
|
| 55 |
- int nread, ntotal = 0; |
|
| 56 |
- while ((nread = read(fd, buffer, sizeof(buffer))) != 0) {
|
|
| 57 |
- if (nread < 0) |
|
| 58 |
- bail(errfd, "read bytes from %s failed (after %d total bytes read): %m", path, ntotal); |
|
| 59 |
- ntotal += nread; |
|
| 60 |
- |
|
| 61 |
- int nwritten = 0; |
|
| 62 |
- while (nwritten < nread) {
|
|
| 63 |
- int n = write(outfd, buffer, nread - nwritten); |
|
| 64 |
- if (n < 0) |
|
| 65 |
- bail(errfd, "write %d bytes from %s failed (after %d bytes written): %m", |
|
| 66 |
- nread - nwritten, path, nwritten); |
|
| 67 |
- nwritten += n; |
|
| 68 |
- } |
|
| 69 |
- if (nread != nwritten) |
|
| 70 |
- bail(errfd, "mismatch for bytes read and written: %d read != %d written", nread, nwritten); |
|
| 71 |
- } |
|
| 72 |
- |
|
| 73 |
- close(fd); |
|
| 74 |
- close(outfd); |
|
| 75 |
- close(errfd); |
|
| 76 |
- |
|
| 77 |
- /* We must exit here, otherwise we would return into a forked runc. */ |
|
| 78 |
- exit(0); |
|
| 79 |
-} |
| 80 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,186 +0,0 @@ |
| 1 |
-//go:build linux |
|
| 2 |
- |
|
| 3 |
-package userns |
|
| 4 |
- |
|
| 5 |
-import ( |
|
| 6 |
- "bufio" |
|
| 7 |
- "bytes" |
|
| 8 |
- "fmt" |
|
| 9 |
- "io" |
|
| 10 |
- "os" |
|
| 11 |
- "unsafe" |
|
| 12 |
- |
|
| 13 |
- "github.com/opencontainers/runc/libcontainer/configs" |
|
| 14 |
- "github.com/sirupsen/logrus" |
|
| 15 |
-) |
|
| 16 |
- |
|
| 17 |
-/* |
|
| 18 |
-#include <stdlib.h> |
|
| 19 |
-extern int spawn_userns_cat(char *userns_path, char *path, int outfd, int errfd); |
|
| 20 |
-*/ |
|
| 21 |
-import "C" |
|
| 22 |
- |
|
| 23 |
-func parseIdmapData(data []byte) (ms []configs.IDMap, err error) {
|
|
| 24 |
- scanner := bufio.NewScanner(bytes.NewReader(data)) |
|
| 25 |
- for scanner.Scan() {
|
|
| 26 |
- var m configs.IDMap |
|
| 27 |
- line := scanner.Text() |
|
| 28 |
- if _, err := fmt.Sscanf(line, "%d %d %d", &m.ContainerID, &m.HostID, &m.Size); err != nil {
|
|
| 29 |
- return nil, fmt.Errorf("parsing id map failed: invalid format in line %q: %w", line, err)
|
|
| 30 |
- } |
|
| 31 |
- ms = append(ms, m) |
|
| 32 |
- } |
|
| 33 |
- if err := scanner.Err(); err != nil {
|
|
| 34 |
- return nil, fmt.Errorf("parsing id map failed: %w", err)
|
|
| 35 |
- } |
|
| 36 |
- return ms, nil |
|
| 37 |
-} |
|
| 38 |
- |
|
| 39 |
-// Do something equivalent to nsenter --user=<nsPath> cat <path>, but more |
|
| 40 |
-// efficiently. Returns the contents of the requested file from within the user |
|
| 41 |
-// namespace. |
|
| 42 |
-func spawnUserNamespaceCat(nsPath string, path string) ([]byte, error) {
|
|
| 43 |
- rdr, wtr, err := os.Pipe() |
|
| 44 |
- if err != nil {
|
|
| 45 |
- return nil, fmt.Errorf("create pipe for userns spawn failed: %w", err)
|
|
| 46 |
- } |
|
| 47 |
- defer rdr.Close() |
|
| 48 |
- defer wtr.Close() |
|
| 49 |
- |
|
| 50 |
- errRdr, errWtr, err := os.Pipe() |
|
| 51 |
- if err != nil {
|
|
| 52 |
- return nil, fmt.Errorf("create error pipe for userns spawn failed: %w", err)
|
|
| 53 |
- } |
|
| 54 |
- defer errRdr.Close() |
|
| 55 |
- defer errWtr.Close() |
|
| 56 |
- |
|
| 57 |
- cNsPath := C.CString(nsPath) |
|
| 58 |
- defer C.free(unsafe.Pointer(cNsPath)) |
|
| 59 |
- cPath := C.CString(path) |
|
| 60 |
- defer C.free(unsafe.Pointer(cPath)) |
|
| 61 |
- |
|
| 62 |
- childPid := C.spawn_userns_cat(cNsPath, cPath, C.int(wtr.Fd()), C.int(errWtr.Fd())) |
|
| 63 |
- |
|
| 64 |
- if childPid < 0 {
|
|
| 65 |
- return nil, fmt.Errorf("failed to spawn fork for userns")
|
|
| 66 |
- } else if childPid == 0 {
|
|
| 67 |
- // this should never happen |
|
| 68 |
- panic("runc executing inside fork child -- unsafe state!")
|
|
| 69 |
- } |
|
| 70 |
- |
|
| 71 |
- // We are in the parent -- close the write end of the pipe before reading. |
|
| 72 |
- wtr.Close() |
|
| 73 |
- output, err := io.ReadAll(rdr) |
|
| 74 |
- rdr.Close() |
|
| 75 |
- if err != nil {
|
|
| 76 |
- return nil, fmt.Errorf("reading from userns spawn failed: %w", err)
|
|
| 77 |
- } |
|
| 78 |
- |
|
| 79 |
- // Ditto for the error pipe. |
|
| 80 |
- errWtr.Close() |
|
| 81 |
- errOutput, err := io.ReadAll(errRdr) |
|
| 82 |
- errRdr.Close() |
|
| 83 |
- if err != nil {
|
|
| 84 |
- return nil, fmt.Errorf("reading from userns spawn error pipe failed: %w", err)
|
|
| 85 |
- } |
|
| 86 |
- errOutput = bytes.TrimSpace(errOutput) |
|
| 87 |
- |
|
| 88 |
- // Clean up the child. |
|
| 89 |
- child, err := os.FindProcess(int(childPid)) |
|
| 90 |
- if err != nil {
|
|
| 91 |
- return nil, fmt.Errorf("could not find userns spawn process: %w", err)
|
|
| 92 |
- } |
|
| 93 |
- state, err := child.Wait() |
|
| 94 |
- if err != nil {
|
|
| 95 |
- return nil, fmt.Errorf("failed to wait for userns spawn process: %w", err)
|
|
| 96 |
- } |
|
| 97 |
- if !state.Success() {
|
|
| 98 |
- errStr := string(errOutput) |
|
| 99 |
- if errStr == "" {
|
|
| 100 |
- errStr = fmt.Sprintf("unknown error (status code %d)", state.ExitCode())
|
|
| 101 |
- } |
|
| 102 |
- return nil, fmt.Errorf("userns spawn: %s", errStr)
|
|
| 103 |
- } else if len(errOutput) > 0 {
|
|
| 104 |
- // We can just ignore weird output in the error pipe if the process |
|
| 105 |
- // didn't bail(), but for completeness output for debugging. |
|
| 106 |
- logrus.Debugf("userns spawn succeeded but unexpected error message found: %s", string(errOutput))
|
|
| 107 |
- } |
|
| 108 |
- // The subprocess succeeded, return whatever it wrote to the pipe. |
|
| 109 |
- return output, nil |
|
| 110 |
-} |
|
| 111 |
- |
|
| 112 |
-func GetUserNamespaceMappings(nsPath string) (uidMap, gidMap []configs.IDMap, err error) {
|
|
| 113 |
- var ( |
|
| 114 |
- pid int |
|
| 115 |
- extra rune |
|
| 116 |
- tryFastPath bool |
|
| 117 |
- ) |
|
| 118 |
- |
|
| 119 |
- // nsPath is usually of the form /proc/<pid>/ns/user, which means that we |
|
| 120 |
- // already have a pid that is part of the user namespace and thus we can |
|
| 121 |
- // just use the pid to read from /proc/<pid>/*id_map. |
|
| 122 |
- // |
|
| 123 |
- // Note that Sscanf doesn't consume the whole input, so we check for any |
|
| 124 |
- // trailing data with %c. That way, we can be sure the pattern matched |
|
| 125 |
- // /proc/$pid/ns/user _exactly_ iff n === 1. |
|
| 126 |
- if n, _ := fmt.Sscanf(nsPath, "/proc/%d/ns/user%c", &pid, &extra); n == 1 {
|
|
| 127 |
- tryFastPath = pid > 0 |
|
| 128 |
- } |
|
| 129 |
- |
|
| 130 |
- for _, mapType := range []struct {
|
|
| 131 |
- name string |
|
| 132 |
- idMap *[]configs.IDMap |
|
| 133 |
- }{
|
|
| 134 |
- {"uid_map", &uidMap},
|
|
| 135 |
- {"gid_map", &gidMap},
|
|
| 136 |
- } {
|
|
| 137 |
- var mapData []byte |
|
| 138 |
- |
|
| 139 |
- if tryFastPath {
|
|
| 140 |
- path := fmt.Sprintf("/proc/%d/%s", pid, mapType.name)
|
|
| 141 |
- data, err := os.ReadFile(path) |
|
| 142 |
- if err != nil {
|
|
| 143 |
- // Do not error out here -- we need to try the slow path if the |
|
| 144 |
- // fast path failed. |
|
| 145 |
- logrus.Debugf("failed to use fast path to read %s from userns %s (error: %s), falling back to slow userns-join path", mapType.name, nsPath, err)
|
|
| 146 |
- } else {
|
|
| 147 |
- mapData = data |
|
| 148 |
- } |
|
| 149 |
- } else {
|
|
| 150 |
- logrus.Debugf("cannot use fast path to read %s from userns %s, falling back to slow userns-join path", mapType.name, nsPath)
|
|
| 151 |
- } |
|
| 152 |
- |
|
| 153 |
- if mapData == nil {
|
|
| 154 |
- // We have to actually join the namespace if we cannot take the |
|
| 155 |
- // fast path. The path is resolved with respect to the child |
|
| 156 |
- // process, so just use /proc/self. |
|
| 157 |
- data, err := spawnUserNamespaceCat(nsPath, "/proc/self/"+mapType.name) |
|
| 158 |
- if err != nil {
|
|
| 159 |
- return nil, nil, err |
|
| 160 |
- } |
|
| 161 |
- mapData = data |
|
| 162 |
- } |
|
| 163 |
- idMap, err := parseIdmapData(mapData) |
|
| 164 |
- if err != nil {
|
|
| 165 |
- return nil, nil, fmt.Errorf("failed to parse %s of userns %s: %w", mapType.name, nsPath, err)
|
|
| 166 |
- } |
|
| 167 |
- *mapType.idMap = idMap |
|
| 168 |
- } |
|
| 169 |
- |
|
| 170 |
- return uidMap, gidMap, nil |
|
| 171 |
-} |
|
| 172 |
- |
|
| 173 |
-// IsSameMapping returns whether or not the two id mappings are the same. Note |
|
| 174 |
-// that if the order of the mappings is different, or a mapping has been split, |
|
| 175 |
-// the mappings will be considered different. |
|
| 176 |
-func IsSameMapping(a, b []configs.IDMap) bool {
|
|
| 177 |
- if len(a) != len(b) {
|
|
| 178 |
- return false |
|
| 179 |
- } |
|
| 180 |
- for idx := range a {
|
|
| 181 |
- if a[idx] != b[idx] {
|
|
| 182 |
- return false |
|
| 183 |
- } |
|
| 184 |
- } |
|
| 185 |
- return true |
|
| 186 |
-} |
| 187 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,18 +0,0 @@ |
| 1 |
-//go:build !linux |
|
| 2 |
-// +build !linux |
|
| 3 |
- |
|
| 4 |
-package userns |
|
| 5 |
- |
|
| 6 |
-import "github.com/opencontainers/runc/libcontainer/user" |
|
| 7 |
- |
|
| 8 |
-// runningInUserNS is a stub for non-Linux systems |
|
| 9 |
-// Always returns false |
|
| 10 |
-func runningInUserNS() bool {
|
|
| 11 |
- return false |
|
| 12 |
-} |
|
| 13 |
- |
|
| 14 |
-// uidMapInUserNS is a stub for non-Linux systems |
|
| 15 |
-// Always returns false |
|
| 16 |
-func uidMapInUserNS(uidmap []user.IDMap) bool {
|
|
| 17 |
- return false |
|
| 18 |
-} |
| ... | ... |
@@ -19,13 +19,14 @@ package utils |
| 19 | 19 |
import ( |
| 20 | 20 |
"fmt" |
| 21 | 21 |
"os" |
| 22 |
+ "runtime" |
|
| 22 | 23 |
|
| 23 | 24 |
"golang.org/x/sys/unix" |
| 24 | 25 |
) |
| 25 | 26 |
|
| 26 |
-// MaxSendfdLen is the maximum length of the name of a file descriptor being |
|
| 27 |
-// sent using SendFd. The name of the file handle returned by RecvFd will never |
|
| 28 |
-// be larger than this value. |
|
| 27 |
+// MaxNameLen is the maximum length of the name of a file descriptor being sent |
|
| 28 |
+// using SendFile. The name of the file handle returned by RecvFile will never be |
|
| 29 |
+// larger than this value. |
|
| 29 | 30 |
const MaxNameLen = 4096 |
| 30 | 31 |
|
| 31 | 32 |
// oobSpace is the size of the oob slice required to store a single FD. Note |
| ... | ... |
@@ -33,26 +34,21 @@ const MaxNameLen = 4096 |
| 33 | 33 |
// so sizeof(fd) = 4. |
| 34 | 34 |
var oobSpace = unix.CmsgSpace(4) |
| 35 | 35 |
|
| 36 |
-// RecvFd waits for a file descriptor to be sent over the given AF_UNIX |
|
| 36 |
+// RecvFile waits for a file descriptor to be sent over the given AF_UNIX |
|
| 37 | 37 |
// socket. The file name of the remote file descriptor will be recreated |
| 38 | 38 |
// locally (it is sent as non-auxiliary data in the same payload). |
| 39 |
-func RecvFd(socket *os.File) (*os.File, error) {
|
|
| 40 |
- // For some reason, unix.Recvmsg uses the length rather than the capacity |
|
| 41 |
- // when passing the msg_controllen and other attributes to recvmsg. So we |
|
| 42 |
- // have to actually set the length. |
|
| 39 |
+func RecvFile(socket *os.File) (_ *os.File, Err error) {
|
|
| 43 | 40 |
name := make([]byte, MaxNameLen) |
| 44 | 41 |
oob := make([]byte, oobSpace) |
| 45 | 42 |
|
| 46 | 43 |
sockfd := socket.Fd() |
| 47 |
- n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0) |
|
| 44 |
+ n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, unix.MSG_CMSG_CLOEXEC) |
|
| 48 | 45 |
if err != nil {
|
| 49 | 46 |
return nil, err |
| 50 | 47 |
} |
| 51 |
- |
|
| 52 | 48 |
if n >= MaxNameLen || oobn != oobSpace {
|
| 53 |
- return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
|
|
| 49 |
+ return nil, fmt.Errorf("recvfile: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
|
|
| 54 | 50 |
} |
| 55 |
- |
|
| 56 | 51 |
// Truncate. |
| 57 | 52 |
name = name[:n] |
| 58 | 53 |
oob = oob[:oobn] |
| ... | ... |
@@ -61,36 +57,63 @@ func RecvFd(socket *os.File) (*os.File, error) {
|
| 61 | 61 |
if err != nil {
|
| 62 | 62 |
return nil, err |
| 63 | 63 |
} |
| 64 |
- if len(scms) != 1 {
|
|
| 65 |
- return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
|
|
| 64 |
+ |
|
| 65 |
+ // We cannot control how many SCM_RIGHTS we receive, and upon receiving |
|
| 66 |
+ // them all of the descriptors are installed in our fd table, so we need to |
|
| 67 |
+ // parse all of the SCM_RIGHTS we received in order to close all of the |
|
| 68 |
+ // descriptors on error. |
|
| 69 |
+ var fds []int |
|
| 70 |
+ defer func() {
|
|
| 71 |
+ for i, fd := range fds {
|
|
| 72 |
+ if i == 0 && Err == nil {
|
|
| 73 |
+ // Only close the first one on error. |
|
| 74 |
+ continue |
|
| 75 |
+ } |
|
| 76 |
+ // Always close extra ones. |
|
| 77 |
+ _ = unix.Close(fd) |
|
| 78 |
+ } |
|
| 79 |
+ }() |
|
| 80 |
+ var lastErr error |
|
| 81 |
+ for _, scm := range scms {
|
|
| 82 |
+ if scm.Header.Type == unix.SCM_RIGHTS {
|
|
| 83 |
+ scmFds, err := unix.ParseUnixRights(&scm) |
|
| 84 |
+ if err != nil {
|
|
| 85 |
+ lastErr = err |
|
| 86 |
+ } else {
|
|
| 87 |
+ fds = append(fds, scmFds...) |
|
| 88 |
+ } |
|
| 89 |
+ } |
|
| 90 |
+ } |
|
| 91 |
+ if lastErr != nil {
|
|
| 92 |
+ return nil, lastErr |
|
| 66 | 93 |
} |
| 67 |
- scm := scms[0] |
|
| 68 | 94 |
|
| 69 |
- fds, err := unix.ParseUnixRights(&scm) |
|
| 70 |
- if err != nil {
|
|
| 71 |
- return nil, err |
|
| 95 |
+ // We do this after collecting the fds to make sure we close them all when |
|
| 96 |
+ // returning an error here. |
|
| 97 |
+ if len(scms) != 1 {
|
|
| 98 |
+ return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
|
|
| 72 | 99 |
} |
| 73 | 100 |
if len(fds) != 1 {
|
| 74 | 101 |
return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
|
| 75 | 102 |
} |
| 76 |
- fd := uintptr(fds[0]) |
|
| 77 |
- |
|
| 78 |
- return os.NewFile(fd, string(name)), nil |
|
| 103 |
+ return os.NewFile(uintptr(fds[0]), string(name)), nil |
|
| 79 | 104 |
} |
| 80 | 105 |
|
| 81 |
-// SendFd sends a file descriptor over the given AF_UNIX socket. In |
|
| 82 |
-// addition, the file.Name() of the given file will also be sent as |
|
| 83 |
-// non-auxiliary data in the same payload (allowing to send contextual |
|
| 84 |
-// information for a file descriptor). |
|
| 85 |
-func SendFd(socket *os.File, name string, fd uintptr) error {
|
|
| 106 |
+// SendFile sends a file over the given AF_UNIX socket. file.Name() is also |
|
| 107 |
+// included so that if the other end uses RecvFile, the file will have the same |
|
| 108 |
+// name information. |
|
| 109 |
+func SendFile(socket *os.File, file *os.File) error {
|
|
| 110 |
+ name := file.Name() |
|
| 86 | 111 |
if len(name) >= MaxNameLen {
|
| 87 | 112 |
return fmt.Errorf("sendfd: filename too long: %s", name)
|
| 88 | 113 |
} |
| 89 |
- return SendFds(socket, []byte(name), int(fd)) |
|
| 114 |
+ err := SendRawFd(socket, name, file.Fd()) |
|
| 115 |
+ runtime.KeepAlive(file) |
|
| 116 |
+ return err |
|
| 90 | 117 |
} |
| 91 | 118 |
|
| 92 |
-// SendFds sends a list of files descriptor and msg over the given AF_UNIX socket. |
|
| 93 |
-func SendFds(socket *os.File, msg []byte, fds ...int) error {
|
|
| 94 |
- oob := unix.UnixRights(fds...) |
|
| 95 |
- return unix.Sendmsg(int(socket.Fd()), msg, oob, nil, 0) |
|
| 119 |
+// SendRawFd sends a specific file descriptor over the given AF_UNIX socket. |
|
| 120 |
+func SendRawFd(socket *os.File, msg string, fd uintptr) error {
|
|
| 121 |
+ oob := unix.UnixRights(int(fd)) |
|
| 122 |
+ return unix.Sendmsg(int(socket.Fd()), []byte(msg), oob, nil, 0) |
|
| 96 | 123 |
} |
| ... | ... |
@@ -1,17 +1,12 @@ |
| 1 | 1 |
package utils |
| 2 | 2 |
|
| 3 | 3 |
import ( |
| 4 |
- "encoding/binary" |
|
| 5 | 4 |
"encoding/json" |
| 6 |
- "fmt" |
|
| 7 | 5 |
"io" |
| 8 | 6 |
"os" |
| 9 | 7 |
"path/filepath" |
| 10 |
- "strconv" |
|
| 11 | 8 |
"strings" |
| 12 |
- "unsafe" |
|
| 13 | 9 |
|
| 14 |
- securejoin "github.com/cyphar/filepath-securejoin" |
|
| 15 | 10 |
"golang.org/x/sys/unix" |
| 16 | 11 |
) |
| 17 | 12 |
|
| ... | ... |
@@ -19,20 +14,6 @@ const ( |
| 19 | 19 |
exitSignalOffset = 128 |
| 20 | 20 |
) |
| 21 | 21 |
|
| 22 |
-// NativeEndian is the native byte order of the host system. |
|
| 23 |
-var NativeEndian binary.ByteOrder |
|
| 24 |
- |
|
| 25 |
-func init() {
|
|
| 26 |
- // Copied from <golang.org/x/net/internal/socket/sys.go>. |
|
| 27 |
- i := uint32(1) |
|
| 28 |
- b := (*[4]byte)(unsafe.Pointer(&i)) |
|
| 29 |
- if b[0] == 1 {
|
|
| 30 |
- NativeEndian = binary.LittleEndian |
|
| 31 |
- } else {
|
|
| 32 |
- NativeEndian = binary.BigEndian |
|
| 33 |
- } |
|
| 34 |
-} |
|
| 35 |
- |
|
| 36 | 22 |
// ExitStatus returns the correct exit status for a process based on if it |
| 37 | 23 |
// was signaled or exited cleanly |
| 38 | 24 |
func ExitStatus(status unix.WaitStatus) int {
|
| ... | ... |
@@ -43,6 +24,9 @@ func ExitStatus(status unix.WaitStatus) int {
|
| 43 | 43 |
} |
| 44 | 44 |
|
| 45 | 45 |
// WriteJSON writes the provided struct v to w using standard json marshaling |
| 46 |
+// without a trailing newline. This is used instead of json.Encoder because |
|
| 47 |
+// there might be a problem in json decoder in some cases, see: |
|
| 48 |
+// https://github.com/docker/docker/issues/14203#issuecomment-174177790 |
|
| 46 | 49 |
func WriteJSON(w io.Writer, v interface{}) error {
|
| 47 | 50 |
data, err := json.Marshal(v) |
| 48 | 51 |
if err != nil {
|
| ... | ... |
@@ -99,52 +83,16 @@ func stripRoot(root, path string) string {
|
| 99 | 99 |
return CleanPath("/" + path)
|
| 100 | 100 |
} |
| 101 | 101 |
|
| 102 |
-// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...) |
|
| 103 |
-// corresponding to the unsafePath resolved within the root. Before passing the |
|
| 104 |
-// fd, this path is verified to have been inside the root -- so operating on it |
|
| 105 |
-// through the passed fdpath should be safe. Do not access this path through |
|
| 106 |
-// the original path strings, and do not attempt to use the pathname outside of |
|
| 107 |
-// the passed closure (the file handle will be freed once the closure returns). |
|
| 108 |
-func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
|
|
| 109 |
- // Remove the root then forcefully resolve inside the root. |
|
| 110 |
- unsafePath = stripRoot(root, unsafePath) |
|
| 111 |
- path, err := securejoin.SecureJoin(root, unsafePath) |
|
| 112 |
- if err != nil {
|
|
| 113 |
- return fmt.Errorf("resolving path inside rootfs failed: %w", err)
|
|
| 114 |
- } |
|
| 115 |
- |
|
| 116 |
- // Open the target path. |
|
| 117 |
- fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0) |
|
| 118 |
- if err != nil {
|
|
| 119 |
- return fmt.Errorf("open o_path procfd: %w", err)
|
|
| 120 |
- } |
|
| 121 |
- defer fh.Close() |
|
| 122 |
- |
|
| 123 |
- // Double-check the path is the one we expected. |
|
| 124 |
- procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd())) |
|
| 125 |
- if realpath, err := os.Readlink(procfd); err != nil {
|
|
| 126 |
- return fmt.Errorf("procfd verification failed: %w", err)
|
|
| 127 |
- } else if realpath != path {
|
|
| 128 |
- return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
|
|
| 129 |
- } |
|
| 130 |
- |
|
| 131 |
- // Run the closure. |
|
| 132 |
- return fn(procfd) |
|
| 133 |
-} |
|
| 134 |
- |
|
| 135 |
-// SearchLabels searches a list of key-value pairs for the provided key and |
|
| 136 |
-// returns the corresponding value. The pairs must be separated with '='. |
|
| 137 |
-func SearchLabels(labels []string, query string) string {
|
|
| 138 |
- for _, l := range labels {
|
|
| 139 |
- parts := strings.SplitN(l, "=", 2) |
|
| 140 |
- if len(parts) < 2 {
|
|
| 141 |
- continue |
|
| 142 |
- } |
|
| 143 |
- if parts[0] == query {
|
|
| 144 |
- return parts[1] |
|
| 102 |
+// SearchLabels searches through a list of key=value pairs for a given key, |
|
| 103 |
+// returning its value, and the binary flag telling whether the key exist. |
|
| 104 |
+func SearchLabels(labels []string, key string) (string, bool) {
|
|
| 105 |
+ key += "=" |
|
| 106 |
+ for _, s := range labels {
|
|
| 107 |
+ if strings.HasPrefix(s, key) {
|
|
| 108 |
+ return s[len(key):], true |
|
| 145 | 109 |
} |
| 146 | 110 |
} |
| 147 |
- return "" |
|
| 111 |
+ return "", false |
|
| 148 | 112 |
} |
| 149 | 113 |
|
| 150 | 114 |
// Annotations returns the bundle path and user defined annotations from the |
| ... | ... |
@@ -153,14 +101,14 @@ func SearchLabels(labels []string, query string) string {
|
| 153 | 153 |
func Annotations(labels []string) (bundle string, userAnnotations map[string]string) {
|
| 154 | 154 |
userAnnotations = make(map[string]string) |
| 155 | 155 |
for _, l := range labels {
|
| 156 |
- parts := strings.SplitN(l, "=", 2) |
|
| 157 |
- if len(parts) < 2 {
|
|
| 156 |
+ name, value, ok := strings.Cut(l, "=") |
|
| 157 |
+ if !ok {
|
|
| 158 | 158 |
continue |
| 159 | 159 |
} |
| 160 |
- if parts[0] == "bundle" {
|
|
| 161 |
- bundle = parts[1] |
|
| 160 |
+ if name == "bundle" {
|
|
| 161 |
+ bundle = value |
|
| 162 | 162 |
} else {
|
| 163 |
- userAnnotations[parts[0]] = parts[1] |
|
| 163 |
+ userAnnotations[name] = value |
|
| 164 | 164 |
} |
| 165 | 165 |
} |
| 166 | 166 |
return |
| ... | ... |
@@ -1,20 +1,20 @@ |
| 1 | 1 |
//go:build !windows |
| 2 |
-// +build !windows |
|
| 3 | 2 |
|
| 4 | 3 |
package utils |
| 5 | 4 |
|
| 6 | 5 |
import ( |
| 7 |
- "errors" |
|
| 8 | 6 |
"fmt" |
| 7 |
+ "math" |
|
| 9 | 8 |
"os" |
| 10 | 9 |
"path/filepath" |
| 10 |
+ "runtime" |
|
| 11 | 11 |
"strconv" |
| 12 | 12 |
"strings" |
| 13 |
+ "sync" |
|
| 13 | 14 |
_ "unsafe" // for go:linkname |
| 14 | 15 |
|
| 15 |
- "github.com/opencontainers/runc/libcontainer/system" |
|
| 16 |
- |
|
| 17 | 16 |
securejoin "github.com/cyphar/filepath-securejoin" |
| 17 |
+ "github.com/sirupsen/logrus" |
|
| 18 | 18 |
"golang.org/x/sys/unix" |
| 19 | 19 |
) |
| 20 | 20 |
|
| ... | ... |
@@ -30,12 +30,39 @@ func EnsureProcHandle(fh *os.File) error {
|
| 30 | 30 |
return nil |
| 31 | 31 |
} |
| 32 | 32 |
|
| 33 |
+var ( |
|
| 34 |
+ haveCloseRangeCloexecBool bool |
|
| 35 |
+ haveCloseRangeCloexecOnce sync.Once |
|
| 36 |
+) |
|
| 37 |
+ |
|
| 38 |
+func haveCloseRangeCloexec() bool {
|
|
| 39 |
+ haveCloseRangeCloexecOnce.Do(func() {
|
|
| 40 |
+ // Make sure we're not closing a random file descriptor. |
|
| 41 |
+ tmpFd, err := unix.FcntlInt(0, unix.F_DUPFD_CLOEXEC, 0) |
|
| 42 |
+ if err != nil {
|
|
| 43 |
+ return |
|
| 44 |
+ } |
|
| 45 |
+ defer unix.Close(tmpFd) |
|
| 46 |
+ |
|
| 47 |
+ err = unix.CloseRange(uint(tmpFd), uint(tmpFd), unix.CLOSE_RANGE_CLOEXEC) |
|
| 48 |
+ // Any error means we cannot use close_range(CLOSE_RANGE_CLOEXEC). |
|
| 49 |
+ // -ENOSYS and -EINVAL ultimately mean we don't have support, but any |
|
| 50 |
+ // other potential error would imply that even the most basic close |
|
| 51 |
+ // operation wouldn't work. |
|
| 52 |
+ haveCloseRangeCloexecBool = err == nil |
|
| 53 |
+ }) |
|
| 54 |
+ return haveCloseRangeCloexecBool |
|
| 55 |
+} |
|
| 56 |
+ |
|
| 33 | 57 |
type fdFunc func(fd int) |
| 34 | 58 |
|
| 35 | 59 |
// fdRangeFrom calls the passed fdFunc for each file descriptor that is open in |
| 36 | 60 |
// the current process. |
| 37 | 61 |
func fdRangeFrom(minFd int, fn fdFunc) error {
|
| 38 |
- fdDir, err := os.Open("/proc/self/fd")
|
|
| 62 |
+ procSelfFd, closer := ProcThreadSelf("fd")
|
|
| 63 |
+ defer closer() |
|
| 64 |
+ |
|
| 65 |
+ fdDir, err := os.Open(procSelfFd) |
|
| 39 | 66 |
if err != nil {
|
| 40 | 67 |
return err |
| 41 | 68 |
} |
| ... | ... |
@@ -73,6 +100,12 @@ func fdRangeFrom(minFd int, fn fdFunc) error {
|
| 73 | 73 |
// CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or |
| 74 | 74 |
// equal to minFd in the current process. |
| 75 | 75 |
func CloseExecFrom(minFd int) error {
|
| 76 |
+ // Use close_range(CLOSE_RANGE_CLOEXEC) if possible. |
|
| 77 |
+ if haveCloseRangeCloexec() {
|
|
| 78 |
+ err := unix.CloseRange(uint(minFd), math.MaxUint, unix.CLOSE_RANGE_CLOEXEC) |
|
| 79 |
+ return os.NewSyscallError("close_range", err)
|
|
| 80 |
+ } |
|
| 81 |
+ // Otherwise, fall back to the standard loop. |
|
| 76 | 82 |
return fdRangeFrom(minFd, unix.CloseOnExec) |
| 77 | 83 |
} |
| 78 | 84 |
|
| ... | ... |
@@ -95,7 +128,8 @@ func runtime_IsPollDescriptor(fd uintptr) bool //nolint:revive |
| 95 | 95 |
// *os.File operations would apply to the wrong file). This function is only |
| 96 | 96 |
// intended to be called from the last stage of runc init. |
| 97 | 97 |
func UnsafeCloseFrom(minFd int) error {
|
| 98 |
- // We must not close some file descriptors. |
|
| 98 |
+ // We cannot use close_range(2) even if it is available, because we must |
|
| 99 |
+ // not close some file descriptors. |
|
| 99 | 100 |
return fdRangeFrom(minFd, func(fd int) {
|
| 100 | 101 |
if runtime_IsPollDescriptor(uintptr(fd)) {
|
| 101 | 102 |
// These are the Go runtimes internal netpoll file descriptors. |
| ... | ... |
@@ -113,8 +147,8 @@ func UnsafeCloseFrom(minFd int) error {
|
| 113 | 113 |
}) |
| 114 | 114 |
} |
| 115 | 115 |
|
| 116 |
-// NewSockPair returns a new unix socket pair |
|
| 117 |
-func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
|
|
| 116 |
+// NewSockPair returns a new SOCK_STREAM unix socket pair. |
|
| 117 |
+func NewSockPair(name string) (parent, child *os.File, err error) {
|
|
| 118 | 118 |
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) |
| 119 | 119 |
if err != nil {
|
| 120 | 120 |
return nil, nil, err |
| ... | ... |
@@ -122,6 +156,112 @@ func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
|
| 122 | 122 |
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil |
| 123 | 123 |
} |
| 124 | 124 |
|
| 125 |
+// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...) |
|
| 126 |
+// corresponding to the unsafePath resolved within the root. Before passing the |
|
| 127 |
+// fd, this path is verified to have been inside the root -- so operating on it |
|
| 128 |
+// through the passed fdpath should be safe. Do not access this path through |
|
| 129 |
+// the original path strings, and do not attempt to use the pathname outside of |
|
| 130 |
+// the passed closure (the file handle will be freed once the closure returns). |
|
| 131 |
+func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
|
|
| 132 |
+ // Remove the root then forcefully resolve inside the root. |
|
| 133 |
+ unsafePath = stripRoot(root, unsafePath) |
|
| 134 |
+ path, err := securejoin.SecureJoin(root, unsafePath) |
|
| 135 |
+ if err != nil {
|
|
| 136 |
+ return fmt.Errorf("resolving path inside rootfs failed: %w", err)
|
|
| 137 |
+ } |
|
| 138 |
+ |
|
| 139 |
+ procSelfFd, closer := ProcThreadSelf("fd/")
|
|
| 140 |
+ defer closer() |
|
| 141 |
+ |
|
| 142 |
+ // Open the target path. |
|
| 143 |
+ fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0) |
|
| 144 |
+ if err != nil {
|
|
| 145 |
+ return fmt.Errorf("open o_path procfd: %w", err)
|
|
| 146 |
+ } |
|
| 147 |
+ defer fh.Close() |
|
| 148 |
+ |
|
| 149 |
+ procfd := filepath.Join(procSelfFd, strconv.Itoa(int(fh.Fd()))) |
|
| 150 |
+ // Double-check the path is the one we expected. |
|
| 151 |
+ if realpath, err := os.Readlink(procfd); err != nil {
|
|
| 152 |
+ return fmt.Errorf("procfd verification failed: %w", err)
|
|
| 153 |
+ } else if realpath != path {
|
|
| 154 |
+ return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
|
|
| 155 |
+ } |
|
| 156 |
+ |
|
| 157 |
+ return fn(procfd) |
|
| 158 |
+} |
|
| 159 |
+ |
|
| 160 |
+type ProcThreadSelfCloser func() |
|
| 161 |
+ |
|
| 162 |
+var ( |
|
| 163 |
+ haveProcThreadSelf bool |
|
| 164 |
+ haveProcThreadSelfOnce sync.Once |
|
| 165 |
+) |
|
| 166 |
+ |
|
| 167 |
+// ProcThreadSelf returns a string that is equivalent to |
|
| 168 |
+// /proc/thread-self/<subpath>, with a graceful fallback on older kernels where |
|
| 169 |
+// /proc/thread-self doesn't exist. This method DOES NOT use SecureJoin, |
|
| 170 |
+// meaning that the passed string needs to be trusted. The caller _must_ call |
|
| 171 |
+// the returned procThreadSelfCloser function (which is runtime.UnlockOSThread) |
|
| 172 |
+// *only once* after it has finished using the returned path string. |
|
| 173 |
+func ProcThreadSelf(subpath string) (string, ProcThreadSelfCloser) {
|
|
| 174 |
+ haveProcThreadSelfOnce.Do(func() {
|
|
| 175 |
+ if _, err := os.Stat("/proc/thread-self/"); err == nil {
|
|
| 176 |
+ haveProcThreadSelf = true |
|
| 177 |
+ } else {
|
|
| 178 |
+ logrus.Debugf("cannot stat /proc/thread-self (%v), falling back to /proc/self/task/<tid>", err)
|
|
| 179 |
+ } |
|
| 180 |
+ }) |
|
| 181 |
+ |
|
| 182 |
+ // We need to lock our thread until the caller is done with the path string |
|
| 183 |
+ // because any non-atomic operation on the path (such as opening a file, |
|
| 184 |
+ // then reading it) could be interrupted by the Go runtime where the |
|
| 185 |
+ // underlying thread is swapped out and the original thread is killed, |
|
| 186 |
+ // resulting in pull-your-hair-out-hard-to-debug issues in the caller. In |
|
| 187 |
+ // addition, the pre-3.17 fallback makes everything non-atomic because the |
|
| 188 |
+ // same thing could happen between unix.Gettid() and the path operations. |
|
| 189 |
+ // |
|
| 190 |
+ // In theory, we don't need to lock in the atomic user case when using |
|
| 191 |
+ // /proc/thread-self/, but it's better to be safe than sorry (and there are |
|
| 192 |
+ // only one or two truly atomic users of /proc/thread-self/). |
|
| 193 |
+ runtime.LockOSThread() |
|
| 194 |
+ |
|
| 195 |
+ threadSelf := "/proc/thread-self/" |
|
| 196 |
+ if !haveProcThreadSelf {
|
|
| 197 |
+ // Pre-3.17 kernels did not have /proc/thread-self, so do it manually. |
|
| 198 |
+ threadSelf = "/proc/self/task/" + strconv.Itoa(unix.Gettid()) + "/" |
|
| 199 |
+ if _, err := os.Stat(threadSelf); err != nil {
|
|
| 200 |
+ // Unfortunately, this code is called from rootfs_linux.go where we |
|
| 201 |
+ // are running inside the pid namespace of the container but /proc |
|
| 202 |
+ // is the host's procfs. Unfortunately there is no real way to get |
|
| 203 |
+ // the correct tid to use here (the kernel age means we cannot do |
|
| 204 |
+ // things like set up a private fsopen("proc") -- even scanning
|
|
| 205 |
+ // NSpid in all of the tasks in /proc/self/task/*/status requires |
|
| 206 |
+ // Linux 4.1). |
|
| 207 |
+ // |
|
| 208 |
+ // So, we just have to assume that /proc/self is acceptable in this |
|
| 209 |
+ // one specific case. |
|
| 210 |
+ if os.Getpid() == 1 {
|
|
| 211 |
+ logrus.Debugf("/proc/thread-self (tid=%d) cannot be emulated inside the initial container setup -- using /proc/self instead: %v", unix.Gettid(), err)
|
|
| 212 |
+ } else {
|
|
| 213 |
+ // This should never happen, but the fallback should work in most cases... |
|
| 214 |
+ logrus.Warnf("/proc/thread-self could not be emulated for pid=%d (tid=%d) -- using more buggy /proc/self fallback instead: %v", os.Getpid(), unix.Gettid(), err)
|
|
| 215 |
+ } |
|
| 216 |
+ threadSelf = "/proc/self/" |
|
| 217 |
+ } |
|
| 218 |
+ } |
|
| 219 |
+ return threadSelf + subpath, runtime.UnlockOSThread |
|
| 220 |
+} |
|
| 221 |
+ |
|
| 222 |
+// ProcThreadSelfFd is small wrapper around ProcThreadSelf to make it easier to |
|
| 223 |
+// create a /proc/thread-self handle for given file descriptor. |
|
| 224 |
+// |
|
| 225 |
+// It is basically equivalent to ProcThreadSelf(fmt.Sprintf("fd/%d", fd)), but
|
|
| 226 |
+// without using fmt.Sprintf to avoid unneeded overhead. |
|
| 227 |
+func ProcThreadSelfFd(fd uintptr) (string, ProcThreadSelfCloser) {
|
|
| 228 |
+ return ProcThreadSelf("fd/" + strconv.FormatUint(uint64(fd), 10))
|
|
| 229 |
+} |
|
| 230 |
+ |
|
| 125 | 231 |
// IsLexicallyInRoot is shorthand for strings.HasPrefix(path+"/", root+"/"), |
| 126 | 232 |
// but properly handling the case where path or root are "/". |
| 127 | 233 |
// |
| ... | ... |
@@ -156,83 +296,45 @@ func IsLexicallyInRoot(root, path string) bool {
|
| 156 | 156 |
// This means that the path also must not contain ".." elements, otherwise an |
| 157 | 157 |
// error will occur. |
| 158 | 158 |
// |
| 159 |
-// This is a somewhat less safe alternative to |
|
| 160 |
-// <https://github.com/cyphar/filepath-securejoin/pull/13>, but it should |
|
| 161 |
-// detect attempts to trick us into creating directories outside of the root. |
|
| 162 |
-// We should migrate to securejoin.MkdirAll once it is merged. |
|
| 159 |
+// This uses securejoin.MkdirAllHandle under the hood, but it has special |
|
| 160 |
+// handling if unsafePath has already been scoped within the rootfs (this is |
|
| 161 |
+// needed for a lot of runc callers and fixing this would require reworking a |
|
| 162 |
+// lot of path logic). |
|
| 163 | 163 |
func MkdirAllInRootOpen(root, unsafePath string, mode uint32) (_ *os.File, Err error) {
|
| 164 |
- // If the path is already "within" the root, use it verbatim. |
|
| 165 |
- fullPath := unsafePath |
|
| 166 |
- if !IsLexicallyInRoot(root, unsafePath) {
|
|
| 167 |
- var err error |
|
| 168 |
- fullPath, err = securejoin.SecureJoin(root, unsafePath) |
|
| 164 |
+ // If the path is already "within" the root, get the path relative to the |
|
| 165 |
+ // root and use that as the unsafe path. This is necessary because a lot of |
|
| 166 |
+ // MkdirAllInRootOpen callers have already done SecureJoin, and refactoring |
|
| 167 |
+ // all of them to stop using these SecureJoin'd paths would require a fair |
|
| 168 |
+ // amount of work. |
|
| 169 |
+ // TODO(cyphar): Do the refactor to libpathrs once it's ready. |
|
| 170 |
+ if IsLexicallyInRoot(root, unsafePath) {
|
|
| 171 |
+ subPath, err := filepath.Rel(root, unsafePath) |
|
| 169 | 172 |
if err != nil {
|
| 170 | 173 |
return nil, err |
| 171 | 174 |
} |
| 172 |
- } |
|
| 173 |
- subPath, err := filepath.Rel(root, fullPath) |
|
| 174 |
- if err != nil {
|
|
| 175 |
- return nil, err |
|
| 175 |
+ unsafePath = subPath |
|
| 176 | 176 |
} |
| 177 | 177 |
|
| 178 | 178 |
// Check for any silly mode bits. |
| 179 | 179 |
if mode&^0o7777 != 0 {
|
| 180 | 180 |
return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode)
|
| 181 | 181 |
} |
| 182 |
+ // Linux (and thus os.MkdirAll) silently ignores the suid and sgid bits if |
|
| 183 |
+ // passed. While it would make sense to return an error in that case (since |
|
| 184 |
+ // the user has asked for a mode that won't be applied), for compatibility |
|
| 185 |
+ // reasons we have to ignore these bits. |
|
| 186 |
+ if ignoredBits := mode &^ 0o1777; ignoredBits != 0 {
|
|
| 187 |
+ logrus.Warnf("MkdirAll called with no-op mode bits that are ignored by Linux: 0o%.3o", ignoredBits)
|
|
| 188 |
+ mode &= 0o1777 |
|
| 189 |
+ } |
|
| 182 | 190 |
|
| 183 |
- currentDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0) |
|
| 191 |
+ rootDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0) |
|
| 184 | 192 |
if err != nil {
|
| 185 | 193 |
return nil, fmt.Errorf("open root handle: %w", err)
|
| 186 | 194 |
} |
| 187 |
- defer func() {
|
|
| 188 |
- if Err != nil {
|
|
| 189 |
- currentDir.Close() |
|
| 190 |
- } |
|
| 191 |
- }() |
|
| 192 |
- |
|
| 193 |
- for _, part := range strings.Split(subPath, string(filepath.Separator)) {
|
|
| 194 |
- switch part {
|
|
| 195 |
- case "", ".": |
|
| 196 |
- // Skip over no-op components. |
|
| 197 |
- continue |
|
| 198 |
- case "..": |
|
| 199 |
- return nil, fmt.Errorf("possible breakout detected: found %q component in SecureJoin subpath %s", part, subPath)
|
|
| 200 |
- } |
|
| 195 |
+ defer rootDir.Close() |
|
| 201 | 196 |
|
| 202 |
- nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) |
|
| 203 |
- switch {
|
|
| 204 |
- case err == nil: |
|
| 205 |
- // Update the currentDir. |
|
| 206 |
- _ = currentDir.Close() |
|
| 207 |
- currentDir = nextDir |
|
| 208 |
- |
|
| 209 |
- case errors.Is(err, unix.ENOTDIR): |
|
| 210 |
- // This might be a symlink or some other random file. Either way, |
|
| 211 |
- // error out. |
|
| 212 |
- return nil, fmt.Errorf("cannot mkdir in %s/%s: %w", currentDir.Name(), part, unix.ENOTDIR)
|
|
| 213 |
- |
|
| 214 |
- case errors.Is(err, os.ErrNotExist): |
|
| 215 |
- // Luckily, mkdirat will not follow trailing symlinks, so this is |
|
| 216 |
- // safe to do as-is. |
|
| 217 |
- if err := system.Mkdirat(currentDir, part, mode); err != nil {
|
|
| 218 |
- return nil, err |
|
| 219 |
- } |
|
| 220 |
- // Open the new directory. There is a race here where an attacker |
|
| 221 |
- // could swap the directory with a different directory, but |
|
| 222 |
- // MkdirAll's fuzzy semantics mean we don't care about that. |
|
| 223 |
- nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) |
|
| 224 |
- if err != nil {
|
|
| 225 |
- return nil, fmt.Errorf("open newly created directory: %w", err)
|
|
| 226 |
- } |
|
| 227 |
- // Update the currentDir. |
|
| 228 |
- _ = currentDir.Close() |
|
| 229 |
- currentDir = nextDir |
|
| 230 |
- |
|
| 231 |
- default: |
|
| 232 |
- return nil, err |
|
| 233 |
- } |
|
| 234 |
- } |
|
| 235 |
- return currentDir, nil |
|
| 197 |
+ return securejoin.MkdirAllHandle(rootDir, unsafePath, int(mode)) |
|
| 236 | 198 |
} |
| 237 | 199 |
|
| 238 | 200 |
// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the |
| ... | ... |
@@ -244,3 +346,18 @@ func MkdirAllInRoot(root, unsafePath string, mode uint32) error {
|
| 244 | 244 |
} |
| 245 | 245 |
return err |
| 246 | 246 |
} |
| 247 |
+ |
|
| 248 |
+// Openat is a Go-friendly openat(2) wrapper. |
|
| 249 |
+func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) {
|
|
| 250 |
+ dirFd := unix.AT_FDCWD |
|
| 251 |
+ if dir != nil {
|
|
| 252 |
+ dirFd = int(dir.Fd()) |
|
| 253 |
+ } |
|
| 254 |
+ flags |= unix.O_CLOEXEC |
|
| 255 |
+ |
|
| 256 |
+ fd, err := unix.Openat(dirFd, path, flags, mode) |
|
| 257 |
+ if err != nil {
|
|
| 258 |
+ return nil, &os.PathError{Op: "openat", Path: path, Err: err}
|
|
| 259 |
+ } |
|
| 260 |
+ return os.NewFile(uintptr(fd), dir.Name()+"/"+path), nil |
|
| 261 |
+} |
| ... | ... |
@@ -986,14 +986,11 @@ github.com/opencontainers/go-digest/digestset |
| 986 | 986 |
github.com/opencontainers/image-spec/identity |
| 987 | 987 |
github.com/opencontainers/image-spec/specs-go |
| 988 | 988 |
github.com/opencontainers/image-spec/specs-go/v1 |
| 989 |
-# github.com/opencontainers/runc v1.1.14 |
|
| 990 |
-## explicit; go 1.18 |
|
| 989 |
+# github.com/opencontainers/runc v1.2.0 |
|
| 990 |
+## explicit; go 1.22 |
|
| 991 | 991 |
github.com/opencontainers/runc/libcontainer/cgroups |
| 992 | 992 |
github.com/opencontainers/runc/libcontainer/configs |
| 993 | 993 |
github.com/opencontainers/runc/libcontainer/devices |
| 994 |
-github.com/opencontainers/runc/libcontainer/system |
|
| 995 |
-github.com/opencontainers/runc/libcontainer/user |
|
| 996 |
-github.com/opencontainers/runc/libcontainer/userns |
|
| 997 | 994 |
github.com/opencontainers/runc/libcontainer/utils |
| 998 | 995 |
# github.com/opencontainers/runtime-spec v1.2.0 |
| 999 | 996 |
## explicit |