The runc libcontainer/cgroups package was moved to a separate
module; switch our use of the runc module to use the new
location.
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
| ... | ... |
@@ -27,7 +27,7 @@ import ( |
| 27 | 27 |
"github.com/moby/sys/mountinfo" |
| 28 | 28 |
"github.com/moby/sys/user" |
| 29 | 29 |
"github.com/moby/sys/userns" |
| 30 |
- "github.com/opencontainers/runc/libcontainer/cgroups" |
|
| 30 |
+ "github.com/opencontainers/cgroups" |
|
| 31 | 31 |
"github.com/opencontainers/runtime-spec/specs-go" |
| 32 | 32 |
"github.com/pkg/errors" |
| 33 | 33 |
) |
| ... | ... |
@@ -80,9 +80,9 @@ require ( |
| 80 | 80 |
github.com/moby/sys/userns v0.1.0 |
| 81 | 81 |
github.com/moby/term v0.5.2 |
| 82 | 82 |
github.com/morikuni/aec v1.0.0 |
| 83 |
+ github.com/opencontainers/cgroups v0.0.1 |
|
| 83 | 84 |
github.com/opencontainers/go-digest v1.0.0 |
| 84 | 85 |
github.com/opencontainers/image-spec v1.1.0 |
| 85 |
- github.com/opencontainers/runc v1.2.6 |
|
| 86 | 86 |
github.com/opencontainers/runtime-spec v1.2.0 |
| 87 | 87 |
github.com/opencontainers/selinux v1.11.1 |
| 88 | 88 |
github.com/pelletier/go-toml v1.9.5 |
| ... | ... |
@@ -435,12 +435,12 @@ github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C |
| 435 | 435 |
github.com/onsi/gomega v1.36.0 h1:Pb12RlruUtj4XUuPUqeEWc6j5DkVVVA49Uf6YLfC95Y= |
| 436 | 436 |
github.com/onsi/gomega v1.36.0/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= |
| 437 | 437 |
github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk= |
| 438 |
+github.com/opencontainers/cgroups v0.0.1 h1:MXjMkkFpKv6kpuirUa4USFBas573sSAY082B4CiHEVA= |
|
| 439 |
+github.com/opencontainers/cgroups v0.0.1/go.mod h1:s8lktyhlGUqM7OSRL5P7eAW6Wb+kWPNvt4qvVfzA5vs= |
|
| 438 | 440 |
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= |
| 439 | 441 |
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= |
| 440 | 442 |
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug= |
| 441 | 443 |
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM= |
| 442 |
-github.com/opencontainers/runc v1.2.6 h1:P7Hqg40bsMvQGCS4S7DJYhUZOISMLJOB2iGX5COWiPk= |
|
| 443 |
-github.com/opencontainers/runc v1.2.6/go.mod h1:dOQeFo29xZKBNeRBI0B19mJtfHv68YgCTh1X+YphA+4= |
|
| 444 | 444 |
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= |
| 445 | 445 |
github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk= |
| 446 | 446 |
github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= |
| 0 | 1 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,150 @@ |
| 0 |
+# Contribution Guidelines |
|
| 1 |
+ |
|
| 2 |
+Development happens on GitHub. |
|
| 3 |
+Issues are used for bugs and actionable items and longer discussions can happen on the [mailing list](#mailing-list). |
|
| 4 |
+ |
|
| 5 |
+The content of this repository is licensed under the [Apache License, Version 2.0](LICENSE). |
|
| 6 |
+ |
|
| 7 |
+## Code of Conduct |
|
| 8 |
+ |
|
| 9 |
+Participation in the Open Container community is governed by [Open Container Code of Conduct][code-of-conduct]. |
|
| 10 |
+ |
|
| 11 |
+## Meetings |
|
| 12 |
+ |
|
| 13 |
+The contributors and maintainers of all OCI projects have monthly meetings at 2:00 PM (USA Pacific) on the first Wednesday of every month. |
|
| 14 |
+There is an [iCalendar][rfc5545] format for the meetings [here][meeting.ics]. |
|
| 15 |
+Everyone is welcome to participate via [UberConference web][UberConference] or audio-only: +1 415 968 0849 (no PIN needed). |
|
| 16 |
+An initial agenda will be posted to the [mailing list](#mailing-list) in the week before each meeting, and everyone is welcome to propose additional topics or suggest other agenda alterations there. |
|
| 17 |
+Minutes from past meetings are archived [here][minutes]. |
|
| 18 |
+ |
|
| 19 |
+## Mailing list |
|
| 20 |
+ |
|
| 21 |
+You can subscribe and browse the mailing list on [Google Groups][mailing-list]. |
|
| 22 |
+ |
|
| 23 |
+## IRC |
|
| 24 |
+ |
|
| 25 |
+OCI discussion happens on #opencontainers on [Freenode][] ([logs][irc-logs]). |
|
| 26 |
+ |
|
| 27 |
+## Git |
|
| 28 |
+ |
|
| 29 |
+### Security issues |
|
| 30 |
+ |
|
| 31 |
+If you are reporting a security issue, do not create an issue or file a pull |
|
| 32 |
+request on GitHub. Instead, disclose the issue responsibly by sending an email |
|
| 33 |
+to security@opencontainers.org (which is inhabited only by the maintainers of |
|
| 34 |
+the various OCI projects). |
|
| 35 |
+ |
|
| 36 |
+### Pull requests are always welcome |
|
| 37 |
+ |
|
| 38 |
+We are always thrilled to receive pull requests, and do our best to |
|
| 39 |
+process them as fast as possible. Not sure if that typo is worth a pull |
|
| 40 |
+request? Do it! We will appreciate it. |
|
| 41 |
+ |
|
| 42 |
+If your pull request is not accepted on the first try, don't be |
|
| 43 |
+discouraged! If there's a problem with the implementation, hopefully you |
|
| 44 |
+received feedback on what to improve. |
|
| 45 |
+ |
|
| 46 |
+We're trying very hard to keep the project lean and focused. We don't want it |
|
| 47 |
+to do everything for everybody. This means that we might decide against |
|
| 48 |
+incorporating a new feature. |
|
| 49 |
+ |
|
| 50 |
+### Conventions |
|
| 51 |
+ |
|
| 52 |
+Fork the repo and make changes on your fork in a feature branch. |
|
| 53 |
+For larger bugs and enhancements, consider filing a leader issue or mailing-list thread for discussion that is independent of the implementation. |
|
| 54 |
+Small changes or changes that have been discussed on the [project mailing list](#mailing-list) may be submitted without a leader issue. |
|
| 55 |
+ |
|
| 56 |
+If the project has a test suite, submit unit tests for your changes. Take a |
|
| 57 |
+look at existing tests for inspiration. Run the full test suite on your branch |
|
| 58 |
+before submitting a pull request. |
|
| 59 |
+ |
|
| 60 |
+Update the documentation when creating or modifying features. Test |
|
| 61 |
+your documentation changes for clarity, concision, and correctness, as |
|
| 62 |
+well as a clean documentation build. |
|
| 63 |
+ |
|
| 64 |
+Pull requests descriptions should be as clear as possible and include a |
|
| 65 |
+reference to all the issues that they address. |
|
| 66 |
+ |
|
| 67 |
+Commit messages must start with a capitalized and short summary |
|
| 68 |
+written in the imperative, followed by an optional, more detailed |
|
| 69 |
+explanatory text which is separated from the summary by an empty line. |
|
| 70 |
+ |
|
| 71 |
+Code review comments may be added to your pull request. Discuss, then make the |
|
| 72 |
+suggested modifications and push additional commits to your feature branch. Be |
|
| 73 |
+sure to post a comment after pushing. The new commits will show up in the pull |
|
| 74 |
+request automatically, but the reviewers will not be notified unless you |
|
| 75 |
+comment. |
|
| 76 |
+ |
|
| 77 |
+Before the pull request is merged, make sure that you squash your commits into |
|
| 78 |
+logical units of work using `git rebase -i` and `git push -f`. After every |
|
| 79 |
+commit the test suite (if any) should be passing. Include documentation changes |
|
| 80 |
+in the same commit so that a revert would remove all traces of the feature or |
|
| 81 |
+fix. |
|
| 82 |
+ |
|
| 83 |
+Commits that fix or close an issue should include a reference like `Closes #XXX` |
|
| 84 |
+or `Fixes #XXX`, which will automatically close the issue when merged. |
|
| 85 |
+ |
|
| 86 |
+### Sign your work |
|
| 87 |
+ |
|
| 88 |
+The sign-off is a simple line at the end of the explanation for the |
|
| 89 |
+patch, which certifies that you wrote it or otherwise have the right to |
|
| 90 |
+pass it on as an open-source patch. The rules are pretty simple: if you |
|
| 91 |
+can certify the below (from [developercertificate.org][]): |
|
| 92 |
+ |
|
| 93 |
+``` |
|
| 94 |
+Developer Certificate of Origin |
|
| 95 |
+Version 1.1 |
|
| 96 |
+ |
|
| 97 |
+Copyright (C) 2004, 2006 The Linux Foundation and its contributors. |
|
| 98 |
+1 Letterman Drive |
|
| 99 |
+Suite D4700 |
|
| 100 |
+San Francisco, CA, 94129 |
|
| 101 |
+ |
|
| 102 |
+Everyone is permitted to copy and distribute verbatim copies of this |
|
| 103 |
+license document, but changing it is not allowed. |
|
| 104 |
+ |
|
| 105 |
+ |
|
| 106 |
+Developer's Certificate of Origin 1.1 |
|
| 107 |
+ |
|
| 108 |
+By making a contribution to this project, I certify that: |
|
| 109 |
+ |
|
| 110 |
+(a) The contribution was created in whole or in part by me and I |
|
| 111 |
+ have the right to submit it under the open source license |
|
| 112 |
+ indicated in the file; or |
|
| 113 |
+ |
|
| 114 |
+(b) The contribution is based upon previous work that, to the best |
|
| 115 |
+ of my knowledge, is covered under an appropriate open source |
|
| 116 |
+ license and I have the right under that license to submit that |
|
| 117 |
+ work with modifications, whether created in whole or in part |
|
| 118 |
+ by me, under the same open source license (unless I am |
|
| 119 |
+ permitted to submit under a different license), as indicated |
|
| 120 |
+ in the file; or |
|
| 121 |
+ |
|
| 122 |
+(c) The contribution was provided directly to me by some other |
|
| 123 |
+ person who certified (a), (b) or (c) and I have not modified |
|
| 124 |
+ it. |
|
| 125 |
+ |
|
| 126 |
+(d) I understand and agree that this project and the contribution |
|
| 127 |
+ are public and that a record of the contribution (including all |
|
| 128 |
+ personal information I submit with it, including my sign-off) is |
|
| 129 |
+ maintained indefinitely and may be redistributed consistent with |
|
| 130 |
+ this project or the open source license(s) involved. |
|
| 131 |
+``` |
|
| 132 |
+ |
|
| 133 |
+then you just add a line to every git commit message: |
|
| 134 |
+ |
|
| 135 |
+ Signed-off-by: Joe Smith <joe@gmail.com> |
|
| 136 |
+ |
|
| 137 |
+using your real name (sorry, no pseudonyms or anonymous contributions.) |
|
| 138 |
+ |
|
| 139 |
+You can add the sign off when creating the git commit via `git commit -s`. |
|
| 140 |
+ |
|
| 141 |
+[code-of-conduct]: https://github.com/opencontainers/tob/blob/d2f9d68c1332870e40693fe077d311e0742bc73d/code-of-conduct.md |
|
| 142 |
+[developercertificate.org]: http://developercertificate.org/ |
|
| 143 |
+[Freenode]: https://freenode.net/ |
|
| 144 |
+[irc-logs]: http://ircbot.wl.linuxfoundation.org/eavesdrop/%23opencontainers/ |
|
| 145 |
+[mailing-list]: https://groups.google.com/a/opencontainers.org/forum/#!forum/dev |
|
| 146 |
+[meeting.ics]: https://github.com/opencontainers/runtime-spec/blob/master/meeting.ics |
|
| 147 |
+[minutes]: http://ircbot.wl.linuxfoundation.org/meetings/opencontainers/ |
|
| 148 |
+[rfc5545]: https://tools.ietf.org/html/rfc5545 |
|
| 149 |
+[UberConference]: https://www.uberconference.com/opencontainers |
| 0 | 150 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,63 @@ |
| 0 |
+# Project governance |
|
| 1 |
+ |
|
| 2 |
+The [OCI charter][charter] §5.b.viii tasks an OCI Project's maintainers (listed in the repository's MAINTAINERS file and sometimes referred to as "the TDC", [§5.e][charter]) with: |
|
| 3 |
+ |
|
| 4 |
+> Creating, maintaining and enforcing governance guidelines for the TDC, approved by the maintainers, and which shall be posted visibly for the TDC. |
|
| 5 |
+ |
|
| 6 |
+This section describes generic rules and procedures for fulfilling that mandate. |
|
| 7 |
+ |
|
| 8 |
+## Proposing a motion |
|
| 9 |
+ |
|
| 10 |
+A maintainer SHOULD propose a motion on the dev@opencontainers.org mailing list (except [security issues](#security-issues)) with another maintainer as a co-sponsor. |
|
| 11 |
+ |
|
| 12 |
+## Voting |
|
| 13 |
+ |
|
| 14 |
+Voting on a proposed motion SHOULD happen on the dev@opencontainers.org mailing list (except [security issues](#security-issues)) with maintainers posting LGTM or REJECT. |
|
| 15 |
+Maintainers MAY also explicitly not vote by posting ABSTAIN (which is useful to revert a previous vote). |
|
| 16 |
+Maintainers MAY post multiple times (e.g. as they revise their position based on feedback), but only their final post counts in the tally. |
|
| 17 |
+A proposed motion is adopted if two-thirds of votes cast, a quorum having voted, are in favor of the release. |
|
| 18 |
+ |
|
| 19 |
+Voting SHOULD remain open for a week to collect feedback from the wider community and allow the maintainers to digest the proposed motion. |
|
| 20 |
+Under exceptional conditions (e.g. non-major security fix releases) proposals which reach quorum with unanimous support MAY be adopted earlier. |
|
| 21 |
+ |
|
| 22 |
+A maintainer MAY choose to reply with REJECT. |
|
| 23 |
+A maintainer posting a REJECT MUST include a list of concerns or links to written documentation for those concerns (e.g. GitHub issues or mailing-list threads). |
|
| 24 |
+The maintainers SHOULD try to resolve the concerns and wait for the rejecting maintainer to change their opinion to LGTM. |
|
| 25 |
+However, a motion MAY be adopted with REJECTs, as outlined in the previous paragraphs. |
|
| 26 |
+ |
|
| 27 |
+## Quorum |
|
| 28 |
+ |
|
| 29 |
+A quorum is established when at least two-thirds of maintainers have voted. |
|
| 30 |
+ |
|
| 31 |
+For projects that are not specifications, a [motion to release](#release-approval) MAY be adopted if the tally is at least three LGTMs and no REJECTs, even if three votes does not meet the usual two-thirds quorum. |
|
| 32 |
+ |
|
| 33 |
+## Amendments |
|
| 34 |
+ |
|
| 35 |
+The [project governance](#project-governance) rules and procedures MAY be amended or replaced using the procedures themselves. |
|
| 36 |
+The MAINTAINERS of this project governance document is the total set of MAINTAINERS from all Open Containers projects (go-digest, image-spec, image-tools, runC, runtime-spec, runtime-tools, and selinux). |
|
| 37 |
+ |
|
| 38 |
+## Subject templates |
|
| 39 |
+ |
|
| 40 |
+Maintainers are busy and get lots of email. |
|
| 41 |
+To make project proposals recognizable, proposed motions SHOULD use the following subject templates. |
|
| 42 |
+ |
|
| 43 |
+### Proposing a motion |
|
| 44 |
+ |
|
| 45 |
+> [{project} VOTE]: {motion description} (closes {end of voting window})
|
|
| 46 |
+ |
|
| 47 |
+For example: |
|
| 48 |
+ |
|
| 49 |
+> [runtime-spec VOTE]: Tag 0647920 as 1.0.0-rc (closes 2016-06-03 20:00 UTC) |
|
| 50 |
+ |
|
| 51 |
+### Tallying results |
|
| 52 |
+ |
|
| 53 |
+After voting closes, a maintainer SHOULD post a tally to the motion thread with a subject template like: |
|
| 54 |
+ |
|
| 55 |
+> [{project} {status}]: {motion description} (+{LGTMs} -{REJECTs} #{ABSTAINs})
|
|
| 56 |
+ |
|
| 57 |
+Where `{status}` is either `adopted` or `rejected`.
|
|
| 58 |
+For example: |
|
| 59 |
+ |
|
| 60 |
+> [runtime-spec adopted]: Tag 0647920 as 1.0.0-rc (+6 -0 #3) |
|
| 61 |
+ |
|
| 62 |
+[charter]: https://www.opencontainers.org/about/governance |
| 0 | 63 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,201 @@ |
| 0 |
+ Apache License |
|
| 1 |
+ Version 2.0, January 2004 |
|
| 2 |
+ http://www.apache.org/licenses/ |
|
| 3 |
+ |
|
| 4 |
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION |
|
| 5 |
+ |
|
| 6 |
+ 1. Definitions. |
|
| 7 |
+ |
|
| 8 |
+ "License" shall mean the terms and conditions for use, reproduction, |
|
| 9 |
+ and distribution as defined by Sections 1 through 9 of this document. |
|
| 10 |
+ |
|
| 11 |
+ "Licensor" shall mean the copyright owner or entity authorized by |
|
| 12 |
+ the copyright owner that is granting the License. |
|
| 13 |
+ |
|
| 14 |
+ "Legal Entity" shall mean the union of the acting entity and all |
|
| 15 |
+ other entities that control, are controlled by, or are under common |
|
| 16 |
+ control with that entity. For the purposes of this definition, |
|
| 17 |
+ "control" means (i) the power, direct or indirect, to cause the |
|
| 18 |
+ direction or management of such entity, whether by contract or |
|
| 19 |
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the |
|
| 20 |
+ outstanding shares, or (iii) beneficial ownership of such entity. |
|
| 21 |
+ |
|
| 22 |
+ "You" (or "Your") shall mean an individual or Legal Entity |
|
| 23 |
+ exercising permissions granted by this License. |
|
| 24 |
+ |
|
| 25 |
+ "Source" form shall mean the preferred form for making modifications, |
|
| 26 |
+ including but not limited to software source code, documentation |
|
| 27 |
+ source, and configuration files. |
|
| 28 |
+ |
|
| 29 |
+ "Object" form shall mean any form resulting from mechanical |
|
| 30 |
+ transformation or translation of a Source form, including but |
|
| 31 |
+ not limited to compiled object code, generated documentation, |
|
| 32 |
+ and conversions to other media types. |
|
| 33 |
+ |
|
| 34 |
+ "Work" shall mean the work of authorship, whether in Source or |
|
| 35 |
+ Object form, made available under the License, as indicated by a |
|
| 36 |
+ copyright notice that is included in or attached to the work |
|
| 37 |
+ (an example is provided in the Appendix below). |
|
| 38 |
+ |
|
| 39 |
+ "Derivative Works" shall mean any work, whether in Source or Object |
|
| 40 |
+ form, that is based on (or derived from) the Work and for which the |
|
| 41 |
+ editorial revisions, annotations, elaborations, or other modifications |
|
| 42 |
+ represent, as a whole, an original work of authorship. For the purposes |
|
| 43 |
+ of this License, Derivative Works shall not include works that remain |
|
| 44 |
+ separable from, or merely link (or bind by name) to the interfaces of, |
|
| 45 |
+ the Work and Derivative Works thereof. |
|
| 46 |
+ |
|
| 47 |
+ "Contribution" shall mean any work of authorship, including |
|
| 48 |
+ the original version of the Work and any modifications or additions |
|
| 49 |
+ to that Work or Derivative Works thereof, that is intentionally |
|
| 50 |
+ submitted to Licensor for inclusion in the Work by the copyright owner |
|
| 51 |
+ or by an individual or Legal Entity authorized to submit on behalf of |
|
| 52 |
+ the copyright owner. For the purposes of this definition, "submitted" |
|
| 53 |
+ means any form of electronic, verbal, or written communication sent |
|
| 54 |
+ to the Licensor or its representatives, including but not limited to |
|
| 55 |
+ communication on electronic mailing lists, source code control systems, |
|
| 56 |
+ and issue tracking systems that are managed by, or on behalf of, the |
|
| 57 |
+ Licensor for the purpose of discussing and improving the Work, but |
|
| 58 |
+ excluding communication that is conspicuously marked or otherwise |
|
| 59 |
+ designated in writing by the copyright owner as "Not a Contribution." |
|
| 60 |
+ |
|
| 61 |
+ "Contributor" shall mean Licensor and any individual or Legal Entity |
|
| 62 |
+ on behalf of whom a Contribution has been received by Licensor and |
|
| 63 |
+ subsequently incorporated within the Work. |
|
| 64 |
+ |
|
| 65 |
+ 2. Grant of Copyright License. Subject to the terms and conditions of |
|
| 66 |
+ this License, each Contributor hereby grants to You a perpetual, |
|
| 67 |
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
|
| 68 |
+ copyright license to reproduce, prepare Derivative Works of, |
|
| 69 |
+ publicly display, publicly perform, sublicense, and distribute the |
|
| 70 |
+ Work and such Derivative Works in Source or Object form. |
|
| 71 |
+ |
|
| 72 |
+ 3. Grant of Patent License. Subject to the terms and conditions of |
|
| 73 |
+ this License, each Contributor hereby grants to You a perpetual, |
|
| 74 |
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
|
| 75 |
+ (except as stated in this section) patent license to make, have made, |
|
| 76 |
+ use, offer to sell, sell, import, and otherwise transfer the Work, |
|
| 77 |
+ where such license applies only to those patent claims licensable |
|
| 78 |
+ by such Contributor that are necessarily infringed by their |
|
| 79 |
+ Contribution(s) alone or by combination of their Contribution(s) |
|
| 80 |
+ with the Work to which such Contribution(s) was submitted. If You |
|
| 81 |
+ institute patent litigation against any entity (including a |
|
| 82 |
+ cross-claim or counterclaim in a lawsuit) alleging that the Work |
|
| 83 |
+ or a Contribution incorporated within the Work constitutes direct |
|
| 84 |
+ or contributory patent infringement, then any patent licenses |
|
| 85 |
+ granted to You under this License for that Work shall terminate |
|
| 86 |
+ as of the date such litigation is filed. |
|
| 87 |
+ |
|
| 88 |
+ 4. Redistribution. You may reproduce and distribute copies of the |
|
| 89 |
+ Work or Derivative Works thereof in any medium, with or without |
|
| 90 |
+ modifications, and in Source or Object form, provided that You |
|
| 91 |
+ meet the following conditions: |
|
| 92 |
+ |
|
| 93 |
+ (a) You must give any other recipients of the Work or |
|
| 94 |
+ Derivative Works a copy of this License; and |
|
| 95 |
+ |
|
| 96 |
+ (b) You must cause any modified files to carry prominent notices |
|
| 97 |
+ stating that You changed the files; and |
|
| 98 |
+ |
|
| 99 |
+ (c) You must retain, in the Source form of any Derivative Works |
|
| 100 |
+ that You distribute, all copyright, patent, trademark, and |
|
| 101 |
+ attribution notices from the Source form of the Work, |
|
| 102 |
+ excluding those notices that do not pertain to any part of |
|
| 103 |
+ the Derivative Works; and |
|
| 104 |
+ |
|
| 105 |
+ (d) If the Work includes a "NOTICE" text file as part of its |
|
| 106 |
+ distribution, then any Derivative Works that You distribute must |
|
| 107 |
+ include a readable copy of the attribution notices contained |
|
| 108 |
+ within such NOTICE file, excluding those notices that do not |
|
| 109 |
+ pertain to any part of the Derivative Works, in at least one |
|
| 110 |
+ of the following places: within a NOTICE text file distributed |
|
| 111 |
+ as part of the Derivative Works; within the Source form or |
|
| 112 |
+ documentation, if provided along with the Derivative Works; or, |
|
| 113 |
+ within a display generated by the Derivative Works, if and |
|
| 114 |
+ wherever such third-party notices normally appear. The contents |
|
| 115 |
+ of the NOTICE file are for informational purposes only and |
|
| 116 |
+ do not modify the License. You may add Your own attribution |
|
| 117 |
+ notices within Derivative Works that You distribute, alongside |
|
| 118 |
+ or as an addendum to the NOTICE text from the Work, provided |
|
| 119 |
+ that such additional attribution notices cannot be construed |
|
| 120 |
+ as modifying the License. |
|
| 121 |
+ |
|
| 122 |
+ You may add Your own copyright statement to Your modifications and |
|
| 123 |
+ may provide additional or different license terms and conditions |
|
| 124 |
+ for use, reproduction, or distribution of Your modifications, or |
|
| 125 |
+ for any such Derivative Works as a whole, provided Your use, |
|
| 126 |
+ reproduction, and distribution of the Work otherwise complies with |
|
| 127 |
+ the conditions stated in this License. |
|
| 128 |
+ |
|
| 129 |
+ 5. Submission of Contributions. Unless You explicitly state otherwise, |
|
| 130 |
+ any Contribution intentionally submitted for inclusion in the Work |
|
| 131 |
+ by You to the Licensor shall be under the terms and conditions of |
|
| 132 |
+ this License, without any additional terms or conditions. |
|
| 133 |
+ Notwithstanding the above, nothing herein shall supersede or modify |
|
| 134 |
+ the terms of any separate license agreement you may have executed |
|
| 135 |
+ with Licensor regarding such Contributions. |
|
| 136 |
+ |
|
| 137 |
+ 6. Trademarks. This License does not grant permission to use the trade |
|
| 138 |
+ names, trademarks, service marks, or product names of the Licensor, |
|
| 139 |
+ except as required for reasonable and customary use in describing the |
|
| 140 |
+ origin of the Work and reproducing the content of the NOTICE file. |
|
| 141 |
+ |
|
| 142 |
+ 7. Disclaimer of Warranty. Unless required by applicable law or |
|
| 143 |
+ agreed to in writing, Licensor provides the Work (and each |
|
| 144 |
+ Contributor provides its Contributions) on an "AS IS" BASIS, |
|
| 145 |
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
|
| 146 |
+ implied, including, without limitation, any warranties or conditions |
|
| 147 |
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A |
|
| 148 |
+ PARTICULAR PURPOSE. You are solely responsible for determining the |
|
| 149 |
+ appropriateness of using or redistributing the Work and assume any |
|
| 150 |
+ risks associated with Your exercise of permissions under this License. |
|
| 151 |
+ |
|
| 152 |
+ 8. Limitation of Liability. In no event and under no legal theory, |
|
| 153 |
+ whether in tort (including negligence), contract, or otherwise, |
|
| 154 |
+ unless required by applicable law (such as deliberate and grossly |
|
| 155 |
+ negligent acts) or agreed to in writing, shall any Contributor be |
|
| 156 |
+ liable to You for damages, including any direct, indirect, special, |
|
| 157 |
+ incidental, or consequential damages of any character arising as a |
|
| 158 |
+ result of this License or out of the use or inability to use the |
|
| 159 |
+ Work (including but not limited to damages for loss of goodwill, |
|
| 160 |
+ work stoppage, computer failure or malfunction, or any and all |
|
| 161 |
+ other commercial damages or losses), even if such Contributor |
|
| 162 |
+ has been advised of the possibility of such damages. |
|
| 163 |
+ |
|
| 164 |
+ 9. Accepting Warranty or Additional Liability. While redistributing |
|
| 165 |
+ the Work or Derivative Works thereof, You may choose to offer, |
|
| 166 |
+ and charge a fee for, acceptance of support, warranty, indemnity, |
|
| 167 |
+ or other liability obligations and/or rights consistent with this |
|
| 168 |
+ License. However, in accepting such obligations, You may act only |
|
| 169 |
+ on Your own behalf and on Your sole responsibility, not on behalf |
|
| 170 |
+ of any other Contributor, and only if You agree to indemnify, |
|
| 171 |
+ defend, and hold each Contributor harmless for any liability |
|
| 172 |
+ incurred by, or claims asserted against, such Contributor by reason |
|
| 173 |
+ of your accepting any such warranty or additional liability. |
|
| 174 |
+ |
|
| 175 |
+ END OF TERMS AND CONDITIONS |
|
| 176 |
+ |
|
| 177 |
+ APPENDIX: How to apply the Apache License to your work. |
|
| 178 |
+ |
|
| 179 |
+ To apply the Apache License to your work, attach the following |
|
| 180 |
+ boilerplate notice, with the fields enclosed by brackets "{}"
|
|
| 181 |
+ replaced with your own identifying information. (Don't include |
|
| 182 |
+ the brackets!) The text should be enclosed in the appropriate |
|
| 183 |
+ comment syntax for the file format. We also recommend that a |
|
| 184 |
+ file or class name and description of purpose be included on the |
|
| 185 |
+ same "printed page" as the copyright notice for easier |
|
| 186 |
+ identification within third-party archives. |
|
| 187 |
+ |
|
| 188 |
+ Copyright {yyyy} {name of copyright owner}
|
|
| 189 |
+ |
|
| 190 |
+ Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 191 |
+ you may not use this file except in compliance with the License. |
|
| 192 |
+ You may obtain a copy of the License at |
|
| 193 |
+ |
|
| 194 |
+ http://www.apache.org/licenses/LICENSE-2.0 |
|
| 195 |
+ |
|
| 196 |
+ Unless required by applicable law or agreed to in writing, software |
|
| 197 |
+ distributed under the License is distributed on an "AS IS" BASIS, |
|
| 198 |
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 199 |
+ See the License for the specific language governing permissions and |
|
| 200 |
+ limitations under the License. |
| 0 | 201 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,8 @@ |
| 0 |
+Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp> (@AkihiroSuda) |
|
| 1 |
+Aleksa Sarai <cyphar@cyphar.com> (@cyphar) |
|
| 2 |
+Kir Kolyshkin <kolyshkin@gmail.com> (@kolyshkin) |
|
| 3 |
+Mrunal Patel <mpatel@redhat.com> (@mrunalp) |
|
| 4 |
+Sebastiaan van Stijn <github@gone.nl> (@thaJeztah) |
|
| 5 |
+Odin Ugedal <odin@uged.al> (@odinuge) |
|
| 6 |
+Peter Hunt <pehunt@redhat.com> (@haircommander) |
|
| 7 |
+Davanum Srinivas <davanum@gmail.com> (@dims) |
| 0 | 8 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,92 @@ |
| 0 |
+## Introduction |
|
| 1 |
+ |
|
| 2 |
+Dear maintainer. Thank you for investing the time and energy to help |
|
| 3 |
+make this project as useful as possible. Maintaining a project is difficult, |
|
| 4 |
+sometimes unrewarding work. Sure, you will get to contribute cool |
|
| 5 |
+features to the project. But most of your time will be spent reviewing, |
|
| 6 |
+cleaning up, documenting, answering questions, justifying design |
|
| 7 |
+decisions - while everyone has all the fun! But remember - the quality |
|
| 8 |
+of the maintainers work is what distinguishes the good projects from the |
|
| 9 |
+great. So please be proud of your work, even the unglamourous parts, |
|
| 10 |
+and encourage a culture of appreciation and respect for *every* aspect |
|
| 11 |
+of improving the project - not just the hot new features. |
|
| 12 |
+ |
|
| 13 |
+This document is a manual for maintainers old and new. It explains what |
|
| 14 |
+is expected of maintainers, how they should work, and what tools are |
|
| 15 |
+available to them. |
|
| 16 |
+ |
|
| 17 |
+This is a living document - if you see something out of date or missing, |
|
| 18 |
+speak up! |
|
| 19 |
+ |
|
| 20 |
+## What are a maintainer's responsibilities? |
|
| 21 |
+ |
|
| 22 |
+It is every maintainer's responsibility to: |
|
| 23 |
+ |
|
| 24 |
+* Expose a clear roadmap for improving their component. |
|
| 25 |
+* Deliver prompt feedback and decisions on pull requests. |
|
| 26 |
+* Be available to anyone with questions, bug reports, criticism etc. on their component. |
|
| 27 |
+ This includes IRC and GitHub issues and pull requests. |
|
| 28 |
+* Make sure their component respects the philosophy, design and roadmap of the project. |
|
| 29 |
+ |
|
| 30 |
+## How are decisions made? |
|
| 31 |
+ |
|
| 32 |
+This project is an open-source project with an open design philosophy. This |
|
| 33 |
+means that the repository is the source of truth for EVERY aspect of the |
|
| 34 |
+project, including its philosophy, design, roadmap and APIs. *If it's |
|
| 35 |
+part of the project, it's in the repo. It's in the repo, it's part of |
|
| 36 |
+the project.* |
|
| 37 |
+ |
|
| 38 |
+As a result, all decisions can be expressed as changes to the |
|
| 39 |
+repository. An implementation change is a change to the source code. An |
|
| 40 |
+API change is a change to the API specification. A philosophy change is |
|
| 41 |
+a change to the philosophy manifesto. And so on. |
|
| 42 |
+ |
|
| 43 |
+All decisions affecting this project, big and small, follow the same procedure: |
|
| 44 |
+ |
|
| 45 |
+1. Discuss a proposal on the [mailing list](CONTRIBUTING.md#mailing-list). |
|
| 46 |
+ Anyone can do this. |
|
| 47 |
+2. Open a pull request. |
|
| 48 |
+ Anyone can do this. |
|
| 49 |
+3. Discuss the pull request. |
|
| 50 |
+ Anyone can do this. |
|
| 51 |
+4. Endorse (`LGTM`) or oppose (`Rejected`) the pull request. |
|
| 52 |
+ The relevant maintainers do this (see below [Who decides what?](#who-decides-what)). |
|
| 53 |
+ Changes that affect project management (changing policy, cutting releases, etc.) are [proposed and voted on the mailing list](GOVERNANCE.md). |
|
| 54 |
+5. Merge or close the pull request. |
|
| 55 |
+ The relevant maintainers do this. |
|
| 56 |
+ |
|
| 57 |
+### I'm a maintainer, should I make pull requests too? |
|
| 58 |
+ |
|
| 59 |
+Yes. Nobody should ever push to master directly. All changes should be |
|
| 60 |
+made through a pull request. |
|
| 61 |
+ |
|
| 62 |
+## Who decides what? |
|
| 63 |
+ |
|
| 64 |
+All decisions are pull requests, and the relevant maintainers make |
|
| 65 |
+decisions by accepting or refusing the pull request. Review and acceptance |
|
| 66 |
+by anyone is denoted by adding a comment in the pull request: `LGTM`. |
|
| 67 |
+However, only currently listed `MAINTAINERS` are counted towards the required |
|
| 68 |
+two LGTMs. In addition, if a maintainer has created a pull request, they cannot |
|
| 69 |
+count toward the two LGTM rule (to ensure equal amounts of review for every pull |
|
| 70 |
+request, no matter who wrote it). |
|
| 71 |
+ |
|
| 72 |
+Overall the maintainer system works because of mutual respect. |
|
| 73 |
+The maintainers trust one another to act in the best interests of the project. |
|
| 74 |
+Sometimes maintainers can disagree and this is part of a healthy project to represent the points of view of various people. |
|
| 75 |
+In the case where maintainers cannot find agreement on a specific change, maintainers should use the [governance procedure](GOVERNANCE.md) to attempt to reach a consensus. |
|
| 76 |
+ |
|
| 77 |
+### How are maintainers added? |
|
| 78 |
+ |
|
| 79 |
+The best maintainers have a vested interest in the project. Maintainers |
|
| 80 |
+are first and foremost contributors that have shown they are committed to |
|
| 81 |
+the long term success of the project. Contributors wanting to become |
|
| 82 |
+maintainers are expected to be deeply involved in contributing code, |
|
| 83 |
+pull request review, and triage of issues in the project for more than two months. |
|
| 84 |
+ |
|
| 85 |
+Just contributing does not make you a maintainer, it is about building trust with the current maintainers of the project and being a person that they can depend on to act in the best interest of the project. |
|
| 86 |
+The final vote to add a new maintainer should be approved by the [governance procedure](GOVERNANCE.md). |
|
| 87 |
+ |
|
| 88 |
+### How are maintainers removed? |
|
| 89 |
+ |
|
| 90 |
+When a maintainer is unable to perform the [required duties](#what-are-a-maintainers-responsibilities) they can be removed by the [governance procedure](GOVERNANCE.md). |
|
| 91 |
+Issues related to a maintainer's performance should be discussed with them among the other maintainers so that they are not surprised by a pull request removing them. |
| 0 | 92 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,11 @@ |
| 0 |
+# OCI Project Template |
|
| 1 |
+ |
|
| 2 |
+Useful boilerplate and organizational information for all OCI projects. |
|
| 3 |
+ |
|
| 4 |
+* README (this file) |
|
| 5 |
+* [The Apache License, Version 2.0](LICENSE) |
|
| 6 |
+* [A list of maintainers](MAINTAINERS) |
|
| 7 |
+* [Maintainer guidelines](MAINTAINERS_GUIDE.md) |
|
| 8 |
+* [Contributor guidelines](CONTRIBUTING.md) |
|
| 9 |
+* [Project governance](GOVERNANCE.md) |
|
| 10 |
+* [Release procedures](RELEASES.md) |
| 0 | 11 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,51 @@ |
| 0 |
+# Releases |
|
| 1 |
+ |
|
| 2 |
+The release process hopes to encourage early, consistent consensus-building during project development. |
|
| 3 |
+The mechanisms used are regular community communication on the mailing list about progress, scheduled meetings for issue resolution and release triage, and regularly paced and communicated releases. |
|
| 4 |
+Releases are proposed and adopted or rejected using the usual [project governance](GOVERNANCE.md) rules and procedures. |
|
| 5 |
+ |
|
| 6 |
+An anti-pattern that we want to avoid is heavy development or discussions "late cycle" around major releases. |
|
| 7 |
+We want to build a community that is involved and communicates consistently through all releases instead of relying on "silent periods" as a judge of stability. |
|
| 8 |
+ |
|
| 9 |
+## Parallel releases |
|
| 10 |
+ |
|
| 11 |
+A single project MAY consider several motions to release in parallel. |
|
| 12 |
+However each motion to release after the initial 0.1.0 MUST be based on a previous release that has already landed. |
|
| 13 |
+ |
|
| 14 |
+For example, runtime-spec maintainers may propose a v1.0.0-rc2 on the 1st of the month and a v0.9.1 bugfix on the 2nd of the month. |
|
| 15 |
+They may not propose a v1.0.0-rc3 until the v1.0.0-rc2 is accepted (on the 7th if the vote initiated on the 1st passes). |
|
| 16 |
+ |
|
| 17 |
+## Specifications |
|
| 18 |
+ |
|
| 19 |
+The OCI maintains three categories of projects: specifications, applications, and conformance-testing tools. |
|
| 20 |
+However, specification releases have special restrictions in the [OCI charter][charter]: |
|
| 21 |
+ |
|
| 22 |
+* They are the target of backwards compatibility (§7.g), and |
|
| 23 |
+* They are subject to the OFWa patent grant (§8.d and e). |
|
| 24 |
+ |
|
| 25 |
+To avoid unfortunate side effects (onerous backwards compatibity requirements or Member resignations), the following additional procedures apply to specification releases: |
|
| 26 |
+ |
|
| 27 |
+### Planning a release |
|
| 28 |
+ |
|
| 29 |
+Every OCI specification project SHOULD hold meetings that involve maintainers reviewing pull requests, debating outstanding issues, and planning releases. |
|
| 30 |
+This meeting MUST be advertised on the project README and MAY happen on a phone call, video conference, or on IRC. |
|
| 31 |
+Maintainers MUST send updates to the dev@opencontainers.org with results of these meetings. |
|
| 32 |
+ |
|
| 33 |
+Before the specification reaches v1.0.0, the meetings SHOULD be weekly. |
|
| 34 |
+Once a specification has reached v1.0.0, the maintainers may alter the cadence, but a meeting MUST be held within four weeks of the previous meeting. |
|
| 35 |
+ |
|
| 36 |
+The release plans, corresponding milestones and estimated due dates MUST be published on GitHub (e.g. https://github.com/opencontainers/runtime-spec/milestones). |
|
| 37 |
+GitHub milestones and issues are only used for community organization and all releases MUST follow the [project governance](GOVERNANCE.md) rules and procedures. |
|
| 38 |
+ |
|
| 39 |
+### Timelines |
|
| 40 |
+ |
|
| 41 |
+Specifications have a variety of different timelines in their lifecycle. |
|
| 42 |
+ |
|
| 43 |
+* Pre-v1.0.0 specifications SHOULD release on a monthly cadence to garner feedback. |
|
| 44 |
+* Major specification releases MUST release at least three release candidates spaced a minimum of one week apart. |
|
| 45 |
+ This means a major release like a v1.0.0 or v2.0.0 release will take 1 month at minimum: one week for rc1, one week for rc2, one week for rc3, and one week for the major release itself. |
|
| 46 |
+ Maintainers SHOULD strive to make zero breaking changes during this cycle of release candidates and SHOULD restart the three-candidate count when a breaking change is introduced. |
|
| 47 |
+ For example if a breaking change is introduced in v1.0.0-rc2 then the series would end with v1.0.0-rc4 and v1.0.0. |
|
| 48 |
+* Minor and patch releases SHOULD be made on an as-needed basis. |
|
| 49 |
+ |
|
| 50 |
+[charter]: https://www.opencontainers.org/about/governance |
| 0 | 51 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,78 @@ |
| 0 |
+package cgroups |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "errors" |
|
| 4 |
+) |
|
| 5 |
+ |
|
| 6 |
+var ( |
|
| 7 |
+ // ErrDevicesUnsupported is an error returned when a cgroup manager |
|
| 8 |
+ // is not configured to set device rules. |
|
| 9 |
+ ErrDevicesUnsupported = errors.New("cgroup manager is not configured to set device rules")
|
|
| 10 |
+ |
|
| 11 |
+ // ErrRootless is returned by [Manager.Apply] when there is an error |
|
| 12 |
+ // creating cgroup directory, and cgroup.Rootless is set. In general, |
|
| 13 |
+ // this error is to be ignored. |
|
| 14 |
+ ErrRootless = errors.New("cgroup manager can not access cgroup (rootless container)")
|
|
| 15 |
+ |
|
| 16 |
+ // DevicesSetV1 and DevicesSetV2 are functions to set devices for |
|
| 17 |
+ // cgroup v1 and v2, respectively. Unless |
|
| 18 |
+ // [github.com/opencontainers/cgroups/devices] |
|
| 19 |
+ // package is imported, it is set to nil, so cgroup managers can't |
|
| 20 |
+ // manage devices. |
|
| 21 |
+ DevicesSetV1 func(path string, r *Resources) error |
|
| 22 |
+ DevicesSetV2 func(path string, r *Resources) error |
|
| 23 |
+) |
|
| 24 |
+ |
|
| 25 |
+type Manager interface {
|
|
| 26 |
+ // Apply creates a cgroup, if not yet created, and adds a process |
|
| 27 |
+ // with the specified pid into that cgroup. A special value of -1 |
|
| 28 |
+ // can be used to merely create a cgroup. |
|
| 29 |
+ Apply(pid int) error |
|
| 30 |
+ |
|
| 31 |
+ // GetPids returns the PIDs of all processes inside the cgroup. |
|
| 32 |
+ GetPids() ([]int, error) |
|
| 33 |
+ |
|
| 34 |
+ // GetAllPids returns the PIDs of all processes inside the cgroup |
|
| 35 |
+ // any all its sub-cgroups. |
|
| 36 |
+ GetAllPids() ([]int, error) |
|
| 37 |
+ |
|
| 38 |
+ // GetStats returns cgroups statistics. |
|
| 39 |
+ GetStats() (*Stats, error) |
|
| 40 |
+ |
|
| 41 |
+ // Freeze sets the freezer cgroup to the specified state. |
|
| 42 |
+ Freeze(state FreezerState) error |
|
| 43 |
+ |
|
| 44 |
+ // Destroy removes cgroup. |
|
| 45 |
+ Destroy() error |
|
| 46 |
+ |
|
| 47 |
+ // Path returns a cgroup path to the specified controller/subsystem. |
|
| 48 |
+ // For cgroupv2, the argument is unused and can be empty. |
|
| 49 |
+ Path(string) string |
|
| 50 |
+ |
|
| 51 |
+ // Set sets cgroup resources parameters/limits. If the argument is nil, |
|
| 52 |
+ // the resources specified during Manager creation (or the previous call |
|
| 53 |
+ // to Set) are used. |
|
| 54 |
+ Set(r *Resources) error |
|
| 55 |
+ |
|
| 56 |
+ // GetPaths returns cgroup path(s) to save in a state file in order to |
|
| 57 |
+ // restore later. |
|
| 58 |
+ // |
|
| 59 |
+ // For cgroup v1, a key is cgroup subsystem name, and the value is the |
|
| 60 |
+ // path to the cgroup for this subsystem. |
|
| 61 |
+ // |
|
| 62 |
+ // For cgroup v2 unified hierarchy, a key is "", and the value is the |
|
| 63 |
+ // unified path. |
|
| 64 |
+ GetPaths() map[string]string |
|
| 65 |
+ |
|
| 66 |
+ // GetCgroups returns the cgroup data as configured. |
|
| 67 |
+ GetCgroups() (*Cgroup, error) |
|
| 68 |
+ |
|
| 69 |
+ // GetFreezerState retrieves the current FreezerState of the cgroup. |
|
| 70 |
+ GetFreezerState() (FreezerState, error) |
|
| 71 |
+ |
|
| 72 |
+ // Exists returns whether the cgroup path exists or not. |
|
| 73 |
+ Exists() bool |
|
| 74 |
+ |
|
| 75 |
+ // OOMKillCount reports OOM kill count for the cgroup. |
|
| 76 |
+ OOMKillCount() (uint64, error) |
|
| 77 |
+} |
| 0 | 78 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,66 @@ |
| 0 |
+package cgroups |
|
| 1 |
+ |
|
| 2 |
+import "fmt" |
|
| 3 |
+ |
|
| 4 |
+// BlockIODevice holds major:minor format supported in blkio cgroup. |
|
| 5 |
+type BlockIODevice struct {
|
|
| 6 |
+ // Major is the device's major number |
|
| 7 |
+ Major int64 `json:"major"` |
|
| 8 |
+ // Minor is the device's minor number |
|
| 9 |
+ Minor int64 `json:"minor"` |
|
| 10 |
+} |
|
| 11 |
+ |
|
| 12 |
+// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair |
|
| 13 |
+type WeightDevice struct {
|
|
| 14 |
+ BlockIODevice |
|
| 15 |
+ // Weight is the bandwidth rate for the device, range is from 10 to 1000 |
|
| 16 |
+ Weight uint16 `json:"weight"` |
|
| 17 |
+ // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only |
|
| 18 |
+ LeafWeight uint16 `json:"leafWeight"` |
|
| 19 |
+} |
|
| 20 |
+ |
|
| 21 |
+// NewWeightDevice returns a configured WeightDevice pointer |
|
| 22 |
+func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice {
|
|
| 23 |
+ wd := &WeightDevice{}
|
|
| 24 |
+ wd.Major = major |
|
| 25 |
+ wd.Minor = minor |
|
| 26 |
+ wd.Weight = weight |
|
| 27 |
+ wd.LeafWeight = leafWeight |
|
| 28 |
+ return wd |
|
| 29 |
+} |
|
| 30 |
+ |
|
| 31 |
+// WeightString formats the struct to be writable to the cgroup specific file |
|
| 32 |
+func (wd *WeightDevice) WeightString() string {
|
|
| 33 |
+ return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight)
|
|
| 34 |
+} |
|
| 35 |
+ |
|
| 36 |
+// LeafWeightString formats the struct to be writable to the cgroup specific file |
|
| 37 |
+func (wd *WeightDevice) LeafWeightString() string {
|
|
| 38 |
+ return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight)
|
|
| 39 |
+} |
|
| 40 |
+ |
|
| 41 |
+// ThrottleDevice struct holds a `major:minor rate_per_second` pair |
|
| 42 |
+type ThrottleDevice struct {
|
|
| 43 |
+ BlockIODevice |
|
| 44 |
+ // Rate is the IO rate limit per cgroup per device |
|
| 45 |
+ Rate uint64 `json:"rate"` |
|
| 46 |
+} |
|
| 47 |
+ |
|
| 48 |
+// NewThrottleDevice returns a configured ThrottleDevice pointer |
|
| 49 |
+func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice {
|
|
| 50 |
+ td := &ThrottleDevice{}
|
|
| 51 |
+ td.Major = major |
|
| 52 |
+ td.Minor = minor |
|
| 53 |
+ td.Rate = rate |
|
| 54 |
+ return td |
|
| 55 |
+} |
|
| 56 |
+ |
|
| 57 |
+// String formats the struct to be writable to the cgroup specific file |
|
| 58 |
+func (td *ThrottleDevice) String() string {
|
|
| 59 |
+ return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)
|
|
| 60 |
+} |
|
| 61 |
+ |
|
| 62 |
+// StringName formats the struct to be writable to the cgroup specific file |
|
| 63 |
+func (td *ThrottleDevice) StringName(name string) string {
|
|
| 64 |
+ return fmt.Sprintf("%d:%d %s=%d", td.Major, td.Minor, name, td.Rate)
|
|
| 65 |
+} |
| 0 | 9 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,14 @@ |
| 0 |
+package cgroups |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+) |
|
| 5 |
+ |
|
| 6 |
+type IfPrioMap struct {
|
|
| 7 |
+ Interface string `json:"interface"` |
|
| 8 |
+ Priority int64 `json:"priority"` |
|
| 9 |
+} |
|
| 10 |
+ |
|
| 11 |
+func (i *IfPrioMap) CgroupString() string {
|
|
| 12 |
+ return fmt.Sprintf("%s %d", i.Interface, i.Priority)
|
|
| 13 |
+} |
| 0 | 14 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,169 @@ |
| 0 |
+package cgroups |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ systemdDbus "github.com/coreos/go-systemd/v22/dbus" |
|
| 4 |
+ devices "github.com/opencontainers/cgroups/devices/config" |
|
| 5 |
+) |
|
| 6 |
+ |
|
| 7 |
+type FreezerState string |
|
| 8 |
+ |
|
| 9 |
+const ( |
|
| 10 |
+ Undefined FreezerState = "" |
|
| 11 |
+ Frozen FreezerState = "FROZEN" |
|
| 12 |
+ Thawed FreezerState = "THAWED" |
|
| 13 |
+) |
|
| 14 |
+ |
|
| 15 |
+// Cgroup holds properties of a cgroup on Linux. |
|
| 16 |
+type Cgroup struct {
|
|
| 17 |
+ // Name specifies the name of the cgroup |
|
| 18 |
+ Name string `json:"name,omitempty"` |
|
| 19 |
+ |
|
| 20 |
+ // Parent specifies the name of parent of cgroup or slice |
|
| 21 |
+ Parent string `json:"parent,omitempty"` |
|
| 22 |
+ |
|
| 23 |
+ // Path specifies the path to cgroups that are created and/or joined by the container. |
|
| 24 |
+ // The path is assumed to be relative to the host system cgroup mountpoint. |
|
| 25 |
+ Path string `json:"path"` |
|
| 26 |
+ |
|
| 27 |
+ // ScopePrefix describes prefix for the scope name |
|
| 28 |
+ ScopePrefix string `json:"scope_prefix"` |
|
| 29 |
+ |
|
| 30 |
+ // Resources contains various cgroups settings to apply |
|
| 31 |
+ *Resources |
|
| 32 |
+ |
|
| 33 |
+ // Systemd tells if systemd should be used to manage cgroups. |
|
| 34 |
+ Systemd bool |
|
| 35 |
+ |
|
| 36 |
+ // SystemdProps are any additional properties for systemd, |
|
| 37 |
+ // derived from org.systemd.property.xxx annotations. |
|
| 38 |
+ // Ignored unless systemd is used for managing cgroups. |
|
| 39 |
+ SystemdProps []systemdDbus.Property `json:"-"` |
|
| 40 |
+ |
|
| 41 |
+ // Rootless tells if rootless cgroups should be used. |
|
| 42 |
+ Rootless bool |
|
| 43 |
+ |
|
| 44 |
+ // The host UID that should own the cgroup, or nil to accept |
|
| 45 |
+ // the default ownership. This should only be set when the |
|
| 46 |
+ // cgroupfs is to be mounted read/write. |
|
| 47 |
+ // Not all cgroup manager implementations support changing |
|
| 48 |
+ // the ownership. |
|
| 49 |
+ OwnerUID *int `json:"owner_uid,omitempty"` |
|
| 50 |
+} |
|
| 51 |
+ |
|
| 52 |
+type Resources struct {
|
|
| 53 |
+ // Devices is the set of access rules for devices in the container. |
|
| 54 |
+ Devices []*devices.Rule `json:"devices"` |
|
| 55 |
+ |
|
| 56 |
+ // Memory limit (in bytes) |
|
| 57 |
+ Memory int64 `json:"memory"` |
|
| 58 |
+ |
|
| 59 |
+ // Memory reservation or soft_limit (in bytes) |
|
| 60 |
+ MemoryReservation int64 `json:"memory_reservation"` |
|
| 61 |
+ |
|
| 62 |
+ // Total memory usage (memory + swap); set `-1` to enable unlimited swap |
|
| 63 |
+ MemorySwap int64 `json:"memory_swap"` |
|
| 64 |
+ |
|
| 65 |
+ // CPU shares (relative weight vs. other containers) |
|
| 66 |
+ CpuShares uint64 `json:"cpu_shares"` |
|
| 67 |
+ |
|
| 68 |
+ // CPU hardcap limit (in usecs). Allowed cpu time in a given period. |
|
| 69 |
+ CpuQuota int64 `json:"cpu_quota"` |
|
| 70 |
+ |
|
| 71 |
+ // CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a given period. |
|
| 72 |
+ CpuBurst *uint64 `json:"cpu_burst"` //nolint:revive |
|
| 73 |
+ |
|
| 74 |
+ // CPU period to be used for hardcapping (in usecs). 0 to use system default. |
|
| 75 |
+ CpuPeriod uint64 `json:"cpu_period"` |
|
| 76 |
+ |
|
| 77 |
+ // How many time CPU will use in realtime scheduling (in usecs). |
|
| 78 |
+ CpuRtRuntime int64 `json:"cpu_rt_quota"` |
|
| 79 |
+ |
|
| 80 |
+ // CPU period to be used for realtime scheduling (in usecs). |
|
| 81 |
+ CpuRtPeriod uint64 `json:"cpu_rt_period"` |
|
| 82 |
+ |
|
| 83 |
+ // CPU to use |
|
| 84 |
+ CpusetCpus string `json:"cpuset_cpus"` |
|
| 85 |
+ |
|
| 86 |
+ // MEM to use |
|
| 87 |
+ CpusetMems string `json:"cpuset_mems"` |
|
| 88 |
+ |
|
| 89 |
+ // cgroup SCHED_IDLE |
|
| 90 |
+ CPUIdle *int64 `json:"cpu_idle,omitempty"` |
|
| 91 |
+ |
|
| 92 |
+ // Process limit; set <= `0' to disable limit. |
|
| 93 |
+ PidsLimit int64 `json:"pids_limit"` |
|
| 94 |
+ |
|
| 95 |
+ // Specifies per cgroup weight, range is from 10 to 1000. |
|
| 96 |
+ BlkioWeight uint16 `json:"blkio_weight"` |
|
| 97 |
+ |
|
| 98 |
+ // Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only |
|
| 99 |
+ BlkioLeafWeight uint16 `json:"blkio_leaf_weight"` |
|
| 100 |
+ |
|
| 101 |
+ // Weight per cgroup per device, can override BlkioWeight. |
|
| 102 |
+ BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"` |
|
| 103 |
+ |
|
| 104 |
+ // IO read rate limit per cgroup per device, bytes per second. |
|
| 105 |
+ BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"` |
|
| 106 |
+ |
|
| 107 |
+ // IO write rate limit per cgroup per device, bytes per second. |
|
| 108 |
+ BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"` |
|
| 109 |
+ |
|
| 110 |
+ // IO read rate limit per cgroup per device, IO per second. |
|
| 111 |
+ BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"` |
|
| 112 |
+ |
|
| 113 |
+ // IO write rate limit per cgroup per device, IO per second. |
|
| 114 |
+ BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"` |
|
| 115 |
+ |
|
| 116 |
+ // set the freeze value for the process |
|
| 117 |
+ Freezer FreezerState `json:"freezer"` |
|
| 118 |
+ |
|
| 119 |
+ // Hugetlb limit (in bytes) |
|
| 120 |
+ HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"` |
|
| 121 |
+ |
|
| 122 |
+ // Whether to disable OOM Killer |
|
| 123 |
+ OomKillDisable bool `json:"oom_kill_disable"` |
|
| 124 |
+ |
|
| 125 |
+ // Tuning swappiness behaviour per cgroup |
|
| 126 |
+ MemorySwappiness *uint64 `json:"memory_swappiness"` |
|
| 127 |
+ |
|
| 128 |
+ // Set priority of network traffic for container |
|
| 129 |
+ NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"` |
|
| 130 |
+ |
|
| 131 |
+ // Set class identifier for container's network packets |
|
| 132 |
+ NetClsClassid uint32 `json:"net_cls_classid_u"` |
|
| 133 |
+ |
|
| 134 |
+ // Rdma resource restriction configuration |
|
| 135 |
+ Rdma map[string]LinuxRdma `json:"rdma"` |
|
| 136 |
+ |
|
| 137 |
+ // Used on cgroups v2: |
|
| 138 |
+ |
|
| 139 |
+ // CpuWeight sets a proportional bandwidth limit. |
|
| 140 |
+ CpuWeight uint64 `json:"cpu_weight"` |
|
| 141 |
+ |
|
| 142 |
+ // Unified is cgroupv2-only key-value map. |
|
| 143 |
+ Unified map[string]string `json:"unified"` |
|
| 144 |
+ |
|
| 145 |
+ // SkipDevices allows to skip configuring device permissions. |
|
| 146 |
+ // Used by e.g. kubelet while creating a parent cgroup (kubepods) |
|
| 147 |
+ // common for many containers, and by runc update. |
|
| 148 |
+ // |
|
| 149 |
+ // NOTE it is impossible to start a container which has this flag set. |
|
| 150 |
+ SkipDevices bool `json:"-"` |
|
| 151 |
+ |
|
| 152 |
+ // SkipFreezeOnSet is a flag for cgroup manager to skip the cgroup |
|
| 153 |
+ // freeze when setting resources. Only applicable to systemd legacy |
|
| 154 |
+ // (i.e. cgroup v1) manager (which uses freeze by default to avoid |
|
| 155 |
+ // spurious permission errors caused by systemd inability to update |
|
| 156 |
+ // device rules in a non-disruptive manner). |
|
| 157 |
+ // |
|
| 158 |
+ // If not set, a few methods (such as looking into cgroup's |
|
| 159 |
+ // devices.list and querying the systemd unit properties) are used |
|
| 160 |
+ // during Set() to figure out whether the freeze is required. Those |
|
| 161 |
+ // methods may be relatively slow, thus this flag. |
|
| 162 |
+ SkipFreezeOnSet bool `json:"-"` |
|
| 163 |
+ |
|
| 164 |
+ // MemoryCheckBeforeUpdate is a flag for cgroup v2 managers to check |
|
| 165 |
+ // if the new memory limits (Memory and MemorySwap) being set are lower |
|
| 166 |
+ // than the current memory usage, and reject if so. |
|
| 167 |
+ MemoryCheckBeforeUpdate bool `json:"memory_check_before_update"` |
|
| 168 |
+} |
| 0 | 169 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,9 @@ |
| 0 |
+package cgroups |
|
| 1 |
+ |
|
| 2 |
+// LinuxRdma for Linux cgroup 'rdma' resource management (Linux 4.11) |
|
| 3 |
+type LinuxRdma struct {
|
|
| 4 |
+ // Maximum number of HCA handles that can be opened. Default is "no limit". |
|
| 5 |
+ HcaHandles *uint32 `json:"hca_handles,omitempty"` |
|
| 6 |
+ // Maximum number of HCA objects that can be created. Default is "no limit". |
|
| 7 |
+ HcaObjects *uint32 `json:"hca_objects,omitempty"` |
|
| 8 |
+} |
| 0 | 8 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,174 @@ |
| 0 |
+package config |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+ "os" |
|
| 5 |
+ "strconv" |
|
| 6 |
+) |
|
| 7 |
+ |
|
| 8 |
+const ( |
|
| 9 |
+ Wildcard = -1 |
|
| 10 |
+) |
|
| 11 |
+ |
|
| 12 |
+type Device struct {
|
|
| 13 |
+ Rule |
|
| 14 |
+ |
|
| 15 |
+ // Path to the device. |
|
| 16 |
+ Path string `json:"path"` |
|
| 17 |
+ |
|
| 18 |
+ // FileMode permission bits for the device. |
|
| 19 |
+ FileMode os.FileMode `json:"file_mode"` |
|
| 20 |
+ |
|
| 21 |
+ // Uid of the device. |
|
| 22 |
+ Uid uint32 `json:"uid"` |
|
| 23 |
+ |
|
| 24 |
+ // Gid of the device. |
|
| 25 |
+ Gid uint32 `json:"gid"` |
|
| 26 |
+} |
|
| 27 |
+ |
|
| 28 |
+// Permissions is a cgroupv1-style string to represent device access. It |
|
| 29 |
+// has to be a string for backward compatibility reasons, hence why it has |
|
| 30 |
+// methods to do set operations. |
|
| 31 |
+type Permissions string |
|
| 32 |
+ |
|
| 33 |
+const ( |
|
| 34 |
+ deviceRead uint = (1 << iota) |
|
| 35 |
+ deviceWrite |
|
| 36 |
+ deviceMknod |
|
| 37 |
+) |
|
| 38 |
+ |
|
| 39 |
+func (p Permissions) toSet() uint {
|
|
| 40 |
+ var set uint |
|
| 41 |
+ for _, perm := range p {
|
|
| 42 |
+ switch perm {
|
|
| 43 |
+ case 'r': |
|
| 44 |
+ set |= deviceRead |
|
| 45 |
+ case 'w': |
|
| 46 |
+ set |= deviceWrite |
|
| 47 |
+ case 'm': |
|
| 48 |
+ set |= deviceMknod |
|
| 49 |
+ } |
|
| 50 |
+ } |
|
| 51 |
+ return set |
|
| 52 |
+} |
|
| 53 |
+ |
|
| 54 |
+func fromSet(set uint) Permissions {
|
|
| 55 |
+ var perm string |
|
| 56 |
+ if set&deviceRead == deviceRead {
|
|
| 57 |
+ perm += "r" |
|
| 58 |
+ } |
|
| 59 |
+ if set&deviceWrite == deviceWrite {
|
|
| 60 |
+ perm += "w" |
|
| 61 |
+ } |
|
| 62 |
+ if set&deviceMknod == deviceMknod {
|
|
| 63 |
+ perm += "m" |
|
| 64 |
+ } |
|
| 65 |
+ return Permissions(perm) |
|
| 66 |
+} |
|
| 67 |
+ |
|
| 68 |
+// Union returns the union of the two sets of Permissions. |
|
| 69 |
+func (p Permissions) Union(o Permissions) Permissions {
|
|
| 70 |
+ lhs := p.toSet() |
|
| 71 |
+ rhs := o.toSet() |
|
| 72 |
+ return fromSet(lhs | rhs) |
|
| 73 |
+} |
|
| 74 |
+ |
|
| 75 |
+// Difference returns the set difference of the two sets of Permissions. |
|
| 76 |
+// In set notation, A.Difference(B) gives you A\B. |
|
| 77 |
+func (p Permissions) Difference(o Permissions) Permissions {
|
|
| 78 |
+ lhs := p.toSet() |
|
| 79 |
+ rhs := o.toSet() |
|
| 80 |
+ return fromSet(lhs &^ rhs) |
|
| 81 |
+} |
|
| 82 |
+ |
|
| 83 |
+// Intersection computes the intersection of the two sets of Permissions. |
|
| 84 |
+func (p Permissions) Intersection(o Permissions) Permissions {
|
|
| 85 |
+ lhs := p.toSet() |
|
| 86 |
+ rhs := o.toSet() |
|
| 87 |
+ return fromSet(lhs & rhs) |
|
| 88 |
+} |
|
| 89 |
+ |
|
| 90 |
+// IsEmpty returns whether the set of permissions in a Permissions is |
|
| 91 |
+// empty. |
|
| 92 |
+func (p Permissions) IsEmpty() bool {
|
|
| 93 |
+ return p == Permissions("")
|
|
| 94 |
+} |
|
| 95 |
+ |
|
| 96 |
+// IsValid returns whether the set of permissions is a subset of valid |
|
| 97 |
+// permissions (namely, {r,w,m}).
|
|
| 98 |
+func (p Permissions) IsValid() bool {
|
|
| 99 |
+ return p == fromSet(p.toSet()) |
|
| 100 |
+} |
|
| 101 |
+ |
|
| 102 |
+type Type rune |
|
| 103 |
+ |
|
| 104 |
+const ( |
|
| 105 |
+ WildcardDevice Type = 'a' |
|
| 106 |
+ BlockDevice Type = 'b' |
|
| 107 |
+ CharDevice Type = 'c' // or 'u' |
|
| 108 |
+ FifoDevice Type = 'p' |
|
| 109 |
+) |
|
| 110 |
+ |
|
| 111 |
+func (t Type) IsValid() bool {
|
|
| 112 |
+ switch t {
|
|
| 113 |
+ case WildcardDevice, BlockDevice, CharDevice, FifoDevice: |
|
| 114 |
+ return true |
|
| 115 |
+ default: |
|
| 116 |
+ return false |
|
| 117 |
+ } |
|
| 118 |
+} |
|
| 119 |
+ |
|
| 120 |
+func (t Type) CanMknod() bool {
|
|
| 121 |
+ switch t {
|
|
| 122 |
+ case BlockDevice, CharDevice, FifoDevice: |
|
| 123 |
+ return true |
|
| 124 |
+ default: |
|
| 125 |
+ return false |
|
| 126 |
+ } |
|
| 127 |
+} |
|
| 128 |
+ |
|
| 129 |
+func (t Type) CanCgroup() bool {
|
|
| 130 |
+ switch t {
|
|
| 131 |
+ case WildcardDevice, BlockDevice, CharDevice: |
|
| 132 |
+ return true |
|
| 133 |
+ default: |
|
| 134 |
+ return false |
|
| 135 |
+ } |
|
| 136 |
+} |
|
| 137 |
+ |
|
| 138 |
+type Rule struct {
|
|
| 139 |
+ // Type of device ('c' for char, 'b' for block). If set to 'a', this rule
|
|
| 140 |
+ // acts as a wildcard and all fields other than Allow are ignored. |
|
| 141 |
+ Type Type `json:"type"` |
|
| 142 |
+ |
|
| 143 |
+ // Major is the device's major number. |
|
| 144 |
+ Major int64 `json:"major"` |
|
| 145 |
+ |
|
| 146 |
+ // Minor is the device's minor number. |
|
| 147 |
+ Minor int64 `json:"minor"` |
|
| 148 |
+ |
|
| 149 |
+ // Permissions is the set of permissions that this rule applies to (in the |
|
| 150 |
+ // cgroupv1 format -- any combination of "rwm"). |
|
| 151 |
+ Permissions Permissions `json:"permissions"` |
|
| 152 |
+ |
|
| 153 |
+ // Allow specifies whether this rule is allowed. |
|
| 154 |
+ Allow bool `json:"allow"` |
|
| 155 |
+} |
|
| 156 |
+ |
|
| 157 |
+func (d *Rule) CgroupString() string {
|
|
| 158 |
+ var ( |
|
| 159 |
+ major = strconv.FormatInt(d.Major, 10) |
|
| 160 |
+ minor = strconv.FormatInt(d.Minor, 10) |
|
| 161 |
+ ) |
|
| 162 |
+ if d.Major == Wildcard {
|
|
| 163 |
+ major = "*" |
|
| 164 |
+ } |
|
| 165 |
+ if d.Minor == Wildcard {
|
|
| 166 |
+ minor = "*" |
|
| 167 |
+ } |
|
| 168 |
+ return fmt.Sprintf("%c %s:%s %s", d.Type, major, minor, d.Permissions)
|
|
| 169 |
+} |
|
| 170 |
+ |
|
| 171 |
+func (d *Rule) Mkdev() (uint64, error) {
|
|
| 172 |
+ return mkDev(d) |
|
| 173 |
+} |
| 0 | 174 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,14 @@ |
| 0 |
+package config |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "errors" |
|
| 4 |
+ |
|
| 5 |
+ "golang.org/x/sys/unix" |
|
| 6 |
+) |
|
| 7 |
+ |
|
| 8 |
+func mkDev(d *Rule) (uint64, error) {
|
|
| 9 |
+ if d.Major == Wildcard || d.Minor == Wildcard {
|
|
| 10 |
+ return 0, errors.New("cannot mkdev() device with wildcards")
|
|
| 11 |
+ } |
|
| 12 |
+ return unix.Mkdev(uint32(d.Major), uint32(d.Minor)), nil |
|
| 13 |
+} |
| 0 | 14 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,216 @@ |
| 0 |
+package cgroups |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "bytes" |
|
| 4 |
+ "errors" |
|
| 5 |
+ "fmt" |
|
| 6 |
+ "os" |
|
| 7 |
+ "path/filepath" |
|
| 8 |
+ "strconv" |
|
| 9 |
+ "strings" |
|
| 10 |
+ "sync" |
|
| 11 |
+ |
|
| 12 |
+ "github.com/sirupsen/logrus" |
|
| 13 |
+ "golang.org/x/sys/unix" |
|
| 14 |
+) |
|
| 15 |
+ |
|
| 16 |
+// OpenFile opens a cgroup file in a given dir with given flags. |
|
| 17 |
+// It is supposed to be used for cgroup files only, and returns |
|
| 18 |
+// an error if the file is not a cgroup file. |
|
| 19 |
+// |
|
| 20 |
+// Arguments dir and file are joined together to form an absolute path |
|
| 21 |
+// to a file being opened. |
|
| 22 |
+func OpenFile(dir, file string, flags int) (*os.File, error) {
|
|
| 23 |
+ if dir == "" {
|
|
| 24 |
+ return nil, fmt.Errorf("no directory specified for %s", file)
|
|
| 25 |
+ } |
|
| 26 |
+ return openFile(dir, file, flags) |
|
| 27 |
+} |
|
| 28 |
+ |
|
| 29 |
+// ReadFile reads data from a cgroup file in dir. |
|
| 30 |
+// It is supposed to be used for cgroup files only. |
|
| 31 |
+func ReadFile(dir, file string) (string, error) {
|
|
| 32 |
+ fd, err := OpenFile(dir, file, unix.O_RDONLY) |
|
| 33 |
+ if err != nil {
|
|
| 34 |
+ return "", err |
|
| 35 |
+ } |
|
| 36 |
+ defer fd.Close() |
|
| 37 |
+ var buf bytes.Buffer |
|
| 38 |
+ |
|
| 39 |
+ _, err = buf.ReadFrom(fd) |
|
| 40 |
+ return buf.String(), err |
|
| 41 |
+} |
|
| 42 |
+ |
|
| 43 |
+// WriteFile writes data to a cgroup file in dir. |
|
| 44 |
+// It is supposed to be used for cgroup files only. |
|
| 45 |
+func WriteFile(dir, file, data string) error {
|
|
| 46 |
+ fd, err := OpenFile(dir, file, unix.O_WRONLY) |
|
| 47 |
+ if err != nil {
|
|
| 48 |
+ return err |
|
| 49 |
+ } |
|
| 50 |
+ defer fd.Close() |
|
| 51 |
+ if _, err := fd.WriteString(data); err != nil {
|
|
| 52 |
+ // Having data in the error message helps in debugging. |
|
| 53 |
+ return fmt.Errorf("failed to write %q: %w", data, err)
|
|
| 54 |
+ } |
|
| 55 |
+ return nil |
|
| 56 |
+} |
|
| 57 |
+ |
|
| 58 |
+// WriteFileByLine is the same as WriteFile, except if data contains newlines, |
|
| 59 |
+// it is written line by line. |
|
| 60 |
+func WriteFileByLine(dir, file, data string) error {
|
|
| 61 |
+ i := strings.Index(data, "\n") |
|
| 62 |
+ if i == -1 {
|
|
| 63 |
+ return WriteFile(dir, file, data) |
|
| 64 |
+ } |
|
| 65 |
+ |
|
| 66 |
+ fd, err := OpenFile(dir, file, unix.O_WRONLY) |
|
| 67 |
+ if err != nil {
|
|
| 68 |
+ return err |
|
| 69 |
+ } |
|
| 70 |
+ defer fd.Close() |
|
| 71 |
+ start := 0 |
|
| 72 |
+ for {
|
|
| 73 |
+ var line string |
|
| 74 |
+ if i == -1 {
|
|
| 75 |
+ line = data[start:] |
|
| 76 |
+ } else {
|
|
| 77 |
+ line = data[start : start+i+1] |
|
| 78 |
+ } |
|
| 79 |
+ _, err := fd.WriteString(line) |
|
| 80 |
+ if err != nil {
|
|
| 81 |
+ return fmt.Errorf("failed to write %q: %w", line, err)
|
|
| 82 |
+ } |
|
| 83 |
+ if i == -1 {
|
|
| 84 |
+ break |
|
| 85 |
+ } |
|
| 86 |
+ start += i + 1 |
|
| 87 |
+ i = strings.Index(data[start:], "\n") |
|
| 88 |
+ } |
|
| 89 |
+ return nil |
|
| 90 |
+} |
|
| 91 |
+ |
|
| 92 |
+const ( |
|
| 93 |
+ cgroupfsDir = "/sys/fs/cgroup" |
|
| 94 |
+ cgroupfsPrefix = cgroupfsDir + "/" |
|
| 95 |
+) |
|
| 96 |
+ |
|
| 97 |
+var ( |
|
| 98 |
+ // TestMode is set to true by unit tests that need "fake" cgroupfs. |
|
| 99 |
+ TestMode bool |
|
| 100 |
+ |
|
| 101 |
+ cgroupRootHandle *os.File |
|
| 102 |
+ prepOnce sync.Once |
|
| 103 |
+ prepErr error |
|
| 104 |
+ resolveFlags uint64 |
|
| 105 |
+) |
|
| 106 |
+ |
|
| 107 |
+func prepareOpenat2() error {
|
|
| 108 |
+ prepOnce.Do(func() {
|
|
| 109 |
+ fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
|
|
| 110 |
+ Flags: unix.O_DIRECTORY | unix.O_PATH | unix.O_CLOEXEC, |
|
| 111 |
+ }) |
|
| 112 |
+ if err != nil {
|
|
| 113 |
+ prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
|
|
| 114 |
+ if err != unix.ENOSYS {
|
|
| 115 |
+ logrus.Warnf("falling back to securejoin: %s", prepErr)
|
|
| 116 |
+ } else {
|
|
| 117 |
+ logrus.Debug("openat2 not available, falling back to securejoin")
|
|
| 118 |
+ } |
|
| 119 |
+ return |
|
| 120 |
+ } |
|
| 121 |
+ file := os.NewFile(uintptr(fd), cgroupfsDir) |
|
| 122 |
+ |
|
| 123 |
+ var st unix.Statfs_t |
|
| 124 |
+ if err := unix.Fstatfs(int(file.Fd()), &st); err != nil {
|
|
| 125 |
+ prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err}
|
|
| 126 |
+ logrus.Warnf("falling back to securejoin: %s", prepErr)
|
|
| 127 |
+ return |
|
| 128 |
+ } |
|
| 129 |
+ |
|
| 130 |
+ cgroupRootHandle = file |
|
| 131 |
+ resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS |
|
| 132 |
+ if st.Type == unix.CGROUP2_SUPER_MAGIC {
|
|
| 133 |
+ // cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks |
|
| 134 |
+ resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS |
|
| 135 |
+ } |
|
| 136 |
+ }) |
|
| 137 |
+ |
|
| 138 |
+ return prepErr |
|
| 139 |
+} |
|
| 140 |
+ |
|
| 141 |
+func openFile(dir, file string, flags int) (*os.File, error) {
|
|
| 142 |
+ mode := os.FileMode(0) |
|
| 143 |
+ if TestMode && flags&os.O_WRONLY != 0 {
|
|
| 144 |
+ // "emulate" cgroup fs for unit tests |
|
| 145 |
+ flags |= os.O_TRUNC | os.O_CREATE |
|
| 146 |
+ mode = 0o600 |
|
| 147 |
+ } |
|
| 148 |
+ // NOTE it is important to use filepath.Clean("/"+file) here
|
|
| 149 |
+ // (see https://github.com/opencontainers/runc/issues/4103)! |
|
| 150 |
+ path := filepath.Join(dir, filepath.Clean("/"+file))
|
|
| 151 |
+ |
|
| 152 |
+ if prepareOpenat2() != nil {
|
|
| 153 |
+ return openFallback(path, flags, mode) |
|
| 154 |
+ } |
|
| 155 |
+ relPath, ok := strings.CutPrefix(path, cgroupfsPrefix) |
|
| 156 |
+ if !ok { // Non-standard path, old system?
|
|
| 157 |
+ return openFallback(path, flags, mode) |
|
| 158 |
+ } |
|
| 159 |
+ |
|
| 160 |
+ fd, err := unix.Openat2(int(cgroupRootHandle.Fd()), relPath, |
|
| 161 |
+ &unix.OpenHow{
|
|
| 162 |
+ Resolve: resolveFlags, |
|
| 163 |
+ Flags: uint64(flags) | unix.O_CLOEXEC, |
|
| 164 |
+ Mode: uint64(mode), |
|
| 165 |
+ }) |
|
| 166 |
+ if err != nil {
|
|
| 167 |
+ err = &os.PathError{Op: "openat2", Path: path, Err: err}
|
|
| 168 |
+ // Check if cgroupRootHandle is still opened to cgroupfsDir |
|
| 169 |
+ // (happens when this package is incorrectly used |
|
| 170 |
+ // across the chroot/pivot_root/mntns boundary, or |
|
| 171 |
+ // when /sys/fs/cgroup is remounted). |
|
| 172 |
+ // |
|
| 173 |
+ // TODO: if such usage will ever be common, amend this |
|
| 174 |
+ // to reopen cgroupRootHandle and retry openat2. |
|
| 175 |
+ fdDest, fdErr := os.Readlink("/proc/thread-self/fd/" + strconv.Itoa(int(cgroupRootHandle.Fd())))
|
|
| 176 |
+ if fdErr == nil && fdDest != cgroupfsDir {
|
|
| 177 |
+ // Wrap the error so it is clear that cgroupRootHandle |
|
| 178 |
+ // is opened to an unexpected/wrong directory. |
|
| 179 |
+ err = fmt.Errorf("cgroupRootHandle %d unexpectedly opened to %s != %s: %w",
|
|
| 180 |
+ cgroupRootHandle.Fd(), fdDest, cgroupfsDir, err) |
|
| 181 |
+ } |
|
| 182 |
+ return nil, err |
|
| 183 |
+ } |
|
| 184 |
+ |
|
| 185 |
+ return os.NewFile(uintptr(fd), path), nil |
|
| 186 |
+} |
|
| 187 |
+ |
|
| 188 |
+var errNotCgroupfs = errors.New("not a cgroup file")
|
|
| 189 |
+ |
|
| 190 |
+// Can be changed by unit tests. |
|
| 191 |
+var openFallback = openAndCheck |
|
| 192 |
+ |
|
| 193 |
+// openAndCheck is used when openat2(2) is not available. It checks the opened |
|
| 194 |
+// file is on cgroupfs, returning an error otherwise. |
|
| 195 |
+func openAndCheck(path string, flags int, mode os.FileMode) (*os.File, error) {
|
|
| 196 |
+ fd, err := os.OpenFile(path, flags, mode) |
|
| 197 |
+ if err != nil {
|
|
| 198 |
+ return nil, err |
|
| 199 |
+ } |
|
| 200 |
+ if TestMode {
|
|
| 201 |
+ return fd, nil |
|
| 202 |
+ } |
|
| 203 |
+ // Check this is a cgroupfs file. |
|
| 204 |
+ var st unix.Statfs_t |
|
| 205 |
+ if err := unix.Fstatfs(int(fd.Fd()), &st); err != nil {
|
|
| 206 |
+ _ = fd.Close() |
|
| 207 |
+ return nil, &os.PathError{Op: "statfs", Path: path, Err: err}
|
|
| 208 |
+ } |
|
| 209 |
+ if st.Type != unix.CGROUP_SUPER_MAGIC && st.Type != unix.CGROUP2_SUPER_MAGIC {
|
|
| 210 |
+ _ = fd.Close() |
|
| 211 |
+ return nil, &os.PathError{Op: "open", Path: path, Err: errNotCgroupfs}
|
|
| 212 |
+ } |
|
| 213 |
+ |
|
| 214 |
+ return fd, nil |
|
| 215 |
+} |
| 0 | 216 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,27 @@ |
| 0 |
+package cgroups |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "io/fs" |
|
| 4 |
+ "path/filepath" |
|
| 5 |
+) |
|
| 6 |
+ |
|
| 7 |
+// GetAllPids returns all pids from the cgroup identified by path, and all its |
|
| 8 |
+// sub-cgroups. |
|
| 9 |
+func GetAllPids(path string) ([]int, error) {
|
|
| 10 |
+ var pids []int |
|
| 11 |
+ err := filepath.WalkDir(path, func(p string, d fs.DirEntry, iErr error) error {
|
|
| 12 |
+ if iErr != nil {
|
|
| 13 |
+ return iErr |
|
| 14 |
+ } |
|
| 15 |
+ if !d.IsDir() {
|
|
| 16 |
+ return nil |
|
| 17 |
+ } |
|
| 18 |
+ cPids, err := readProcsFile(p) |
|
| 19 |
+ if err != nil {
|
|
| 20 |
+ return err |
|
| 21 |
+ } |
|
| 22 |
+ pids = append(pids, cPids...) |
|
| 23 |
+ return nil |
|
| 24 |
+ }) |
|
| 25 |
+ return pids, err |
|
| 26 |
+} |
| 0 | 27 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,200 @@ |
| 0 |
+package cgroups |
|
| 1 |
+ |
|
| 2 |
+type ThrottlingData struct {
|
|
| 3 |
+ // Number of periods with throttling active |
|
| 4 |
+ Periods uint64 `json:"periods,omitempty"` |
|
| 5 |
+ // Number of periods when the container hit its throttling limit. |
|
| 6 |
+ ThrottledPeriods uint64 `json:"throttled_periods,omitempty"` |
|
| 7 |
+ // Aggregate time the container was throttled for in nanoseconds. |
|
| 8 |
+ ThrottledTime uint64 `json:"throttled_time,omitempty"` |
|
| 9 |
+} |
|
| 10 |
+ |
|
| 11 |
+// CpuUsage denotes the usage of a CPU. |
|
| 12 |
+// All CPU stats are aggregate since container inception. |
|
| 13 |
+type CpuUsage struct {
|
|
| 14 |
+ // Total CPU time consumed. |
|
| 15 |
+ // Units: nanoseconds. |
|
| 16 |
+ TotalUsage uint64 `json:"total_usage,omitempty"` |
|
| 17 |
+ // Total CPU time consumed per core. |
|
| 18 |
+ // Units: nanoseconds. |
|
| 19 |
+ PercpuUsage []uint64 `json:"percpu_usage,omitempty"` |
|
| 20 |
+ // CPU time consumed per core in kernel mode |
|
| 21 |
+ // Units: nanoseconds. |
|
| 22 |
+ PercpuUsageInKernelmode []uint64 `json:"percpu_usage_in_kernelmode"` |
|
| 23 |
+ // CPU time consumed per core in user mode |
|
| 24 |
+ // Units: nanoseconds. |
|
| 25 |
+ PercpuUsageInUsermode []uint64 `json:"percpu_usage_in_usermode"` |
|
| 26 |
+ // Time spent by tasks of the cgroup in kernel mode. |
|
| 27 |
+ // Units: nanoseconds. |
|
| 28 |
+ UsageInKernelmode uint64 `json:"usage_in_kernelmode"` |
|
| 29 |
+ // Time spent by tasks of the cgroup in user mode. |
|
| 30 |
+ // Units: nanoseconds. |
|
| 31 |
+ UsageInUsermode uint64 `json:"usage_in_usermode"` |
|
| 32 |
+} |
|
| 33 |
+ |
|
| 34 |
+type PSIData struct {
|
|
| 35 |
+ Avg10 float64 `json:"avg10"` |
|
| 36 |
+ Avg60 float64 `json:"avg60"` |
|
| 37 |
+ Avg300 float64 `json:"avg300"` |
|
| 38 |
+ Total uint64 `json:"total"` |
|
| 39 |
+} |
|
| 40 |
+ |
|
| 41 |
+type PSIStats struct {
|
|
| 42 |
+ Some PSIData `json:"some,omitempty"` |
|
| 43 |
+ Full PSIData `json:"full,omitempty"` |
|
| 44 |
+} |
|
| 45 |
+ |
|
| 46 |
+type CpuStats struct {
|
|
| 47 |
+ CpuUsage CpuUsage `json:"cpu_usage,omitempty"` |
|
| 48 |
+ ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` |
|
| 49 |
+ PSI *PSIStats `json:"psi,omitempty"` |
|
| 50 |
+} |
|
| 51 |
+ |
|
| 52 |
+type CPUSetStats struct {
|
|
| 53 |
+ // List of the physical numbers of the CPUs on which processes |
|
| 54 |
+ // in that cpuset are allowed to execute |
|
| 55 |
+ CPUs []uint16 `json:"cpus,omitempty"` |
|
| 56 |
+ // cpu_exclusive flag |
|
| 57 |
+ CPUExclusive uint64 `json:"cpu_exclusive"` |
|
| 58 |
+ // List of memory nodes on which processes in that cpuset |
|
| 59 |
+ // are allowed to allocate memory |
|
| 60 |
+ Mems []uint16 `json:"mems,omitempty"` |
|
| 61 |
+ // mem_hardwall flag |
|
| 62 |
+ MemHardwall uint64 `json:"mem_hardwall"` |
|
| 63 |
+ // mem_exclusive flag |
|
| 64 |
+ MemExclusive uint64 `json:"mem_exclusive"` |
|
| 65 |
+ // memory_migrate flag |
|
| 66 |
+ MemoryMigrate uint64 `json:"memory_migrate"` |
|
| 67 |
+ // memory_spread page flag |
|
| 68 |
+ MemorySpreadPage uint64 `json:"memory_spread_page"` |
|
| 69 |
+ // memory_spread slab flag |
|
| 70 |
+ MemorySpreadSlab uint64 `json:"memory_spread_slab"` |
|
| 71 |
+ // memory_pressure |
|
| 72 |
+ MemoryPressure uint64 `json:"memory_pressure"` |
|
| 73 |
+ // sched_load balance flag |
|
| 74 |
+ SchedLoadBalance uint64 `json:"sched_load_balance"` |
|
| 75 |
+ // sched_relax_domain_level |
|
| 76 |
+ SchedRelaxDomainLevel int64 `json:"sched_relax_domain_level"` |
|
| 77 |
+} |
|
| 78 |
+ |
|
| 79 |
+type MemoryData struct {
|
|
| 80 |
+ Usage uint64 `json:"usage,omitempty"` |
|
| 81 |
+ MaxUsage uint64 `json:"max_usage,omitempty"` |
|
| 82 |
+ Failcnt uint64 `json:"failcnt"` |
|
| 83 |
+ Limit uint64 `json:"limit"` |
|
| 84 |
+} |
|
| 85 |
+ |
|
| 86 |
+type MemoryStats struct {
|
|
| 87 |
+ // memory used for cache |
|
| 88 |
+ Cache uint64 `json:"cache,omitempty"` |
|
| 89 |
+ // usage of memory |
|
| 90 |
+ Usage MemoryData `json:"usage,omitempty"` |
|
| 91 |
+ // usage of memory + swap |
|
| 92 |
+ SwapUsage MemoryData `json:"swap_usage,omitempty"` |
|
| 93 |
+ // usage of swap only |
|
| 94 |
+ SwapOnlyUsage MemoryData `json:"swap_only_usage,omitempty"` |
|
| 95 |
+ // usage of kernel memory |
|
| 96 |
+ KernelUsage MemoryData `json:"kernel_usage,omitempty"` |
|
| 97 |
+ // usage of kernel TCP memory |
|
| 98 |
+ KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"` |
|
| 99 |
+ // usage of memory pages by NUMA node |
|
| 100 |
+ // see chapter 5.6 of memory controller documentation |
|
| 101 |
+ PageUsageByNUMA PageUsageByNUMA `json:"page_usage_by_numa,omitempty"` |
|
| 102 |
+ // if true, memory usage is accounted for throughout a hierarchy of cgroups. |
|
| 103 |
+ UseHierarchy bool `json:"use_hierarchy"` |
|
| 104 |
+ |
|
| 105 |
+ Stats map[string]uint64 `json:"stats,omitempty"` |
|
| 106 |
+ PSI *PSIStats `json:"psi,omitempty"` |
|
| 107 |
+} |
|
| 108 |
+ |
|
| 109 |
+type PageUsageByNUMA struct {
|
|
| 110 |
+ // Embedding is used as types can't be recursive. |
|
| 111 |
+ PageUsageByNUMAInner |
|
| 112 |
+ Hierarchical PageUsageByNUMAInner `json:"hierarchical,omitempty"` |
|
| 113 |
+} |
|
| 114 |
+ |
|
| 115 |
+type PageUsageByNUMAInner struct {
|
|
| 116 |
+ Total PageStats `json:"total,omitempty"` |
|
| 117 |
+ File PageStats `json:"file,omitempty"` |
|
| 118 |
+ Anon PageStats `json:"anon,omitempty"` |
|
| 119 |
+ Unevictable PageStats `json:"unevictable,omitempty"` |
|
| 120 |
+} |
|
| 121 |
+ |
|
| 122 |
+type PageStats struct {
|
|
| 123 |
+ Total uint64 `json:"total,omitempty"` |
|
| 124 |
+ Nodes map[uint8]uint64 `json:"nodes,omitempty"` |
|
| 125 |
+} |
|
| 126 |
+ |
|
| 127 |
+type PidsStats struct {
|
|
| 128 |
+ // number of pids in the cgroup |
|
| 129 |
+ Current uint64 `json:"current,omitempty"` |
|
| 130 |
+ // active pids hard limit |
|
| 131 |
+ Limit uint64 `json:"limit,omitempty"` |
|
| 132 |
+} |
|
| 133 |
+ |
|
| 134 |
+type BlkioStatEntry struct {
|
|
| 135 |
+ Major uint64 `json:"major,omitempty"` |
|
| 136 |
+ Minor uint64 `json:"minor,omitempty"` |
|
| 137 |
+ Op string `json:"op,omitempty"` |
|
| 138 |
+ Value uint64 `json:"value,omitempty"` |
|
| 139 |
+} |
|
| 140 |
+ |
|
| 141 |
+type BlkioStats struct {
|
|
| 142 |
+ // number of bytes transferred to and from the block device |
|
| 143 |
+ IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"` |
|
| 144 |
+ IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"` |
|
| 145 |
+ IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"` |
|
| 146 |
+ IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"` |
|
| 147 |
+ IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"` |
|
| 148 |
+ IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"` |
|
| 149 |
+ IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"` |
|
| 150 |
+ SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"` |
|
| 151 |
+ PSI *PSIStats `json:"psi,omitempty"` |
|
| 152 |
+} |
|
| 153 |
+ |
|
| 154 |
+type HugetlbStats struct {
|
|
| 155 |
+ // current res_counter usage for hugetlb |
|
| 156 |
+ Usage uint64 `json:"usage,omitempty"` |
|
| 157 |
+ // maximum usage ever recorded. |
|
| 158 |
+ MaxUsage uint64 `json:"max_usage,omitempty"` |
|
| 159 |
+ // number of times hugetlb usage allocation failure. |
|
| 160 |
+ Failcnt uint64 `json:"failcnt"` |
|
| 161 |
+} |
|
| 162 |
+ |
|
| 163 |
+type RdmaEntry struct {
|
|
| 164 |
+ Device string `json:"device,omitempty"` |
|
| 165 |
+ HcaHandles uint32 `json:"hca_handles,omitempty"` |
|
| 166 |
+ HcaObjects uint32 `json:"hca_objects,omitempty"` |
|
| 167 |
+} |
|
| 168 |
+ |
|
| 169 |
+type RdmaStats struct {
|
|
| 170 |
+ RdmaLimit []RdmaEntry `json:"rdma_limit,omitempty"` |
|
| 171 |
+ RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"` |
|
| 172 |
+} |
|
| 173 |
+ |
|
| 174 |
+type MiscStats struct {
|
|
| 175 |
+ // current resource usage for a key in misc |
|
| 176 |
+ Usage uint64 `json:"usage,omitempty"` |
|
| 177 |
+ // number of times the resource usage was about to go over the max boundary |
|
| 178 |
+ Events uint64 `json:"events,omitempty"` |
|
| 179 |
+} |
|
| 180 |
+ |
|
| 181 |
+type Stats struct {
|
|
| 182 |
+ CpuStats CpuStats `json:"cpu_stats,omitempty"` |
|
| 183 |
+ CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"` |
|
| 184 |
+ MemoryStats MemoryStats `json:"memory_stats,omitempty"` |
|
| 185 |
+ PidsStats PidsStats `json:"pids_stats,omitempty"` |
|
| 186 |
+ BlkioStats BlkioStats `json:"blkio_stats,omitempty"` |
|
| 187 |
+ // the map is in the format "size of hugepage: stats of the hugepage" |
|
| 188 |
+ HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` |
|
| 189 |
+ RdmaStats RdmaStats `json:"rdma_stats,omitempty"` |
|
| 190 |
+ // the map is in the format "misc resource name: stats of the key" |
|
| 191 |
+ MiscStats map[string]MiscStats `json:"misc_stats,omitempty"` |
|
| 192 |
+} |
|
| 193 |
+ |
|
| 194 |
+func NewStats() *Stats {
|
|
| 195 |
+ memoryStats := MemoryStats{Stats: make(map[string]uint64)}
|
|
| 196 |
+ hugetlbStats := make(map[string]HugetlbStats) |
|
| 197 |
+ miscStats := make(map[string]MiscStats) |
|
| 198 |
+ return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats, MiscStats: miscStats}
|
|
| 199 |
+} |
| 0 | 200 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,468 @@ |
| 0 |
+package cgroups |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "bufio" |
|
| 4 |
+ "errors" |
|
| 5 |
+ "fmt" |
|
| 6 |
+ "io" |
|
| 7 |
+ "os" |
|
| 8 |
+ "path/filepath" |
|
| 9 |
+ "strconv" |
|
| 10 |
+ "strings" |
|
| 11 |
+ "sync" |
|
| 12 |
+ "time" |
|
| 13 |
+ |
|
| 14 |
+ "github.com/moby/sys/userns" |
|
| 15 |
+ "github.com/sirupsen/logrus" |
|
| 16 |
+ "golang.org/x/sys/unix" |
|
| 17 |
+) |
|
| 18 |
+ |
|
| 19 |
+const ( |
|
| 20 |
+ CgroupProcesses = "cgroup.procs" |
|
| 21 |
+ unifiedMountpoint = "/sys/fs/cgroup" |
|
| 22 |
+ hybridMountpoint = "/sys/fs/cgroup/unified" |
|
| 23 |
+) |
|
| 24 |
+ |
|
| 25 |
+var ( |
|
| 26 |
+ isUnifiedOnce sync.Once |
|
| 27 |
+ isUnified bool |
|
| 28 |
+ isHybridOnce sync.Once |
|
| 29 |
+ isHybrid bool |
|
| 30 |
+) |
|
| 31 |
+ |
|
| 32 |
+// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode. |
|
| 33 |
+func IsCgroup2UnifiedMode() bool {
|
|
| 34 |
+ isUnifiedOnce.Do(func() {
|
|
| 35 |
+ var st unix.Statfs_t |
|
| 36 |
+ err := unix.Statfs(unifiedMountpoint, &st) |
|
| 37 |
+ if err != nil {
|
|
| 38 |
+ level := logrus.WarnLevel |
|
| 39 |
+ if os.IsNotExist(err) && userns.RunningInUserNS() {
|
|
| 40 |
+ // For rootless containers, sweep it under the rug. |
|
| 41 |
+ level = logrus.DebugLevel |
|
| 42 |
+ } |
|
| 43 |
+ logrus.StandardLogger().Logf(level, |
|
| 44 |
+ "statfs %s: %v; assuming cgroup v1", unifiedMountpoint, err) |
|
| 45 |
+ } |
|
| 46 |
+ isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC |
|
| 47 |
+ }) |
|
| 48 |
+ return isUnified |
|
| 49 |
+} |
|
| 50 |
+ |
|
| 51 |
+// IsCgroup2HybridMode returns whether we are running in cgroup v2 hybrid mode. |
|
| 52 |
+func IsCgroup2HybridMode() bool {
|
|
| 53 |
+ isHybridOnce.Do(func() {
|
|
| 54 |
+ var st unix.Statfs_t |
|
| 55 |
+ err := unix.Statfs(hybridMountpoint, &st) |
|
| 56 |
+ if err != nil {
|
|
| 57 |
+ isHybrid = false |
|
| 58 |
+ if !os.IsNotExist(err) {
|
|
| 59 |
+ // Report unexpected errors. |
|
| 60 |
+ logrus.WithError(err).Debugf("statfs(%q) failed", hybridMountpoint)
|
|
| 61 |
+ } |
|
| 62 |
+ return |
|
| 63 |
+ } |
|
| 64 |
+ isHybrid = st.Type == unix.CGROUP2_SUPER_MAGIC |
|
| 65 |
+ }) |
|
| 66 |
+ return isHybrid |
|
| 67 |
+} |
|
| 68 |
+ |
|
| 69 |
+type Mount struct {
|
|
| 70 |
+ Mountpoint string |
|
| 71 |
+ Root string |
|
| 72 |
+ Subsystems []string |
|
| 73 |
+} |
|
| 74 |
+ |
|
| 75 |
+// GetCgroupMounts returns the mounts for the cgroup subsystems. |
|
| 76 |
+// all indicates whether to return just the first instance or all the mounts. |
|
| 77 |
+// This function should not be used from cgroupv2 code, as in this case |
|
| 78 |
+// all the controllers are available under the constant unifiedMountpoint. |
|
| 79 |
+func GetCgroupMounts(all bool) ([]Mount, error) {
|
|
| 80 |
+ if IsCgroup2UnifiedMode() {
|
|
| 81 |
+ // TODO: remove cgroupv2 case once all external users are converted |
|
| 82 |
+ availableControllers, err := GetAllSubsystems() |
|
| 83 |
+ if err != nil {
|
|
| 84 |
+ return nil, err |
|
| 85 |
+ } |
|
| 86 |
+ m := Mount{
|
|
| 87 |
+ Mountpoint: unifiedMountpoint, |
|
| 88 |
+ Root: unifiedMountpoint, |
|
| 89 |
+ Subsystems: availableControllers, |
|
| 90 |
+ } |
|
| 91 |
+ return []Mount{m}, nil
|
|
| 92 |
+ } |
|
| 93 |
+ |
|
| 94 |
+ return getCgroupMountsV1(all) |
|
| 95 |
+} |
|
| 96 |
+ |
|
| 97 |
+// GetAllSubsystems returns all the cgroup subsystems supported by the kernel |
|
| 98 |
+func GetAllSubsystems() ([]string, error) {
|
|
| 99 |
+ // /proc/cgroups is meaningless for v2 |
|
| 100 |
+ // https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#deprecated-v1-core-features |
|
| 101 |
+ if IsCgroup2UnifiedMode() {
|
|
| 102 |
+ // "pseudo" controllers do not appear in /sys/fs/cgroup/cgroup.controllers. |
|
| 103 |
+ // - devices: implemented in kernel 4.15 |
|
| 104 |
+ // - freezer: implemented in kernel 5.2 |
|
| 105 |
+ // We assume these are always available, as it is hard to detect availability. |
|
| 106 |
+ pseudo := []string{"devices", "freezer"}
|
|
| 107 |
+ data, err := ReadFile("/sys/fs/cgroup", "cgroup.controllers")
|
|
| 108 |
+ if err != nil {
|
|
| 109 |
+ return nil, err |
|
| 110 |
+ } |
|
| 111 |
+ subsystems := append(pseudo, strings.Fields(data)...) |
|
| 112 |
+ return subsystems, nil |
|
| 113 |
+ } |
|
| 114 |
+ f, err := os.Open("/proc/cgroups")
|
|
| 115 |
+ if err != nil {
|
|
| 116 |
+ return nil, err |
|
| 117 |
+ } |
|
| 118 |
+ defer f.Close() |
|
| 119 |
+ |
|
| 120 |
+ subsystems := []string{}
|
|
| 121 |
+ |
|
| 122 |
+ s := bufio.NewScanner(f) |
|
| 123 |
+ for s.Scan() {
|
|
| 124 |
+ text := s.Text() |
|
| 125 |
+ if text[0] != '#' {
|
|
| 126 |
+ parts := strings.Fields(text) |
|
| 127 |
+ if len(parts) >= 4 && parts[3] != "0" {
|
|
| 128 |
+ subsystems = append(subsystems, parts[0]) |
|
| 129 |
+ } |
|
| 130 |
+ } |
|
| 131 |
+ } |
|
| 132 |
+ if err := s.Err(); err != nil {
|
|
| 133 |
+ return nil, err |
|
| 134 |
+ } |
|
| 135 |
+ return subsystems, nil |
|
| 136 |
+} |
|
| 137 |
+ |
|
| 138 |
+func readProcsFile(dir string) (out []int, _ error) {
|
|
| 139 |
+ file := CgroupProcesses |
|
| 140 |
+ retry := true |
|
| 141 |
+ |
|
| 142 |
+again: |
|
| 143 |
+ f, err := OpenFile(dir, file, os.O_RDONLY) |
|
| 144 |
+ if err != nil {
|
|
| 145 |
+ return nil, err |
|
| 146 |
+ } |
|
| 147 |
+ defer f.Close() |
|
| 148 |
+ |
|
| 149 |
+ s := bufio.NewScanner(f) |
|
| 150 |
+ for s.Scan() {
|
|
| 151 |
+ if t := s.Text(); t != "" {
|
|
| 152 |
+ pid, err := strconv.Atoi(t) |
|
| 153 |
+ if err != nil {
|
|
| 154 |
+ return nil, err |
|
| 155 |
+ } |
|
| 156 |
+ out = append(out, pid) |
|
| 157 |
+ } |
|
| 158 |
+ } |
|
| 159 |
+ if errors.Is(s.Err(), unix.ENOTSUP) && retry {
|
|
| 160 |
+ // For a threaded cgroup, read returns ENOTSUP, and we should |
|
| 161 |
+ // read from cgroup.threads instead. |
|
| 162 |
+ file = "cgroup.threads" |
|
| 163 |
+ retry = false |
|
| 164 |
+ goto again |
|
| 165 |
+ } |
|
| 166 |
+ return out, s.Err() |
|
| 167 |
+} |
|
| 168 |
+ |
|
| 169 |
+// ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup |
|
| 170 |
+// or /proc/<pid>/cgroup, into a map of subsystems to cgroup paths, e.g. |
|
| 171 |
+// |
|
| 172 |
+// "cpu": "/user.slice/user-1000.slice" |
|
| 173 |
+// "pids": "/user.slice/user-1000.slice" |
|
| 174 |
+// |
|
| 175 |
+// etc. |
|
| 176 |
+// |
|
| 177 |
+// Note that for cgroup v2 unified hierarchy, there are no per-controller |
|
| 178 |
+// cgroup paths, so the resulting map will have a single element where the key |
|
| 179 |
+// is empty string ("") and the value is the cgroup path the <pid> is in.
|
|
| 180 |
+func ParseCgroupFile(path string) (map[string]string, error) {
|
|
| 181 |
+ f, err := os.Open(path) |
|
| 182 |
+ if err != nil {
|
|
| 183 |
+ return nil, err |
|
| 184 |
+ } |
|
| 185 |
+ defer f.Close() |
|
| 186 |
+ |
|
| 187 |
+ return parseCgroupFromReader(f) |
|
| 188 |
+} |
|
| 189 |
+ |
|
| 190 |
+// helper function for ParseCgroupFile to make testing easier |
|
| 191 |
+func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
|
|
| 192 |
+ s := bufio.NewScanner(r) |
|
| 193 |
+ cgroups := make(map[string]string) |
|
| 194 |
+ |
|
| 195 |
+ for s.Scan() {
|
|
| 196 |
+ text := s.Text() |
|
| 197 |
+ // from cgroups(7): |
|
| 198 |
+ // /proc/[pid]/cgroup |
|
| 199 |
+ // ... |
|
| 200 |
+ // For each cgroup hierarchy ... there is one entry |
|
| 201 |
+ // containing three colon-separated fields of the form: |
|
| 202 |
+ // hierarchy-ID:subsystem-list:cgroup-path |
|
| 203 |
+ parts := strings.SplitN(text, ":", 3) |
|
| 204 |
+ if len(parts) < 3 {
|
|
| 205 |
+ return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
|
|
| 206 |
+ } |
|
| 207 |
+ |
|
| 208 |
+ for _, subs := range strings.Split(parts[1], ",") {
|
|
| 209 |
+ cgroups[subs] = parts[2] |
|
| 210 |
+ } |
|
| 211 |
+ } |
|
| 212 |
+ if err := s.Err(); err != nil {
|
|
| 213 |
+ return nil, err |
|
| 214 |
+ } |
|
| 215 |
+ |
|
| 216 |
+ return cgroups, nil |
|
| 217 |
+} |
|
| 218 |
+ |
|
| 219 |
+func PathExists(path string) bool {
|
|
| 220 |
+ if _, err := os.Stat(path); err != nil {
|
|
| 221 |
+ return false |
|
| 222 |
+ } |
|
| 223 |
+ return true |
|
| 224 |
+} |
|
| 225 |
+ |
|
| 226 |
+// rmdir tries to remove a directory, optionally retrying on EBUSY. |
|
| 227 |
+func rmdir(path string, retry bool) error {
|
|
| 228 |
+ delay := time.Millisecond |
|
| 229 |
+ tries := 10 |
|
| 230 |
+ |
|
| 231 |
+again: |
|
| 232 |
+ err := unix.Rmdir(path) |
|
| 233 |
+ switch err { // nolint:errorlint // unix errors are bare
|
|
| 234 |
+ case nil, unix.ENOENT: |
|
| 235 |
+ return nil |
|
| 236 |
+ case unix.EINTR: |
|
| 237 |
+ goto again |
|
| 238 |
+ case unix.EBUSY: |
|
| 239 |
+ if retry && tries > 0 {
|
|
| 240 |
+ time.Sleep(delay) |
|
| 241 |
+ delay *= 2 |
|
| 242 |
+ tries-- |
|
| 243 |
+ goto again |
|
| 244 |
+ |
|
| 245 |
+ } |
|
| 246 |
+ } |
|
| 247 |
+ return &os.PathError{Op: "rmdir", Path: path, Err: err}
|
|
| 248 |
+} |
|
| 249 |
+ |
|
| 250 |
+// RemovePath aims to remove cgroup path. It does so recursively, |
|
| 251 |
+// by removing any subdirectories (sub-cgroups) first. |
|
| 252 |
+func RemovePath(path string) error {
|
|
| 253 |
+ // Try the fast path first; don't retry on EBUSY yet. |
|
| 254 |
+ if err := rmdir(path, false); err == nil {
|
|
| 255 |
+ return nil |
|
| 256 |
+ } |
|
| 257 |
+ |
|
| 258 |
+ // There are many reasons why rmdir can fail, including: |
|
| 259 |
+ // 1. cgroup have existing sub-cgroups; |
|
| 260 |
+ // 2. cgroup (still) have some processes (that are about to vanish); |
|
| 261 |
+ // 3. lack of permission (one example is read-only /sys/fs/cgroup mount, |
|
| 262 |
+ // in which case rmdir returns EROFS even for for a non-existent path, |
|
| 263 |
+ // see issue 4518). |
|
| 264 |
+ // |
|
| 265 |
+ // Using os.ReadDir here kills two birds with one stone: check if |
|
| 266 |
+ // the directory exists (handling scenario 3 above), and use |
|
| 267 |
+ // directory contents to remove sub-cgroups (handling scenario 1). |
|
| 268 |
+ infos, err := os.ReadDir(path) |
|
| 269 |
+ if err != nil {
|
|
| 270 |
+ if os.IsNotExist(err) {
|
|
| 271 |
+ return nil |
|
| 272 |
+ } |
|
| 273 |
+ return err |
|
| 274 |
+ } |
|
| 275 |
+ // Let's remove sub-cgroups, if any. |
|
| 276 |
+ for _, info := range infos {
|
|
| 277 |
+ if info.IsDir() {
|
|
| 278 |
+ if err = RemovePath(filepath.Join(path, info.Name())); err != nil {
|
|
| 279 |
+ return err |
|
| 280 |
+ } |
|
| 281 |
+ } |
|
| 282 |
+ } |
|
| 283 |
+ // Finally, try rmdir again, this time with retries on EBUSY, |
|
| 284 |
+ // which may help with scenario 2 above. |
|
| 285 |
+ return rmdir(path, true) |
|
| 286 |
+} |
|
| 287 |
+ |
|
| 288 |
+// RemovePaths iterates over the provided paths removing them. |
|
| 289 |
+func RemovePaths(paths map[string]string) (err error) {
|
|
| 290 |
+ for s, p := range paths {
|
|
| 291 |
+ if err := RemovePath(p); err == nil {
|
|
| 292 |
+ delete(paths, s) |
|
| 293 |
+ } |
|
| 294 |
+ } |
|
| 295 |
+ if len(paths) == 0 {
|
|
| 296 |
+ clear(paths) |
|
| 297 |
+ return nil |
|
| 298 |
+ } |
|
| 299 |
+ return fmt.Errorf("Failed to remove paths: %v", paths)
|
|
| 300 |
+} |
|
| 301 |
+ |
|
| 302 |
+var ( |
|
| 303 |
+ hugePageSizes []string |
|
| 304 |
+ initHPSOnce sync.Once |
|
| 305 |
+) |
|
| 306 |
+ |
|
| 307 |
+func HugePageSizes() []string {
|
|
| 308 |
+ initHPSOnce.Do(func() {
|
|
| 309 |
+ dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
|
| 310 |
+ if err != nil {
|
|
| 311 |
+ return |
|
| 312 |
+ } |
|
| 313 |
+ files, err := dir.Readdirnames(0) |
|
| 314 |
+ dir.Close() |
|
| 315 |
+ if err != nil {
|
|
| 316 |
+ return |
|
| 317 |
+ } |
|
| 318 |
+ |
|
| 319 |
+ hugePageSizes, err = getHugePageSizeFromFilenames(files) |
|
| 320 |
+ if err != nil {
|
|
| 321 |
+ logrus.Warn("HugePageSizes: ", err)
|
|
| 322 |
+ } |
|
| 323 |
+ }) |
|
| 324 |
+ |
|
| 325 |
+ return hugePageSizes |
|
| 326 |
+} |
|
| 327 |
+ |
|
| 328 |
+func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
|
|
| 329 |
+ pageSizes := make([]string, 0, len(fileNames)) |
|
| 330 |
+ var warn error |
|
| 331 |
+ |
|
| 332 |
+ for _, file := range fileNames {
|
|
| 333 |
+ // example: hugepages-1048576kB |
|
| 334 |
+ val, ok := strings.CutPrefix(file, "hugepages-") |
|
| 335 |
+ if !ok {
|
|
| 336 |
+ // Unexpected file name: no prefix found, ignore it. |
|
| 337 |
+ continue |
|
| 338 |
+ } |
|
| 339 |
+ // The suffix is always "kB" (as of Linux 5.13). If we find |
|
| 340 |
+ // something else, produce an error but keep going. |
|
| 341 |
+ eLen := len(val) - 2 |
|
| 342 |
+ val = strings.TrimSuffix(val, "kB") |
|
| 343 |
+ if len(val) != eLen {
|
|
| 344 |
+ // Highly unlikely. |
|
| 345 |
+ if warn == nil {
|
|
| 346 |
+ warn = errors.New(file + `: invalid suffix (expected "kB")`) |
|
| 347 |
+ } |
|
| 348 |
+ continue |
|
| 349 |
+ } |
|
| 350 |
+ size, err := strconv.Atoi(val) |
|
| 351 |
+ if err != nil {
|
|
| 352 |
+ // Highly unlikely. |
|
| 353 |
+ if warn == nil {
|
|
| 354 |
+ warn = fmt.Errorf("%s: %w", file, err)
|
|
| 355 |
+ } |
|
| 356 |
+ continue |
|
| 357 |
+ } |
|
| 358 |
+ // Model after https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/hugetlb_cgroup.c?id=eff48ddeab782e35e58ccc8853f7386bbae9dec4#n574 |
|
| 359 |
+ // but in our case the size is in KB already. |
|
| 360 |
+ if size >= (1 << 20) {
|
|
| 361 |
+ val = strconv.Itoa(size>>20) + "GB" |
|
| 362 |
+ } else if size >= (1 << 10) {
|
|
| 363 |
+ val = strconv.Itoa(size>>10) + "MB" |
|
| 364 |
+ } else {
|
|
| 365 |
+ val += "KB" |
|
| 366 |
+ } |
|
| 367 |
+ pageSizes = append(pageSizes, val) |
|
| 368 |
+ } |
|
| 369 |
+ |
|
| 370 |
+ return pageSizes, warn |
|
| 371 |
+} |
|
| 372 |
+ |
|
| 373 |
+// GetPids returns all pids, that were added to cgroup at path. |
|
| 374 |
+func GetPids(dir string) ([]int, error) {
|
|
| 375 |
+ return readProcsFile(dir) |
|
| 376 |
+} |
|
| 377 |
+ |
|
| 378 |
+// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file |
|
| 379 |
+func WriteCgroupProc(dir string, pid int) error {
|
|
| 380 |
+ // Normally dir should not be empty, one case is that cgroup subsystem |
|
| 381 |
+ // is not mounted, we will get empty dir, and we want it fail here. |
|
| 382 |
+ if dir == "" {
|
|
| 383 |
+ return fmt.Errorf("no such directory for %s", CgroupProcesses)
|
|
| 384 |
+ } |
|
| 385 |
+ |
|
| 386 |
+ // Dont attach any pid to the cgroup if -1 is specified as a pid |
|
| 387 |
+ if pid == -1 {
|
|
| 388 |
+ return nil |
|
| 389 |
+ } |
|
| 390 |
+ |
|
| 391 |
+ file, err := OpenFile(dir, CgroupProcesses, os.O_WRONLY) |
|
| 392 |
+ if err != nil {
|
|
| 393 |
+ return fmt.Errorf("failed to write %v: %w", pid, err)
|
|
| 394 |
+ } |
|
| 395 |
+ defer file.Close() |
|
| 396 |
+ |
|
| 397 |
+ for i := 0; i < 5; i++ {
|
|
| 398 |
+ _, err = file.WriteString(strconv.Itoa(pid)) |
|
| 399 |
+ if err == nil {
|
|
| 400 |
+ return nil |
|
| 401 |
+ } |
|
| 402 |
+ |
|
| 403 |
+ // EINVAL might mean that the task being added to cgroup.procs is in state |
|
| 404 |
+ // TASK_NEW. We should attempt to do so again. |
|
| 405 |
+ if errors.Is(err, unix.EINVAL) {
|
|
| 406 |
+ time.Sleep(30 * time.Millisecond) |
|
| 407 |
+ continue |
|
| 408 |
+ } |
|
| 409 |
+ |
|
| 410 |
+ return fmt.Errorf("failed to write %v: %w", pid, err)
|
|
| 411 |
+ } |
|
| 412 |
+ return err |
|
| 413 |
+} |
|
| 414 |
+ |
|
| 415 |
+// Since the OCI spec is designed for cgroup v1, in some cases |
|
| 416 |
+// there is need to convert from the cgroup v1 configuration to cgroup v2 |
|
| 417 |
+// the formula for cpuShares is y = (1 + ((x - 2) * 9999) / 262142) |
|
| 418 |
+// convert from [2-262144] to [1-10000] |
|
| 419 |
+// 262144 comes from Linux kernel definition "#define MAX_SHARES (1UL << 18)" |
|
| 420 |
+func ConvertCPUSharesToCgroupV2Value(cpuShares uint64) uint64 {
|
|
| 421 |
+ if cpuShares == 0 {
|
|
| 422 |
+ return 0 |
|
| 423 |
+ } |
|
| 424 |
+ return (1 + ((cpuShares-2)*9999)/262142) |
|
| 425 |
+} |
|
| 426 |
+ |
|
| 427 |
+// ConvertMemorySwapToCgroupV2Value converts MemorySwap value from OCI spec |
|
| 428 |
+// for use by cgroup v2 drivers. A conversion is needed since Resources.MemorySwap |
|
| 429 |
+// is defined as memory+swap combined, while in cgroup v2 swap is a separate value, |
|
| 430 |
+// so we need to subtract memory from it where it makes sense. |
|
| 431 |
+func ConvertMemorySwapToCgroupV2Value(memorySwap, memory int64) (int64, error) {
|
|
| 432 |
+ switch {
|
|
| 433 |
+ case memory == -1 && memorySwap == 0: |
|
| 434 |
+ // For compatibility with cgroup1 controller, set swap to unlimited in |
|
| 435 |
+ // case the memory is set to unlimited and the swap is not explicitly set, |
|
| 436 |
+ // treating the request as "set both memory and swap to unlimited". |
|
| 437 |
+ return -1, nil |
|
| 438 |
+ case memorySwap == -1, memorySwap == 0: |
|
| 439 |
+ // Treat -1 ("max") and 0 ("unset") swap as is.
|
|
| 440 |
+ return memorySwap, nil |
|
| 441 |
+ case memory == -1: |
|
| 442 |
+ // Unlimited memory, so treat swap as is. |
|
| 443 |
+ return memorySwap, nil |
|
| 444 |
+ case memory == 0: |
|
| 445 |
+ // Unset or unknown memory, can't calculate swap. |
|
| 446 |
+ return 0, errors.New("unable to set swap limit without memory limit")
|
|
| 447 |
+ case memory < 0: |
|
| 448 |
+ // Does not make sense to subtract a negative value. |
|
| 449 |
+ return 0, fmt.Errorf("invalid memory value: %d", memory)
|
|
| 450 |
+ case memorySwap < memory: |
|
| 451 |
+ // Sanity check. |
|
| 452 |
+ return 0, errors.New("memory+swap limit should be >= memory limit")
|
|
| 453 |
+ } |
|
| 454 |
+ |
|
| 455 |
+ return memorySwap - memory, nil |
|
| 456 |
+} |
|
| 457 |
+ |
|
| 458 |
+// Since the OCI spec is designed for cgroup v1, in some cases |
|
| 459 |
+// there is need to convert from the cgroup v1 configuration to cgroup v2 |
|
| 460 |
+// the formula for BlkIOWeight to IOWeight is y = (1 + (x - 10) * 9999 / 990) |
|
| 461 |
+// convert linearly from [10-1000] to [1-10000] |
|
| 462 |
+func ConvertBlkIOToIOWeightValue(blkIoWeight uint16) uint64 {
|
|
| 463 |
+ if blkIoWeight == 0 {
|
|
| 464 |
+ return 0 |
|
| 465 |
+ } |
|
| 466 |
+ return 1 + (uint64(blkIoWeight)-10)*9999/990 |
|
| 467 |
+} |
| 0 | 468 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,277 @@ |
| 0 |
+package cgroups |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "errors" |
|
| 4 |
+ "fmt" |
|
| 5 |
+ "os" |
|
| 6 |
+ "path/filepath" |
|
| 7 |
+ "strings" |
|
| 8 |
+ "sync" |
|
| 9 |
+ "syscall" |
|
| 10 |
+ |
|
| 11 |
+ securejoin "github.com/cyphar/filepath-securejoin" |
|
| 12 |
+ "github.com/moby/sys/mountinfo" |
|
| 13 |
+ "golang.org/x/sys/unix" |
|
| 14 |
+) |
|
| 15 |
+ |
|
| 16 |
+// Code in this source file are specific to cgroup v1, |
|
| 17 |
+// and must not be used from any cgroup v2 code. |
|
| 18 |
+ |
|
| 19 |
+const ( |
|
| 20 |
+ CgroupNamePrefix = "name=" |
|
| 21 |
+ defaultPrefix = "/sys/fs/cgroup" |
|
| 22 |
+) |
|
| 23 |
+ |
|
| 24 |
+var ( |
|
| 25 |
+ errUnified = errors.New("not implemented for cgroup v2 unified hierarchy")
|
|
| 26 |
+ ErrV1NoUnified = errors.New("invalid configuration: cannot use unified on cgroup v1")
|
|
| 27 |
+ |
|
| 28 |
+ readMountinfoOnce sync.Once |
|
| 29 |
+ readMountinfoErr error |
|
| 30 |
+ cgroupMountinfo []*mountinfo.Info |
|
| 31 |
+) |
|
| 32 |
+ |
|
| 33 |
+type NotFoundError struct {
|
|
| 34 |
+ Subsystem string |
|
| 35 |
+} |
|
| 36 |
+ |
|
| 37 |
+func (e *NotFoundError) Error() string {
|
|
| 38 |
+ return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
|
|
| 39 |
+} |
|
| 40 |
+ |
|
| 41 |
+func NewNotFoundError(sub string) error {
|
|
| 42 |
+ return &NotFoundError{
|
|
| 43 |
+ Subsystem: sub, |
|
| 44 |
+ } |
|
| 45 |
+} |
|
| 46 |
+ |
|
| 47 |
+func IsNotFound(err error) bool {
|
|
| 48 |
+ var nfErr *NotFoundError |
|
| 49 |
+ return errors.As(err, &nfErr) |
|
| 50 |
+} |
|
| 51 |
+ |
|
| 52 |
+func tryDefaultPath(cgroupPath, subsystem string) string {
|
|
| 53 |
+ if !strings.HasPrefix(defaultPrefix, cgroupPath) {
|
|
| 54 |
+ return "" |
|
| 55 |
+ } |
|
| 56 |
+ |
|
| 57 |
+ // remove possible prefix |
|
| 58 |
+ subsystem = strings.TrimPrefix(subsystem, CgroupNamePrefix) |
|
| 59 |
+ |
|
| 60 |
+ // Make sure we're still under defaultPrefix, and resolve |
|
| 61 |
+ // a possible symlink (like cpu -> cpu,cpuacct). |
|
| 62 |
+ path, err := securejoin.SecureJoin(defaultPrefix, subsystem) |
|
| 63 |
+ if err != nil {
|
|
| 64 |
+ return "" |
|
| 65 |
+ } |
|
| 66 |
+ |
|
| 67 |
+ // (1) path should be a directory. |
|
| 68 |
+ st, err := os.Lstat(path) |
|
| 69 |
+ if err != nil || !st.IsDir() {
|
|
| 70 |
+ return "" |
|
| 71 |
+ } |
|
| 72 |
+ |
|
| 73 |
+ // (2) path should be a mount point. |
|
| 74 |
+ pst, err := os.Lstat(filepath.Dir(path)) |
|
| 75 |
+ if err != nil {
|
|
| 76 |
+ return "" |
|
| 77 |
+ } |
|
| 78 |
+ |
|
| 79 |
+ if st.Sys().(*syscall.Stat_t).Dev == pst.Sys().(*syscall.Stat_t).Dev {
|
|
| 80 |
+ // parent dir has the same dev -- path is not a mount point |
|
| 81 |
+ return "" |
|
| 82 |
+ } |
|
| 83 |
+ |
|
| 84 |
+ // (3) path should have 'cgroup' fs type. |
|
| 85 |
+ fst := unix.Statfs_t{}
|
|
| 86 |
+ err = unix.Statfs(path, &fst) |
|
| 87 |
+ if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
|
|
| 88 |
+ return "" |
|
| 89 |
+ } |
|
| 90 |
+ |
|
| 91 |
+ return path |
|
| 92 |
+} |
|
| 93 |
+ |
|
| 94 |
+// readCgroupMountinfo returns a list of cgroup v1 mounts (i.e. the ones |
|
| 95 |
+// with fstype of "cgroup") for the current running process. |
|
| 96 |
+// |
|
| 97 |
+// The results are cached (to avoid re-reading mountinfo which is relatively |
|
| 98 |
+// expensive), so it is assumed that cgroup mounts are not being changed. |
|
| 99 |
+func readCgroupMountinfo() ([]*mountinfo.Info, error) {
|
|
| 100 |
+ readMountinfoOnce.Do(func() {
|
|
| 101 |
+ // mountinfo.GetMounts uses /proc/thread-self, so we can use it without |
|
| 102 |
+ // issues. |
|
| 103 |
+ cgroupMountinfo, readMountinfoErr = mountinfo.GetMounts( |
|
| 104 |
+ mountinfo.FSTypeFilter("cgroup"),
|
|
| 105 |
+ ) |
|
| 106 |
+ }) |
|
| 107 |
+ return cgroupMountinfo, readMountinfoErr |
|
| 108 |
+} |
|
| 109 |
+ |
|
| 110 |
+// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt |
|
| 111 |
+func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
|
|
| 112 |
+ if IsCgroup2UnifiedMode() {
|
|
| 113 |
+ return "", errUnified |
|
| 114 |
+ } |
|
| 115 |
+ |
|
| 116 |
+ // If subsystem is empty, we look for the cgroupv2 hybrid path. |
|
| 117 |
+ if len(subsystem) == 0 {
|
|
| 118 |
+ return hybridMountpoint, nil |
|
| 119 |
+ } |
|
| 120 |
+ |
|
| 121 |
+ // Avoid parsing mountinfo by trying the default path first, if possible. |
|
| 122 |
+ if path := tryDefaultPath(cgroupPath, subsystem); path != "" {
|
|
| 123 |
+ return path, nil |
|
| 124 |
+ } |
|
| 125 |
+ |
|
| 126 |
+ mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem) |
|
| 127 |
+ return mnt, err |
|
| 128 |
+} |
|
| 129 |
+ |
|
| 130 |
+func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
|
|
| 131 |
+ if IsCgroup2UnifiedMode() {
|
|
| 132 |
+ return "", "", errUnified |
|
| 133 |
+ } |
|
| 134 |
+ |
|
| 135 |
+ mi, err := readCgroupMountinfo() |
|
| 136 |
+ if err != nil {
|
|
| 137 |
+ return "", "", err |
|
| 138 |
+ } |
|
| 139 |
+ |
|
| 140 |
+ return findCgroupMountpointAndRootFromMI(mi, cgroupPath, subsystem) |
|
| 141 |
+} |
|
| 142 |
+ |
|
| 143 |
+func findCgroupMountpointAndRootFromMI(mounts []*mountinfo.Info, cgroupPath, subsystem string) (string, string, error) {
|
|
| 144 |
+ for _, mi := range mounts {
|
|
| 145 |
+ if strings.HasPrefix(mi.Mountpoint, cgroupPath) {
|
|
| 146 |
+ for _, opt := range strings.Split(mi.VFSOptions, ",") {
|
|
| 147 |
+ if opt == subsystem {
|
|
| 148 |
+ return mi.Mountpoint, mi.Root, nil |
|
| 149 |
+ } |
|
| 150 |
+ } |
|
| 151 |
+ } |
|
| 152 |
+ } |
|
| 153 |
+ |
|
| 154 |
+ return "", "", NewNotFoundError(subsystem) |
|
| 155 |
+} |
|
| 156 |
+ |
|
| 157 |
+func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
|
|
| 158 |
+ if len(m.Subsystems) == 0 {
|
|
| 159 |
+ return "", errors.New("no subsystem for mount")
|
|
| 160 |
+ } |
|
| 161 |
+ |
|
| 162 |
+ return getControllerPath(m.Subsystems[0], cgroups) |
|
| 163 |
+} |
|
| 164 |
+ |
|
| 165 |
+func getCgroupMountsHelper(ss map[string]bool, mounts []*mountinfo.Info, all bool) ([]Mount, error) {
|
|
| 166 |
+ res := make([]Mount, 0, len(ss)) |
|
| 167 |
+ numFound := 0 |
|
| 168 |
+ for _, mi := range mounts {
|
|
| 169 |
+ m := Mount{
|
|
| 170 |
+ Mountpoint: mi.Mountpoint, |
|
| 171 |
+ Root: mi.Root, |
|
| 172 |
+ } |
|
| 173 |
+ for _, opt := range strings.Split(mi.VFSOptions, ",") {
|
|
| 174 |
+ seen, known := ss[opt] |
|
| 175 |
+ if !known || (!all && seen) {
|
|
| 176 |
+ continue |
|
| 177 |
+ } |
|
| 178 |
+ ss[opt] = true |
|
| 179 |
+ opt = strings.TrimPrefix(opt, CgroupNamePrefix) |
|
| 180 |
+ m.Subsystems = append(m.Subsystems, opt) |
|
| 181 |
+ numFound++ |
|
| 182 |
+ } |
|
| 183 |
+ if len(m.Subsystems) > 0 || all {
|
|
| 184 |
+ res = append(res, m) |
|
| 185 |
+ } |
|
| 186 |
+ if !all && numFound >= len(ss) {
|
|
| 187 |
+ break |
|
| 188 |
+ } |
|
| 189 |
+ } |
|
| 190 |
+ return res, nil |
|
| 191 |
+} |
|
| 192 |
+ |
|
| 193 |
+func getCgroupMountsV1(all bool) ([]Mount, error) {
|
|
| 194 |
+ mi, err := readCgroupMountinfo() |
|
| 195 |
+ if err != nil {
|
|
| 196 |
+ return nil, err |
|
| 197 |
+ } |
|
| 198 |
+ |
|
| 199 |
+ // We don't need to use /proc/thread-self here because runc always runs |
|
| 200 |
+ // with every thread in the same cgroup. This lets us avoid having to do |
|
| 201 |
+ // runtime.LockOSThread. |
|
| 202 |
+ allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
|
|
| 203 |
+ if err != nil {
|
|
| 204 |
+ return nil, err |
|
| 205 |
+ } |
|
| 206 |
+ |
|
| 207 |
+ allMap := make(map[string]bool) |
|
| 208 |
+ for s := range allSubsystems {
|
|
| 209 |
+ allMap[s] = false |
|
| 210 |
+ } |
|
| 211 |
+ |
|
| 212 |
+ return getCgroupMountsHelper(allMap, mi, all) |
|
| 213 |
+} |
|
| 214 |
+ |
|
| 215 |
+// GetOwnCgroup returns the relative path to the cgroup docker is running in. |
|
| 216 |
+func GetOwnCgroup(subsystem string) (string, error) {
|
|
| 217 |
+ if IsCgroup2UnifiedMode() {
|
|
| 218 |
+ return "", errUnified |
|
| 219 |
+ } |
|
| 220 |
+ |
|
| 221 |
+ // We don't need to use /proc/thread-self here because runc always runs |
|
| 222 |
+ // with every thread in the same cgroup. This lets us avoid having to do |
|
| 223 |
+ // runtime.LockOSThread. |
|
| 224 |
+ cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
|
| 225 |
+ if err != nil {
|
|
| 226 |
+ return "", err |
|
| 227 |
+ } |
|
| 228 |
+ |
|
| 229 |
+ return getControllerPath(subsystem, cgroups) |
|
| 230 |
+} |
|
| 231 |
+ |
|
| 232 |
+func GetOwnCgroupPath(subsystem string) (string, error) {
|
|
| 233 |
+ cgroup, err := GetOwnCgroup(subsystem) |
|
| 234 |
+ if err != nil {
|
|
| 235 |
+ return "", err |
|
| 236 |
+ } |
|
| 237 |
+ |
|
| 238 |
+ // If subsystem is empty, we look for the cgroupv2 hybrid path. |
|
| 239 |
+ if len(subsystem) == 0 {
|
|
| 240 |
+ return hybridMountpoint, nil |
|
| 241 |
+ } |
|
| 242 |
+ |
|
| 243 |
+ return getCgroupPathHelper(subsystem, cgroup) |
|
| 244 |
+} |
|
| 245 |
+ |
|
| 246 |
+func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
|
|
| 247 |
+ mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
|
|
| 248 |
+ if err != nil {
|
|
| 249 |
+ return "", err |
|
| 250 |
+ } |
|
| 251 |
+ |
|
| 252 |
+ // This is needed for nested containers, because in /proc/self/cgroup we |
|
| 253 |
+ // see paths from host, which don't exist in container. |
|
| 254 |
+ relCgroup, err := filepath.Rel(root, cgroup) |
|
| 255 |
+ if err != nil {
|
|
| 256 |
+ return "", err |
|
| 257 |
+ } |
|
| 258 |
+ |
|
| 259 |
+ return filepath.Join(mnt, relCgroup), nil |
|
| 260 |
+} |
|
| 261 |
+ |
|
| 262 |
+func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
|
|
| 263 |
+ if IsCgroup2UnifiedMode() {
|
|
| 264 |
+ return "", errUnified |
|
| 265 |
+ } |
|
| 266 |
+ |
|
| 267 |
+ if p, ok := cgroups[subsystem]; ok {
|
|
| 268 |
+ return p, nil |
|
| 269 |
+ } |
|
| 270 |
+ |
|
| 271 |
+ if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
|
|
| 272 |
+ return p, nil |
|
| 273 |
+ } |
|
| 274 |
+ |
|
| 275 |
+ return "", NewNotFoundError(subsystem) |
|
| 276 |
+} |
| 0 | 277 |
deleted file mode 100644 |
| ... | ... |
@@ -1,191 +0,0 @@ |
| 1 |
- |
|
| 2 |
- Apache License |
|
| 3 |
- Version 2.0, January 2004 |
|
| 4 |
- http://www.apache.org/licenses/ |
|
| 5 |
- |
|
| 6 |
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION |
|
| 7 |
- |
|
| 8 |
- 1. Definitions. |
|
| 9 |
- |
|
| 10 |
- "License" shall mean the terms and conditions for use, reproduction, |
|
| 11 |
- and distribution as defined by Sections 1 through 9 of this document. |
|
| 12 |
- |
|
| 13 |
- "Licensor" shall mean the copyright owner or entity authorized by |
|
| 14 |
- the copyright owner that is granting the License. |
|
| 15 |
- |
|
| 16 |
- "Legal Entity" shall mean the union of the acting entity and all |
|
| 17 |
- other entities that control, are controlled by, or are under common |
|
| 18 |
- control with that entity. For the purposes of this definition, |
|
| 19 |
- "control" means (i) the power, direct or indirect, to cause the |
|
| 20 |
- direction or management of such entity, whether by contract or |
|
| 21 |
- otherwise, or (ii) ownership of fifty percent (50%) or more of the |
|
| 22 |
- outstanding shares, or (iii) beneficial ownership of such entity. |
|
| 23 |
- |
|
| 24 |
- "You" (or "Your") shall mean an individual or Legal Entity |
|
| 25 |
- exercising permissions granted by this License. |
|
| 26 |
- |
|
| 27 |
- "Source" form shall mean the preferred form for making modifications, |
|
| 28 |
- including but not limited to software source code, documentation |
|
| 29 |
- source, and configuration files. |
|
| 30 |
- |
|
| 31 |
- "Object" form shall mean any form resulting from mechanical |
|
| 32 |
- transformation or translation of a Source form, including but |
|
| 33 |
- not limited to compiled object code, generated documentation, |
|
| 34 |
- and conversions to other media types. |
|
| 35 |
- |
|
| 36 |
- "Work" shall mean the work of authorship, whether in Source or |
|
| 37 |
- Object form, made available under the License, as indicated by a |
|
| 38 |
- copyright notice that is included in or attached to the work |
|
| 39 |
- (an example is provided in the Appendix below). |
|
| 40 |
- |
|
| 41 |
- "Derivative Works" shall mean any work, whether in Source or Object |
|
| 42 |
- form, that is based on (or derived from) the Work and for which the |
|
| 43 |
- editorial revisions, annotations, elaborations, or other modifications |
|
| 44 |
- represent, as a whole, an original work of authorship. For the purposes |
|
| 45 |
- of this License, Derivative Works shall not include works that remain |
|
| 46 |
- separable from, or merely link (or bind by name) to the interfaces of, |
|
| 47 |
- the Work and Derivative Works thereof. |
|
| 48 |
- |
|
| 49 |
- "Contribution" shall mean any work of authorship, including |
|
| 50 |
- the original version of the Work and any modifications or additions |
|
| 51 |
- to that Work or Derivative Works thereof, that is intentionally |
|
| 52 |
- submitted to Licensor for inclusion in the Work by the copyright owner |
|
| 53 |
- or by an individual or Legal Entity authorized to submit on behalf of |
|
| 54 |
- the copyright owner. For the purposes of this definition, "submitted" |
|
| 55 |
- means any form of electronic, verbal, or written communication sent |
|
| 56 |
- to the Licensor or its representatives, including but not limited to |
|
| 57 |
- communication on electronic mailing lists, source code control systems, |
|
| 58 |
- and issue tracking systems that are managed by, or on behalf of, the |
|
| 59 |
- Licensor for the purpose of discussing and improving the Work, but |
|
| 60 |
- excluding communication that is conspicuously marked or otherwise |
|
| 61 |
- designated in writing by the copyright owner as "Not a Contribution." |
|
| 62 |
- |
|
| 63 |
- "Contributor" shall mean Licensor and any individual or Legal Entity |
|
| 64 |
- on behalf of whom a Contribution has been received by Licensor and |
|
| 65 |
- subsequently incorporated within the Work. |
|
| 66 |
- |
|
| 67 |
- 2. Grant of Copyright License. Subject to the terms and conditions of |
|
| 68 |
- this License, each Contributor hereby grants to You a perpetual, |
|
| 69 |
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
|
| 70 |
- copyright license to reproduce, prepare Derivative Works of, |
|
| 71 |
- publicly display, publicly perform, sublicense, and distribute the |
|
| 72 |
- Work and such Derivative Works in Source or Object form. |
|
| 73 |
- |
|
| 74 |
- 3. Grant of Patent License. Subject to the terms and conditions of |
|
| 75 |
- this License, each Contributor hereby grants to You a perpetual, |
|
| 76 |
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
|
| 77 |
- (except as stated in this section) patent license to make, have made, |
|
| 78 |
- use, offer to sell, sell, import, and otherwise transfer the Work, |
|
| 79 |
- where such license applies only to those patent claims licensable |
|
| 80 |
- by such Contributor that are necessarily infringed by their |
|
| 81 |
- Contribution(s) alone or by combination of their Contribution(s) |
|
| 82 |
- with the Work to which such Contribution(s) was submitted. If You |
|
| 83 |
- institute patent litigation against any entity (including a |
|
| 84 |
- cross-claim or counterclaim in a lawsuit) alleging that the Work |
|
| 85 |
- or a Contribution incorporated within the Work constitutes direct |
|
| 86 |
- or contributory patent infringement, then any patent licenses |
|
| 87 |
- granted to You under this License for that Work shall terminate |
|
| 88 |
- as of the date such litigation is filed. |
|
| 89 |
- |
|
| 90 |
- 4. Redistribution. You may reproduce and distribute copies of the |
|
| 91 |
- Work or Derivative Works thereof in any medium, with or without |
|
| 92 |
- modifications, and in Source or Object form, provided that You |
|
| 93 |
- meet the following conditions: |
|
| 94 |
- |
|
| 95 |
- (a) You must give any other recipients of the Work or |
|
| 96 |
- Derivative Works a copy of this License; and |
|
| 97 |
- |
|
| 98 |
- (b) You must cause any modified files to carry prominent notices |
|
| 99 |
- stating that You changed the files; and |
|
| 100 |
- |
|
| 101 |
- (c) You must retain, in the Source form of any Derivative Works |
|
| 102 |
- that You distribute, all copyright, patent, trademark, and |
|
| 103 |
- attribution notices from the Source form of the Work, |
|
| 104 |
- excluding those notices that do not pertain to any part of |
|
| 105 |
- the Derivative Works; and |
|
| 106 |
- |
|
| 107 |
- (d) If the Work includes a "NOTICE" text file as part of its |
|
| 108 |
- distribution, then any Derivative Works that You distribute must |
|
| 109 |
- include a readable copy of the attribution notices contained |
|
| 110 |
- within such NOTICE file, excluding those notices that do not |
|
| 111 |
- pertain to any part of the Derivative Works, in at least one |
|
| 112 |
- of the following places: within a NOTICE text file distributed |
|
| 113 |
- as part of the Derivative Works; within the Source form or |
|
| 114 |
- documentation, if provided along with the Derivative Works; or, |
|
| 115 |
- within a display generated by the Derivative Works, if and |
|
| 116 |
- wherever such third-party notices normally appear. The contents |
|
| 117 |
- of the NOTICE file are for informational purposes only and |
|
| 118 |
- do not modify the License. You may add Your own attribution |
|
| 119 |
- notices within Derivative Works that You distribute, alongside |
|
| 120 |
- or as an addendum to the NOTICE text from the Work, provided |
|
| 121 |
- that such additional attribution notices cannot be construed |
|
| 122 |
- as modifying the License. |
|
| 123 |
- |
|
| 124 |
- You may add Your own copyright statement to Your modifications and |
|
| 125 |
- may provide additional or different license terms and conditions |
|
| 126 |
- for use, reproduction, or distribution of Your modifications, or |
|
| 127 |
- for any such Derivative Works as a whole, provided Your use, |
|
| 128 |
- reproduction, and distribution of the Work otherwise complies with |
|
| 129 |
- the conditions stated in this License. |
|
| 130 |
- |
|
| 131 |
- 5. Submission of Contributions. Unless You explicitly state otherwise, |
|
| 132 |
- any Contribution intentionally submitted for inclusion in the Work |
|
| 133 |
- by You to the Licensor shall be under the terms and conditions of |
|
| 134 |
- this License, without any additional terms or conditions. |
|
| 135 |
- Notwithstanding the above, nothing herein shall supersede or modify |
|
| 136 |
- the terms of any separate license agreement you may have executed |
|
| 137 |
- with Licensor regarding such Contributions. |
|
| 138 |
- |
|
| 139 |
- 6. Trademarks. This License does not grant permission to use the trade |
|
| 140 |
- names, trademarks, service marks, or product names of the Licensor, |
|
| 141 |
- except as required for reasonable and customary use in describing the |
|
| 142 |
- origin of the Work and reproducing the content of the NOTICE file. |
|
| 143 |
- |
|
| 144 |
- 7. Disclaimer of Warranty. Unless required by applicable law or |
|
| 145 |
- agreed to in writing, Licensor provides the Work (and each |
|
| 146 |
- Contributor provides its Contributions) on an "AS IS" BASIS, |
|
| 147 |
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
|
| 148 |
- implied, including, without limitation, any warranties or conditions |
|
| 149 |
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A |
|
| 150 |
- PARTICULAR PURPOSE. You are solely responsible for determining the |
|
| 151 |
- appropriateness of using or redistributing the Work and assume any |
|
| 152 |
- risks associated with Your exercise of permissions under this License. |
|
| 153 |
- |
|
| 154 |
- 8. Limitation of Liability. In no event and under no legal theory, |
|
| 155 |
- whether in tort (including negligence), contract, or otherwise, |
|
| 156 |
- unless required by applicable law (such as deliberate and grossly |
|
| 157 |
- negligent acts) or agreed to in writing, shall any Contributor be |
|
| 158 |
- liable to You for damages, including any direct, indirect, special, |
|
| 159 |
- incidental, or consequential damages of any character arising as a |
|
| 160 |
- result of this License or out of the use or inability to use the |
|
| 161 |
- Work (including but not limited to damages for loss of goodwill, |
|
| 162 |
- work stoppage, computer failure or malfunction, or any and all |
|
| 163 |
- other commercial damages or losses), even if such Contributor |
|
| 164 |
- has been advised of the possibility of such damages. |
|
| 165 |
- |
|
| 166 |
- 9. Accepting Warranty or Additional Liability. While redistributing |
|
| 167 |
- the Work or Derivative Works thereof, You may choose to offer, |
|
| 168 |
- and charge a fee for, acceptance of support, warranty, indemnity, |
|
| 169 |
- or other liability obligations and/or rights consistent with this |
|
| 170 |
- License. However, in accepting such obligations, You may act only |
|
| 171 |
- on Your own behalf and on Your sole responsibility, not on behalf |
|
| 172 |
- of any other Contributor, and only if You agree to indemnify, |
|
| 173 |
- defend, and hold each Contributor harmless for any liability |
|
| 174 |
- incurred by, or claims asserted against, such Contributor by reason |
|
| 175 |
- of your accepting any such warranty or additional liability. |
|
| 176 |
- |
|
| 177 |
- END OF TERMS AND CONDITIONS |
|
| 178 |
- |
|
| 179 |
- Copyright 2014 Docker, Inc. |
|
| 180 |
- |
|
| 181 |
- Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 182 |
- you may not use this file except in compliance with the License. |
|
| 183 |
- You may obtain a copy of the License at |
|
| 184 |
- |
|
| 185 |
- http://www.apache.org/licenses/LICENSE-2.0 |
|
| 186 |
- |
|
| 187 |
- Unless required by applicable law or agreed to in writing, software |
|
| 188 |
- distributed under the License is distributed on an "AS IS" BASIS, |
|
| 189 |
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 190 |
- See the License for the specific language governing permissions and |
|
| 191 |
- limitations under the License. |
| 192 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,17 +0,0 @@ |
| 1 |
-runc |
|
| 2 |
- |
|
| 3 |
-Copyright 2012-2015 Docker, Inc. |
|
| 4 |
- |
|
| 5 |
-This product includes software developed at Docker, Inc. (http://www.docker.com). |
|
| 6 |
- |
|
| 7 |
-The following is courtesy of our legal counsel: |
|
| 8 |
- |
|
| 9 |
- |
|
| 10 |
-Use and transfer of Docker may be subject to certain restrictions by the |
|
| 11 |
-United States and other governments. |
|
| 12 |
-It is your responsibility to ensure that your use and/or transfer does not |
|
| 13 |
-violate applicable laws. |
|
| 14 |
- |
|
| 15 |
-For more information, please see http://www.bis.doc.gov |
|
| 16 |
- |
|
| 17 |
-See also http://www.apache.org/dev/crypto.html and/or seek legal counsel. |
| 18 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,80 +0,0 @@ |
| 1 |
-package cgroups |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "errors" |
|
| 5 |
- |
|
| 6 |
- "github.com/opencontainers/runc/libcontainer/configs" |
|
| 7 |
-) |
|
| 8 |
- |
|
| 9 |
-var ( |
|
| 10 |
- // ErrDevicesUnsupported is an error returned when a cgroup manager |
|
| 11 |
- // is not configured to set device rules. |
|
| 12 |
- ErrDevicesUnsupported = errors.New("cgroup manager is not configured to set device rules")
|
|
| 13 |
- |
|
| 14 |
- // ErrRootless is returned by [Manager.Apply] when there is an error |
|
| 15 |
- // creating cgroup directory, and cgroup.Rootless is set. In general, |
|
| 16 |
- // this error is to be ignored. |
|
| 17 |
- ErrRootless = errors.New("cgroup manager can not access cgroup (rootless container)")
|
|
| 18 |
- |
|
| 19 |
- // DevicesSetV1 and DevicesSetV2 are functions to set devices for |
|
| 20 |
- // cgroup v1 and v2, respectively. Unless |
|
| 21 |
- // [github.com/opencontainers/runc/libcontainer/cgroups/devices] |
|
| 22 |
- // package is imported, it is set to nil, so cgroup managers can't |
|
| 23 |
- // manage devices. |
|
| 24 |
- DevicesSetV1 func(path string, r *configs.Resources) error |
|
| 25 |
- DevicesSetV2 func(path string, r *configs.Resources) error |
|
| 26 |
-) |
|
| 27 |
- |
|
| 28 |
-type Manager interface {
|
|
| 29 |
- // Apply creates a cgroup, if not yet created, and adds a process |
|
| 30 |
- // with the specified pid into that cgroup. A special value of -1 |
|
| 31 |
- // can be used to merely create a cgroup. |
|
| 32 |
- Apply(pid int) error |
|
| 33 |
- |
|
| 34 |
- // GetPids returns the PIDs of all processes inside the cgroup. |
|
| 35 |
- GetPids() ([]int, error) |
|
| 36 |
- |
|
| 37 |
- // GetAllPids returns the PIDs of all processes inside the cgroup |
|
| 38 |
- // any all its sub-cgroups. |
|
| 39 |
- GetAllPids() ([]int, error) |
|
| 40 |
- |
|
| 41 |
- // GetStats returns cgroups statistics. |
|
| 42 |
- GetStats() (*Stats, error) |
|
| 43 |
- |
|
| 44 |
- // Freeze sets the freezer cgroup to the specified state. |
|
| 45 |
- Freeze(state configs.FreezerState) error |
|
| 46 |
- |
|
| 47 |
- // Destroy removes cgroup. |
|
| 48 |
- Destroy() error |
|
| 49 |
- |
|
| 50 |
- // Path returns a cgroup path to the specified controller/subsystem. |
|
| 51 |
- // For cgroupv2, the argument is unused and can be empty. |
|
| 52 |
- Path(string) string |
|
| 53 |
- |
|
| 54 |
- // Set sets cgroup resources parameters/limits. If the argument is nil, |
|
| 55 |
- // the resources specified during Manager creation (or the previous call |
|
| 56 |
- // to Set) are used. |
|
| 57 |
- Set(r *configs.Resources) error |
|
| 58 |
- |
|
| 59 |
- // GetPaths returns cgroup path(s) to save in a state file in order to |
|
| 60 |
- // restore later. |
|
| 61 |
- // |
|
| 62 |
- // For cgroup v1, a key is cgroup subsystem name, and the value is the |
|
| 63 |
- // path to the cgroup for this subsystem. |
|
| 64 |
- // |
|
| 65 |
- // For cgroup v2 unified hierarchy, a key is "", and the value is the |
|
| 66 |
- // unified path. |
|
| 67 |
- GetPaths() map[string]string |
|
| 68 |
- |
|
| 69 |
- // GetCgroups returns the cgroup data as configured. |
|
| 70 |
- GetCgroups() (*configs.Cgroup, error) |
|
| 71 |
- |
|
| 72 |
- // GetFreezerState retrieves the current FreezerState of the cgroup. |
|
| 73 |
- GetFreezerState() (configs.FreezerState, error) |
|
| 74 |
- |
|
| 75 |
- // Exists returns whether the cgroup path exists or not. |
|
| 76 |
- Exists() bool |
|
| 77 |
- |
|
| 78 |
- // OOMKillCount reports OOM kill count for the cgroup. |
|
| 79 |
- OOMKillCount() (uint64, error) |
|
| 80 |
-} |
| 81 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,216 +0,0 @@ |
| 1 |
-package cgroups |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "bytes" |
|
| 5 |
- "errors" |
|
| 6 |
- "fmt" |
|
| 7 |
- "os" |
|
| 8 |
- "path" |
|
| 9 |
- "strconv" |
|
| 10 |
- "strings" |
|
| 11 |
- "sync" |
|
| 12 |
- |
|
| 13 |
- "github.com/opencontainers/runc/libcontainer/utils" |
|
| 14 |
- "github.com/sirupsen/logrus" |
|
| 15 |
- "golang.org/x/sys/unix" |
|
| 16 |
-) |
|
| 17 |
- |
|
| 18 |
-// OpenFile opens a cgroup file in a given dir with given flags. |
|
| 19 |
-// It is supposed to be used for cgroup files only, and returns |
|
| 20 |
-// an error if the file is not a cgroup file. |
|
| 21 |
-// |
|
| 22 |
-// Arguments dir and file are joined together to form an absolute path |
|
| 23 |
-// to a file being opened. |
|
| 24 |
-func OpenFile(dir, file string, flags int) (*os.File, error) {
|
|
| 25 |
- if dir == "" {
|
|
| 26 |
- return nil, fmt.Errorf("no directory specified for %s", file)
|
|
| 27 |
- } |
|
| 28 |
- return openFile(dir, file, flags) |
|
| 29 |
-} |
|
| 30 |
- |
|
| 31 |
-// ReadFile reads data from a cgroup file in dir. |
|
| 32 |
-// It is supposed to be used for cgroup files only. |
|
| 33 |
-func ReadFile(dir, file string) (string, error) {
|
|
| 34 |
- fd, err := OpenFile(dir, file, unix.O_RDONLY) |
|
| 35 |
- if err != nil {
|
|
| 36 |
- return "", err |
|
| 37 |
- } |
|
| 38 |
- defer fd.Close() |
|
| 39 |
- var buf bytes.Buffer |
|
| 40 |
- |
|
| 41 |
- _, err = buf.ReadFrom(fd) |
|
| 42 |
- return buf.String(), err |
|
| 43 |
-} |
|
| 44 |
- |
|
| 45 |
-// WriteFile writes data to a cgroup file in dir. |
|
| 46 |
-// It is supposed to be used for cgroup files only. |
|
| 47 |
-func WriteFile(dir, file, data string) error {
|
|
| 48 |
- fd, err := OpenFile(dir, file, unix.O_WRONLY) |
|
| 49 |
- if err != nil {
|
|
| 50 |
- return err |
|
| 51 |
- } |
|
| 52 |
- defer fd.Close() |
|
| 53 |
- if _, err := fd.WriteString(data); err != nil {
|
|
| 54 |
- // Having data in the error message helps in debugging. |
|
| 55 |
- return fmt.Errorf("failed to write %q: %w", data, err)
|
|
| 56 |
- } |
|
| 57 |
- return nil |
|
| 58 |
-} |
|
| 59 |
- |
|
| 60 |
-// WriteFileByLine is the same as WriteFile, except if data contains newlines, |
|
| 61 |
-// it is written line by line. |
|
| 62 |
-func WriteFileByLine(dir, file, data string) error {
|
|
| 63 |
- i := strings.Index(data, "\n") |
|
| 64 |
- if i == -1 {
|
|
| 65 |
- return WriteFile(dir, file, data) |
|
| 66 |
- } |
|
| 67 |
- |
|
| 68 |
- fd, err := OpenFile(dir, file, unix.O_WRONLY) |
|
| 69 |
- if err != nil {
|
|
| 70 |
- return err |
|
| 71 |
- } |
|
| 72 |
- defer fd.Close() |
|
| 73 |
- start := 0 |
|
| 74 |
- for {
|
|
| 75 |
- var line string |
|
| 76 |
- if i == -1 {
|
|
| 77 |
- line = data[start:] |
|
| 78 |
- } else {
|
|
| 79 |
- line = data[start : start+i+1] |
|
| 80 |
- } |
|
| 81 |
- _, err := fd.WriteString(line) |
|
| 82 |
- if err != nil {
|
|
| 83 |
- return fmt.Errorf("failed to write %q: %w", line, err)
|
|
| 84 |
- } |
|
| 85 |
- if i == -1 {
|
|
| 86 |
- break |
|
| 87 |
- } |
|
| 88 |
- start += i + 1 |
|
| 89 |
- i = strings.Index(data[start:], "\n") |
|
| 90 |
- } |
|
| 91 |
- return nil |
|
| 92 |
-} |
|
| 93 |
- |
|
| 94 |
-const ( |
|
| 95 |
- cgroupfsDir = "/sys/fs/cgroup" |
|
| 96 |
- cgroupfsPrefix = cgroupfsDir + "/" |
|
| 97 |
-) |
|
| 98 |
- |
|
| 99 |
-var ( |
|
| 100 |
- // TestMode is set to true by unit tests that need "fake" cgroupfs. |
|
| 101 |
- TestMode bool |
|
| 102 |
- |
|
| 103 |
- cgroupRootHandle *os.File |
|
| 104 |
- prepOnce sync.Once |
|
| 105 |
- prepErr error |
|
| 106 |
- resolveFlags uint64 |
|
| 107 |
-) |
|
| 108 |
- |
|
| 109 |
-func prepareOpenat2() error {
|
|
| 110 |
- prepOnce.Do(func() {
|
|
| 111 |
- fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
|
|
| 112 |
- Flags: unix.O_DIRECTORY | unix.O_PATH | unix.O_CLOEXEC, |
|
| 113 |
- }) |
|
| 114 |
- if err != nil {
|
|
| 115 |
- prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
|
|
| 116 |
- if err != unix.ENOSYS {
|
|
| 117 |
- logrus.Warnf("falling back to securejoin: %s", prepErr)
|
|
| 118 |
- } else {
|
|
| 119 |
- logrus.Debug("openat2 not available, falling back to securejoin")
|
|
| 120 |
- } |
|
| 121 |
- return |
|
| 122 |
- } |
|
| 123 |
- file := os.NewFile(uintptr(fd), cgroupfsDir) |
|
| 124 |
- |
|
| 125 |
- var st unix.Statfs_t |
|
| 126 |
- if err := unix.Fstatfs(int(file.Fd()), &st); err != nil {
|
|
| 127 |
- prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err}
|
|
| 128 |
- logrus.Warnf("falling back to securejoin: %s", prepErr)
|
|
| 129 |
- return |
|
| 130 |
- } |
|
| 131 |
- |
|
| 132 |
- cgroupRootHandle = file |
|
| 133 |
- resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS |
|
| 134 |
- if st.Type == unix.CGROUP2_SUPER_MAGIC {
|
|
| 135 |
- // cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks |
|
| 136 |
- resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS |
|
| 137 |
- } |
|
| 138 |
- }) |
|
| 139 |
- |
|
| 140 |
- return prepErr |
|
| 141 |
-} |
|
| 142 |
- |
|
| 143 |
-func openFile(dir, file string, flags int) (*os.File, error) {
|
|
| 144 |
- mode := os.FileMode(0) |
|
| 145 |
- if TestMode && flags&os.O_WRONLY != 0 {
|
|
| 146 |
- // "emulate" cgroup fs for unit tests |
|
| 147 |
- flags |= os.O_TRUNC | os.O_CREATE |
|
| 148 |
- mode = 0o600 |
|
| 149 |
- } |
|
| 150 |
- path := path.Join(dir, utils.CleanPath(file)) |
|
| 151 |
- if prepareOpenat2() != nil {
|
|
| 152 |
- return openFallback(path, flags, mode) |
|
| 153 |
- } |
|
| 154 |
- relPath := strings.TrimPrefix(path, cgroupfsPrefix) |
|
| 155 |
- if len(relPath) == len(path) { // non-standard path, old system?
|
|
| 156 |
- return openFallback(path, flags, mode) |
|
| 157 |
- } |
|
| 158 |
- |
|
| 159 |
- fd, err := unix.Openat2(int(cgroupRootHandle.Fd()), relPath, |
|
| 160 |
- &unix.OpenHow{
|
|
| 161 |
- Resolve: resolveFlags, |
|
| 162 |
- Flags: uint64(flags) | unix.O_CLOEXEC, |
|
| 163 |
- Mode: uint64(mode), |
|
| 164 |
- }) |
|
| 165 |
- if err != nil {
|
|
| 166 |
- err = &os.PathError{Op: "openat2", Path: path, Err: err}
|
|
| 167 |
- // Check if cgroupRootHandle is still opened to cgroupfsDir |
|
| 168 |
- // (happens when this package is incorrectly used |
|
| 169 |
- // across the chroot/pivot_root/mntns boundary, or |
|
| 170 |
- // when /sys/fs/cgroup is remounted). |
|
| 171 |
- // |
|
| 172 |
- // TODO: if such usage will ever be common, amend this |
|
| 173 |
- // to reopen cgroupRootHandle and retry openat2. |
|
| 174 |
- fdPath, closer := utils.ProcThreadSelf("fd/" + strconv.Itoa(int(cgroupRootHandle.Fd())))
|
|
| 175 |
- defer closer() |
|
| 176 |
- fdDest, _ := os.Readlink(fdPath) |
|
| 177 |
- if fdDest != cgroupfsDir {
|
|
| 178 |
- // Wrap the error so it is clear that cgroupRootHandle |
|
| 179 |
- // is opened to an unexpected/wrong directory. |
|
| 180 |
- err = fmt.Errorf("cgroupRootHandle %d unexpectedly opened to %s != %s: %w",
|
|
| 181 |
- cgroupRootHandle.Fd(), fdDest, cgroupfsDir, err) |
|
| 182 |
- } |
|
| 183 |
- return nil, err |
|
| 184 |
- } |
|
| 185 |
- |
|
| 186 |
- return os.NewFile(uintptr(fd), path), nil |
|
| 187 |
-} |
|
| 188 |
- |
|
| 189 |
-var errNotCgroupfs = errors.New("not a cgroup file")
|
|
| 190 |
- |
|
| 191 |
-// Can be changed by unit tests. |
|
| 192 |
-var openFallback = openAndCheck |
|
| 193 |
- |
|
| 194 |
-// openAndCheck is used when openat2(2) is not available. It checks the opened |
|
| 195 |
-// file is on cgroupfs, returning an error otherwise. |
|
| 196 |
-func openAndCheck(path string, flags int, mode os.FileMode) (*os.File, error) {
|
|
| 197 |
- fd, err := os.OpenFile(path, flags, mode) |
|
| 198 |
- if err != nil {
|
|
| 199 |
- return nil, err |
|
| 200 |
- } |
|
| 201 |
- if TestMode {
|
|
| 202 |
- return fd, nil |
|
| 203 |
- } |
|
| 204 |
- // Check this is a cgroupfs file. |
|
| 205 |
- var st unix.Statfs_t |
|
| 206 |
- if err := unix.Fstatfs(int(fd.Fd()), &st); err != nil {
|
|
| 207 |
- _ = fd.Close() |
|
| 208 |
- return nil, &os.PathError{Op: "statfs", Path: path, Err: err}
|
|
| 209 |
- } |
|
| 210 |
- if st.Type != unix.CGROUP_SUPER_MAGIC && st.Type != unix.CGROUP2_SUPER_MAGIC {
|
|
| 211 |
- _ = fd.Close() |
|
| 212 |
- return nil, &os.PathError{Op: "open", Path: path, Err: errNotCgroupfs}
|
|
| 213 |
- } |
|
| 214 |
- |
|
| 215 |
- return fd, nil |
|
| 216 |
-} |
| 217 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,27 +0,0 @@ |
| 1 |
-package cgroups |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "io/fs" |
|
| 5 |
- "path/filepath" |
|
| 6 |
-) |
|
| 7 |
- |
|
| 8 |
-// GetAllPids returns all pids from the cgroup identified by path, and all its |
|
| 9 |
-// sub-cgroups. |
|
| 10 |
-func GetAllPids(path string) ([]int, error) {
|
|
| 11 |
- var pids []int |
|
| 12 |
- err := filepath.WalkDir(path, func(p string, d fs.DirEntry, iErr error) error {
|
|
| 13 |
- if iErr != nil {
|
|
| 14 |
- return iErr |
|
| 15 |
- } |
|
| 16 |
- if !d.IsDir() {
|
|
| 17 |
- return nil |
|
| 18 |
- } |
|
| 19 |
- cPids, err := readProcsFile(p) |
|
| 20 |
- if err != nil {
|
|
| 21 |
- return err |
|
| 22 |
- } |
|
| 23 |
- pids = append(pids, cPids...) |
|
| 24 |
- return nil |
|
| 25 |
- }) |
|
| 26 |
- return pids, err |
|
| 27 |
-} |
| 28 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,200 +0,0 @@ |
| 1 |
-package cgroups |
|
| 2 |
- |
|
| 3 |
-type ThrottlingData struct {
|
|
| 4 |
- // Number of periods with throttling active |
|
| 5 |
- Periods uint64 `json:"periods,omitempty"` |
|
| 6 |
- // Number of periods when the container hit its throttling limit. |
|
| 7 |
- ThrottledPeriods uint64 `json:"throttled_periods,omitempty"` |
|
| 8 |
- // Aggregate time the container was throttled for in nanoseconds. |
|
| 9 |
- ThrottledTime uint64 `json:"throttled_time,omitempty"` |
|
| 10 |
-} |
|
| 11 |
- |
|
| 12 |
-// CpuUsage denotes the usage of a CPU. |
|
| 13 |
-// All CPU stats are aggregate since container inception. |
|
| 14 |
-type CpuUsage struct {
|
|
| 15 |
- // Total CPU time consumed. |
|
| 16 |
- // Units: nanoseconds. |
|
| 17 |
- TotalUsage uint64 `json:"total_usage,omitempty"` |
|
| 18 |
- // Total CPU time consumed per core. |
|
| 19 |
- // Units: nanoseconds. |
|
| 20 |
- PercpuUsage []uint64 `json:"percpu_usage,omitempty"` |
|
| 21 |
- // CPU time consumed per core in kernel mode |
|
| 22 |
- // Units: nanoseconds. |
|
| 23 |
- PercpuUsageInKernelmode []uint64 `json:"percpu_usage_in_kernelmode"` |
|
| 24 |
- // CPU time consumed per core in user mode |
|
| 25 |
- // Units: nanoseconds. |
|
| 26 |
- PercpuUsageInUsermode []uint64 `json:"percpu_usage_in_usermode"` |
|
| 27 |
- // Time spent by tasks of the cgroup in kernel mode. |
|
| 28 |
- // Units: nanoseconds. |
|
| 29 |
- UsageInKernelmode uint64 `json:"usage_in_kernelmode"` |
|
| 30 |
- // Time spent by tasks of the cgroup in user mode. |
|
| 31 |
- // Units: nanoseconds. |
|
| 32 |
- UsageInUsermode uint64 `json:"usage_in_usermode"` |
|
| 33 |
-} |
|
| 34 |
- |
|
| 35 |
-type PSIData struct {
|
|
| 36 |
- Avg10 float64 `json:"avg10"` |
|
| 37 |
- Avg60 float64 `json:"avg60"` |
|
| 38 |
- Avg300 float64 `json:"avg300"` |
|
| 39 |
- Total uint64 `json:"total"` |
|
| 40 |
-} |
|
| 41 |
- |
|
| 42 |
-type PSIStats struct {
|
|
| 43 |
- Some PSIData `json:"some,omitempty"` |
|
| 44 |
- Full PSIData `json:"full,omitempty"` |
|
| 45 |
-} |
|
| 46 |
- |
|
| 47 |
-type CpuStats struct {
|
|
| 48 |
- CpuUsage CpuUsage `json:"cpu_usage,omitempty"` |
|
| 49 |
- ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` |
|
| 50 |
- PSI *PSIStats `json:"psi,omitempty"` |
|
| 51 |
-} |
|
| 52 |
- |
|
| 53 |
-type CPUSetStats struct {
|
|
| 54 |
- // List of the physical numbers of the CPUs on which processes |
|
| 55 |
- // in that cpuset are allowed to execute |
|
| 56 |
- CPUs []uint16 `json:"cpus,omitempty"` |
|
| 57 |
- // cpu_exclusive flag |
|
| 58 |
- CPUExclusive uint64 `json:"cpu_exclusive"` |
|
| 59 |
- // List of memory nodes on which processes in that cpuset |
|
| 60 |
- // are allowed to allocate memory |
|
| 61 |
- Mems []uint16 `json:"mems,omitempty"` |
|
| 62 |
- // mem_hardwall flag |
|
| 63 |
- MemHardwall uint64 `json:"mem_hardwall"` |
|
| 64 |
- // mem_exclusive flag |
|
| 65 |
- MemExclusive uint64 `json:"mem_exclusive"` |
|
| 66 |
- // memory_migrate flag |
|
| 67 |
- MemoryMigrate uint64 `json:"memory_migrate"` |
|
| 68 |
- // memory_spread page flag |
|
| 69 |
- MemorySpreadPage uint64 `json:"memory_spread_page"` |
|
| 70 |
- // memory_spread slab flag |
|
| 71 |
- MemorySpreadSlab uint64 `json:"memory_spread_slab"` |
|
| 72 |
- // memory_pressure |
|
| 73 |
- MemoryPressure uint64 `json:"memory_pressure"` |
|
| 74 |
- // sched_load balance flag |
|
| 75 |
- SchedLoadBalance uint64 `json:"sched_load_balance"` |
|
| 76 |
- // sched_relax_domain_level |
|
| 77 |
- SchedRelaxDomainLevel int64 `json:"sched_relax_domain_level"` |
|
| 78 |
-} |
|
| 79 |
- |
|
| 80 |
-type MemoryData struct {
|
|
| 81 |
- Usage uint64 `json:"usage,omitempty"` |
|
| 82 |
- MaxUsage uint64 `json:"max_usage,omitempty"` |
|
| 83 |
- Failcnt uint64 `json:"failcnt"` |
|
| 84 |
- Limit uint64 `json:"limit"` |
|
| 85 |
-} |
|
| 86 |
- |
|
| 87 |
-type MemoryStats struct {
|
|
| 88 |
- // memory used for cache |
|
| 89 |
- Cache uint64 `json:"cache,omitempty"` |
|
| 90 |
- // usage of memory |
|
| 91 |
- Usage MemoryData `json:"usage,omitempty"` |
|
| 92 |
- // usage of memory + swap |
|
| 93 |
- SwapUsage MemoryData `json:"swap_usage,omitempty"` |
|
| 94 |
- // usage of swap only |
|
| 95 |
- SwapOnlyUsage MemoryData `json:"swap_only_usage,omitempty"` |
|
| 96 |
- // usage of kernel memory |
|
| 97 |
- KernelUsage MemoryData `json:"kernel_usage,omitempty"` |
|
| 98 |
- // usage of kernel TCP memory |
|
| 99 |
- KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"` |
|
| 100 |
- // usage of memory pages by NUMA node |
|
| 101 |
- // see chapter 5.6 of memory controller documentation |
|
| 102 |
- PageUsageByNUMA PageUsageByNUMA `json:"page_usage_by_numa,omitempty"` |
|
| 103 |
- // if true, memory usage is accounted for throughout a hierarchy of cgroups. |
|
| 104 |
- UseHierarchy bool `json:"use_hierarchy"` |
|
| 105 |
- |
|
| 106 |
- Stats map[string]uint64 `json:"stats,omitempty"` |
|
| 107 |
- PSI *PSIStats `json:"psi,omitempty"` |
|
| 108 |
-} |
|
| 109 |
- |
|
| 110 |
-type PageUsageByNUMA struct {
|
|
| 111 |
- // Embedding is used as types can't be recursive. |
|
| 112 |
- PageUsageByNUMAInner |
|
| 113 |
- Hierarchical PageUsageByNUMAInner `json:"hierarchical,omitempty"` |
|
| 114 |
-} |
|
| 115 |
- |
|
| 116 |
-type PageUsageByNUMAInner struct {
|
|
| 117 |
- Total PageStats `json:"total,omitempty"` |
|
| 118 |
- File PageStats `json:"file,omitempty"` |
|
| 119 |
- Anon PageStats `json:"anon,omitempty"` |
|
| 120 |
- Unevictable PageStats `json:"unevictable,omitempty"` |
|
| 121 |
-} |
|
| 122 |
- |
|
| 123 |
-type PageStats struct {
|
|
| 124 |
- Total uint64 `json:"total,omitempty"` |
|
| 125 |
- Nodes map[uint8]uint64 `json:"nodes,omitempty"` |
|
| 126 |
-} |
|
| 127 |
- |
|
| 128 |
-type PidsStats struct {
|
|
| 129 |
- // number of pids in the cgroup |
|
| 130 |
- Current uint64 `json:"current,omitempty"` |
|
| 131 |
- // active pids hard limit |
|
| 132 |
- Limit uint64 `json:"limit,omitempty"` |
|
| 133 |
-} |
|
| 134 |
- |
|
| 135 |
-type BlkioStatEntry struct {
|
|
| 136 |
- Major uint64 `json:"major,omitempty"` |
|
| 137 |
- Minor uint64 `json:"minor,omitempty"` |
|
| 138 |
- Op string `json:"op,omitempty"` |
|
| 139 |
- Value uint64 `json:"value,omitempty"` |
|
| 140 |
-} |
|
| 141 |
- |
|
| 142 |
-type BlkioStats struct {
|
|
| 143 |
- // number of bytes transferred to and from the block device |
|
| 144 |
- IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"` |
|
| 145 |
- IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"` |
|
| 146 |
- IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"` |
|
| 147 |
- IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"` |
|
| 148 |
- IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"` |
|
| 149 |
- IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"` |
|
| 150 |
- IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"` |
|
| 151 |
- SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"` |
|
| 152 |
- PSI *PSIStats `json:"psi,omitempty"` |
|
| 153 |
-} |
|
| 154 |
- |
|
| 155 |
-type HugetlbStats struct {
|
|
| 156 |
- // current res_counter usage for hugetlb |
|
| 157 |
- Usage uint64 `json:"usage,omitempty"` |
|
| 158 |
- // maximum usage ever recorded. |
|
| 159 |
- MaxUsage uint64 `json:"max_usage,omitempty"` |
|
| 160 |
- // number of times hugetlb usage allocation failure. |
|
| 161 |
- Failcnt uint64 `json:"failcnt"` |
|
| 162 |
-} |
|
| 163 |
- |
|
| 164 |
-type RdmaEntry struct {
|
|
| 165 |
- Device string `json:"device,omitempty"` |
|
| 166 |
- HcaHandles uint32 `json:"hca_handles,omitempty"` |
|
| 167 |
- HcaObjects uint32 `json:"hca_objects,omitempty"` |
|
| 168 |
-} |
|
| 169 |
- |
|
| 170 |
-type RdmaStats struct {
|
|
| 171 |
- RdmaLimit []RdmaEntry `json:"rdma_limit,omitempty"` |
|
| 172 |
- RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"` |
|
| 173 |
-} |
|
| 174 |
- |
|
| 175 |
-type MiscStats struct {
|
|
| 176 |
- // current resource usage for a key in misc |
|
| 177 |
- Usage uint64 `json:"usage,omitempty"` |
|
| 178 |
- // number of times the resource usage was about to go over the max boundary |
|
| 179 |
- Events uint64 `json:"events,omitempty"` |
|
| 180 |
-} |
|
| 181 |
- |
|
| 182 |
-type Stats struct {
|
|
| 183 |
- CpuStats CpuStats `json:"cpu_stats,omitempty"` |
|
| 184 |
- CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"` |
|
| 185 |
- MemoryStats MemoryStats `json:"memory_stats,omitempty"` |
|
| 186 |
- PidsStats PidsStats `json:"pids_stats,omitempty"` |
|
| 187 |
- BlkioStats BlkioStats `json:"blkio_stats,omitempty"` |
|
| 188 |
- // the map is in the format "size of hugepage: stats of the hugepage" |
|
| 189 |
- HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` |
|
| 190 |
- RdmaStats RdmaStats `json:"rdma_stats,omitempty"` |
|
| 191 |
- // the map is in the format "misc resource name: stats of the key" |
|
| 192 |
- MiscStats map[string]MiscStats `json:"misc_stats,omitempty"` |
|
| 193 |
-} |
|
| 194 |
- |
|
| 195 |
-func NewStats() *Stats {
|
|
| 196 |
- memoryStats := MemoryStats{Stats: make(map[string]uint64)}
|
|
| 197 |
- hugetlbStats := make(map[string]HugetlbStats) |
|
| 198 |
- miscStats := make(map[string]MiscStats) |
|
| 199 |
- return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats, MiscStats: miscStats}
|
|
| 200 |
-} |
| 201 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,468 +0,0 @@ |
| 1 |
-package cgroups |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "bufio" |
|
| 5 |
- "errors" |
|
| 6 |
- "fmt" |
|
| 7 |
- "io" |
|
| 8 |
- "os" |
|
| 9 |
- "path/filepath" |
|
| 10 |
- "strconv" |
|
| 11 |
- "strings" |
|
| 12 |
- "sync" |
|
| 13 |
- "time" |
|
| 14 |
- |
|
| 15 |
- "github.com/moby/sys/userns" |
|
| 16 |
- "github.com/sirupsen/logrus" |
|
| 17 |
- "golang.org/x/sys/unix" |
|
| 18 |
-) |
|
| 19 |
- |
|
| 20 |
-const ( |
|
| 21 |
- CgroupProcesses = "cgroup.procs" |
|
| 22 |
- unifiedMountpoint = "/sys/fs/cgroup" |
|
| 23 |
- hybridMountpoint = "/sys/fs/cgroup/unified" |
|
| 24 |
-) |
|
| 25 |
- |
|
| 26 |
-var ( |
|
| 27 |
- isUnifiedOnce sync.Once |
|
| 28 |
- isUnified bool |
|
| 29 |
- isHybridOnce sync.Once |
|
| 30 |
- isHybrid bool |
|
| 31 |
-) |
|
| 32 |
- |
|
| 33 |
-// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode. |
|
| 34 |
-func IsCgroup2UnifiedMode() bool {
|
|
| 35 |
- isUnifiedOnce.Do(func() {
|
|
| 36 |
- var st unix.Statfs_t |
|
| 37 |
- err := unix.Statfs(unifiedMountpoint, &st) |
|
| 38 |
- if err != nil {
|
|
| 39 |
- level := logrus.WarnLevel |
|
| 40 |
- if os.IsNotExist(err) && userns.RunningInUserNS() {
|
|
| 41 |
- // For rootless containers, sweep it under the rug. |
|
| 42 |
- level = logrus.DebugLevel |
|
| 43 |
- } |
|
| 44 |
- logrus.StandardLogger().Logf(level, |
|
| 45 |
- "statfs %s: %v; assuming cgroup v1", unifiedMountpoint, err) |
|
| 46 |
- } |
|
| 47 |
- isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC |
|
| 48 |
- }) |
|
| 49 |
- return isUnified |
|
| 50 |
-} |
|
| 51 |
- |
|
| 52 |
-// IsCgroup2HybridMode returns whether we are running in cgroup v2 hybrid mode. |
|
| 53 |
-func IsCgroup2HybridMode() bool {
|
|
| 54 |
- isHybridOnce.Do(func() {
|
|
| 55 |
- var st unix.Statfs_t |
|
| 56 |
- err := unix.Statfs(hybridMountpoint, &st) |
|
| 57 |
- if err != nil {
|
|
| 58 |
- isHybrid = false |
|
| 59 |
- if !os.IsNotExist(err) {
|
|
| 60 |
- // Report unexpected errors. |
|
| 61 |
- logrus.WithError(err).Debugf("statfs(%q) failed", hybridMountpoint)
|
|
| 62 |
- } |
|
| 63 |
- return |
|
| 64 |
- } |
|
| 65 |
- isHybrid = st.Type == unix.CGROUP2_SUPER_MAGIC |
|
| 66 |
- }) |
|
| 67 |
- return isHybrid |
|
| 68 |
-} |
|
| 69 |
- |
|
| 70 |
-type Mount struct {
|
|
| 71 |
- Mountpoint string |
|
| 72 |
- Root string |
|
| 73 |
- Subsystems []string |
|
| 74 |
-} |
|
| 75 |
- |
|
| 76 |
-// GetCgroupMounts returns the mounts for the cgroup subsystems. |
|
| 77 |
-// all indicates whether to return just the first instance or all the mounts. |
|
| 78 |
-// This function should not be used from cgroupv2 code, as in this case |
|
| 79 |
-// all the controllers are available under the constant unifiedMountpoint. |
|
| 80 |
-func GetCgroupMounts(all bool) ([]Mount, error) {
|
|
| 81 |
- if IsCgroup2UnifiedMode() {
|
|
| 82 |
- // TODO: remove cgroupv2 case once all external users are converted |
|
| 83 |
- availableControllers, err := GetAllSubsystems() |
|
| 84 |
- if err != nil {
|
|
| 85 |
- return nil, err |
|
| 86 |
- } |
|
| 87 |
- m := Mount{
|
|
| 88 |
- Mountpoint: unifiedMountpoint, |
|
| 89 |
- Root: unifiedMountpoint, |
|
| 90 |
- Subsystems: availableControllers, |
|
| 91 |
- } |
|
| 92 |
- return []Mount{m}, nil
|
|
| 93 |
- } |
|
| 94 |
- |
|
| 95 |
- return getCgroupMountsV1(all) |
|
| 96 |
-} |
|
| 97 |
- |
|
| 98 |
-// GetAllSubsystems returns all the cgroup subsystems supported by the kernel |
|
| 99 |
-func GetAllSubsystems() ([]string, error) {
|
|
| 100 |
- // /proc/cgroups is meaningless for v2 |
|
| 101 |
- // https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#deprecated-v1-core-features |
|
| 102 |
- if IsCgroup2UnifiedMode() {
|
|
| 103 |
- // "pseudo" controllers do not appear in /sys/fs/cgroup/cgroup.controllers. |
|
| 104 |
- // - devices: implemented in kernel 4.15 |
|
| 105 |
- // - freezer: implemented in kernel 5.2 |
|
| 106 |
- // We assume these are always available, as it is hard to detect availability. |
|
| 107 |
- pseudo := []string{"devices", "freezer"}
|
|
| 108 |
- data, err := ReadFile("/sys/fs/cgroup", "cgroup.controllers")
|
|
| 109 |
- if err != nil {
|
|
| 110 |
- return nil, err |
|
| 111 |
- } |
|
| 112 |
- subsystems := append(pseudo, strings.Fields(data)...) |
|
| 113 |
- return subsystems, nil |
|
| 114 |
- } |
|
| 115 |
- f, err := os.Open("/proc/cgroups")
|
|
| 116 |
- if err != nil {
|
|
| 117 |
- return nil, err |
|
| 118 |
- } |
|
| 119 |
- defer f.Close() |
|
| 120 |
- |
|
| 121 |
- subsystems := []string{}
|
|
| 122 |
- |
|
| 123 |
- s := bufio.NewScanner(f) |
|
| 124 |
- for s.Scan() {
|
|
| 125 |
- text := s.Text() |
|
| 126 |
- if text[0] != '#' {
|
|
| 127 |
- parts := strings.Fields(text) |
|
| 128 |
- if len(parts) >= 4 && parts[3] != "0" {
|
|
| 129 |
- subsystems = append(subsystems, parts[0]) |
|
| 130 |
- } |
|
| 131 |
- } |
|
| 132 |
- } |
|
| 133 |
- if err := s.Err(); err != nil {
|
|
| 134 |
- return nil, err |
|
| 135 |
- } |
|
| 136 |
- return subsystems, nil |
|
| 137 |
-} |
|
| 138 |
- |
|
| 139 |
-func readProcsFile(dir string) (out []int, _ error) {
|
|
| 140 |
- file := CgroupProcesses |
|
| 141 |
- retry := true |
|
| 142 |
- |
|
| 143 |
-again: |
|
| 144 |
- f, err := OpenFile(dir, file, os.O_RDONLY) |
|
| 145 |
- if err != nil {
|
|
| 146 |
- return nil, err |
|
| 147 |
- } |
|
| 148 |
- defer f.Close() |
|
| 149 |
- |
|
| 150 |
- s := bufio.NewScanner(f) |
|
| 151 |
- for s.Scan() {
|
|
| 152 |
- if t := s.Text(); t != "" {
|
|
| 153 |
- pid, err := strconv.Atoi(t) |
|
| 154 |
- if err != nil {
|
|
| 155 |
- return nil, err |
|
| 156 |
- } |
|
| 157 |
- out = append(out, pid) |
|
| 158 |
- } |
|
| 159 |
- } |
|
| 160 |
- if errors.Is(s.Err(), unix.ENOTSUP) && retry {
|
|
| 161 |
- // For a threaded cgroup, read returns ENOTSUP, and we should |
|
| 162 |
- // read from cgroup.threads instead. |
|
| 163 |
- file = "cgroup.threads" |
|
| 164 |
- retry = false |
|
| 165 |
- goto again |
|
| 166 |
- } |
|
| 167 |
- return out, s.Err() |
|
| 168 |
-} |
|
| 169 |
- |
|
| 170 |
-// ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup |
|
| 171 |
-// or /proc/<pid>/cgroup, into a map of subsystems to cgroup paths, e.g. |
|
| 172 |
-// |
|
| 173 |
-// "cpu": "/user.slice/user-1000.slice" |
|
| 174 |
-// "pids": "/user.slice/user-1000.slice" |
|
| 175 |
-// |
|
| 176 |
-// etc. |
|
| 177 |
-// |
|
| 178 |
-// Note that for cgroup v2 unified hierarchy, there are no per-controller |
|
| 179 |
-// cgroup paths, so the resulting map will have a single element where the key |
|
| 180 |
-// is empty string ("") and the value is the cgroup path the <pid> is in.
|
|
| 181 |
-func ParseCgroupFile(path string) (map[string]string, error) {
|
|
| 182 |
- f, err := os.Open(path) |
|
| 183 |
- if err != nil {
|
|
| 184 |
- return nil, err |
|
| 185 |
- } |
|
| 186 |
- defer f.Close() |
|
| 187 |
- |
|
| 188 |
- return parseCgroupFromReader(f) |
|
| 189 |
-} |
|
| 190 |
- |
|
| 191 |
-// helper function for ParseCgroupFile to make testing easier |
|
| 192 |
-func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
|
|
| 193 |
- s := bufio.NewScanner(r) |
|
| 194 |
- cgroups := make(map[string]string) |
|
| 195 |
- |
|
| 196 |
- for s.Scan() {
|
|
| 197 |
- text := s.Text() |
|
| 198 |
- // from cgroups(7): |
|
| 199 |
- // /proc/[pid]/cgroup |
|
| 200 |
- // ... |
|
| 201 |
- // For each cgroup hierarchy ... there is one entry |
|
| 202 |
- // containing three colon-separated fields of the form: |
|
| 203 |
- // hierarchy-ID:subsystem-list:cgroup-path |
|
| 204 |
- parts := strings.SplitN(text, ":", 3) |
|
| 205 |
- if len(parts) < 3 {
|
|
| 206 |
- return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
|
|
| 207 |
- } |
|
| 208 |
- |
|
| 209 |
- for _, subs := range strings.Split(parts[1], ",") {
|
|
| 210 |
- cgroups[subs] = parts[2] |
|
| 211 |
- } |
|
| 212 |
- } |
|
| 213 |
- if err := s.Err(); err != nil {
|
|
| 214 |
- return nil, err |
|
| 215 |
- } |
|
| 216 |
- |
|
| 217 |
- return cgroups, nil |
|
| 218 |
-} |
|
| 219 |
- |
|
| 220 |
-func PathExists(path string) bool {
|
|
| 221 |
- if _, err := os.Stat(path); err != nil {
|
|
| 222 |
- return false |
|
| 223 |
- } |
|
| 224 |
- return true |
|
| 225 |
-} |
|
| 226 |
- |
|
| 227 |
-// rmdir tries to remove a directory, optionally retrying on EBUSY. |
|
| 228 |
-func rmdir(path string, retry bool) error {
|
|
| 229 |
- delay := time.Millisecond |
|
| 230 |
- tries := 10 |
|
| 231 |
- |
|
| 232 |
-again: |
|
| 233 |
- err := unix.Rmdir(path) |
|
| 234 |
- switch err { // nolint:errorlint // unix errors are bare
|
|
| 235 |
- case nil, unix.ENOENT: |
|
| 236 |
- return nil |
|
| 237 |
- case unix.EINTR: |
|
| 238 |
- goto again |
|
| 239 |
- case unix.EBUSY: |
|
| 240 |
- if retry && tries > 0 {
|
|
| 241 |
- time.Sleep(delay) |
|
| 242 |
- delay *= 2 |
|
| 243 |
- tries-- |
|
| 244 |
- goto again |
|
| 245 |
- |
|
| 246 |
- } |
|
| 247 |
- } |
|
| 248 |
- return &os.PathError{Op: "rmdir", Path: path, Err: err}
|
|
| 249 |
-} |
|
| 250 |
- |
|
| 251 |
-// RemovePath aims to remove cgroup path. It does so recursively, |
|
| 252 |
-// by removing any subdirectories (sub-cgroups) first. |
|
| 253 |
-func RemovePath(path string) error {
|
|
| 254 |
- // Try the fast path first; don't retry on EBUSY yet. |
|
| 255 |
- if err := rmdir(path, false); err == nil {
|
|
| 256 |
- return nil |
|
| 257 |
- } |
|
| 258 |
- |
|
| 259 |
- // There are many reasons why rmdir can fail, including: |
|
| 260 |
- // 1. cgroup have existing sub-cgroups; |
|
| 261 |
- // 2. cgroup (still) have some processes (that are about to vanish); |
|
| 262 |
- // 3. lack of permission (one example is read-only /sys/fs/cgroup mount, |
|
| 263 |
- // in which case rmdir returns EROFS even for for a non-existent path, |
|
| 264 |
- // see issue 4518). |
|
| 265 |
- // |
|
| 266 |
- // Using os.ReadDir here kills two birds with one stone: check if |
|
| 267 |
- // the directory exists (handling scenario 3 above), and use |
|
| 268 |
- // directory contents to remove sub-cgroups (handling scenario 1). |
|
| 269 |
- infos, err := os.ReadDir(path) |
|
| 270 |
- if err != nil {
|
|
| 271 |
- if os.IsNotExist(err) {
|
|
| 272 |
- return nil |
|
| 273 |
- } |
|
| 274 |
- return err |
|
| 275 |
- } |
|
| 276 |
- // Let's remove sub-cgroups, if any. |
|
| 277 |
- for _, info := range infos {
|
|
| 278 |
- if info.IsDir() {
|
|
| 279 |
- if err = RemovePath(filepath.Join(path, info.Name())); err != nil {
|
|
| 280 |
- return err |
|
| 281 |
- } |
|
| 282 |
- } |
|
| 283 |
- } |
|
| 284 |
- // Finally, try rmdir again, this time with retries on EBUSY, |
|
| 285 |
- // which may help with scenario 2 above. |
|
| 286 |
- return rmdir(path, true) |
|
| 287 |
-} |
|
| 288 |
- |
|
| 289 |
-// RemovePaths iterates over the provided paths removing them. |
|
| 290 |
-func RemovePaths(paths map[string]string) (err error) {
|
|
| 291 |
- for s, p := range paths {
|
|
| 292 |
- if err := RemovePath(p); err == nil {
|
|
| 293 |
- delete(paths, s) |
|
| 294 |
- } |
|
| 295 |
- } |
|
| 296 |
- if len(paths) == 0 {
|
|
| 297 |
- clear(paths) |
|
| 298 |
- return nil |
|
| 299 |
- } |
|
| 300 |
- return fmt.Errorf("Failed to remove paths: %v", paths)
|
|
| 301 |
-} |
|
| 302 |
- |
|
| 303 |
-var ( |
|
| 304 |
- hugePageSizes []string |
|
| 305 |
- initHPSOnce sync.Once |
|
| 306 |
-) |
|
| 307 |
- |
|
| 308 |
-func HugePageSizes() []string {
|
|
| 309 |
- initHPSOnce.Do(func() {
|
|
| 310 |
- dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
|
| 311 |
- if err != nil {
|
|
| 312 |
- return |
|
| 313 |
- } |
|
| 314 |
- files, err := dir.Readdirnames(0) |
|
| 315 |
- dir.Close() |
|
| 316 |
- if err != nil {
|
|
| 317 |
- return |
|
| 318 |
- } |
|
| 319 |
- |
|
| 320 |
- hugePageSizes, err = getHugePageSizeFromFilenames(files) |
|
| 321 |
- if err != nil {
|
|
| 322 |
- logrus.Warn("HugePageSizes: ", err)
|
|
| 323 |
- } |
|
| 324 |
- }) |
|
| 325 |
- |
|
| 326 |
- return hugePageSizes |
|
| 327 |
-} |
|
| 328 |
- |
|
| 329 |
-func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
|
|
| 330 |
- pageSizes := make([]string, 0, len(fileNames)) |
|
| 331 |
- var warn error |
|
| 332 |
- |
|
| 333 |
- for _, file := range fileNames {
|
|
| 334 |
- // example: hugepages-1048576kB |
|
| 335 |
- val := strings.TrimPrefix(file, "hugepages-") |
|
| 336 |
- if len(val) == len(file) {
|
|
| 337 |
- // Unexpected file name: no prefix found, ignore it. |
|
| 338 |
- continue |
|
| 339 |
- } |
|
| 340 |
- // The suffix is always "kB" (as of Linux 5.13). If we find |
|
| 341 |
- // something else, produce an error but keep going. |
|
| 342 |
- eLen := len(val) - 2 |
|
| 343 |
- val = strings.TrimSuffix(val, "kB") |
|
| 344 |
- if len(val) != eLen {
|
|
| 345 |
- // Highly unlikely. |
|
| 346 |
- if warn == nil {
|
|
| 347 |
- warn = errors.New(file + `: invalid suffix (expected "kB")`) |
|
| 348 |
- } |
|
| 349 |
- continue |
|
| 350 |
- } |
|
| 351 |
- size, err := strconv.Atoi(val) |
|
| 352 |
- if err != nil {
|
|
| 353 |
- // Highly unlikely. |
|
| 354 |
- if warn == nil {
|
|
| 355 |
- warn = fmt.Errorf("%s: %w", file, err)
|
|
| 356 |
- } |
|
| 357 |
- continue |
|
| 358 |
- } |
|
| 359 |
- // Model after https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/hugetlb_cgroup.c?id=eff48ddeab782e35e58ccc8853f7386bbae9dec4#n574 |
|
| 360 |
- // but in our case the size is in KB already. |
|
| 361 |
- if size >= (1 << 20) {
|
|
| 362 |
- val = strconv.Itoa(size>>20) + "GB" |
|
| 363 |
- } else if size >= (1 << 10) {
|
|
| 364 |
- val = strconv.Itoa(size>>10) + "MB" |
|
| 365 |
- } else {
|
|
| 366 |
- val += "KB" |
|
| 367 |
- } |
|
| 368 |
- pageSizes = append(pageSizes, val) |
|
| 369 |
- } |
|
| 370 |
- |
|
| 371 |
- return pageSizes, warn |
|
| 372 |
-} |
|
| 373 |
- |
|
| 374 |
-// GetPids returns all pids, that were added to cgroup at path. |
|
| 375 |
-func GetPids(dir string) ([]int, error) {
|
|
| 376 |
- return readProcsFile(dir) |
|
| 377 |
-} |
|
| 378 |
- |
|
| 379 |
-// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file |
|
| 380 |
-func WriteCgroupProc(dir string, pid int) error {
|
|
| 381 |
- // Normally dir should not be empty, one case is that cgroup subsystem |
|
| 382 |
- // is not mounted, we will get empty dir, and we want it fail here. |
|
| 383 |
- if dir == "" {
|
|
| 384 |
- return fmt.Errorf("no such directory for %s", CgroupProcesses)
|
|
| 385 |
- } |
|
| 386 |
- |
|
| 387 |
- // Dont attach any pid to the cgroup if -1 is specified as a pid |
|
| 388 |
- if pid == -1 {
|
|
| 389 |
- return nil |
|
| 390 |
- } |
|
| 391 |
- |
|
| 392 |
- file, err := OpenFile(dir, CgroupProcesses, os.O_WRONLY) |
|
| 393 |
- if err != nil {
|
|
| 394 |
- return fmt.Errorf("failed to write %v: %w", pid, err)
|
|
| 395 |
- } |
|
| 396 |
- defer file.Close() |
|
| 397 |
- |
|
| 398 |
- for i := 0; i < 5; i++ {
|
|
| 399 |
- _, err = file.WriteString(strconv.Itoa(pid)) |
|
| 400 |
- if err == nil {
|
|
| 401 |
- return nil |
|
| 402 |
- } |
|
| 403 |
- |
|
| 404 |
- // EINVAL might mean that the task being added to cgroup.procs is in state |
|
| 405 |
- // TASK_NEW. We should attempt to do so again. |
|
| 406 |
- if errors.Is(err, unix.EINVAL) {
|
|
| 407 |
- time.Sleep(30 * time.Millisecond) |
|
| 408 |
- continue |
|
| 409 |
- } |
|
| 410 |
- |
|
| 411 |
- return fmt.Errorf("failed to write %v: %w", pid, err)
|
|
| 412 |
- } |
|
| 413 |
- return err |
|
| 414 |
-} |
|
| 415 |
- |
|
| 416 |
-// Since the OCI spec is designed for cgroup v1, in some cases |
|
| 417 |
-// there is need to convert from the cgroup v1 configuration to cgroup v2 |
|
| 418 |
-// the formula for cpuShares is y = (1 + ((x - 2) * 9999) / 262142) |
|
| 419 |
-// convert from [2-262144] to [1-10000] |
|
| 420 |
-// 262144 comes from Linux kernel definition "#define MAX_SHARES (1UL << 18)" |
|
| 421 |
-func ConvertCPUSharesToCgroupV2Value(cpuShares uint64) uint64 {
|
|
| 422 |
- if cpuShares == 0 {
|
|
| 423 |
- return 0 |
|
| 424 |
- } |
|
| 425 |
- return (1 + ((cpuShares-2)*9999)/262142) |
|
| 426 |
-} |
|
| 427 |
- |
|
| 428 |
-// ConvertMemorySwapToCgroupV2Value converts MemorySwap value from OCI spec |
|
| 429 |
-// for use by cgroup v2 drivers. A conversion is needed since Resources.MemorySwap |
|
| 430 |
-// is defined as memory+swap combined, while in cgroup v2 swap is a separate value, |
|
| 431 |
-// so we need to subtract memory from it where it makes sense. |
|
| 432 |
-func ConvertMemorySwapToCgroupV2Value(memorySwap, memory int64) (int64, error) {
|
|
| 433 |
- switch {
|
|
| 434 |
- case memory == -1 && memorySwap == 0: |
|
| 435 |
- // For compatibility with cgroup1 controller, set swap to unlimited in |
|
| 436 |
- // case the memory is set to unlimited and the swap is not explicitly set, |
|
| 437 |
- // treating the request as "set both memory and swap to unlimited". |
|
| 438 |
- return -1, nil |
|
| 439 |
- case memorySwap == -1, memorySwap == 0: |
|
| 440 |
- // Treat -1 ("max") and 0 ("unset") swap as is.
|
|
| 441 |
- return memorySwap, nil |
|
| 442 |
- case memory == -1: |
|
| 443 |
- // Unlimited memory, so treat swap as is. |
|
| 444 |
- return memorySwap, nil |
|
| 445 |
- case memory == 0: |
|
| 446 |
- // Unset or unknown memory, can't calculate swap. |
|
| 447 |
- return 0, errors.New("unable to set swap limit without memory limit")
|
|
| 448 |
- case memory < 0: |
|
| 449 |
- // Does not make sense to subtract a negative value. |
|
| 450 |
- return 0, fmt.Errorf("invalid memory value: %d", memory)
|
|
| 451 |
- case memorySwap < memory: |
|
| 452 |
- // Sanity check. |
|
| 453 |
- return 0, errors.New("memory+swap limit should be >= memory limit")
|
|
| 454 |
- } |
|
| 455 |
- |
|
| 456 |
- return memorySwap - memory, nil |
|
| 457 |
-} |
|
| 458 |
- |
|
| 459 |
-// Since the OCI spec is designed for cgroup v1, in some cases |
|
| 460 |
-// there is need to convert from the cgroup v1 configuration to cgroup v2 |
|
| 461 |
-// the formula for BlkIOWeight to IOWeight is y = (1 + (x - 10) * 9999 / 990) |
|
| 462 |
-// convert linearly from [10-1000] to [1-10000] |
|
| 463 |
-func ConvertBlkIOToIOWeightValue(blkIoWeight uint16) uint64 {
|
|
| 464 |
- if blkIoWeight == 0 {
|
|
| 465 |
- return 0 |
|
| 466 |
- } |
|
| 467 |
- return 1 + (uint64(blkIoWeight)-10)*9999/990 |
|
| 468 |
-} |
| 469 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,277 +0,0 @@ |
| 1 |
-package cgroups |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "errors" |
|
| 5 |
- "fmt" |
|
| 6 |
- "os" |
|
| 7 |
- "path/filepath" |
|
| 8 |
- "strings" |
|
| 9 |
- "sync" |
|
| 10 |
- "syscall" |
|
| 11 |
- |
|
| 12 |
- securejoin "github.com/cyphar/filepath-securejoin" |
|
| 13 |
- "github.com/moby/sys/mountinfo" |
|
| 14 |
- "golang.org/x/sys/unix" |
|
| 15 |
-) |
|
| 16 |
- |
|
| 17 |
-// Code in this source file are specific to cgroup v1, |
|
| 18 |
-// and must not be used from any cgroup v2 code. |
|
| 19 |
- |
|
| 20 |
-const ( |
|
| 21 |
- CgroupNamePrefix = "name=" |
|
| 22 |
- defaultPrefix = "/sys/fs/cgroup" |
|
| 23 |
-) |
|
| 24 |
- |
|
| 25 |
-var ( |
|
| 26 |
- errUnified = errors.New("not implemented for cgroup v2 unified hierarchy")
|
|
| 27 |
- ErrV1NoUnified = errors.New("invalid configuration: cannot use unified on cgroup v1")
|
|
| 28 |
- |
|
| 29 |
- readMountinfoOnce sync.Once |
|
| 30 |
- readMountinfoErr error |
|
| 31 |
- cgroupMountinfo []*mountinfo.Info |
|
| 32 |
-) |
|
| 33 |
- |
|
| 34 |
-type NotFoundError struct {
|
|
| 35 |
- Subsystem string |
|
| 36 |
-} |
|
| 37 |
- |
|
| 38 |
-func (e *NotFoundError) Error() string {
|
|
| 39 |
- return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
|
|
| 40 |
-} |
|
| 41 |
- |
|
| 42 |
-func NewNotFoundError(sub string) error {
|
|
| 43 |
- return &NotFoundError{
|
|
| 44 |
- Subsystem: sub, |
|
| 45 |
- } |
|
| 46 |
-} |
|
| 47 |
- |
|
| 48 |
-func IsNotFound(err error) bool {
|
|
| 49 |
- var nfErr *NotFoundError |
|
| 50 |
- return errors.As(err, &nfErr) |
|
| 51 |
-} |
|
| 52 |
- |
|
| 53 |
-func tryDefaultPath(cgroupPath, subsystem string) string {
|
|
| 54 |
- if !strings.HasPrefix(defaultPrefix, cgroupPath) {
|
|
| 55 |
- return "" |
|
| 56 |
- } |
|
| 57 |
- |
|
| 58 |
- // remove possible prefix |
|
| 59 |
- subsystem = strings.TrimPrefix(subsystem, CgroupNamePrefix) |
|
| 60 |
- |
|
| 61 |
- // Make sure we're still under defaultPrefix, and resolve |
|
| 62 |
- // a possible symlink (like cpu -> cpu,cpuacct). |
|
| 63 |
- path, err := securejoin.SecureJoin(defaultPrefix, subsystem) |
|
| 64 |
- if err != nil {
|
|
| 65 |
- return "" |
|
| 66 |
- } |
|
| 67 |
- |
|
| 68 |
- // (1) path should be a directory. |
|
| 69 |
- st, err := os.Lstat(path) |
|
| 70 |
- if err != nil || !st.IsDir() {
|
|
| 71 |
- return "" |
|
| 72 |
- } |
|
| 73 |
- |
|
| 74 |
- // (2) path should be a mount point. |
|
| 75 |
- pst, err := os.Lstat(filepath.Dir(path)) |
|
| 76 |
- if err != nil {
|
|
| 77 |
- return "" |
|
| 78 |
- } |
|
| 79 |
- |
|
| 80 |
- if st.Sys().(*syscall.Stat_t).Dev == pst.Sys().(*syscall.Stat_t).Dev {
|
|
| 81 |
- // parent dir has the same dev -- path is not a mount point |
|
| 82 |
- return "" |
|
| 83 |
- } |
|
| 84 |
- |
|
| 85 |
- // (3) path should have 'cgroup' fs type. |
|
| 86 |
- fst := unix.Statfs_t{}
|
|
| 87 |
- err = unix.Statfs(path, &fst) |
|
| 88 |
- if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
|
|
| 89 |
- return "" |
|
| 90 |
- } |
|
| 91 |
- |
|
| 92 |
- return path |
|
| 93 |
-} |
|
| 94 |
- |
|
| 95 |
-// readCgroupMountinfo returns a list of cgroup v1 mounts (i.e. the ones |
|
| 96 |
-// with fstype of "cgroup") for the current running process. |
|
| 97 |
-// |
|
| 98 |
-// The results are cached (to avoid re-reading mountinfo which is relatively |
|
| 99 |
-// expensive), so it is assumed that cgroup mounts are not being changed. |
|
| 100 |
-func readCgroupMountinfo() ([]*mountinfo.Info, error) {
|
|
| 101 |
- readMountinfoOnce.Do(func() {
|
|
| 102 |
- // mountinfo.GetMounts uses /proc/thread-self, so we can use it without |
|
| 103 |
- // issues. |
|
| 104 |
- cgroupMountinfo, readMountinfoErr = mountinfo.GetMounts( |
|
| 105 |
- mountinfo.FSTypeFilter("cgroup"),
|
|
| 106 |
- ) |
|
| 107 |
- }) |
|
| 108 |
- return cgroupMountinfo, readMountinfoErr |
|
| 109 |
-} |
|
| 110 |
- |
|
| 111 |
-// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt |
|
| 112 |
-func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
|
|
| 113 |
- if IsCgroup2UnifiedMode() {
|
|
| 114 |
- return "", errUnified |
|
| 115 |
- } |
|
| 116 |
- |
|
| 117 |
- // If subsystem is empty, we look for the cgroupv2 hybrid path. |
|
| 118 |
- if len(subsystem) == 0 {
|
|
| 119 |
- return hybridMountpoint, nil |
|
| 120 |
- } |
|
| 121 |
- |
|
| 122 |
- // Avoid parsing mountinfo by trying the default path first, if possible. |
|
| 123 |
- if path := tryDefaultPath(cgroupPath, subsystem); path != "" {
|
|
| 124 |
- return path, nil |
|
| 125 |
- } |
|
| 126 |
- |
|
| 127 |
- mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem) |
|
| 128 |
- return mnt, err |
|
| 129 |
-} |
|
| 130 |
- |
|
| 131 |
-func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
|
|
| 132 |
- if IsCgroup2UnifiedMode() {
|
|
| 133 |
- return "", "", errUnified |
|
| 134 |
- } |
|
| 135 |
- |
|
| 136 |
- mi, err := readCgroupMountinfo() |
|
| 137 |
- if err != nil {
|
|
| 138 |
- return "", "", err |
|
| 139 |
- } |
|
| 140 |
- |
|
| 141 |
- return findCgroupMountpointAndRootFromMI(mi, cgroupPath, subsystem) |
|
| 142 |
-} |
|
| 143 |
- |
|
| 144 |
-func findCgroupMountpointAndRootFromMI(mounts []*mountinfo.Info, cgroupPath, subsystem string) (string, string, error) {
|
|
| 145 |
- for _, mi := range mounts {
|
|
| 146 |
- if strings.HasPrefix(mi.Mountpoint, cgroupPath) {
|
|
| 147 |
- for _, opt := range strings.Split(mi.VFSOptions, ",") {
|
|
| 148 |
- if opt == subsystem {
|
|
| 149 |
- return mi.Mountpoint, mi.Root, nil |
|
| 150 |
- } |
|
| 151 |
- } |
|
| 152 |
- } |
|
| 153 |
- } |
|
| 154 |
- |
|
| 155 |
- return "", "", NewNotFoundError(subsystem) |
|
| 156 |
-} |
|
| 157 |
- |
|
| 158 |
-func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
|
|
| 159 |
- if len(m.Subsystems) == 0 {
|
|
| 160 |
- return "", errors.New("no subsystem for mount")
|
|
| 161 |
- } |
|
| 162 |
- |
|
| 163 |
- return getControllerPath(m.Subsystems[0], cgroups) |
|
| 164 |
-} |
|
| 165 |
- |
|
| 166 |
-func getCgroupMountsHelper(ss map[string]bool, mounts []*mountinfo.Info, all bool) ([]Mount, error) {
|
|
| 167 |
- res := make([]Mount, 0, len(ss)) |
|
| 168 |
- numFound := 0 |
|
| 169 |
- for _, mi := range mounts {
|
|
| 170 |
- m := Mount{
|
|
| 171 |
- Mountpoint: mi.Mountpoint, |
|
| 172 |
- Root: mi.Root, |
|
| 173 |
- } |
|
| 174 |
- for _, opt := range strings.Split(mi.VFSOptions, ",") {
|
|
| 175 |
- seen, known := ss[opt] |
|
| 176 |
- if !known || (!all && seen) {
|
|
| 177 |
- continue |
|
| 178 |
- } |
|
| 179 |
- ss[opt] = true |
|
| 180 |
- opt = strings.TrimPrefix(opt, CgroupNamePrefix) |
|
| 181 |
- m.Subsystems = append(m.Subsystems, opt) |
|
| 182 |
- numFound++ |
|
| 183 |
- } |
|
| 184 |
- if len(m.Subsystems) > 0 || all {
|
|
| 185 |
- res = append(res, m) |
|
| 186 |
- } |
|
| 187 |
- if !all && numFound >= len(ss) {
|
|
| 188 |
- break |
|
| 189 |
- } |
|
| 190 |
- } |
|
| 191 |
- return res, nil |
|
| 192 |
-} |
|
| 193 |
- |
|
| 194 |
-func getCgroupMountsV1(all bool) ([]Mount, error) {
|
|
| 195 |
- mi, err := readCgroupMountinfo() |
|
| 196 |
- if err != nil {
|
|
| 197 |
- return nil, err |
|
| 198 |
- } |
|
| 199 |
- |
|
| 200 |
- // We don't need to use /proc/thread-self here because runc always runs |
|
| 201 |
- // with every thread in the same cgroup. This lets us avoid having to do |
|
| 202 |
- // runtime.LockOSThread. |
|
| 203 |
- allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
|
|
| 204 |
- if err != nil {
|
|
| 205 |
- return nil, err |
|
| 206 |
- } |
|
| 207 |
- |
|
| 208 |
- allMap := make(map[string]bool) |
|
| 209 |
- for s := range allSubsystems {
|
|
| 210 |
- allMap[s] = false |
|
| 211 |
- } |
|
| 212 |
- |
|
| 213 |
- return getCgroupMountsHelper(allMap, mi, all) |
|
| 214 |
-} |
|
| 215 |
- |
|
| 216 |
-// GetOwnCgroup returns the relative path to the cgroup docker is running in. |
|
| 217 |
-func GetOwnCgroup(subsystem string) (string, error) {
|
|
| 218 |
- if IsCgroup2UnifiedMode() {
|
|
| 219 |
- return "", errUnified |
|
| 220 |
- } |
|
| 221 |
- |
|
| 222 |
- // We don't need to use /proc/thread-self here because runc always runs |
|
| 223 |
- // with every thread in the same cgroup. This lets us avoid having to do |
|
| 224 |
- // runtime.LockOSThread. |
|
| 225 |
- cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
|
| 226 |
- if err != nil {
|
|
| 227 |
- return "", err |
|
| 228 |
- } |
|
| 229 |
- |
|
| 230 |
- return getControllerPath(subsystem, cgroups) |
|
| 231 |
-} |
|
| 232 |
- |
|
| 233 |
-func GetOwnCgroupPath(subsystem string) (string, error) {
|
|
| 234 |
- cgroup, err := GetOwnCgroup(subsystem) |
|
| 235 |
- if err != nil {
|
|
| 236 |
- return "", err |
|
| 237 |
- } |
|
| 238 |
- |
|
| 239 |
- // If subsystem is empty, we look for the cgroupv2 hybrid path. |
|
| 240 |
- if len(subsystem) == 0 {
|
|
| 241 |
- return hybridMountpoint, nil |
|
| 242 |
- } |
|
| 243 |
- |
|
| 244 |
- return getCgroupPathHelper(subsystem, cgroup) |
|
| 245 |
-} |
|
| 246 |
- |
|
| 247 |
-func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
|
|
| 248 |
- mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
|
|
| 249 |
- if err != nil {
|
|
| 250 |
- return "", err |
|
| 251 |
- } |
|
| 252 |
- |
|
| 253 |
- // This is needed for nested containers, because in /proc/self/cgroup we |
|
| 254 |
- // see paths from host, which don't exist in container. |
|
| 255 |
- relCgroup, err := filepath.Rel(root, cgroup) |
|
| 256 |
- if err != nil {
|
|
| 257 |
- return "", err |
|
| 258 |
- } |
|
| 259 |
- |
|
| 260 |
- return filepath.Join(mnt, relCgroup), nil |
|
| 261 |
-} |
|
| 262 |
- |
|
| 263 |
-func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
|
|
| 264 |
- if IsCgroup2UnifiedMode() {
|
|
| 265 |
- return "", errUnified |
|
| 266 |
- } |
|
| 267 |
- |
|
| 268 |
- if p, ok := cgroups[subsystem]; ok {
|
|
| 269 |
- return p, nil |
|
| 270 |
- } |
|
| 271 |
- |
|
| 272 |
- if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
|
|
| 273 |
- return p, nil |
|
| 274 |
- } |
|
| 275 |
- |
|
| 276 |
- return "", NewNotFoundError(subsystem) |
|
| 277 |
-} |
| 278 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,66 +0,0 @@ |
| 1 |
-package configs |
|
| 2 |
- |
|
| 3 |
-import "fmt" |
|
| 4 |
- |
|
| 5 |
-// BlockIODevice holds major:minor format supported in blkio cgroup. |
|
| 6 |
-type BlockIODevice struct {
|
|
| 7 |
- // Major is the device's major number |
|
| 8 |
- Major int64 `json:"major"` |
|
| 9 |
- // Minor is the device's minor number |
|
| 10 |
- Minor int64 `json:"minor"` |
|
| 11 |
-} |
|
| 12 |
- |
|
| 13 |
-// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair |
|
| 14 |
-type WeightDevice struct {
|
|
| 15 |
- BlockIODevice |
|
| 16 |
- // Weight is the bandwidth rate for the device, range is from 10 to 1000 |
|
| 17 |
- Weight uint16 `json:"weight"` |
|
| 18 |
- // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only |
|
| 19 |
- LeafWeight uint16 `json:"leafWeight"` |
|
| 20 |
-} |
|
| 21 |
- |
|
| 22 |
-// NewWeightDevice returns a configured WeightDevice pointer |
|
| 23 |
-func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice {
|
|
| 24 |
- wd := &WeightDevice{}
|
|
| 25 |
- wd.Major = major |
|
| 26 |
- wd.Minor = minor |
|
| 27 |
- wd.Weight = weight |
|
| 28 |
- wd.LeafWeight = leafWeight |
|
| 29 |
- return wd |
|
| 30 |
-} |
|
| 31 |
- |
|
| 32 |
-// WeightString formats the struct to be writable to the cgroup specific file |
|
| 33 |
-func (wd *WeightDevice) WeightString() string {
|
|
| 34 |
- return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight)
|
|
| 35 |
-} |
|
| 36 |
- |
|
| 37 |
-// LeafWeightString formats the struct to be writable to the cgroup specific file |
|
| 38 |
-func (wd *WeightDevice) LeafWeightString() string {
|
|
| 39 |
- return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight)
|
|
| 40 |
-} |
|
| 41 |
- |
|
| 42 |
-// ThrottleDevice struct holds a `major:minor rate_per_second` pair |
|
| 43 |
-type ThrottleDevice struct {
|
|
| 44 |
- BlockIODevice |
|
| 45 |
- // Rate is the IO rate limit per cgroup per device |
|
| 46 |
- Rate uint64 `json:"rate"` |
|
| 47 |
-} |
|
| 48 |
- |
|
| 49 |
-// NewThrottleDevice returns a configured ThrottleDevice pointer |
|
| 50 |
-func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice {
|
|
| 51 |
- td := &ThrottleDevice{}
|
|
| 52 |
- td.Major = major |
|
| 53 |
- td.Minor = minor |
|
| 54 |
- td.Rate = rate |
|
| 55 |
- return td |
|
| 56 |
-} |
|
| 57 |
- |
|
| 58 |
-// String formats the struct to be writable to the cgroup specific file |
|
| 59 |
-func (td *ThrottleDevice) String() string {
|
|
| 60 |
- return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)
|
|
| 61 |
-} |
|
| 62 |
- |
|
| 63 |
-// StringName formats the struct to be writable to the cgroup specific file |
|
| 64 |
-func (td *ThrottleDevice) StringName(name string) string {
|
|
| 65 |
- return fmt.Sprintf("%d:%d %s=%d", td.Major, td.Minor, name, td.Rate)
|
|
| 66 |
-} |
| 67 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,169 +0,0 @@ |
| 1 |
-package configs |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- systemdDbus "github.com/coreos/go-systemd/v22/dbus" |
|
| 5 |
- "github.com/opencontainers/runc/libcontainer/devices" |
|
| 6 |
-) |
|
| 7 |
- |
|
| 8 |
-type FreezerState string |
|
| 9 |
- |
|
| 10 |
-const ( |
|
| 11 |
- Undefined FreezerState = "" |
|
| 12 |
- Frozen FreezerState = "FROZEN" |
|
| 13 |
- Thawed FreezerState = "THAWED" |
|
| 14 |
-) |
|
| 15 |
- |
|
| 16 |
-// Cgroup holds properties of a cgroup on Linux. |
|
| 17 |
-type Cgroup struct {
|
|
| 18 |
- // Name specifies the name of the cgroup |
|
| 19 |
- Name string `json:"name,omitempty"` |
|
| 20 |
- |
|
| 21 |
- // Parent specifies the name of parent of cgroup or slice |
|
| 22 |
- Parent string `json:"parent,omitempty"` |
|
| 23 |
- |
|
| 24 |
- // Path specifies the path to cgroups that are created and/or joined by the container. |
|
| 25 |
- // The path is assumed to be relative to the host system cgroup mountpoint. |
|
| 26 |
- Path string `json:"path"` |
|
| 27 |
- |
|
| 28 |
- // ScopePrefix describes prefix for the scope name |
|
| 29 |
- ScopePrefix string `json:"scope_prefix"` |
|
| 30 |
- |
|
| 31 |
- // Resources contains various cgroups settings to apply |
|
| 32 |
- *Resources |
|
| 33 |
- |
|
| 34 |
- // Systemd tells if systemd should be used to manage cgroups. |
|
| 35 |
- Systemd bool |
|
| 36 |
- |
|
| 37 |
- // SystemdProps are any additional properties for systemd, |
|
| 38 |
- // derived from org.systemd.property.xxx annotations. |
|
| 39 |
- // Ignored unless systemd is used for managing cgroups. |
|
| 40 |
- SystemdProps []systemdDbus.Property `json:"-"` |
|
| 41 |
- |
|
| 42 |
- // Rootless tells if rootless cgroups should be used. |
|
| 43 |
- Rootless bool |
|
| 44 |
- |
|
| 45 |
- // The host UID that should own the cgroup, or nil to accept |
|
| 46 |
- // the default ownership. This should only be set when the |
|
| 47 |
- // cgroupfs is to be mounted read/write. |
|
| 48 |
- // Not all cgroup manager implementations support changing |
|
| 49 |
- // the ownership. |
|
| 50 |
- OwnerUID *int `json:"owner_uid,omitempty"` |
|
| 51 |
-} |
|
| 52 |
- |
|
| 53 |
-type Resources struct {
|
|
| 54 |
- // Devices is the set of access rules for devices in the container. |
|
| 55 |
- Devices []*devices.Rule `json:"devices"` |
|
| 56 |
- |
|
| 57 |
- // Memory limit (in bytes) |
|
| 58 |
- Memory int64 `json:"memory"` |
|
| 59 |
- |
|
| 60 |
- // Memory reservation or soft_limit (in bytes) |
|
| 61 |
- MemoryReservation int64 `json:"memory_reservation"` |
|
| 62 |
- |
|
| 63 |
- // Total memory usage (memory + swap); set `-1` to enable unlimited swap |
|
| 64 |
- MemorySwap int64 `json:"memory_swap"` |
|
| 65 |
- |
|
| 66 |
- // CPU shares (relative weight vs. other containers) |
|
| 67 |
- CpuShares uint64 `json:"cpu_shares"` |
|
| 68 |
- |
|
| 69 |
- // CPU hardcap limit (in usecs). Allowed cpu time in a given period. |
|
| 70 |
- CpuQuota int64 `json:"cpu_quota"` |
|
| 71 |
- |
|
| 72 |
- // CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a given period. |
|
| 73 |
- CpuBurst *uint64 `json:"cpu_burst"` //nolint:revive |
|
| 74 |
- |
|
| 75 |
- // CPU period to be used for hardcapping (in usecs). 0 to use system default. |
|
| 76 |
- CpuPeriod uint64 `json:"cpu_period"` |
|
| 77 |
- |
|
| 78 |
- // How many time CPU will use in realtime scheduling (in usecs). |
|
| 79 |
- CpuRtRuntime int64 `json:"cpu_rt_quota"` |
|
| 80 |
- |
|
| 81 |
- // CPU period to be used for realtime scheduling (in usecs). |
|
| 82 |
- CpuRtPeriod uint64 `json:"cpu_rt_period"` |
|
| 83 |
- |
|
| 84 |
- // CPU to use |
|
| 85 |
- CpusetCpus string `json:"cpuset_cpus"` |
|
| 86 |
- |
|
| 87 |
- // MEM to use |
|
| 88 |
- CpusetMems string `json:"cpuset_mems"` |
|
| 89 |
- |
|
| 90 |
- // cgroup SCHED_IDLE |
|
| 91 |
- CPUIdle *int64 `json:"cpu_idle,omitempty"` |
|
| 92 |
- |
|
| 93 |
- // Process limit; set <= `0' to disable limit. |
|
| 94 |
- PidsLimit int64 `json:"pids_limit"` |
|
| 95 |
- |
|
| 96 |
- // Specifies per cgroup weight, range is from 10 to 1000. |
|
| 97 |
- BlkioWeight uint16 `json:"blkio_weight"` |
|
| 98 |
- |
|
| 99 |
- // Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only |
|
| 100 |
- BlkioLeafWeight uint16 `json:"blkio_leaf_weight"` |
|
| 101 |
- |
|
| 102 |
- // Weight per cgroup per device, can override BlkioWeight. |
|
| 103 |
- BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"` |
|
| 104 |
- |
|
| 105 |
- // IO read rate limit per cgroup per device, bytes per second. |
|
| 106 |
- BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"` |
|
| 107 |
- |
|
| 108 |
- // IO write rate limit per cgroup per device, bytes per second. |
|
| 109 |
- BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"` |
|
| 110 |
- |
|
| 111 |
- // IO read rate limit per cgroup per device, IO per second. |
|
| 112 |
- BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"` |
|
| 113 |
- |
|
| 114 |
- // IO write rate limit per cgroup per device, IO per second. |
|
| 115 |
- BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"` |
|
| 116 |
- |
|
| 117 |
- // set the freeze value for the process |
|
| 118 |
- Freezer FreezerState `json:"freezer"` |
|
| 119 |
- |
|
| 120 |
- // Hugetlb limit (in bytes) |
|
| 121 |
- HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"` |
|
| 122 |
- |
|
| 123 |
- // Whether to disable OOM Killer |
|
| 124 |
- OomKillDisable bool `json:"oom_kill_disable"` |
|
| 125 |
- |
|
| 126 |
- // Tuning swappiness behaviour per cgroup |
|
| 127 |
- MemorySwappiness *uint64 `json:"memory_swappiness"` |
|
| 128 |
- |
|
| 129 |
- // Set priority of network traffic for container |
|
| 130 |
- NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"` |
|
| 131 |
- |
|
| 132 |
- // Set class identifier for container's network packets |
|
| 133 |
- NetClsClassid uint32 `json:"net_cls_classid_u"` |
|
| 134 |
- |
|
| 135 |
- // Rdma resource restriction configuration |
|
| 136 |
- Rdma map[string]LinuxRdma `json:"rdma"` |
|
| 137 |
- |
|
| 138 |
- // Used on cgroups v2: |
|
| 139 |
- |
|
| 140 |
- // CpuWeight sets a proportional bandwidth limit. |
|
| 141 |
- CpuWeight uint64 `json:"cpu_weight"` |
|
| 142 |
- |
|
| 143 |
- // Unified is cgroupv2-only key-value map. |
|
| 144 |
- Unified map[string]string `json:"unified"` |
|
| 145 |
- |
|
| 146 |
- // SkipDevices allows to skip configuring device permissions. |
|
| 147 |
- // Used by e.g. kubelet while creating a parent cgroup (kubepods) |
|
| 148 |
- // common for many containers, and by runc update. |
|
| 149 |
- // |
|
| 150 |
- // NOTE it is impossible to start a container which has this flag set. |
|
| 151 |
- SkipDevices bool `json:"-"` |
|
| 152 |
- |
|
| 153 |
- // SkipFreezeOnSet is a flag for cgroup manager to skip the cgroup |
|
| 154 |
- // freeze when setting resources. Only applicable to systemd legacy |
|
| 155 |
- // (i.e. cgroup v1) manager (which uses freeze by default to avoid |
|
| 156 |
- // spurious permission errors caused by systemd inability to update |
|
| 157 |
- // device rules in a non-disruptive manner). |
|
| 158 |
- // |
|
| 159 |
- // If not set, a few methods (such as looking into cgroup's |
|
| 160 |
- // devices.list and querying the systemd unit properties) are used |
|
| 161 |
- // during Set() to figure out whether the freeze is required. Those |
|
| 162 |
- // methods may be relatively slow, thus this flag. |
|
| 163 |
- SkipFreezeOnSet bool `json:"-"` |
|
| 164 |
- |
|
| 165 |
- // MemoryCheckBeforeUpdate is a flag for cgroup v2 managers to check |
|
| 166 |
- // if the new memory limits (Memory and MemorySwap) being set are lower |
|
| 167 |
- // than the current memory usage, and reject if so. |
|
| 168 |
- MemoryCheckBeforeUpdate bool `json:"memory_check_before_update"` |
|
| 169 |
-} |
| 170 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,8 +0,0 @@ |
| 1 |
-//go:build !linux |
|
| 2 |
- |
|
| 3 |
-package configs |
|
| 4 |
- |
|
| 5 |
-// Cgroup holds properties of a cgroup on Linux |
|
| 6 |
-// TODO Windows: This can ultimately be entirely factored out on Windows as |
|
| 7 |
-// cgroups are a Unix-specific construct. |
|
| 8 |
-type Cgroup struct{}
|
| 9 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,508 +0,0 @@ |
| 1 |
-package configs |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "bytes" |
|
| 5 |
- "encoding/json" |
|
| 6 |
- "fmt" |
|
| 7 |
- "os/exec" |
|
| 8 |
- "time" |
|
| 9 |
- |
|
| 10 |
- "github.com/sirupsen/logrus" |
|
| 11 |
- "golang.org/x/sys/unix" |
|
| 12 |
- |
|
| 13 |
- "github.com/opencontainers/runc/libcontainer/devices" |
|
| 14 |
- "github.com/opencontainers/runtime-spec/specs-go" |
|
| 15 |
-) |
|
| 16 |
- |
|
| 17 |
-type Rlimit struct {
|
|
| 18 |
- Type int `json:"type"` |
|
| 19 |
- Hard uint64 `json:"hard"` |
|
| 20 |
- Soft uint64 `json:"soft"` |
|
| 21 |
-} |
|
| 22 |
- |
|
| 23 |
-// IDMap represents UID/GID Mappings for User Namespaces. |
|
| 24 |
-type IDMap struct {
|
|
| 25 |
- ContainerID int64 `json:"container_id"` |
|
| 26 |
- HostID int64 `json:"host_id"` |
|
| 27 |
- Size int64 `json:"size"` |
|
| 28 |
-} |
|
| 29 |
- |
|
| 30 |
-// Seccomp represents syscall restrictions |
|
| 31 |
-// By default, only the native architecture of the kernel is allowed to be used |
|
| 32 |
-// for syscalls. Additional architectures can be added by specifying them in |
|
| 33 |
-// Architectures. |
|
| 34 |
-type Seccomp struct {
|
|
| 35 |
- DefaultAction Action `json:"default_action"` |
|
| 36 |
- Architectures []string `json:"architectures"` |
|
| 37 |
- Flags []specs.LinuxSeccompFlag `json:"flags"` |
|
| 38 |
- Syscalls []*Syscall `json:"syscalls"` |
|
| 39 |
- DefaultErrnoRet *uint `json:"default_errno_ret"` |
|
| 40 |
- ListenerPath string `json:"listener_path,omitempty"` |
|
| 41 |
- ListenerMetadata string `json:"listener_metadata,omitempty"` |
|
| 42 |
-} |
|
| 43 |
- |
|
| 44 |
-// Action is taken upon rule match in Seccomp |
|
| 45 |
-type Action int |
|
| 46 |
- |
|
| 47 |
-const ( |
|
| 48 |
- Kill Action = iota + 1 |
|
| 49 |
- Errno |
|
| 50 |
- Trap |
|
| 51 |
- Allow |
|
| 52 |
- Trace |
|
| 53 |
- Log |
|
| 54 |
- Notify |
|
| 55 |
- KillThread |
|
| 56 |
- KillProcess |
|
| 57 |
-) |
|
| 58 |
- |
|
| 59 |
-// Operator is a comparison operator to be used when matching syscall arguments in Seccomp |
|
| 60 |
-type Operator int |
|
| 61 |
- |
|
| 62 |
-const ( |
|
| 63 |
- EqualTo Operator = iota + 1 |
|
| 64 |
- NotEqualTo |
|
| 65 |
- GreaterThan |
|
| 66 |
- GreaterThanOrEqualTo |
|
| 67 |
- LessThan |
|
| 68 |
- LessThanOrEqualTo |
|
| 69 |
- MaskEqualTo |
|
| 70 |
-) |
|
| 71 |
- |
|
| 72 |
-// Arg is a rule to match a specific syscall argument in Seccomp |
|
| 73 |
-type Arg struct {
|
|
| 74 |
- Index uint `json:"index"` |
|
| 75 |
- Value uint64 `json:"value"` |
|
| 76 |
- ValueTwo uint64 `json:"value_two"` |
|
| 77 |
- Op Operator `json:"op"` |
|
| 78 |
-} |
|
| 79 |
- |
|
| 80 |
-// Syscall is a rule to match a syscall in Seccomp |
|
| 81 |
-type Syscall struct {
|
|
| 82 |
- Name string `json:"name"` |
|
| 83 |
- Action Action `json:"action"` |
|
| 84 |
- ErrnoRet *uint `json:"errnoRet"` |
|
| 85 |
- Args []*Arg `json:"args"` |
|
| 86 |
-} |
|
| 87 |
- |
|
| 88 |
-// Config defines configuration options for executing a process inside a contained environment. |
|
| 89 |
-type Config struct {
|
|
| 90 |
- // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs |
|
| 91 |
- // This is a common option when the container is running in ramdisk |
|
| 92 |
- NoPivotRoot bool `json:"no_pivot_root"` |
|
| 93 |
- |
|
| 94 |
- // ParentDeathSignal specifies the signal that is sent to the container's process in the case |
|
| 95 |
- // that the parent process dies. |
|
| 96 |
- ParentDeathSignal int `json:"parent_death_signal"` |
|
| 97 |
- |
|
| 98 |
- // Path to a directory containing the container's root filesystem. |
|
| 99 |
- Rootfs string `json:"rootfs"` |
|
| 100 |
- |
|
| 101 |
- // Umask is the umask to use inside of the container. |
|
| 102 |
- Umask *uint32 `json:"umask"` |
|
| 103 |
- |
|
| 104 |
- // Readonlyfs will remount the container's rootfs as readonly where only externally mounted |
|
| 105 |
- // bind mounts are writtable. |
|
| 106 |
- Readonlyfs bool `json:"readonlyfs"` |
|
| 107 |
- |
|
| 108 |
- // Specifies the mount propagation flags to be applied to /. |
|
| 109 |
- RootPropagation int `json:"rootPropagation"` |
|
| 110 |
- |
|
| 111 |
- // Mounts specify additional source and destination paths that will be mounted inside the container's |
|
| 112 |
- // rootfs and mount namespace if specified |
|
| 113 |
- Mounts []*Mount `json:"mounts"` |
|
| 114 |
- |
|
| 115 |
- // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! |
|
| 116 |
- Devices []*devices.Device `json:"devices"` |
|
| 117 |
- |
|
| 118 |
- MountLabel string `json:"mount_label"` |
|
| 119 |
- |
|
| 120 |
- // Hostname optionally sets the container's hostname if provided |
|
| 121 |
- Hostname string `json:"hostname"` |
|
| 122 |
- |
|
| 123 |
- // Domainname optionally sets the container's domainname if provided |
|
| 124 |
- Domainname string `json:"domainname"` |
|
| 125 |
- |
|
| 126 |
- // Namespaces specifies the container's namespaces that it should setup when cloning the init process |
|
| 127 |
- // If a namespace is not provided that namespace is shared from the container's parent process |
|
| 128 |
- Namespaces Namespaces `json:"namespaces"` |
|
| 129 |
- |
|
| 130 |
- // Capabilities specify the capabilities to keep when executing the process inside the container |
|
| 131 |
- // All capabilities not specified will be dropped from the processes capability mask |
|
| 132 |
- Capabilities *Capabilities `json:"capabilities"` |
|
| 133 |
- |
|
| 134 |
- // Networks specifies the container's network setup to be created |
|
| 135 |
- Networks []*Network `json:"networks"` |
|
| 136 |
- |
|
| 137 |
- // Routes can be specified to create entries in the route table as the container is started |
|
| 138 |
- Routes []*Route `json:"routes"` |
|
| 139 |
- |
|
| 140 |
- // Cgroups specifies specific cgroup settings for the various subsystems that the container is |
|
| 141 |
- // placed into to limit the resources the container has available |
|
| 142 |
- Cgroups *Cgroup `json:"cgroups"` |
|
| 143 |
- |
|
| 144 |
- // AppArmorProfile specifies the profile to apply to the process running in the container and is |
|
| 145 |
- // change at the time the process is execed |
|
| 146 |
- AppArmorProfile string `json:"apparmor_profile,omitempty"` |
|
| 147 |
- |
|
| 148 |
- // ProcessLabel specifies the label to apply to the process running in the container. It is |
|
| 149 |
- // commonly used by selinux |
|
| 150 |
- ProcessLabel string `json:"process_label,omitempty"` |
|
| 151 |
- |
|
| 152 |
- // Rlimits specifies the resource limits, such as max open files, to set in the container |
|
| 153 |
- // If Rlimits are not set, the container will inherit rlimits from the parent process |
|
| 154 |
- Rlimits []Rlimit `json:"rlimits,omitempty"` |
|
| 155 |
- |
|
| 156 |
- // OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores |
|
| 157 |
- // for a process. Valid values are between the range [-1000, '1000'], where processes with |
|
| 158 |
- // higher scores are preferred for being killed. If it is unset then we don't touch the current |
|
| 159 |
- // value. |
|
| 160 |
- // More information about kernel oom score calculation here: https://lwn.net/Articles/317814/ |
|
| 161 |
- OomScoreAdj *int `json:"oom_score_adj,omitempty"` |
|
| 162 |
- |
|
| 163 |
- // UIDMappings is an array of User ID mappings for User Namespaces |
|
| 164 |
- UIDMappings []IDMap `json:"uid_mappings"` |
|
| 165 |
- |
|
| 166 |
- // GIDMappings is an array of Group ID mappings for User Namespaces |
|
| 167 |
- GIDMappings []IDMap `json:"gid_mappings"` |
|
| 168 |
- |
|
| 169 |
- // MaskPaths specifies paths within the container's rootfs to mask over with a bind |
|
| 170 |
- // mount pointing to /dev/null as to prevent reads of the file. |
|
| 171 |
- MaskPaths []string `json:"mask_paths"` |
|
| 172 |
- |
|
| 173 |
- // ReadonlyPaths specifies paths within the container's rootfs to remount as read-only |
|
| 174 |
- // so that these files prevent any writes. |
|
| 175 |
- ReadonlyPaths []string `json:"readonly_paths"` |
|
| 176 |
- |
|
| 177 |
- // Sysctl is a map of properties and their values. It is the equivalent of using |
|
| 178 |
- // sysctl -w my.property.name value in Linux. |
|
| 179 |
- Sysctl map[string]string `json:"sysctl"` |
|
| 180 |
- |
|
| 181 |
- // Seccomp allows actions to be taken whenever a syscall is made within the container. |
|
| 182 |
- // A number of rules are given, each having an action to be taken if a syscall matches it. |
|
| 183 |
- // A default action to be taken if no rules match is also given. |
|
| 184 |
- Seccomp *Seccomp `json:"seccomp"` |
|
| 185 |
- |
|
| 186 |
- // NoNewPrivileges controls whether processes in the container can gain additional privileges. |
|
| 187 |
- NoNewPrivileges bool `json:"no_new_privileges,omitempty"` |
|
| 188 |
- |
|
| 189 |
- // Hooks are a collection of actions to perform at various container lifecycle events. |
|
| 190 |
- // CommandHooks are serialized to JSON, but other hooks are not. |
|
| 191 |
- Hooks Hooks |
|
| 192 |
- |
|
| 193 |
- // Version is the version of opencontainer specification that is supported. |
|
| 194 |
- Version string `json:"version"` |
|
| 195 |
- |
|
| 196 |
- // Labels are user defined metadata that is stored in the config and populated on the state |
|
| 197 |
- Labels []string `json:"labels"` |
|
| 198 |
- |
|
| 199 |
- // NoNewKeyring will not allocated a new session keyring for the container. It will use the |
|
| 200 |
- // callers keyring in this case. |
|
| 201 |
- NoNewKeyring bool `json:"no_new_keyring"` |
|
| 202 |
- |
|
| 203 |
- // IntelRdt specifies settings for Intel RDT group that the container is placed into |
|
| 204 |
- // to limit the resources (e.g., L3 cache, memory bandwidth) the container has available |
|
| 205 |
- IntelRdt *IntelRdt `json:"intel_rdt,omitempty"` |
|
| 206 |
- |
|
| 207 |
- // RootlessEUID is set when the runc was launched with non-zero EUID. |
|
| 208 |
- // Note that RootlessEUID is set to false when launched with EUID=0 in userns. |
|
| 209 |
- // When RootlessEUID is set, runc creates a new userns for the container. |
|
| 210 |
- // (config.json needs to contain userns settings) |
|
| 211 |
- RootlessEUID bool `json:"rootless_euid,omitempty"` |
|
| 212 |
- |
|
| 213 |
- // RootlessCgroups is set when unlikely to have the full access to cgroups. |
|
| 214 |
- // When RootlessCgroups is set, cgroups errors are ignored. |
|
| 215 |
- RootlessCgroups bool `json:"rootless_cgroups,omitempty"` |
|
| 216 |
- |
|
| 217 |
- // TimeOffsets specifies the offset for supporting time namespaces. |
|
| 218 |
- TimeOffsets map[string]specs.LinuxTimeOffset `json:"time_offsets,omitempty"` |
|
| 219 |
- |
|
| 220 |
- // Scheduler represents the scheduling attributes for a process. |
|
| 221 |
- Scheduler *Scheduler `json:"scheduler,omitempty"` |
|
| 222 |
- |
|
| 223 |
- // Personality contains configuration for the Linux personality syscall. |
|
| 224 |
- Personality *LinuxPersonality `json:"personality,omitempty"` |
|
| 225 |
- |
|
| 226 |
- // IOPriority is the container's I/O priority. |
|
| 227 |
- IOPriority *IOPriority `json:"io_priority,omitempty"` |
|
| 228 |
-} |
|
| 229 |
- |
|
| 230 |
-// Scheduler is based on the Linux sched_setattr(2) syscall. |
|
| 231 |
-type Scheduler = specs.Scheduler |
|
| 232 |
- |
|
| 233 |
-// ToSchedAttr is to convert *configs.Scheduler to *unix.SchedAttr. |
|
| 234 |
-func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) {
|
|
| 235 |
- var policy uint32 |
|
| 236 |
- switch scheduler.Policy {
|
|
| 237 |
- case specs.SchedOther: |
|
| 238 |
- policy = 0 |
|
| 239 |
- case specs.SchedFIFO: |
|
| 240 |
- policy = 1 |
|
| 241 |
- case specs.SchedRR: |
|
| 242 |
- policy = 2 |
|
| 243 |
- case specs.SchedBatch: |
|
| 244 |
- policy = 3 |
|
| 245 |
- case specs.SchedISO: |
|
| 246 |
- policy = 4 |
|
| 247 |
- case specs.SchedIdle: |
|
| 248 |
- policy = 5 |
|
| 249 |
- case specs.SchedDeadline: |
|
| 250 |
- policy = 6 |
|
| 251 |
- default: |
|
| 252 |
- return nil, fmt.Errorf("invalid scheduler policy: %s", scheduler.Policy)
|
|
| 253 |
- } |
|
| 254 |
- |
|
| 255 |
- var flags uint64 |
|
| 256 |
- for _, flag := range scheduler.Flags {
|
|
| 257 |
- switch flag {
|
|
| 258 |
- case specs.SchedFlagResetOnFork: |
|
| 259 |
- flags |= 0x01 |
|
| 260 |
- case specs.SchedFlagReclaim: |
|
| 261 |
- flags |= 0x02 |
|
| 262 |
- case specs.SchedFlagDLOverrun: |
|
| 263 |
- flags |= 0x04 |
|
| 264 |
- case specs.SchedFlagKeepPolicy: |
|
| 265 |
- flags |= 0x08 |
|
| 266 |
- case specs.SchedFlagKeepParams: |
|
| 267 |
- flags |= 0x10 |
|
| 268 |
- case specs.SchedFlagUtilClampMin: |
|
| 269 |
- flags |= 0x20 |
|
| 270 |
- case specs.SchedFlagUtilClampMax: |
|
| 271 |
- flags |= 0x40 |
|
| 272 |
- default: |
|
| 273 |
- return nil, fmt.Errorf("invalid scheduler flag: %s", flag)
|
|
| 274 |
- } |
|
| 275 |
- } |
|
| 276 |
- |
|
| 277 |
- return &unix.SchedAttr{
|
|
| 278 |
- Size: unix.SizeofSchedAttr, |
|
| 279 |
- Policy: policy, |
|
| 280 |
- Flags: flags, |
|
| 281 |
- Nice: scheduler.Nice, |
|
| 282 |
- Priority: uint32(scheduler.Priority), |
|
| 283 |
- Runtime: scheduler.Runtime, |
|
| 284 |
- Deadline: scheduler.Deadline, |
|
| 285 |
- Period: scheduler.Period, |
|
| 286 |
- }, nil |
|
| 287 |
-} |
|
| 288 |
- |
|
| 289 |
-var IOPrioClassMapping = map[specs.IOPriorityClass]int{
|
|
| 290 |
- specs.IOPRIO_CLASS_RT: 1, |
|
| 291 |
- specs.IOPRIO_CLASS_BE: 2, |
|
| 292 |
- specs.IOPRIO_CLASS_IDLE: 3, |
|
| 293 |
-} |
|
| 294 |
- |
|
| 295 |
-type IOPriority = specs.LinuxIOPriority |
|
| 296 |
- |
|
| 297 |
-type ( |
|
| 298 |
- HookName string |
|
| 299 |
- HookList []Hook |
|
| 300 |
- Hooks map[HookName]HookList |
|
| 301 |
-) |
|
| 302 |
- |
|
| 303 |
-const ( |
|
| 304 |
- // Prestart commands are executed after the container namespaces are created, |
|
| 305 |
- // but before the user supplied command is executed from init. |
|
| 306 |
- // Note: This hook is now deprecated |
|
| 307 |
- // Prestart commands are called in the Runtime namespace. |
|
| 308 |
- Prestart HookName = "prestart" |
|
| 309 |
- |
|
| 310 |
- // CreateRuntime commands MUST be called as part of the create operation after |
|
| 311 |
- // the runtime environment has been created but before the pivot_root has been executed. |
|
| 312 |
- // CreateRuntime is called immediately after the deprecated Prestart hook. |
|
| 313 |
- // CreateRuntime commands are called in the Runtime Namespace. |
|
| 314 |
- CreateRuntime HookName = "createRuntime" |
|
| 315 |
- |
|
| 316 |
- // CreateContainer commands MUST be called as part of the create operation after |
|
| 317 |
- // the runtime environment has been created but before the pivot_root has been executed. |
|
| 318 |
- // CreateContainer commands are called in the Container namespace. |
|
| 319 |
- CreateContainer HookName = "createContainer" |
|
| 320 |
- |
|
| 321 |
- // StartContainer commands MUST be called as part of the start operation and before |
|
| 322 |
- // the container process is started. |
|
| 323 |
- // StartContainer commands are called in the Container namespace. |
|
| 324 |
- StartContainer HookName = "startContainer" |
|
| 325 |
- |
|
| 326 |
- // Poststart commands are executed after the container init process starts. |
|
| 327 |
- // Poststart commands are called in the Runtime Namespace. |
|
| 328 |
- Poststart HookName = "poststart" |
|
| 329 |
- |
|
| 330 |
- // Poststop commands are executed after the container init process exits. |
|
| 331 |
- // Poststop commands are called in the Runtime Namespace. |
|
| 332 |
- Poststop HookName = "poststop" |
|
| 333 |
-) |
|
| 334 |
- |
|
| 335 |
-// KnownHookNames returns the known hook names. |
|
| 336 |
-// Used by `runc features`. |
|
| 337 |
-func KnownHookNames() []string {
|
|
| 338 |
- return []string{
|
|
| 339 |
- string(Prestart), // deprecated |
|
| 340 |
- string(CreateRuntime), |
|
| 341 |
- string(CreateContainer), |
|
| 342 |
- string(StartContainer), |
|
| 343 |
- string(Poststart), |
|
| 344 |
- string(Poststop), |
|
| 345 |
- } |
|
| 346 |
-} |
|
| 347 |
- |
|
| 348 |
-type Capabilities struct {
|
|
| 349 |
- // Bounding is the set of capabilities checked by the kernel. |
|
| 350 |
- Bounding []string |
|
| 351 |
- // Effective is the set of capabilities checked by the kernel. |
|
| 352 |
- Effective []string |
|
| 353 |
- // Inheritable is the capabilities preserved across execve. |
|
| 354 |
- Inheritable []string |
|
| 355 |
- // Permitted is the limiting superset for effective capabilities. |
|
| 356 |
- Permitted []string |
|
| 357 |
- // Ambient is the ambient set of capabilities that are kept. |
|
| 358 |
- Ambient []string |
|
| 359 |
-} |
|
| 360 |
- |
|
| 361 |
-// Deprecated: use (Hooks).Run instead. |
|
| 362 |
-func (hooks HookList) RunHooks(state *specs.State) error {
|
|
| 363 |
- for i, h := range hooks {
|
|
| 364 |
- if err := h.Run(state); err != nil {
|
|
| 365 |
- return fmt.Errorf("error running hook #%d: %w", i, err)
|
|
| 366 |
- } |
|
| 367 |
- } |
|
| 368 |
- |
|
| 369 |
- return nil |
|
| 370 |
-} |
|
| 371 |
- |
|
| 372 |
-func (hooks *Hooks) UnmarshalJSON(b []byte) error {
|
|
| 373 |
- var state map[HookName][]CommandHook |
|
| 374 |
- |
|
| 375 |
- if err := json.Unmarshal(b, &state); err != nil {
|
|
| 376 |
- return err |
|
| 377 |
- } |
|
| 378 |
- |
|
| 379 |
- *hooks = Hooks{}
|
|
| 380 |
- for n, commandHooks := range state {
|
|
| 381 |
- if len(commandHooks) == 0 {
|
|
| 382 |
- continue |
|
| 383 |
- } |
|
| 384 |
- |
|
| 385 |
- (*hooks)[n] = HookList{}
|
|
| 386 |
- for _, h := range commandHooks {
|
|
| 387 |
- (*hooks)[n] = append((*hooks)[n], h) |
|
| 388 |
- } |
|
| 389 |
- } |
|
| 390 |
- |
|
| 391 |
- return nil |
|
| 392 |
-} |
|
| 393 |
- |
|
| 394 |
-func (hooks *Hooks) MarshalJSON() ([]byte, error) {
|
|
| 395 |
- serialize := func(hooks []Hook) (serializableHooks []CommandHook) {
|
|
| 396 |
- for _, hook := range hooks {
|
|
| 397 |
- switch chook := hook.(type) {
|
|
| 398 |
- case CommandHook: |
|
| 399 |
- serializableHooks = append(serializableHooks, chook) |
|
| 400 |
- default: |
|
| 401 |
- logrus.Warnf("cannot serialize hook of type %T, skipping", hook)
|
|
| 402 |
- } |
|
| 403 |
- } |
|
| 404 |
- |
|
| 405 |
- return serializableHooks |
|
| 406 |
- } |
|
| 407 |
- |
|
| 408 |
- return json.Marshal(map[string]interface{}{
|
|
| 409 |
- "prestart": serialize((*hooks)[Prestart]), |
|
| 410 |
- "createRuntime": serialize((*hooks)[CreateRuntime]), |
|
| 411 |
- "createContainer": serialize((*hooks)[CreateContainer]), |
|
| 412 |
- "startContainer": serialize((*hooks)[StartContainer]), |
|
| 413 |
- "poststart": serialize((*hooks)[Poststart]), |
|
| 414 |
- "poststop": serialize((*hooks)[Poststop]), |
|
| 415 |
- }) |
|
| 416 |
-} |
|
| 417 |
- |
|
| 418 |
-// Run executes all hooks for the given hook name. |
|
| 419 |
-func (hooks Hooks) Run(name HookName, state *specs.State) error {
|
|
| 420 |
- list := hooks[name] |
|
| 421 |
- for i, h := range list {
|
|
| 422 |
- if err := h.Run(state); err != nil {
|
|
| 423 |
- return fmt.Errorf("error running %s hook #%d: %w", name, i, err)
|
|
| 424 |
- } |
|
| 425 |
- } |
|
| 426 |
- |
|
| 427 |
- return nil |
|
| 428 |
-} |
|
| 429 |
- |
|
| 430 |
-type Hook interface {
|
|
| 431 |
- // Run executes the hook with the provided state. |
|
| 432 |
- Run(*specs.State) error |
|
| 433 |
-} |
|
| 434 |
- |
|
| 435 |
-// NewFunctionHook will call the provided function when the hook is run. |
|
| 436 |
-func NewFunctionHook(f func(*specs.State) error) FuncHook {
|
|
| 437 |
- return FuncHook{
|
|
| 438 |
- run: f, |
|
| 439 |
- } |
|
| 440 |
-} |
|
| 441 |
- |
|
| 442 |
-type FuncHook struct {
|
|
| 443 |
- run func(*specs.State) error |
|
| 444 |
-} |
|
| 445 |
- |
|
| 446 |
-func (f FuncHook) Run(s *specs.State) error {
|
|
| 447 |
- return f.run(s) |
|
| 448 |
-} |
|
| 449 |
- |
|
| 450 |
-type Command struct {
|
|
| 451 |
- Path string `json:"path"` |
|
| 452 |
- Args []string `json:"args"` |
|
| 453 |
- Env []string `json:"env"` |
|
| 454 |
- Dir string `json:"dir"` |
|
| 455 |
- Timeout *time.Duration `json:"timeout"` |
|
| 456 |
-} |
|
| 457 |
- |
|
| 458 |
-// NewCommandHook will execute the provided command when the hook is run. |
|
| 459 |
-func NewCommandHook(cmd Command) CommandHook {
|
|
| 460 |
- return CommandHook{
|
|
| 461 |
- Command: cmd, |
|
| 462 |
- } |
|
| 463 |
-} |
|
| 464 |
- |
|
| 465 |
-type CommandHook struct {
|
|
| 466 |
- Command |
|
| 467 |
-} |
|
| 468 |
- |
|
| 469 |
-func (c Command) Run(s *specs.State) error {
|
|
| 470 |
- b, err := json.Marshal(s) |
|
| 471 |
- if err != nil {
|
|
| 472 |
- return err |
|
| 473 |
- } |
|
| 474 |
- var stdout, stderr bytes.Buffer |
|
| 475 |
- cmd := exec.Cmd{
|
|
| 476 |
- Path: c.Path, |
|
| 477 |
- Args: c.Args, |
|
| 478 |
- Env: c.Env, |
|
| 479 |
- Stdin: bytes.NewReader(b), |
|
| 480 |
- Stdout: &stdout, |
|
| 481 |
- Stderr: &stderr, |
|
| 482 |
- } |
|
| 483 |
- if err := cmd.Start(); err != nil {
|
|
| 484 |
- return err |
|
| 485 |
- } |
|
| 486 |
- errC := make(chan error, 1) |
|
| 487 |
- go func() {
|
|
| 488 |
- err := cmd.Wait() |
|
| 489 |
- if err != nil {
|
|
| 490 |
- err = fmt.Errorf("%w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
|
|
| 491 |
- } |
|
| 492 |
- errC <- err |
|
| 493 |
- }() |
|
| 494 |
- var timerCh <-chan time.Time |
|
| 495 |
- if c.Timeout != nil {
|
|
| 496 |
- timer := time.NewTimer(*c.Timeout) |
|
| 497 |
- defer timer.Stop() |
|
| 498 |
- timerCh = timer.C |
|
| 499 |
- } |
|
| 500 |
- select {
|
|
| 501 |
- case err := <-errC: |
|
| 502 |
- return err |
|
| 503 |
- case <-timerCh: |
|
| 504 |
- _ = cmd.Process.Kill() |
|
| 505 |
- <-errC |
|
| 506 |
- return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
|
|
| 507 |
- } |
|
| 508 |
-} |
| 509 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,97 +0,0 @@ |
| 1 |
-package configs |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "errors" |
|
| 5 |
- "fmt" |
|
| 6 |
- "math" |
|
| 7 |
-) |
|
| 8 |
- |
|
| 9 |
-var ( |
|
| 10 |
- errNoUIDMap = errors.New("user namespaces enabled, but no uid mappings found")
|
|
| 11 |
- errNoGIDMap = errors.New("user namespaces enabled, but no gid mappings found")
|
|
| 12 |
-) |
|
| 13 |
- |
|
| 14 |
-// Please check https://man7.org/linux/man-pages/man2/personality.2.html for const details. |
|
| 15 |
-// https://raw.githubusercontent.com/torvalds/linux/master/include/uapi/linux/personality.h |
|
| 16 |
-const ( |
|
| 17 |
- PerLinux = 0x0000 |
|
| 18 |
- PerLinux32 = 0x0008 |
|
| 19 |
-) |
|
| 20 |
- |
|
| 21 |
-type LinuxPersonality struct {
|
|
| 22 |
- // Domain for the personality |
|
| 23 |
- // can only contain values "LINUX" and "LINUX32" |
|
| 24 |
- Domain int `json:"domain"` |
|
| 25 |
-} |
|
| 26 |
- |
|
| 27 |
-// HostUID gets the translated uid for the process on host which could be |
|
| 28 |
-// different when user namespaces are enabled. |
|
| 29 |
-func (c Config) HostUID(containerId int) (int, error) {
|
|
| 30 |
- if c.Namespaces.Contains(NEWUSER) {
|
|
| 31 |
- if len(c.UIDMappings) == 0 {
|
|
| 32 |
- return -1, errNoUIDMap |
|
| 33 |
- } |
|
| 34 |
- id, found := c.hostIDFromMapping(int64(containerId), c.UIDMappings) |
|
| 35 |
- if !found {
|
|
| 36 |
- return -1, fmt.Errorf("user namespaces enabled, but no mapping found for uid %d", containerId)
|
|
| 37 |
- } |
|
| 38 |
- // If we are a 32-bit binary running on a 64-bit system, it's possible |
|
| 39 |
- // the mapped user is too large to store in an int, which means we |
|
| 40 |
- // cannot do the mapping. We can't just return an int64, because |
|
| 41 |
- // os.Setuid() takes an int. |
|
| 42 |
- if id > math.MaxInt {
|
|
| 43 |
- return -1, fmt.Errorf("mapping for uid %d (host id %d) is larger than native integer size (%d)", containerId, id, math.MaxInt)
|
|
| 44 |
- } |
|
| 45 |
- return int(id), nil |
|
| 46 |
- } |
|
| 47 |
- // Return unchanged id. |
|
| 48 |
- return containerId, nil |
|
| 49 |
-} |
|
| 50 |
- |
|
| 51 |
-// HostRootUID gets the root uid for the process on host which could be non-zero |
|
| 52 |
-// when user namespaces are enabled. |
|
| 53 |
-func (c Config) HostRootUID() (int, error) {
|
|
| 54 |
- return c.HostUID(0) |
|
| 55 |
-} |
|
| 56 |
- |
|
| 57 |
-// HostGID gets the translated gid for the process on host which could be |
|
| 58 |
-// different when user namespaces are enabled. |
|
| 59 |
-func (c Config) HostGID(containerId int) (int, error) {
|
|
| 60 |
- if c.Namespaces.Contains(NEWUSER) {
|
|
| 61 |
- if len(c.GIDMappings) == 0 {
|
|
| 62 |
- return -1, errNoGIDMap |
|
| 63 |
- } |
|
| 64 |
- id, found := c.hostIDFromMapping(int64(containerId), c.GIDMappings) |
|
| 65 |
- if !found {
|
|
| 66 |
- return -1, fmt.Errorf("user namespaces enabled, but no mapping found for gid %d", containerId)
|
|
| 67 |
- } |
|
| 68 |
- // If we are a 32-bit binary running on a 64-bit system, it's possible |
|
| 69 |
- // the mapped user is too large to store in an int, which means we |
|
| 70 |
- // cannot do the mapping. We can't just return an int64, because |
|
| 71 |
- // os.Setgid() takes an int. |
|
| 72 |
- if id > math.MaxInt {
|
|
| 73 |
- return -1, fmt.Errorf("mapping for gid %d (host id %d) is larger than native integer size (%d)", containerId, id, math.MaxInt)
|
|
| 74 |
- } |
|
| 75 |
- return int(id), nil |
|
| 76 |
- } |
|
| 77 |
- // Return unchanged id. |
|
| 78 |
- return containerId, nil |
|
| 79 |
-} |
|
| 80 |
- |
|
| 81 |
-// HostRootGID gets the root gid for the process on host which could be non-zero |
|
| 82 |
-// when user namespaces are enabled. |
|
| 83 |
-func (c Config) HostRootGID() (int, error) {
|
|
| 84 |
- return c.HostGID(0) |
|
| 85 |
-} |
|
| 86 |
- |
|
| 87 |
-// Utility function that gets a host ID for a container ID from user namespace map |
|
| 88 |
-// if that ID is present in the map. |
|
| 89 |
-func (c Config) hostIDFromMapping(containerID int64, uMap []IDMap) (int64, bool) {
|
|
| 90 |
- for _, m := range uMap {
|
|
| 91 |
- if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
|
|
| 92 |
- hostID := m.HostID + (containerID - m.ContainerID) |
|
| 93 |
- return hostID, true |
|
| 94 |
- } |
|
| 95 |
- } |
|
| 96 |
- return -1, false |
|
| 97 |
-} |
| 10 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,16 +0,0 @@ |
| 1 |
-package configs |
|
| 2 |
- |
|
| 3 |
-type IntelRdt struct {
|
|
| 4 |
- // The identity for RDT Class of Service |
|
| 5 |
- ClosID string `json:"closID,omitempty"` |
|
| 6 |
- |
|
| 7 |
- // The schema for L3 cache id and capacity bitmask (CBM) |
|
| 8 |
- // Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..." |
|
| 9 |
- L3CacheSchema string `json:"l3_cache_schema,omitempty"` |
|
| 10 |
- |
|
| 11 |
- // The schema of memory bandwidth per L3 cache id |
|
| 12 |
- // Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..." |
|
| 13 |
- // The unit of memory bandwidth is specified in "percentages" by |
|
| 14 |
- // default, and in "MBps" if MBA Software Controller is enabled. |
|
| 15 |
- MemBwSchema string `json:"memBwSchema,omitempty"` |
|
| 16 |
-} |
| 17 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,14 +0,0 @@ |
| 1 |
-package configs |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "fmt" |
|
| 5 |
-) |
|
| 6 |
- |
|
| 7 |
-type IfPrioMap struct {
|
|
| 8 |
- Interface string `json:"interface"` |
|
| 9 |
- Priority int64 `json:"priority"` |
|
| 10 |
-} |
|
| 11 |
- |
|
| 12 |
-func (i *IfPrioMap) CgroupString() string {
|
|
| 13 |
- return fmt.Sprintf("%s %d", i.Interface, i.Priority)
|
|
| 14 |
-} |
| 8 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,66 +0,0 @@ |
| 1 |
-package configs |
|
| 2 |
- |
|
| 3 |
-import "golang.org/x/sys/unix" |
|
| 4 |
- |
|
| 5 |
-type MountIDMapping struct {
|
|
| 6 |
- // Recursive indicates if the mapping needs to be recursive. |
|
| 7 |
- Recursive bool `json:"recursive"` |
|
| 8 |
- |
|
| 9 |
- // UserNSPath is a path to a user namespace that indicates the necessary |
|
| 10 |
- // id-mappings for MOUNT_ATTR_IDMAP. If set to non-"", UIDMappings and |
|
| 11 |
- // GIDMappings must be set to nil. |
|
| 12 |
- UserNSPath string `json:"userns_path,omitempty"` |
|
| 13 |
- |
|
| 14 |
- // UIDMappings is the uid mapping set for this mount, to be used with |
|
| 15 |
- // MOUNT_ATTR_IDMAP. |
|
| 16 |
- UIDMappings []IDMap `json:"uid_mappings,omitempty"` |
|
| 17 |
- |
|
| 18 |
- // GIDMappings is the gid mapping set for this mount, to be used with |
|
| 19 |
- // MOUNT_ATTR_IDMAP. |
|
| 20 |
- GIDMappings []IDMap `json:"gid_mappings,omitempty"` |
|
| 21 |
-} |
|
| 22 |
- |
|
| 23 |
-type Mount struct {
|
|
| 24 |
- // Source path for the mount. |
|
| 25 |
- Source string `json:"source"` |
|
| 26 |
- |
|
| 27 |
- // Destination path for the mount inside the container. |
|
| 28 |
- Destination string `json:"destination"` |
|
| 29 |
- |
|
| 30 |
- // Device the mount is for. |
|
| 31 |
- Device string `json:"device"` |
|
| 32 |
- |
|
| 33 |
- // Mount flags. |
|
| 34 |
- Flags int `json:"flags"` |
|
| 35 |
- |
|
| 36 |
- // Mount flags that were explicitly cleared in the configuration (meaning |
|
| 37 |
- // the user explicitly requested that these flags *not* be set). |
|
| 38 |
- ClearedFlags int `json:"cleared_flags"` |
|
| 39 |
- |
|
| 40 |
- // Propagation Flags |
|
| 41 |
- PropagationFlags []int `json:"propagation_flags"` |
|
| 42 |
- |
|
| 43 |
- // Mount data applied to the mount. |
|
| 44 |
- Data string `json:"data"` |
|
| 45 |
- |
|
| 46 |
- // Relabel source if set, "z" indicates shared, "Z" indicates unshared. |
|
| 47 |
- Relabel string `json:"relabel"` |
|
| 48 |
- |
|
| 49 |
- // RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2). |
|
| 50 |
- RecAttr *unix.MountAttr `json:"rec_attr"` |
|
| 51 |
- |
|
| 52 |
- // Extensions are additional flags that are specific to runc. |
|
| 53 |
- Extensions int `json:"extensions"` |
|
| 54 |
- |
|
| 55 |
- // Mapping is the MOUNT_ATTR_IDMAP configuration for the mount. If non-nil, |
|
| 56 |
- // the mount is configured to use MOUNT_ATTR_IDMAP-style id mappings. |
|
| 57 |
- IDMapping *MountIDMapping `json:"id_mapping,omitempty"` |
|
| 58 |
-} |
|
| 59 |
- |
|
| 60 |
-func (m *Mount) IsBind() bool {
|
|
| 61 |
- return m.Flags&unix.MS_BIND != 0 |
|
| 62 |
-} |
|
| 63 |
- |
|
| 64 |
-func (m *Mount) IsIDMapped() bool {
|
|
| 65 |
- return m.IDMapping != nil |
|
| 66 |
-} |
| 6 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,133 +0,0 @@ |
| 1 |
-package configs |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "fmt" |
|
| 5 |
- "os" |
|
| 6 |
- "sync" |
|
| 7 |
-) |
|
| 8 |
- |
|
| 9 |
-const ( |
|
| 10 |
- NEWNET NamespaceType = "NEWNET" |
|
| 11 |
- NEWPID NamespaceType = "NEWPID" |
|
| 12 |
- NEWNS NamespaceType = "NEWNS" |
|
| 13 |
- NEWUTS NamespaceType = "NEWUTS" |
|
| 14 |
- NEWIPC NamespaceType = "NEWIPC" |
|
| 15 |
- NEWUSER NamespaceType = "NEWUSER" |
|
| 16 |
- NEWCGROUP NamespaceType = "NEWCGROUP" |
|
| 17 |
- NEWTIME NamespaceType = "NEWTIME" |
|
| 18 |
-) |
|
| 19 |
- |
|
| 20 |
-var ( |
|
| 21 |
- nsLock sync.Mutex |
|
| 22 |
- supportedNamespaces = make(map[NamespaceType]bool) |
|
| 23 |
-) |
|
| 24 |
- |
|
| 25 |
-// NsName converts the namespace type to its filename |
|
| 26 |
-func NsName(ns NamespaceType) string {
|
|
| 27 |
- switch ns {
|
|
| 28 |
- case NEWNET: |
|
| 29 |
- return "net" |
|
| 30 |
- case NEWNS: |
|
| 31 |
- return "mnt" |
|
| 32 |
- case NEWPID: |
|
| 33 |
- return "pid" |
|
| 34 |
- case NEWIPC: |
|
| 35 |
- return "ipc" |
|
| 36 |
- case NEWUSER: |
|
| 37 |
- return "user" |
|
| 38 |
- case NEWUTS: |
|
| 39 |
- return "uts" |
|
| 40 |
- case NEWCGROUP: |
|
| 41 |
- return "cgroup" |
|
| 42 |
- case NEWTIME: |
|
| 43 |
- return "time" |
|
| 44 |
- } |
|
| 45 |
- return "" |
|
| 46 |
-} |
|
| 47 |
- |
|
| 48 |
-// IsNamespaceSupported returns whether a namespace is available or |
|
| 49 |
-// not |
|
| 50 |
-func IsNamespaceSupported(ns NamespaceType) bool {
|
|
| 51 |
- nsLock.Lock() |
|
| 52 |
- defer nsLock.Unlock() |
|
| 53 |
- supported, ok := supportedNamespaces[ns] |
|
| 54 |
- if ok {
|
|
| 55 |
- return supported |
|
| 56 |
- } |
|
| 57 |
- nsFile := NsName(ns) |
|
| 58 |
- // if the namespace type is unknown, just return false |
|
| 59 |
- if nsFile == "" {
|
|
| 60 |
- return false |
|
| 61 |
- } |
|
| 62 |
- // We don't need to use /proc/thread-self here because the list of |
|
| 63 |
- // namespace types is unrelated to the thread. This lets us avoid having to |
|
| 64 |
- // do runtime.LockOSThread. |
|
| 65 |
- _, err := os.Stat("/proc/self/ns/" + nsFile)
|
|
| 66 |
- // a namespace is supported if it exists and we have permissions to read it |
|
| 67 |
- supported = err == nil |
|
| 68 |
- supportedNamespaces[ns] = supported |
|
| 69 |
- return supported |
|
| 70 |
-} |
|
| 71 |
- |
|
| 72 |
-func NamespaceTypes() []NamespaceType {
|
|
| 73 |
- return []NamespaceType{
|
|
| 74 |
- NEWUSER, // Keep user NS always first, don't move it. |
|
| 75 |
- NEWIPC, |
|
| 76 |
- NEWUTS, |
|
| 77 |
- NEWNET, |
|
| 78 |
- NEWPID, |
|
| 79 |
- NEWNS, |
|
| 80 |
- NEWCGROUP, |
|
| 81 |
- NEWTIME, |
|
| 82 |
- } |
|
| 83 |
-} |
|
| 84 |
- |
|
| 85 |
-// Namespace defines configuration for each namespace. It specifies an |
|
| 86 |
-// alternate path that is able to be joined via setns. |
|
| 87 |
-type Namespace struct {
|
|
| 88 |
- Type NamespaceType `json:"type"` |
|
| 89 |
- Path string `json:"path"` |
|
| 90 |
-} |
|
| 91 |
- |
|
| 92 |
-func (n *Namespace) GetPath(pid int) string {
|
|
| 93 |
- return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type))
|
|
| 94 |
-} |
|
| 95 |
- |
|
| 96 |
-func (n *Namespaces) Remove(t NamespaceType) bool {
|
|
| 97 |
- i := n.index(t) |
|
| 98 |
- if i == -1 {
|
|
| 99 |
- return false |
|
| 100 |
- } |
|
| 101 |
- *n = append((*n)[:i], (*n)[i+1:]...) |
|
| 102 |
- return true |
|
| 103 |
-} |
|
| 104 |
- |
|
| 105 |
-func (n *Namespaces) Add(t NamespaceType, path string) {
|
|
| 106 |
- i := n.index(t) |
|
| 107 |
- if i == -1 {
|
|
| 108 |
- *n = append(*n, Namespace{Type: t, Path: path})
|
|
| 109 |
- return |
|
| 110 |
- } |
|
| 111 |
- (*n)[i].Path = path |
|
| 112 |
-} |
|
| 113 |
- |
|
| 114 |
-func (n *Namespaces) index(t NamespaceType) int {
|
|
| 115 |
- for i, ns := range *n {
|
|
| 116 |
- if ns.Type == t {
|
|
| 117 |
- return i |
|
| 118 |
- } |
|
| 119 |
- } |
|
| 120 |
- return -1 |
|
| 121 |
-} |
|
| 122 |
- |
|
| 123 |
-func (n *Namespaces) Contains(t NamespaceType) bool {
|
|
| 124 |
- return n.index(t) != -1 |
|
| 125 |
-} |
|
| 126 |
- |
|
| 127 |
-func (n *Namespaces) PathOf(t NamespaceType) string {
|
|
| 128 |
- i := n.index(t) |
|
| 129 |
- if i == -1 {
|
|
| 130 |
- return "" |
|
| 131 |
- } |
|
| 132 |
- return (*n)[i].Path |
|
| 133 |
-} |
| 134 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,45 +0,0 @@ |
| 1 |
-//go:build linux |
|
| 2 |
- |
|
| 3 |
-package configs |
|
| 4 |
- |
|
| 5 |
-import "golang.org/x/sys/unix" |
|
| 6 |
- |
|
| 7 |
-func (n *Namespace) Syscall() int {
|
|
| 8 |
- return namespaceInfo[n.Type] |
|
| 9 |
-} |
|
| 10 |
- |
|
| 11 |
-var namespaceInfo = map[NamespaceType]int{
|
|
| 12 |
- NEWNET: unix.CLONE_NEWNET, |
|
| 13 |
- NEWNS: unix.CLONE_NEWNS, |
|
| 14 |
- NEWUSER: unix.CLONE_NEWUSER, |
|
| 15 |
- NEWIPC: unix.CLONE_NEWIPC, |
|
| 16 |
- NEWUTS: unix.CLONE_NEWUTS, |
|
| 17 |
- NEWPID: unix.CLONE_NEWPID, |
|
| 18 |
- NEWCGROUP: unix.CLONE_NEWCGROUP, |
|
| 19 |
- NEWTIME: unix.CLONE_NEWTIME, |
|
| 20 |
-} |
|
| 21 |
- |
|
| 22 |
-// CloneFlags parses the container's Namespaces options to set the correct |
|
| 23 |
-// flags on clone, unshare. This function returns flags only for new namespaces. |
|
| 24 |
-func (n *Namespaces) CloneFlags() uintptr {
|
|
| 25 |
- var flag int |
|
| 26 |
- for _, v := range *n {
|
|
| 27 |
- if v.Path != "" {
|
|
| 28 |
- continue |
|
| 29 |
- } |
|
| 30 |
- flag |= namespaceInfo[v.Type] |
|
| 31 |
- } |
|
| 32 |
- return uintptr(flag) |
|
| 33 |
-} |
|
| 34 |
- |
|
| 35 |
-// IsPrivate tells whether the namespace of type t is configured as private |
|
| 36 |
-// (i.e. it exists and is not shared). |
|
| 37 |
-func (n Namespaces) IsPrivate(t NamespaceType) bool {
|
|
| 38 |
- for _, v := range n {
|
|
| 39 |
- if v.Type == t {
|
|
| 40 |
- return v.Path == "" |
|
| 41 |
- } |
|
| 42 |
- } |
|
| 43 |
- // Not found, so implicitly sharing a parent namespace. |
|
| 44 |
- return false |
|
| 45 |
-} |
| 46 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,13 +0,0 @@ |
| 1 |
-//go:build !linux && !windows |
|
| 2 |
- |
|
| 3 |
-package configs |
|
| 4 |
- |
|
| 5 |
-func (n *Namespace) Syscall() int {
|
|
| 6 |
- panic("No namespace syscall support")
|
|
| 7 |
-} |
|
| 8 |
- |
|
| 9 |
-// CloneFlags parses the container's Namespaces options to set the correct |
|
| 10 |
-// flags on clone, unshare. This function returns flags only for new namespaces. |
|
| 11 |
-func (n *Namespaces) CloneFlags() uintptr {
|
|
| 12 |
- panic("No namespace syscall support")
|
|
| 13 |
-} |
| 8 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,75 +0,0 @@ |
| 1 |
-package configs |
|
| 2 |
- |
|
| 3 |
-// Network defines configuration for a container's networking stack |
|
| 4 |
-// |
|
| 5 |
-// The network configuration can be omitted from a container causing the |
|
| 6 |
-// container to be setup with the host's networking stack |
|
| 7 |
-type Network struct {
|
|
| 8 |
- // Type sets the networks type, commonly veth and loopback |
|
| 9 |
- Type string `json:"type"` |
|
| 10 |
- |
|
| 11 |
- // Name of the network interface |
|
| 12 |
- Name string `json:"name"` |
|
| 13 |
- |
|
| 14 |
- // The bridge to use. |
|
| 15 |
- Bridge string `json:"bridge"` |
|
| 16 |
- |
|
| 17 |
- // MacAddress contains the MAC address to set on the network interface |
|
| 18 |
- MacAddress string `json:"mac_address"` |
|
| 19 |
- |
|
| 20 |
- // Address contains the IPv4 and mask to set on the network interface |
|
| 21 |
- Address string `json:"address"` |
|
| 22 |
- |
|
| 23 |
- // Gateway sets the gateway address that is used as the default for the interface |
|
| 24 |
- Gateway string `json:"gateway"` |
|
| 25 |
- |
|
| 26 |
- // IPv6Address contains the IPv6 and mask to set on the network interface |
|
| 27 |
- IPv6Address string `json:"ipv6_address"` |
|
| 28 |
- |
|
| 29 |
- // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface |
|
| 30 |
- IPv6Gateway string `json:"ipv6_gateway"` |
|
| 31 |
- |
|
| 32 |
- // Mtu sets the mtu value for the interface and will be mirrored on both the host and |
|
| 33 |
- // container's interfaces if a pair is created, specifically in the case of type veth |
|
| 34 |
- // Note: This does not apply to loopback interfaces. |
|
| 35 |
- Mtu int `json:"mtu"` |
|
| 36 |
- |
|
| 37 |
- // TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and |
|
| 38 |
- // container's interfaces if a pair is created, specifically in the case of type veth |
|
| 39 |
- // Note: This does not apply to loopback interfaces. |
|
| 40 |
- TxQueueLen int `json:"txqueuelen"` |
|
| 41 |
- |
|
| 42 |
- // HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the |
|
| 43 |
- // container. |
|
| 44 |
- HostInterfaceName string `json:"host_interface_name"` |
|
| 45 |
- |
|
| 46 |
- // HairpinMode specifies if hairpin NAT should be enabled on the virtual interface |
|
| 47 |
- // bridge port in the case of type veth |
|
| 48 |
- // Note: This is unsupported on some systems. |
|
| 49 |
- // Note: This does not apply to loopback interfaces. |
|
| 50 |
- HairpinMode bool `json:"hairpin_mode"` |
|
| 51 |
-} |
|
| 52 |
- |
|
| 53 |
-// Route defines a routing table entry. |
|
| 54 |
-// |
|
| 55 |
-// Routes can be specified to create entries in the routing table as the container |
|
| 56 |
-// is started. |
|
| 57 |
-// |
|
| 58 |
-// All of destination, source, and gateway should be either IPv4 or IPv6. |
|
| 59 |
-// One of the three options must be present, and omitted entries will use their |
|
| 60 |
-// IP family default for the route table. For IPv4 for example, setting the |
|
| 61 |
-// gateway to 1.2.3.4 and the interface to eth0 will set up a standard |
|
| 62 |
-// destination of 0.0.0.0(or *) when viewed in the route table. |
|
| 63 |
-type Route struct {
|
|
| 64 |
- // Destination specifies the destination IP address and mask in the CIDR form. |
|
| 65 |
- Destination string `json:"destination"` |
|
| 66 |
- |
|
| 67 |
- // Source specifies the source IP address and mask in the CIDR form. |
|
| 68 |
- Source string `json:"source"` |
|
| 69 |
- |
|
| 70 |
- // Gateway specifies the gateway IP address. |
|
| 71 |
- Gateway string `json:"gateway"` |
|
| 72 |
- |
|
| 73 |
- // InterfaceName specifies the device to set this route up for, for example eth0. |
|
| 74 |
- InterfaceName string `json:"interface_name"` |
|
| 75 |
-} |
| 76 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,9 +0,0 @@ |
| 1 |
-package configs |
|
| 2 |
- |
|
| 3 |
-// LinuxRdma for Linux cgroup 'rdma' resource management (Linux 4.11) |
|
| 4 |
-type LinuxRdma struct {
|
|
| 5 |
- // Maximum number of HCA handles that can be opened. Default is "no limit". |
|
| 6 |
- HcaHandles *uint32 `json:"hca_handles,omitempty"` |
|
| 7 |
- // Maximum number of HCA objects that can be created. Default is "no limit". |
|
| 8 |
- HcaObjects *uint32 `json:"hca_objects,omitempty"` |
|
| 9 |
-} |
| 10 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,174 +0,0 @@ |
| 1 |
-package devices |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "fmt" |
|
| 5 |
- "os" |
|
| 6 |
- "strconv" |
|
| 7 |
-) |
|
| 8 |
- |
|
| 9 |
-const ( |
|
| 10 |
- Wildcard = -1 |
|
| 11 |
-) |
|
| 12 |
- |
|
| 13 |
-type Device struct {
|
|
| 14 |
- Rule |
|
| 15 |
- |
|
| 16 |
- // Path to the device. |
|
| 17 |
- Path string `json:"path"` |
|
| 18 |
- |
|
| 19 |
- // FileMode permission bits for the device. |
|
| 20 |
- FileMode os.FileMode `json:"file_mode"` |
|
| 21 |
- |
|
| 22 |
- // Uid of the device. |
|
| 23 |
- Uid uint32 `json:"uid"` |
|
| 24 |
- |
|
| 25 |
- // Gid of the device. |
|
| 26 |
- Gid uint32 `json:"gid"` |
|
| 27 |
-} |
|
| 28 |
- |
|
| 29 |
-// Permissions is a cgroupv1-style string to represent device access. It |
|
| 30 |
-// has to be a string for backward compatibility reasons, hence why it has |
|
| 31 |
-// methods to do set operations. |
|
| 32 |
-type Permissions string |
|
| 33 |
- |
|
| 34 |
-const ( |
|
| 35 |
- deviceRead uint = (1 << iota) |
|
| 36 |
- deviceWrite |
|
| 37 |
- deviceMknod |
|
| 38 |
-) |
|
| 39 |
- |
|
| 40 |
-func (p Permissions) toSet() uint {
|
|
| 41 |
- var set uint |
|
| 42 |
- for _, perm := range p {
|
|
| 43 |
- switch perm {
|
|
| 44 |
- case 'r': |
|
| 45 |
- set |= deviceRead |
|
| 46 |
- case 'w': |
|
| 47 |
- set |= deviceWrite |
|
| 48 |
- case 'm': |
|
| 49 |
- set |= deviceMknod |
|
| 50 |
- } |
|
| 51 |
- } |
|
| 52 |
- return set |
|
| 53 |
-} |
|
| 54 |
- |
|
| 55 |
-func fromSet(set uint) Permissions {
|
|
| 56 |
- var perm string |
|
| 57 |
- if set&deviceRead == deviceRead {
|
|
| 58 |
- perm += "r" |
|
| 59 |
- } |
|
| 60 |
- if set&deviceWrite == deviceWrite {
|
|
| 61 |
- perm += "w" |
|
| 62 |
- } |
|
| 63 |
- if set&deviceMknod == deviceMknod {
|
|
| 64 |
- perm += "m" |
|
| 65 |
- } |
|
| 66 |
- return Permissions(perm) |
|
| 67 |
-} |
|
| 68 |
- |
|
| 69 |
-// Union returns the union of the two sets of Permissions. |
|
| 70 |
-func (p Permissions) Union(o Permissions) Permissions {
|
|
| 71 |
- lhs := p.toSet() |
|
| 72 |
- rhs := o.toSet() |
|
| 73 |
- return fromSet(lhs | rhs) |
|
| 74 |
-} |
|
| 75 |
- |
|
| 76 |
-// Difference returns the set difference of the two sets of Permissions. |
|
| 77 |
-// In set notation, A.Difference(B) gives you A\B. |
|
| 78 |
-func (p Permissions) Difference(o Permissions) Permissions {
|
|
| 79 |
- lhs := p.toSet() |
|
| 80 |
- rhs := o.toSet() |
|
| 81 |
- return fromSet(lhs &^ rhs) |
|
| 82 |
-} |
|
| 83 |
- |
|
| 84 |
-// Intersection computes the intersection of the two sets of Permissions. |
|
| 85 |
-func (p Permissions) Intersection(o Permissions) Permissions {
|
|
| 86 |
- lhs := p.toSet() |
|
| 87 |
- rhs := o.toSet() |
|
| 88 |
- return fromSet(lhs & rhs) |
|
| 89 |
-} |
|
| 90 |
- |
|
| 91 |
-// IsEmpty returns whether the set of permissions in a Permissions is |
|
| 92 |
-// empty. |
|
| 93 |
-func (p Permissions) IsEmpty() bool {
|
|
| 94 |
- return p == Permissions("")
|
|
| 95 |
-} |
|
| 96 |
- |
|
| 97 |
-// IsValid returns whether the set of permissions is a subset of valid |
|
| 98 |
-// permissions (namely, {r,w,m}).
|
|
| 99 |
-func (p Permissions) IsValid() bool {
|
|
| 100 |
- return p == fromSet(p.toSet()) |
|
| 101 |
-} |
|
| 102 |
- |
|
| 103 |
-type Type rune |
|
| 104 |
- |
|
| 105 |
-const ( |
|
| 106 |
- WildcardDevice Type = 'a' |
|
| 107 |
- BlockDevice Type = 'b' |
|
| 108 |
- CharDevice Type = 'c' // or 'u' |
|
| 109 |
- FifoDevice Type = 'p' |
|
| 110 |
-) |
|
| 111 |
- |
|
| 112 |
-func (t Type) IsValid() bool {
|
|
| 113 |
- switch t {
|
|
| 114 |
- case WildcardDevice, BlockDevice, CharDevice, FifoDevice: |
|
| 115 |
- return true |
|
| 116 |
- default: |
|
| 117 |
- return false |
|
| 118 |
- } |
|
| 119 |
-} |
|
| 120 |
- |
|
| 121 |
-func (t Type) CanMknod() bool {
|
|
| 122 |
- switch t {
|
|
| 123 |
- case BlockDevice, CharDevice, FifoDevice: |
|
| 124 |
- return true |
|
| 125 |
- default: |
|
| 126 |
- return false |
|
| 127 |
- } |
|
| 128 |
-} |
|
| 129 |
- |
|
| 130 |
-func (t Type) CanCgroup() bool {
|
|
| 131 |
- switch t {
|
|
| 132 |
- case WildcardDevice, BlockDevice, CharDevice: |
|
| 133 |
- return true |
|
| 134 |
- default: |
|
| 135 |
- return false |
|
| 136 |
- } |
|
| 137 |
-} |
|
| 138 |
- |
|
| 139 |
-type Rule struct {
|
|
| 140 |
- // Type of device ('c' for char, 'b' for block). If set to 'a', this rule
|
|
| 141 |
- // acts as a wildcard and all fields other than Allow are ignored. |
|
| 142 |
- Type Type `json:"type"` |
|
| 143 |
- |
|
| 144 |
- // Major is the device's major number. |
|
| 145 |
- Major int64 `json:"major"` |
|
| 146 |
- |
|
| 147 |
- // Minor is the device's minor number. |
|
| 148 |
- Minor int64 `json:"minor"` |
|
| 149 |
- |
|
| 150 |
- // Permissions is the set of permissions that this rule applies to (in the |
|
| 151 |
- // cgroupv1 format -- any combination of "rwm"). |
|
| 152 |
- Permissions Permissions `json:"permissions"` |
|
| 153 |
- |
|
| 154 |
- // Allow specifies whether this rule is allowed. |
|
| 155 |
- Allow bool `json:"allow"` |
|
| 156 |
-} |
|
| 157 |
- |
|
| 158 |
-func (d *Rule) CgroupString() string {
|
|
| 159 |
- var ( |
|
| 160 |
- major = strconv.FormatInt(d.Major, 10) |
|
| 161 |
- minor = strconv.FormatInt(d.Minor, 10) |
|
| 162 |
- ) |
|
| 163 |
- if d.Major == Wildcard {
|
|
| 164 |
- major = "*" |
|
| 165 |
- } |
|
| 166 |
- if d.Minor == Wildcard {
|
|
| 167 |
- minor = "*" |
|
| 168 |
- } |
|
| 169 |
- return fmt.Sprintf("%c %s:%s %s", d.Type, major, minor, d.Permissions)
|
|
| 170 |
-} |
|
| 171 |
- |
|
| 172 |
-func (d *Rule) Mkdev() (uint64, error) {
|
|
| 173 |
- return mkDev(d) |
|
| 174 |
-} |
| 175 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,119 +0,0 @@ |
| 1 |
-//go:build !windows |
|
| 2 |
- |
|
| 3 |
-package devices |
|
| 4 |
- |
|
| 5 |
-import ( |
|
| 6 |
- "errors" |
|
| 7 |
- "os" |
|
| 8 |
- "path/filepath" |
|
| 9 |
- |
|
| 10 |
- "golang.org/x/sys/unix" |
|
| 11 |
-) |
|
| 12 |
- |
|
| 13 |
-// ErrNotADevice denotes that a file is not a valid linux device. |
|
| 14 |
-var ErrNotADevice = errors.New("not a device node")
|
|
| 15 |
- |
|
| 16 |
-// Testing dependencies |
|
| 17 |
-var ( |
|
| 18 |
- unixLstat = unix.Lstat |
|
| 19 |
- osReadDir = os.ReadDir |
|
| 20 |
-) |
|
| 21 |
- |
|
| 22 |
-func mkDev(d *Rule) (uint64, error) {
|
|
| 23 |
- if d.Major == Wildcard || d.Minor == Wildcard {
|
|
| 24 |
- return 0, errors.New("cannot mkdev() device with wildcards")
|
|
| 25 |
- } |
|
| 26 |
- return unix.Mkdev(uint32(d.Major), uint32(d.Minor)), nil |
|
| 27 |
-} |
|
| 28 |
- |
|
| 29 |
-// DeviceFromPath takes the path to a device and its cgroup_permissions (which |
|
| 30 |
-// cannot be easily queried) to look up the information about a linux device |
|
| 31 |
-// and returns that information as a Device struct. |
|
| 32 |
-func DeviceFromPath(path, permissions string) (*Device, error) {
|
|
| 33 |
- var stat unix.Stat_t |
|
| 34 |
- err := unixLstat(path, &stat) |
|
| 35 |
- if err != nil {
|
|
| 36 |
- return nil, err |
|
| 37 |
- } |
|
| 38 |
- |
|
| 39 |
- var ( |
|
| 40 |
- devType Type |
|
| 41 |
- mode = stat.Mode |
|
| 42 |
- devNumber = uint64(stat.Rdev) //nolint:unconvert // Rdev is uint32 on e.g. MIPS. |
|
| 43 |
- major = unix.Major(devNumber) |
|
| 44 |
- minor = unix.Minor(devNumber) |
|
| 45 |
- ) |
|
| 46 |
- switch mode & unix.S_IFMT {
|
|
| 47 |
- case unix.S_IFBLK: |
|
| 48 |
- devType = BlockDevice |
|
| 49 |
- case unix.S_IFCHR: |
|
| 50 |
- devType = CharDevice |
|
| 51 |
- case unix.S_IFIFO: |
|
| 52 |
- devType = FifoDevice |
|
| 53 |
- default: |
|
| 54 |
- return nil, ErrNotADevice |
|
| 55 |
- } |
|
| 56 |
- return &Device{
|
|
| 57 |
- Rule: Rule{
|
|
| 58 |
- Type: devType, |
|
| 59 |
- Major: int64(major), |
|
| 60 |
- Minor: int64(minor), |
|
| 61 |
- Permissions: Permissions(permissions), |
|
| 62 |
- }, |
|
| 63 |
- Path: path, |
|
| 64 |
- FileMode: os.FileMode(mode &^ unix.S_IFMT), |
|
| 65 |
- Uid: stat.Uid, |
|
| 66 |
- Gid: stat.Gid, |
|
| 67 |
- }, nil |
|
| 68 |
-} |
|
| 69 |
- |
|
| 70 |
-// HostDevices returns all devices that can be found under /dev directory. |
|
| 71 |
-func HostDevices() ([]*Device, error) {
|
|
| 72 |
- return GetDevices("/dev")
|
|
| 73 |
-} |
|
| 74 |
- |
|
| 75 |
-// GetDevices recursively traverses a directory specified by path |
|
| 76 |
-// and returns all devices found there. |
|
| 77 |
-func GetDevices(path string) ([]*Device, error) {
|
|
| 78 |
- files, err := osReadDir(path) |
|
| 79 |
- if err != nil {
|
|
| 80 |
- return nil, err |
|
| 81 |
- } |
|
| 82 |
- var out []*Device |
|
| 83 |
- for _, f := range files {
|
|
| 84 |
- switch {
|
|
| 85 |
- case f.IsDir(): |
|
| 86 |
- switch f.Name() {
|
|
| 87 |
- // ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825 |
|
| 88 |
- // ".udev" added to address https://github.com/opencontainers/runc/issues/2093 |
|
| 89 |
- case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts", ".udev": |
|
| 90 |
- continue |
|
| 91 |
- default: |
|
| 92 |
- sub, err := GetDevices(filepath.Join(path, f.Name())) |
|
| 93 |
- if err != nil {
|
|
| 94 |
- return nil, err |
|
| 95 |
- } |
|
| 96 |
- |
|
| 97 |
- out = append(out, sub...) |
|
| 98 |
- continue |
|
| 99 |
- } |
|
| 100 |
- case f.Name() == "console": |
|
| 101 |
- continue |
|
| 102 |
- } |
|
| 103 |
- device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm") |
|
| 104 |
- if err != nil {
|
|
| 105 |
- if errors.Is(err, ErrNotADevice) {
|
|
| 106 |
- continue |
|
| 107 |
- } |
|
| 108 |
- if os.IsNotExist(err) {
|
|
| 109 |
- continue |
|
| 110 |
- } |
|
| 111 |
- return nil, err |
|
| 112 |
- } |
|
| 113 |
- if device.Type == FifoDevice {
|
|
| 114 |
- continue |
|
| 115 |
- } |
|
| 116 |
- out = append(out, device) |
|
| 117 |
- } |
|
| 118 |
- return out, nil |
|
| 119 |
-} |
| 120 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,135 +0,0 @@ |
| 1 |
-package utils |
|
| 2 |
- |
|
| 3 |
-/* |
|
| 4 |
- * Copyright 2016, 2017 SUSE LLC |
|
| 5 |
- * |
|
| 6 |
- * Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 7 |
- * you may not use this file except in compliance with the License. |
|
| 8 |
- * You may obtain a copy of the License at |
|
| 9 |
- * |
|
| 10 |
- * http://www.apache.org/licenses/LICENSE-2.0 |
|
| 11 |
- * |
|
| 12 |
- * Unless required by applicable law or agreed to in writing, software |
|
| 13 |
- * distributed under the License is distributed on an "AS IS" BASIS, |
|
| 14 |
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 15 |
- * See the License for the specific language governing permissions and |
|
| 16 |
- * limitations under the License. |
|
| 17 |
- */ |
|
| 18 |
- |
|
| 19 |
-import ( |
|
| 20 |
- "fmt" |
|
| 21 |
- "os" |
|
| 22 |
- "runtime" |
|
| 23 |
- |
|
| 24 |
- "golang.org/x/sys/unix" |
|
| 25 |
-) |
|
| 26 |
- |
|
| 27 |
-// MaxNameLen is the maximum length of the name of a file descriptor being sent |
|
| 28 |
-// using SendFile. The name of the file handle returned by RecvFile will never be |
|
| 29 |
-// larger than this value. |
|
| 30 |
-const MaxNameLen = 4096 |
|
| 31 |
- |
|
| 32 |
-// oobSpace is the size of the oob slice required to store a single FD. Note |
|
| 33 |
-// that unix.UnixRights appears to make the assumption that fd is always int32, |
|
| 34 |
-// so sizeof(fd) = 4. |
|
| 35 |
-var oobSpace = unix.CmsgSpace(4) |
|
| 36 |
- |
|
| 37 |
-// RecvFile waits for a file descriptor to be sent over the given AF_UNIX |
|
| 38 |
-// socket. The file name of the remote file descriptor will be recreated |
|
| 39 |
-// locally (it is sent as non-auxiliary data in the same payload). |
|
| 40 |
-func RecvFile(socket *os.File) (_ *os.File, Err error) {
|
|
| 41 |
- name := make([]byte, MaxNameLen) |
|
| 42 |
- oob := make([]byte, oobSpace) |
|
| 43 |
- |
|
| 44 |
- sockfd := socket.Fd() |
|
| 45 |
- var ( |
|
| 46 |
- n, oobn int |
|
| 47 |
- err error |
|
| 48 |
- ) |
|
| 49 |
- |
|
| 50 |
- for {
|
|
| 51 |
- n, oobn, _, _, err = unix.Recvmsg(int(sockfd), name, oob, unix.MSG_CMSG_CLOEXEC) |
|
| 52 |
- if err != unix.EINTR { //nolint:errorlint // unix errors are bare
|
|
| 53 |
- break |
|
| 54 |
- } |
|
| 55 |
- } |
|
| 56 |
- |
|
| 57 |
- if err != nil {
|
|
| 58 |
- return nil, os.NewSyscallError("recvmsg", err)
|
|
| 59 |
- } |
|
| 60 |
- if n >= MaxNameLen || oobn != oobSpace {
|
|
| 61 |
- return nil, fmt.Errorf("recvfile: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
|
|
| 62 |
- } |
|
| 63 |
- // Truncate. |
|
| 64 |
- name = name[:n] |
|
| 65 |
- oob = oob[:oobn] |
|
| 66 |
- |
|
| 67 |
- scms, err := unix.ParseSocketControlMessage(oob) |
|
| 68 |
- if err != nil {
|
|
| 69 |
- return nil, err |
|
| 70 |
- } |
|
| 71 |
- |
|
| 72 |
- // We cannot control how many SCM_RIGHTS we receive, and upon receiving |
|
| 73 |
- // them all of the descriptors are installed in our fd table, so we need to |
|
| 74 |
- // parse all of the SCM_RIGHTS we received in order to close all of the |
|
| 75 |
- // descriptors on error. |
|
| 76 |
- var fds []int |
|
| 77 |
- defer func() {
|
|
| 78 |
- for i, fd := range fds {
|
|
| 79 |
- if i == 0 && Err == nil {
|
|
| 80 |
- // Only close the first one on error. |
|
| 81 |
- continue |
|
| 82 |
- } |
|
| 83 |
- // Always close extra ones. |
|
| 84 |
- _ = unix.Close(fd) |
|
| 85 |
- } |
|
| 86 |
- }() |
|
| 87 |
- var lastErr error |
|
| 88 |
- for _, scm := range scms {
|
|
| 89 |
- if scm.Header.Type == unix.SCM_RIGHTS {
|
|
| 90 |
- scmFds, err := unix.ParseUnixRights(&scm) |
|
| 91 |
- if err != nil {
|
|
| 92 |
- lastErr = err |
|
| 93 |
- } else {
|
|
| 94 |
- fds = append(fds, scmFds...) |
|
| 95 |
- } |
|
| 96 |
- } |
|
| 97 |
- } |
|
| 98 |
- if lastErr != nil {
|
|
| 99 |
- return nil, lastErr |
|
| 100 |
- } |
|
| 101 |
- |
|
| 102 |
- // We do this after collecting the fds to make sure we close them all when |
|
| 103 |
- // returning an error here. |
|
| 104 |
- if len(scms) != 1 {
|
|
| 105 |
- return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
|
|
| 106 |
- } |
|
| 107 |
- if len(fds) != 1 {
|
|
| 108 |
- return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
|
|
| 109 |
- } |
|
| 110 |
- return os.NewFile(uintptr(fds[0]), string(name)), nil |
|
| 111 |
-} |
|
| 112 |
- |
|
| 113 |
-// SendFile sends a file over the given AF_UNIX socket. file.Name() is also |
|
| 114 |
-// included so that if the other end uses RecvFile, the file will have the same |
|
| 115 |
-// name information. |
|
| 116 |
-func SendFile(socket *os.File, file *os.File) error {
|
|
| 117 |
- name := file.Name() |
|
| 118 |
- if len(name) >= MaxNameLen {
|
|
| 119 |
- return fmt.Errorf("sendfd: filename too long: %s", name)
|
|
| 120 |
- } |
|
| 121 |
- err := SendRawFd(socket, name, file.Fd()) |
|
| 122 |
- runtime.KeepAlive(file) |
|
| 123 |
- return err |
|
| 124 |
-} |
|
| 125 |
- |
|
| 126 |
-// SendRawFd sends a specific file descriptor over the given AF_UNIX socket. |
|
| 127 |
-func SendRawFd(socket *os.File, msg string, fd uintptr) error {
|
|
| 128 |
- oob := unix.UnixRights(int(fd)) |
|
| 129 |
- for {
|
|
| 130 |
- err := unix.Sendmsg(int(socket.Fd()), []byte(msg), oob, nil, 0) |
|
| 131 |
- if err != unix.EINTR { //nolint:errorlint // unix errors are bare
|
|
| 132 |
- return os.NewSyscallError("sendmsg", err)
|
|
| 133 |
- } |
|
| 134 |
- } |
|
| 135 |
-} |
| 136 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,115 +0,0 @@ |
| 1 |
-package utils |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "encoding/json" |
|
| 5 |
- "io" |
|
| 6 |
- "os" |
|
| 7 |
- "path/filepath" |
|
| 8 |
- "strings" |
|
| 9 |
- |
|
| 10 |
- "golang.org/x/sys/unix" |
|
| 11 |
-) |
|
| 12 |
- |
|
| 13 |
-const ( |
|
| 14 |
- exitSignalOffset = 128 |
|
| 15 |
-) |
|
| 16 |
- |
|
| 17 |
-// ExitStatus returns the correct exit status for a process based on if it |
|
| 18 |
-// was signaled or exited cleanly |
|
| 19 |
-func ExitStatus(status unix.WaitStatus) int {
|
|
| 20 |
- if status.Signaled() {
|
|
| 21 |
- return exitSignalOffset + int(status.Signal()) |
|
| 22 |
- } |
|
| 23 |
- return status.ExitStatus() |
|
| 24 |
-} |
|
| 25 |
- |
|
| 26 |
-// WriteJSON writes the provided struct v to w using standard json marshaling |
|
| 27 |
-// without a trailing newline. This is used instead of json.Encoder because |
|
| 28 |
-// there might be a problem in json decoder in some cases, see: |
|
| 29 |
-// https://github.com/docker/docker/issues/14203#issuecomment-174177790 |
|
| 30 |
-func WriteJSON(w io.Writer, v interface{}) error {
|
|
| 31 |
- data, err := json.Marshal(v) |
|
| 32 |
- if err != nil {
|
|
| 33 |
- return err |
|
| 34 |
- } |
|
| 35 |
- _, err = w.Write(data) |
|
| 36 |
- return err |
|
| 37 |
-} |
|
| 38 |
- |
|
| 39 |
-// CleanPath makes a path safe for use with filepath.Join. This is done by not |
|
| 40 |
-// only cleaning the path, but also (if the path is relative) adding a leading |
|
| 41 |
-// '/' and cleaning it (then removing the leading '/'). This ensures that a |
|
| 42 |
-// path resulting from prepending another path will always resolve to lexically |
|
| 43 |
-// be a subdirectory of the prefixed path. This is all done lexically, so paths |
|
| 44 |
-// that include symlinks won't be safe as a result of using CleanPath. |
|
| 45 |
-func CleanPath(path string) string {
|
|
| 46 |
- // Deal with empty strings nicely. |
|
| 47 |
- if path == "" {
|
|
| 48 |
- return "" |
|
| 49 |
- } |
|
| 50 |
- |
|
| 51 |
- // Ensure that all paths are cleaned (especially problematic ones like |
|
| 52 |
- // "/../../../../../" which can cause lots of issues). |
|
| 53 |
- path = filepath.Clean(path) |
|
| 54 |
- |
|
| 55 |
- // If the path isn't absolute, we need to do more processing to fix paths |
|
| 56 |
- // such as "../../../../<etc>/some/path". We also shouldn't convert absolute |
|
| 57 |
- // paths to relative ones. |
|
| 58 |
- if !filepath.IsAbs(path) {
|
|
| 59 |
- path = filepath.Clean(string(os.PathSeparator) + path) |
|
| 60 |
- // This can't fail, as (by definition) all paths are relative to root. |
|
| 61 |
- path, _ = filepath.Rel(string(os.PathSeparator), path) |
|
| 62 |
- } |
|
| 63 |
- |
|
| 64 |
- // Clean the path again for good measure. |
|
| 65 |
- return filepath.Clean(path) |
|
| 66 |
-} |
|
| 67 |
- |
|
| 68 |
-// stripRoot returns the passed path, stripping the root path if it was |
|
| 69 |
-// (lexicially) inside it. Note that both passed paths will always be treated |
|
| 70 |
-// as absolute, and the returned path will also always be absolute. In |
|
| 71 |
-// addition, the paths are cleaned before stripping the root. |
|
| 72 |
-func stripRoot(root, path string) string {
|
|
| 73 |
- // Make the paths clean and absolute. |
|
| 74 |
- root, path = CleanPath("/"+root), CleanPath("/"+path)
|
|
| 75 |
- switch {
|
|
| 76 |
- case path == root: |
|
| 77 |
- path = "/" |
|
| 78 |
- case root == "/": |
|
| 79 |
- // do nothing |
|
| 80 |
- case strings.HasPrefix(path, root+"/"): |
|
| 81 |
- path = strings.TrimPrefix(path, root+"/") |
|
| 82 |
- } |
|
| 83 |
- return CleanPath("/" + path)
|
|
| 84 |
-} |
|
| 85 |
- |
|
| 86 |
-// SearchLabels searches through a list of key=value pairs for a given key, |
|
| 87 |
-// returning its value, and the binary flag telling whether the key exist. |
|
| 88 |
-func SearchLabels(labels []string, key string) (string, bool) {
|
|
| 89 |
- key += "=" |
|
| 90 |
- for _, s := range labels {
|
|
| 91 |
- if strings.HasPrefix(s, key) {
|
|
| 92 |
- return s[len(key):], true |
|
| 93 |
- } |
|
| 94 |
- } |
|
| 95 |
- return "", false |
|
| 96 |
-} |
|
| 97 |
- |
|
| 98 |
-// Annotations returns the bundle path and user defined annotations from the |
|
| 99 |
-// libcontainer state. We need to remove the bundle because that is a label |
|
| 100 |
-// added by libcontainer. |
|
| 101 |
-func Annotations(labels []string) (bundle string, userAnnotations map[string]string) {
|
|
| 102 |
- userAnnotations = make(map[string]string) |
|
| 103 |
- for _, l := range labels {
|
|
| 104 |
- name, value, ok := strings.Cut(l, "=") |
|
| 105 |
- if !ok {
|
|
| 106 |
- continue |
|
| 107 |
- } |
|
| 108 |
- if name == "bundle" {
|
|
| 109 |
- bundle = value |
|
| 110 |
- } else {
|
|
| 111 |
- userAnnotations[name] = value |
|
| 112 |
- } |
|
| 113 |
- } |
|
| 114 |
- return |
|
| 115 |
-} |
| 116 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,360 +0,0 @@ |
| 1 |
-//go:build !windows |
|
| 2 |
- |
|
| 3 |
-package utils |
|
| 4 |
- |
|
| 5 |
-import ( |
|
| 6 |
- "fmt" |
|
| 7 |
- "math" |
|
| 8 |
- "os" |
|
| 9 |
- "path/filepath" |
|
| 10 |
- "runtime" |
|
| 11 |
- "strconv" |
|
| 12 |
- "strings" |
|
| 13 |
- "sync" |
|
| 14 |
- _ "unsafe" // for go:linkname |
|
| 15 |
- |
|
| 16 |
- securejoin "github.com/cyphar/filepath-securejoin" |
|
| 17 |
- "github.com/sirupsen/logrus" |
|
| 18 |
- "golang.org/x/sys/unix" |
|
| 19 |
-) |
|
| 20 |
- |
|
| 21 |
-// EnsureProcHandle returns whether or not the given file handle is on procfs. |
|
| 22 |
-func EnsureProcHandle(fh *os.File) error {
|
|
| 23 |
- var buf unix.Statfs_t |
|
| 24 |
- if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil {
|
|
| 25 |
- return fmt.Errorf("ensure %s is on procfs: %w", fh.Name(), err)
|
|
| 26 |
- } |
|
| 27 |
- if buf.Type != unix.PROC_SUPER_MAGIC {
|
|
| 28 |
- return fmt.Errorf("%s is not on procfs", fh.Name())
|
|
| 29 |
- } |
|
| 30 |
- return nil |
|
| 31 |
-} |
|
| 32 |
- |
|
| 33 |
-var ( |
|
| 34 |
- haveCloseRangeCloexecBool bool |
|
| 35 |
- haveCloseRangeCloexecOnce sync.Once |
|
| 36 |
-) |
|
| 37 |
- |
|
| 38 |
-func haveCloseRangeCloexec() bool {
|
|
| 39 |
- haveCloseRangeCloexecOnce.Do(func() {
|
|
| 40 |
- // Make sure we're not closing a random file descriptor. |
|
| 41 |
- tmpFd, err := unix.FcntlInt(0, unix.F_DUPFD_CLOEXEC, 0) |
|
| 42 |
- if err != nil {
|
|
| 43 |
- return |
|
| 44 |
- } |
|
| 45 |
- defer unix.Close(tmpFd) |
|
| 46 |
- |
|
| 47 |
- err = unix.CloseRange(uint(tmpFd), uint(tmpFd), unix.CLOSE_RANGE_CLOEXEC) |
|
| 48 |
- // Any error means we cannot use close_range(CLOSE_RANGE_CLOEXEC). |
|
| 49 |
- // -ENOSYS and -EINVAL ultimately mean we don't have support, but any |
|
| 50 |
- // other potential error would imply that even the most basic close |
|
| 51 |
- // operation wouldn't work. |
|
| 52 |
- haveCloseRangeCloexecBool = err == nil |
|
| 53 |
- }) |
|
| 54 |
- return haveCloseRangeCloexecBool |
|
| 55 |
-} |
|
| 56 |
- |
|
| 57 |
-type fdFunc func(fd int) |
|
| 58 |
- |
|
| 59 |
-// fdRangeFrom calls the passed fdFunc for each file descriptor that is open in |
|
| 60 |
-// the current process. |
|
| 61 |
-func fdRangeFrom(minFd int, fn fdFunc) error {
|
|
| 62 |
- procSelfFd, closer := ProcThreadSelf("fd")
|
|
| 63 |
- defer closer() |
|
| 64 |
- |
|
| 65 |
- fdDir, err := os.Open(procSelfFd) |
|
| 66 |
- if err != nil {
|
|
| 67 |
- return err |
|
| 68 |
- } |
|
| 69 |
- defer fdDir.Close() |
|
| 70 |
- |
|
| 71 |
- if err := EnsureProcHandle(fdDir); err != nil {
|
|
| 72 |
- return err |
|
| 73 |
- } |
|
| 74 |
- |
|
| 75 |
- fdList, err := fdDir.Readdirnames(-1) |
|
| 76 |
- if err != nil {
|
|
| 77 |
- return err |
|
| 78 |
- } |
|
| 79 |
- for _, fdStr := range fdList {
|
|
| 80 |
- fd, err := strconv.Atoi(fdStr) |
|
| 81 |
- // Ignore non-numeric file names. |
|
| 82 |
- if err != nil {
|
|
| 83 |
- continue |
|
| 84 |
- } |
|
| 85 |
- // Ignore descriptors lower than our specified minimum. |
|
| 86 |
- if fd < minFd {
|
|
| 87 |
- continue |
|
| 88 |
- } |
|
| 89 |
- // Ignore the file descriptor we used for readdir, as it will be closed |
|
| 90 |
- // when we return. |
|
| 91 |
- if uintptr(fd) == fdDir.Fd() {
|
|
| 92 |
- continue |
|
| 93 |
- } |
|
| 94 |
- // Run the closure. |
|
| 95 |
- fn(fd) |
|
| 96 |
- } |
|
| 97 |
- return nil |
|
| 98 |
-} |
|
| 99 |
- |
|
| 100 |
-// CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or |
|
| 101 |
-// equal to minFd in the current process. |
|
| 102 |
-func CloseExecFrom(minFd int) error {
|
|
| 103 |
- // Use close_range(CLOSE_RANGE_CLOEXEC) if possible. |
|
| 104 |
- if haveCloseRangeCloexec() {
|
|
| 105 |
- err := unix.CloseRange(uint(minFd), math.MaxUint, unix.CLOSE_RANGE_CLOEXEC) |
|
| 106 |
- return os.NewSyscallError("close_range", err)
|
|
| 107 |
- } |
|
| 108 |
- // Otherwise, fall back to the standard loop. |
|
| 109 |
- return fdRangeFrom(minFd, unix.CloseOnExec) |
|
| 110 |
-} |
|
| 111 |
- |
|
| 112 |
-//go:linkname runtime_IsPollDescriptor internal/poll.IsPollDescriptor |
|
| 113 |
- |
|
| 114 |
-// In order to make sure we do not close the internal epoll descriptors the Go |
|
| 115 |
-// runtime uses, we need to ensure that we skip descriptors that match |
|
| 116 |
-// "internal/poll".IsPollDescriptor. Yes, this is a Go runtime internal thing, |
|
| 117 |
-// unfortunately there's no other way to be sure we're only keeping the file |
|
| 118 |
-// descriptors the Go runtime needs. Hopefully nothing blows up doing this... |
|
| 119 |
-func runtime_IsPollDescriptor(fd uintptr) bool //nolint:revive |
|
| 120 |
- |
|
| 121 |
-// UnsafeCloseFrom closes all file descriptors greater or equal to minFd in the |
|
| 122 |
-// current process, except for those critical to Go's runtime (such as the |
|
| 123 |
-// netpoll management descriptors). |
|
| 124 |
-// |
|
| 125 |
-// NOTE: That this function is incredibly dangerous to use in most Go code, as |
|
| 126 |
-// closing file descriptors from underneath *os.File handles can lead to very |
|
| 127 |
-// bad behaviour (the closed file descriptor can be re-used and then any |
|
| 128 |
-// *os.File operations would apply to the wrong file). This function is only |
|
| 129 |
-// intended to be called from the last stage of runc init. |
|
| 130 |
-func UnsafeCloseFrom(minFd int) error {
|
|
| 131 |
- // We cannot use close_range(2) even if it is available, because we must |
|
| 132 |
- // not close some file descriptors. |
|
| 133 |
- return fdRangeFrom(minFd, func(fd int) {
|
|
| 134 |
- if runtime_IsPollDescriptor(uintptr(fd)) {
|
|
| 135 |
- // These are the Go runtimes internal netpoll file descriptors. |
|
| 136 |
- // These file descriptors are operated on deep in the Go scheduler, |
|
| 137 |
- // and closing those files from underneath Go can result in panics. |
|
| 138 |
- // There is no issue with keeping them because they are not |
|
| 139 |
- // executable and are not useful to an attacker anyway. Also we |
|
| 140 |
- // don't have any choice. |
|
| 141 |
- return |
|
| 142 |
- } |
|
| 143 |
- // There's nothing we can do about errors from close(2), and the |
|
| 144 |
- // only likely error to be seen is EBADF which indicates the fd was |
|
| 145 |
- // already closed (in which case, we got what we wanted). |
|
| 146 |
- _ = unix.Close(fd) |
|
| 147 |
- }) |
|
| 148 |
-} |
|
| 149 |
- |
|
| 150 |
-// NewSockPair returns a new SOCK_STREAM unix socket pair. |
|
| 151 |
-func NewSockPair(name string) (parent, child *os.File, err error) {
|
|
| 152 |
- fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) |
|
| 153 |
- if err != nil {
|
|
| 154 |
- return nil, nil, err |
|
| 155 |
- } |
|
| 156 |
- return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil |
|
| 157 |
-} |
|
| 158 |
- |
|
| 159 |
-// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...) |
|
| 160 |
-// corresponding to the unsafePath resolved within the root. Before passing the |
|
| 161 |
-// fd, this path is verified to have been inside the root -- so operating on it |
|
| 162 |
-// through the passed fdpath should be safe. Do not access this path through |
|
| 163 |
-// the original path strings, and do not attempt to use the pathname outside of |
|
| 164 |
-// the passed closure (the file handle will be freed once the closure returns). |
|
| 165 |
-func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
|
|
| 166 |
- // Remove the root then forcefully resolve inside the root. |
|
| 167 |
- unsafePath = stripRoot(root, unsafePath) |
|
| 168 |
- path, err := securejoin.SecureJoin(root, unsafePath) |
|
| 169 |
- if err != nil {
|
|
| 170 |
- return fmt.Errorf("resolving path inside rootfs failed: %w", err)
|
|
| 171 |
- } |
|
| 172 |
- |
|
| 173 |
- procSelfFd, closer := ProcThreadSelf("fd/")
|
|
| 174 |
- defer closer() |
|
| 175 |
- |
|
| 176 |
- // Open the target path. |
|
| 177 |
- fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0) |
|
| 178 |
- if err != nil {
|
|
| 179 |
- return fmt.Errorf("open o_path procfd: %w", err)
|
|
| 180 |
- } |
|
| 181 |
- defer fh.Close() |
|
| 182 |
- |
|
| 183 |
- procfd := filepath.Join(procSelfFd, strconv.Itoa(int(fh.Fd()))) |
|
| 184 |
- // Double-check the path is the one we expected. |
|
| 185 |
- if realpath, err := os.Readlink(procfd); err != nil {
|
|
| 186 |
- return fmt.Errorf("procfd verification failed: %w", err)
|
|
| 187 |
- } else if realpath != path {
|
|
| 188 |
- return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
|
|
| 189 |
- } |
|
| 190 |
- |
|
| 191 |
- return fn(procfd) |
|
| 192 |
-} |
|
| 193 |
- |
|
| 194 |
-type ProcThreadSelfCloser func() |
|
| 195 |
- |
|
| 196 |
-var ( |
|
| 197 |
- haveProcThreadSelf bool |
|
| 198 |
- haveProcThreadSelfOnce sync.Once |
|
| 199 |
-) |
|
| 200 |
- |
|
| 201 |
-// ProcThreadSelf returns a string that is equivalent to |
|
| 202 |
-// /proc/thread-self/<subpath>, with a graceful fallback on older kernels where |
|
| 203 |
-// /proc/thread-self doesn't exist. This method DOES NOT use SecureJoin, |
|
| 204 |
-// meaning that the passed string needs to be trusted. The caller _must_ call |
|
| 205 |
-// the returned procThreadSelfCloser function (which is runtime.UnlockOSThread) |
|
| 206 |
-// *only once* after it has finished using the returned path string. |
|
| 207 |
-func ProcThreadSelf(subpath string) (string, ProcThreadSelfCloser) {
|
|
| 208 |
- haveProcThreadSelfOnce.Do(func() {
|
|
| 209 |
- if _, err := os.Stat("/proc/thread-self/"); err == nil {
|
|
| 210 |
- haveProcThreadSelf = true |
|
| 211 |
- } else {
|
|
| 212 |
- logrus.Debugf("cannot stat /proc/thread-self (%v), falling back to /proc/self/task/<tid>", err)
|
|
| 213 |
- } |
|
| 214 |
- }) |
|
| 215 |
- |
|
| 216 |
- // We need to lock our thread until the caller is done with the path string |
|
| 217 |
- // because any non-atomic operation on the path (such as opening a file, |
|
| 218 |
- // then reading it) could be interrupted by the Go runtime where the |
|
| 219 |
- // underlying thread is swapped out and the original thread is killed, |
|
| 220 |
- // resulting in pull-your-hair-out-hard-to-debug issues in the caller. In |
|
| 221 |
- // addition, the pre-3.17 fallback makes everything non-atomic because the |
|
| 222 |
- // same thing could happen between unix.Gettid() and the path operations. |
|
| 223 |
- // |
|
| 224 |
- // In theory, we don't need to lock in the atomic user case when using |
|
| 225 |
- // /proc/thread-self/, but it's better to be safe than sorry (and there are |
|
| 226 |
- // only one or two truly atomic users of /proc/thread-self/). |
|
| 227 |
- runtime.LockOSThread() |
|
| 228 |
- |
|
| 229 |
- threadSelf := "/proc/thread-self/" |
|
| 230 |
- if !haveProcThreadSelf {
|
|
| 231 |
- // Pre-3.17 kernels did not have /proc/thread-self, so do it manually. |
|
| 232 |
- threadSelf = "/proc/self/task/" + strconv.Itoa(unix.Gettid()) + "/" |
|
| 233 |
- if _, err := os.Stat(threadSelf); err != nil {
|
|
| 234 |
- // Unfortunately, this code is called from rootfs_linux.go where we |
|
| 235 |
- // are running inside the pid namespace of the container but /proc |
|
| 236 |
- // is the host's procfs. Unfortunately there is no real way to get |
|
| 237 |
- // the correct tid to use here (the kernel age means we cannot do |
|
| 238 |
- // things like set up a private fsopen("proc") -- even scanning
|
|
| 239 |
- // NSpid in all of the tasks in /proc/self/task/*/status requires |
|
| 240 |
- // Linux 4.1). |
|
| 241 |
- // |
|
| 242 |
- // So, we just have to assume that /proc/self is acceptable in this |
|
| 243 |
- // one specific case. |
|
| 244 |
- if os.Getpid() == 1 {
|
|
| 245 |
- logrus.Debugf("/proc/thread-self (tid=%d) cannot be emulated inside the initial container setup -- using /proc/self instead: %v", unix.Gettid(), err)
|
|
| 246 |
- } else {
|
|
| 247 |
- // This should never happen, but the fallback should work in most cases... |
|
| 248 |
- logrus.Warnf("/proc/thread-self could not be emulated for pid=%d (tid=%d) -- using more buggy /proc/self fallback instead: %v", os.Getpid(), unix.Gettid(), err)
|
|
| 249 |
- } |
|
| 250 |
- threadSelf = "/proc/self/" |
|
| 251 |
- } |
|
| 252 |
- } |
|
| 253 |
- return threadSelf + subpath, runtime.UnlockOSThread |
|
| 254 |
-} |
|
| 255 |
- |
|
| 256 |
-// ProcThreadSelfFd is small wrapper around ProcThreadSelf to make it easier to |
|
| 257 |
-// create a /proc/thread-self handle for given file descriptor. |
|
| 258 |
-// |
|
| 259 |
-// It is basically equivalent to ProcThreadSelf(fmt.Sprintf("fd/%d", fd)), but
|
|
| 260 |
-// without using fmt.Sprintf to avoid unneeded overhead. |
|
| 261 |
-func ProcThreadSelfFd(fd uintptr) (string, ProcThreadSelfCloser) {
|
|
| 262 |
- return ProcThreadSelf("fd/" + strconv.FormatUint(uint64(fd), 10))
|
|
| 263 |
-} |
|
| 264 |
- |
|
| 265 |
-// IsLexicallyInRoot is shorthand for strings.HasPrefix(path+"/", root+"/"), |
|
| 266 |
-// but properly handling the case where path or root are "/". |
|
| 267 |
-// |
|
| 268 |
-// NOTE: The return value only make sense if the path doesn't contain "..". |
|
| 269 |
-func IsLexicallyInRoot(root, path string) bool {
|
|
| 270 |
- if root != "/" {
|
|
| 271 |
- root += "/" |
|
| 272 |
- } |
|
| 273 |
- if path != "/" {
|
|
| 274 |
- path += "/" |
|
| 275 |
- } |
|
| 276 |
- return strings.HasPrefix(path, root) |
|
| 277 |
-} |
|
| 278 |
- |
|
| 279 |
-// MkdirAllInRootOpen attempts to make |
|
| 280 |
-// |
|
| 281 |
-// path, _ := securejoin.SecureJoin(root, unsafePath) |
|
| 282 |
-// os.MkdirAll(path, mode) |
|
| 283 |
-// os.Open(path) |
|
| 284 |
-// |
|
| 285 |
-// safer against attacks where components in the path are changed between |
|
| 286 |
-// SecureJoin returning and MkdirAll (or Open) being called. In particular, we |
|
| 287 |
-// try to detect any symlink components in the path while we are doing the |
|
| 288 |
-// MkdirAll. |
|
| 289 |
-// |
|
| 290 |
-// NOTE: If unsafePath is a subpath of root, we assume that you have already |
|
| 291 |
-// called SecureJoin and so we use the provided path verbatim without resolving |
|
| 292 |
-// any symlinks (this is done in a way that avoids symlink-exchange races). |
|
| 293 |
-// This means that the path also must not contain ".." elements, otherwise an |
|
| 294 |
-// error will occur. |
|
| 295 |
-// |
|
| 296 |
-// This uses securejoin.MkdirAllHandle under the hood, but it has special |
|
| 297 |
-// handling if unsafePath has already been scoped within the rootfs (this is |
|
| 298 |
-// needed for a lot of runc callers and fixing this would require reworking a |
|
| 299 |
-// lot of path logic). |
|
| 300 |
-func MkdirAllInRootOpen(root, unsafePath string, mode os.FileMode) (_ *os.File, Err error) {
|
|
| 301 |
- // If the path is already "within" the root, get the path relative to the |
|
| 302 |
- // root and use that as the unsafe path. This is necessary because a lot of |
|
| 303 |
- // MkdirAllInRootOpen callers have already done SecureJoin, and refactoring |
|
| 304 |
- // all of them to stop using these SecureJoin'd paths would require a fair |
|
| 305 |
- // amount of work. |
|
| 306 |
- // TODO(cyphar): Do the refactor to libpathrs once it's ready. |
|
| 307 |
- if IsLexicallyInRoot(root, unsafePath) {
|
|
| 308 |
- subPath, err := filepath.Rel(root, unsafePath) |
|
| 309 |
- if err != nil {
|
|
| 310 |
- return nil, err |
|
| 311 |
- } |
|
| 312 |
- unsafePath = subPath |
|
| 313 |
- } |
|
| 314 |
- |
|
| 315 |
- // Check for any silly mode bits. |
|
| 316 |
- if mode&^0o7777 != 0 {
|
|
| 317 |
- return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode)
|
|
| 318 |
- } |
|
| 319 |
- // Linux (and thus os.MkdirAll) silently ignores the suid and sgid bits if |
|
| 320 |
- // passed. While it would make sense to return an error in that case (since |
|
| 321 |
- // the user has asked for a mode that won't be applied), for compatibility |
|
| 322 |
- // reasons we have to ignore these bits. |
|
| 323 |
- if ignoredBits := mode &^ 0o1777; ignoredBits != 0 {
|
|
| 324 |
- logrus.Warnf("MkdirAll called with no-op mode bits that are ignored by Linux: 0o%.3o", ignoredBits)
|
|
| 325 |
- mode &= 0o1777 |
|
| 326 |
- } |
|
| 327 |
- |
|
| 328 |
- rootDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0) |
|
| 329 |
- if err != nil {
|
|
| 330 |
- return nil, fmt.Errorf("open root handle: %w", err)
|
|
| 331 |
- } |
|
| 332 |
- defer rootDir.Close() |
|
| 333 |
- |
|
| 334 |
- return securejoin.MkdirAllHandle(rootDir, unsafePath, mode) |
|
| 335 |
-} |
|
| 336 |
- |
|
| 337 |
-// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the |
|
| 338 |
-// returned handle, for callers that don't need to use it. |
|
| 339 |
-func MkdirAllInRoot(root, unsafePath string, mode os.FileMode) error {
|
|
| 340 |
- f, err := MkdirAllInRootOpen(root, unsafePath, mode) |
|
| 341 |
- if err == nil {
|
|
| 342 |
- _ = f.Close() |
|
| 343 |
- } |
|
| 344 |
- return err |
|
| 345 |
-} |
|
| 346 |
- |
|
| 347 |
-// Openat is a Go-friendly openat(2) wrapper. |
|
| 348 |
-func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) {
|
|
| 349 |
- dirFd := unix.AT_FDCWD |
|
| 350 |
- if dir != nil {
|
|
| 351 |
- dirFd = int(dir.Fd()) |
|
| 352 |
- } |
|
| 353 |
- flags |= unix.O_CLOEXEC |
|
| 354 |
- |
|
| 355 |
- fd, err := unix.Openat(dirFd, path, flags, mode) |
|
| 356 |
- if err != nil {
|
|
| 357 |
- return nil, &os.PathError{Op: "openat", Path: path, Err: err}
|
|
| 358 |
- } |
|
| 359 |
- return os.NewFile(uintptr(fd), dir.Name()+"/"+path), nil |
|
| 360 |
-} |
| ... | ... |
@@ -1041,6 +1041,10 @@ github.com/morikuni/aec |
| 1041 | 1041 |
# github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 |
| 1042 | 1042 |
## explicit |
| 1043 | 1043 |
github.com/munnerz/goautoneg |
| 1044 |
+# github.com/opencontainers/cgroups v0.0.1 |
|
| 1045 |
+## explicit; go 1.23.0 |
|
| 1046 |
+github.com/opencontainers/cgroups |
|
| 1047 |
+github.com/opencontainers/cgroups/devices/config |
|
| 1044 | 1048 |
# github.com/opencontainers/go-digest v1.0.0 |
| 1045 | 1049 |
## explicit; go 1.13 |
| 1046 | 1050 |
github.com/opencontainers/go-digest |
| ... | ... |
@@ -1050,12 +1054,6 @@ github.com/opencontainers/go-digest/digestset |
| 1050 | 1050 |
github.com/opencontainers/image-spec/identity |
| 1051 | 1051 |
github.com/opencontainers/image-spec/specs-go |
| 1052 | 1052 |
github.com/opencontainers/image-spec/specs-go/v1 |
| 1053 |
-# github.com/opencontainers/runc v1.2.6 |
|
| 1054 |
-## explicit; go 1.22 |
|
| 1055 |
-github.com/opencontainers/runc/libcontainer/cgroups |
|
| 1056 |
-github.com/opencontainers/runc/libcontainer/configs |
|
| 1057 |
-github.com/opencontainers/runc/libcontainer/devices |
|
| 1058 |
-github.com/opencontainers/runc/libcontainer/utils |
|
| 1059 | 1053 |
# github.com/opencontainers/runtime-spec v1.2.0 |
| 1060 | 1054 |
## explicit |
| 1061 | 1055 |
github.com/opencontainers/runtime-spec/specs-go |