cgroup2: implement `docker info`
| ... | ... |
@@ -4047,6 +4047,13 @@ definitions: |
| 4047 | 4047 |
enum: ["cgroupfs", "systemd", "none"] |
| 4048 | 4048 |
default: "cgroupfs" |
| 4049 | 4049 |
example: "cgroupfs" |
| 4050 |
+ CgroupVersion: |
|
| 4051 |
+ description: | |
|
| 4052 |
+ The version of the cgroup. |
|
| 4053 |
+ type: "string" |
|
| 4054 |
+ enum: ["1", "2"] |
|
| 4055 |
+ default: "1" |
|
| 4056 |
+ example: "1" |
|
| 4050 | 4057 |
NEventsListener: |
| 4051 | 4058 |
description: "Number of event listeners subscribed." |
| 4052 | 4059 |
type: "integer" |
| ... | ... |
@@ -44,6 +44,7 @@ import ( |
| 44 | 44 |
"github.com/docker/docker/pkg/pidfile" |
| 45 | 45 |
"github.com/docker/docker/pkg/plugingetter" |
| 46 | 46 |
"github.com/docker/docker/pkg/signal" |
| 47 |
+ "github.com/docker/docker/pkg/sysinfo" |
|
| 47 | 48 |
"github.com/docker/docker/pkg/system" |
| 48 | 49 |
"github.com/docker/docker/plugin" |
| 49 | 50 |
"github.com/docker/docker/rootless" |
| ... | ... |
@@ -452,7 +453,11 @@ func warnOnDeprecatedConfigOptions(config *config.Config) {
|
| 452 | 452 |
} |
| 453 | 453 |
|
| 454 | 454 |
func initRouter(opts routerOptions) {
|
| 455 |
- decoder := runconfig.ContainerDecoder{}
|
|
| 455 |
+ decoder := runconfig.ContainerDecoder{
|
|
| 456 |
+ GetSysInfo: func() *sysinfo.SysInfo {
|
|
| 457 |
+ return opts.daemon.RawSysInfo(true) |
|
| 458 |
+ }, |
|
| 459 |
+ } |
|
| 456 | 460 |
|
| 457 | 461 |
routers := []router.Router{
|
| 458 | 462 |
// we need to add the checkpoint router before the container router or the DELETE gets masked |
| ... | ... |
@@ -42,6 +42,7 @@ import ( |
| 42 | 42 |
"github.com/docker/docker/errdefs" |
| 43 | 43 |
bkconfig "github.com/moby/buildkit/cmd/buildkitd/config" |
| 44 | 44 |
"github.com/moby/buildkit/util/resolver" |
| 45 |
+ rsystem "github.com/opencontainers/runc/libcontainer/system" |
|
| 45 | 46 |
"github.com/sirupsen/logrus" |
| 46 | 47 |
|
| 47 | 48 |
// register graph drivers |
| ... | ... |
@@ -56,7 +57,6 @@ import ( |
| 56 | 56 |
"github.com/docker/docker/pkg/idtools" |
| 57 | 57 |
"github.com/docker/docker/pkg/locker" |
| 58 | 58 |
"github.com/docker/docker/pkg/plugingetter" |
| 59 |
- "github.com/docker/docker/pkg/sysinfo" |
|
| 60 | 59 |
"github.com/docker/docker/pkg/system" |
| 61 | 60 |
"github.com/docker/docker/pkg/truncindex" |
| 62 | 61 |
"github.com/docker/docker/plugin" |
| ... | ... |
@@ -1026,10 +1026,10 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S |
| 1026 | 1026 |
return nil, err |
| 1027 | 1027 |
} |
| 1028 | 1028 |
|
| 1029 |
- sysInfo := sysinfo.New(false) |
|
| 1029 |
+ sysInfo := d.RawSysInfo(false) |
|
| 1030 | 1030 |
// Check if Devices cgroup is mounted, it is hard requirement for container security, |
| 1031 | 1031 |
// on Linux. |
| 1032 |
- if runtime.GOOS == "linux" && !sysInfo.CgroupDevicesEnabled {
|
|
| 1032 |
+ if runtime.GOOS == "linux" && !sysInfo.CgroupDevicesEnabled && !rsystem.RunningInUserNS() {
|
|
| 1033 | 1033 |
return nil, errors.New("Devices cgroup isn't mounted")
|
| 1034 | 1034 |
} |
| 1035 | 1035 |
|
| ... | ... |
@@ -644,7 +644,7 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes. |
| 644 | 644 |
if hostConfig == nil {
|
| 645 | 645 |
return nil, nil |
| 646 | 646 |
} |
| 647 |
- sysInfo := sysinfo.New(true) |
|
| 647 |
+ sysInfo := daemon.RawSysInfo(true) |
|
| 648 | 648 |
|
| 649 | 649 |
w, err := verifyPlatformContainerResources(&hostConfig.Resources, sysInfo, update) |
| 650 | 650 |
|
| ... | ... |
@@ -1745,7 +1745,7 @@ func (daemon *Daemon) initCgroupsPath(path string) error {
|
| 1745 | 1745 |
} |
| 1746 | 1746 |
|
| 1747 | 1747 |
path = filepath.Join(mnt, root, path) |
| 1748 |
- sysInfo := sysinfo.New(true) |
|
| 1748 |
+ sysInfo := daemon.RawSysInfo(true) |
|
| 1749 | 1749 |
if err := maybeCreateCPURealTimeFile(sysInfo.CPURealtimePeriod, daemon.configStore.CPURealtimePeriod, "cpu.rt_period_us", path); err != nil {
|
| 1750 | 1750 |
return err |
| 1751 | 1751 |
} |
| ... | ... |
@@ -1779,3 +1779,16 @@ func (daemon *Daemon) setupSeccompProfile() error {
|
| 1779 | 1779 |
func (daemon *Daemon) useShimV2() bool {
|
| 1780 | 1780 |
return cgroups.IsCgroup2UnifiedMode() |
| 1781 | 1781 |
} |
| 1782 |
+ |
|
| 1783 |
+// RawSysInfo returns *sysinfo.SysInfo . |
|
| 1784 |
+func (daemon *Daemon) RawSysInfo(quiet bool) *sysinfo.SysInfo {
|
|
| 1785 |
+ var opts []sysinfo.Opt |
|
| 1786 |
+ if daemon.getCgroupDriver() == cgroupSystemdDriver {
|
|
| 1787 |
+ rootlesskitParentEUID := os.Getenv("ROOTLESSKIT_PARENT_EUID")
|
|
| 1788 |
+ if rootlesskitParentEUID != "" {
|
|
| 1789 |
+ groupPath := fmt.Sprintf("/user.slice/user-%s.slice", rootlesskitParentEUID)
|
|
| 1790 |
+ opts = append(opts, sysinfo.WithCgroup2GroupPath(groupPath)) |
|
| 1791 |
+ } |
|
| 1792 |
+ } |
|
| 1793 |
+ return sysinfo.New(quiet, opts...) |
|
| 1794 |
+} |
| ... | ... |
@@ -1,9 +1,18 @@ |
| 1 | 1 |
// +build !linux,!freebsd,!windows |
| 2 | 2 |
|
| 3 | 3 |
package daemon // import "github.com/docker/docker/daemon" |
| 4 |
-import "github.com/docker/docker/daemon/config" |
|
| 4 |
+ |
|
| 5 |
+import ( |
|
| 6 |
+ "github.com/docker/docker/daemon/config" |
|
| 7 |
+ "github.com/docker/docker/pkg/sysinfo" |
|
| 8 |
+) |
|
| 5 | 9 |
|
| 6 | 10 |
const platformSupported = false |
| 7 | 11 |
|
| 8 | 12 |
func setupResolvConf(config *config.Config) {
|
| 9 | 13 |
} |
| 14 |
+ |
|
| 15 |
+// RawSysInfo returns *sysinfo.SysInfo . |
|
| 16 |
+func (daemon *Daemon) RawSysInfo(quiet bool) *sysinfo.SysInfo {
|
|
| 17 |
+ return sysinfo.New(quiet) |
|
| 18 |
+} |
| ... | ... |
@@ -657,3 +657,8 @@ func setupResolvConf(config *config.Config) {
|
| 657 | 657 |
func (daemon *Daemon) useShimV2() bool {
|
| 658 | 658 |
return true |
| 659 | 659 |
} |
| 660 |
+ |
|
| 661 |
+// RawSysInfo returns *sysinfo.SysInfo . |
|
| 662 |
+func (daemon *Daemon) RawSysInfo(quiet bool) *sysinfo.SysInfo {
|
|
| 663 |
+ return sysinfo.New(quiet) |
|
| 664 |
+} |
| ... | ... |
@@ -28,7 +28,7 @@ import ( |
| 28 | 28 |
func (daemon *Daemon) SystemInfo() *types.Info {
|
| 29 | 29 |
defer metrics.StartTimer(hostInfoFunctions.WithValues("system_info"))()
|
| 30 | 30 |
|
| 31 |
- sysInfo := sysinfo.New(true) |
|
| 31 |
+ sysInfo := daemon.RawSysInfo(true) |
|
| 32 | 32 |
cRunning, cPaused, cStopped := stateCtr.get() |
| 33 | 33 |
|
| 34 | 34 |
v := &types.Info{
|
| ... | ... |
@@ -47,7 +47,6 @@ func (daemon *Daemon) SystemInfo() *types.Info {
|
| 47 | 47 |
NGoroutines: runtime.NumGoroutine(), |
| 48 | 48 |
SystemTime: time.Now().Format(time.RFC3339Nano), |
| 49 | 49 |
LoggingDriver: daemon.defaultLogConfig.Type, |
| 50 |
- CgroupDriver: daemon.getCgroupDriver(), |
|
| 51 | 50 |
NEventsListener: daemon.EventsService.SubscribersCount(), |
| 52 | 51 |
KernelVersion: kernelVersion(), |
| 53 | 52 |
OperatingSystem: operatingSystem(), |
| ... | ... |
@@ -19,6 +19,12 @@ import ( |
| 19 | 19 |
|
| 20 | 20 |
// fillPlatformInfo fills the platform related info. |
| 21 | 21 |
func (daemon *Daemon) fillPlatformInfo(v *types.Info, sysInfo *sysinfo.SysInfo) {
|
| 22 |
+ v.CgroupDriver = daemon.getCgroupDriver() |
|
| 23 |
+ v.CgroupVersion = "1" |
|
| 24 |
+ if sysInfo.CgroupUnified {
|
|
| 25 |
+ v.CgroupVersion = "2" |
|
| 26 |
+ } |
|
| 27 |
+ |
|
| 22 | 28 |
v.MemoryLimit = sysInfo.MemoryLimit |
| 23 | 29 |
v.SwapLimit = sysInfo.SwapLimit |
| 24 | 30 |
v.KernelMemory = sysInfo.KernelMemory |
| ... | ... |
@@ -81,32 +87,43 @@ func (daemon *Daemon) fillPlatformInfo(v *types.Info, sysInfo *sysinfo.SysInfo) |
| 81 | 81 |
v.InitCommit.ID = "N/A" |
| 82 | 82 |
} |
| 83 | 83 |
|
| 84 |
- if !v.MemoryLimit {
|
|
| 85 |
- v.Warnings = append(v.Warnings, "WARNING: No memory limit support") |
|
| 86 |
- } |
|
| 87 |
- if !v.SwapLimit {
|
|
| 88 |
- v.Warnings = append(v.Warnings, "WARNING: No swap limit support") |
|
| 89 |
- } |
|
| 90 |
- if !v.KernelMemory {
|
|
| 91 |
- v.Warnings = append(v.Warnings, "WARNING: No kernel memory limit support") |
|
| 92 |
- } |
|
| 93 |
- if !v.KernelMemoryTCP {
|
|
| 94 |
- v.Warnings = append(v.Warnings, "WARNING: No kernel memory TCP limit support") |
|
| 95 |
- } |
|
| 96 |
- if !v.OomKillDisable {
|
|
| 97 |
- v.Warnings = append(v.Warnings, "WARNING: No oom kill disable support") |
|
| 98 |
- } |
|
| 99 |
- if !v.CPUCfsQuota {
|
|
| 100 |
- v.Warnings = append(v.Warnings, "WARNING: No cpu cfs quota support") |
|
| 101 |
- } |
|
| 102 |
- if !v.CPUCfsPeriod {
|
|
| 103 |
- v.Warnings = append(v.Warnings, "WARNING: No cpu cfs period support") |
|
| 104 |
- } |
|
| 105 |
- if !v.CPUShares {
|
|
| 106 |
- v.Warnings = append(v.Warnings, "WARNING: No cpu shares support") |
|
| 107 |
- } |
|
| 108 |
- if !v.CPUSet {
|
|
| 109 |
- v.Warnings = append(v.Warnings, "WARNING: No cpuset support") |
|
| 84 |
+ if v.CgroupDriver == cgroupNoneDriver {
|
|
| 85 |
+ if v.CgroupVersion == "2" {
|
|
| 86 |
+ v.Warnings = append(v.Warnings, "WARNING: Running in rootless-mode without cgroup. To enable cgroup in rootless-mode, you need to set exec-opt \"native.cgroupdriver=systemd\".") |
|
| 87 |
+ } else {
|
|
| 88 |
+ v.Warnings = append(v.Warnings, "WARNING: Running in rootless-mode without cgroup. To enable cgroup in rootless-mode, you need to boot the system in cgroup v2 mode and set exec-opt \"native.cgroupdriver=systemd\".") |
|
| 89 |
+ } |
|
| 90 |
+ } else {
|
|
| 91 |
+ if !v.MemoryLimit {
|
|
| 92 |
+ v.Warnings = append(v.Warnings, "WARNING: No memory limit support") |
|
| 93 |
+ } |
|
| 94 |
+ if !v.SwapLimit {
|
|
| 95 |
+ v.Warnings = append(v.Warnings, "WARNING: No swap limit support") |
|
| 96 |
+ } |
|
| 97 |
+ if !v.KernelMemory {
|
|
| 98 |
+ v.Warnings = append(v.Warnings, "WARNING: No kernel memory limit support") |
|
| 99 |
+ } |
|
| 100 |
+ if !v.KernelMemoryTCP {
|
|
| 101 |
+ v.Warnings = append(v.Warnings, "WARNING: No kernel memory TCP limit support") |
|
| 102 |
+ } |
|
| 103 |
+ if !v.OomKillDisable {
|
|
| 104 |
+ v.Warnings = append(v.Warnings, "WARNING: No oom kill disable support") |
|
| 105 |
+ } |
|
| 106 |
+ if !v.CPUCfsQuota {
|
|
| 107 |
+ v.Warnings = append(v.Warnings, "WARNING: No cpu cfs quota support") |
|
| 108 |
+ } |
|
| 109 |
+ if !v.CPUCfsPeriod {
|
|
| 110 |
+ v.Warnings = append(v.Warnings, "WARNING: No cpu cfs period support") |
|
| 111 |
+ } |
|
| 112 |
+ if !v.CPUShares {
|
|
| 113 |
+ v.Warnings = append(v.Warnings, "WARNING: No cpu shares support") |
|
| 114 |
+ } |
|
| 115 |
+ if !v.CPUSet {
|
|
| 116 |
+ v.Warnings = append(v.Warnings, "WARNING: No cpuset support") |
|
| 117 |
+ } |
|
| 118 |
+ if v.CgroupVersion == "2" {
|
|
| 119 |
+ v.Warnings = append(v.Warnings, "WARNING: Support for cgroup v2 is experimental") |
|
| 120 |
+ } |
|
| 110 | 121 |
} |
| 111 | 122 |
if !v.IPv4Forwarding {
|
| 112 | 123 |
v.Warnings = append(v.Warnings, "WARNING: IPv4 forwarding is disabled") |
| ... | ... |
@@ -17,6 +17,7 @@ keywords: "API, Docker, rcli, REST, documentation" |
| 17 | 17 |
|
| 18 | 18 |
[Docker Engine API v1.41](https://docs.docker.com/engine/api/v1.41/) documentation |
| 19 | 19 |
|
| 20 |
+* `GET /info` now returns an `CgroupVersion` field, containing the cgroup version. |
|
| 20 | 21 |
* `POST /services/create` and `POST /services/{id}/update` now supports `BindOptions.NonRecursive`.
|
| 21 | 22 |
* The `ClusterStore` and `ClusterAdvertise` fields in `GET /info` are deprecated |
| 22 | 23 |
and are now omitted if they contain an empty value. This change is not versioned, |
| 23 | 24 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,151 @@ |
| 0 |
+package sysinfo // import "github.com/docker/docker/pkg/sysinfo" |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "io/ioutil" |
|
| 4 |
+ "path" |
|
| 5 |
+ "strings" |
|
| 6 |
+ |
|
| 7 |
+ cgroupsV2 "github.com/containerd/cgroups/v2" |
|
| 8 |
+ rsystem "github.com/opencontainers/runc/libcontainer/system" |
|
| 9 |
+ "github.com/sirupsen/logrus" |
|
| 10 |
+) |
|
| 11 |
+ |
|
| 12 |
+type infoCollectorV2 func(info *SysInfo, controllers map[string]struct{}, dirPath string) (warnings []string)
|
|
| 13 |
+ |
|
| 14 |
+func newV2(quiet bool, opts *opts) *SysInfo {
|
|
| 15 |
+ var warnings []string |
|
| 16 |
+ sysInfo := &SysInfo{
|
|
| 17 |
+ CgroupUnified: true, |
|
| 18 |
+ } |
|
| 19 |
+ g := opts.cg2GroupPath |
|
| 20 |
+ if g == "" {
|
|
| 21 |
+ g = "/" |
|
| 22 |
+ } |
|
| 23 |
+ m, err := cgroupsV2.LoadManager("/sys/fs/cgroup", g)
|
|
| 24 |
+ if err != nil {
|
|
| 25 |
+ logrus.Warn(err) |
|
| 26 |
+ } else {
|
|
| 27 |
+ controllersM := make(map[string]struct{})
|
|
| 28 |
+ controllers, err := m.Controllers() |
|
| 29 |
+ if err != nil {
|
|
| 30 |
+ logrus.Warn(err) |
|
| 31 |
+ } |
|
| 32 |
+ for _, c := range controllers {
|
|
| 33 |
+ controllersM[c] = struct{}{}
|
|
| 34 |
+ } |
|
| 35 |
+ opsV2 := []infoCollectorV2{
|
|
| 36 |
+ applyMemoryCgroupInfoV2, |
|
| 37 |
+ applyCPUCgroupInfoV2, |
|
| 38 |
+ applyIOCgroupInfoV2, |
|
| 39 |
+ applyCPUSetCgroupInfoV2, |
|
| 40 |
+ applyPIDSCgroupInfoV2, |
|
| 41 |
+ applyDevicesCgroupInfoV2, |
|
| 42 |
+ } |
|
| 43 |
+ dirPath := path.Join("/sys/fs/cgroup", path.Clean(g))
|
|
| 44 |
+ for _, o := range opsV2 {
|
|
| 45 |
+ w := o(sysInfo, controllersM, dirPath) |
|
| 46 |
+ warnings = append(warnings, w...) |
|
| 47 |
+ } |
|
| 48 |
+ } |
|
| 49 |
+ |
|
| 50 |
+ ops := []infoCollector{
|
|
| 51 |
+ applyNetworkingInfo, |
|
| 52 |
+ applyAppArmorInfo, |
|
| 53 |
+ applySeccompInfo, |
|
| 54 |
+ applyCgroupNsInfo, |
|
| 55 |
+ } |
|
| 56 |
+ for _, o := range ops {
|
|
| 57 |
+ w := o(sysInfo, nil) |
|
| 58 |
+ warnings = append(warnings, w...) |
|
| 59 |
+ } |
|
| 60 |
+ if !quiet {
|
|
| 61 |
+ for _, w := range warnings {
|
|
| 62 |
+ logrus.Warn(w) |
|
| 63 |
+ } |
|
| 64 |
+ } |
|
| 65 |
+ return sysInfo |
|
| 66 |
+} |
|
| 67 |
+ |
|
| 68 |
+func applyMemoryCgroupInfoV2(info *SysInfo, controllers map[string]struct{}, _ string) []string {
|
|
| 69 |
+ var warnings []string |
|
| 70 |
+ if _, ok := controllers["memory"]; !ok {
|
|
| 71 |
+ warnings = append(warnings, "Unable to find memory controller") |
|
| 72 |
+ return warnings |
|
| 73 |
+ } |
|
| 74 |
+ |
|
| 75 |
+ info.MemoryLimit = true |
|
| 76 |
+ info.SwapLimit = true |
|
| 77 |
+ info.MemoryReservation = true |
|
| 78 |
+ info.OomKillDisable = false |
|
| 79 |
+ info.MemorySwappiness = false |
|
| 80 |
+ info.KernelMemory = false |
|
| 81 |
+ info.KernelMemoryTCP = false |
|
| 82 |
+ return warnings |
|
| 83 |
+} |
|
| 84 |
+ |
|
| 85 |
+func applyCPUCgroupInfoV2(info *SysInfo, controllers map[string]struct{}, _ string) []string {
|
|
| 86 |
+ var warnings []string |
|
| 87 |
+ if _, ok := controllers["cpu"]; !ok {
|
|
| 88 |
+ warnings = append(warnings, "Unable to find cpu controller") |
|
| 89 |
+ return warnings |
|
| 90 |
+ } |
|
| 91 |
+ info.CPUShares = true |
|
| 92 |
+ info.CPUCfsPeriod = true |
|
| 93 |
+ info.CPUCfsQuota = true |
|
| 94 |
+ info.CPURealtimePeriod = false |
|
| 95 |
+ info.CPURealtimeRuntime = false |
|
| 96 |
+ return warnings |
|
| 97 |
+} |
|
| 98 |
+ |
|
| 99 |
+func applyIOCgroupInfoV2(info *SysInfo, controllers map[string]struct{}, _ string) []string {
|
|
| 100 |
+ var warnings []string |
|
| 101 |
+ if _, ok := controllers["io"]; !ok {
|
|
| 102 |
+ warnings = append(warnings, "Unable to find io controller") |
|
| 103 |
+ return warnings |
|
| 104 |
+ } |
|
| 105 |
+ |
|
| 106 |
+ info.BlkioWeight = true |
|
| 107 |
+ info.BlkioWeightDevice = true |
|
| 108 |
+ info.BlkioReadBpsDevice = true |
|
| 109 |
+ info.BlkioWriteBpsDevice = true |
|
| 110 |
+ info.BlkioReadIOpsDevice = true |
|
| 111 |
+ info.BlkioWriteIOpsDevice = true |
|
| 112 |
+ return warnings |
|
| 113 |
+} |
|
| 114 |
+ |
|
| 115 |
+func applyCPUSetCgroupInfoV2(info *SysInfo, controllers map[string]struct{}, dirPath string) []string {
|
|
| 116 |
+ var warnings []string |
|
| 117 |
+ if _, ok := controllers["cpuset"]; !ok {
|
|
| 118 |
+ warnings = append(warnings, "Unable to find cpuset controller") |
|
| 119 |
+ return warnings |
|
| 120 |
+ } |
|
| 121 |
+ info.Cpuset = true |
|
| 122 |
+ |
|
| 123 |
+ cpus, err := ioutil.ReadFile(path.Join(dirPath, "cpuset.cpus.effective")) |
|
| 124 |
+ if err != nil {
|
|
| 125 |
+ return warnings |
|
| 126 |
+ } |
|
| 127 |
+ info.Cpus = strings.TrimSpace(string(cpus)) |
|
| 128 |
+ |
|
| 129 |
+ mems, err := ioutil.ReadFile(path.Join(dirPath, "cpuset.mems.effective")) |
|
| 130 |
+ if err != nil {
|
|
| 131 |
+ return warnings |
|
| 132 |
+ } |
|
| 133 |
+ info.Mems = strings.TrimSpace(string(mems)) |
|
| 134 |
+ return warnings |
|
| 135 |
+} |
|
| 136 |
+ |
|
| 137 |
+func applyPIDSCgroupInfoV2(info *SysInfo, controllers map[string]struct{}, _ string) []string {
|
|
| 138 |
+ var warnings []string |
|
| 139 |
+ if _, ok := controllers["pids"]; !ok {
|
|
| 140 |
+ warnings = append(warnings, "Unable to find pids controller") |
|
| 141 |
+ return warnings |
|
| 142 |
+ } |
|
| 143 |
+ info.PidsLimit = true |
|
| 144 |
+ return warnings |
|
| 145 |
+} |
|
| 146 |
+ |
|
| 147 |
+func applyDevicesCgroupInfoV2(info *SysInfo, controllers map[string]struct{}, _ string) []string {
|
|
| 148 |
+ info.CgroupDevicesEnabled = !rsystem.RunningInUserNS() |
|
| 149 |
+ return nil |
|
| 150 |
+} |
| ... | ... |
@@ -28,10 +28,37 @@ func findCgroupMountpoints() (map[string]string, error) {
|
| 28 | 28 |
|
| 29 | 29 |
type infoCollector func(info *SysInfo, cgMounts map[string]string) (warnings []string) |
| 30 | 30 |
|
| 31 |
+type opts struct {
|
|
| 32 |
+ cg2GroupPath string |
|
| 33 |
+} |
|
| 34 |
+ |
|
| 35 |
+// Opt for New(). |
|
| 36 |
+type Opt func(*opts) |
|
| 37 |
+ |
|
| 38 |
+// WithCgroup2GroupPath specifies the cgroup v2 group path to inspect availability |
|
| 39 |
+// of the controllers. |
|
| 40 |
+// |
|
| 41 |
+// WithCgroup2GroupPath is expected to be used for rootless mode with systemd driver. |
|
| 42 |
+// |
|
| 43 |
+// e.g. g = "/user.slice/user-1000.slice/user@1000.service" |
|
| 44 |
+func WithCgroup2GroupPath(g string) Opt {
|
|
| 45 |
+ return func(o *opts) {
|
|
| 46 |
+ o.cg2GroupPath = path.Clean(g) |
|
| 47 |
+ } |
|
| 48 |
+} |
|
| 49 |
+ |
|
| 31 | 50 |
// New returns a new SysInfo, using the filesystem to detect which features |
| 32 | 51 |
// the kernel supports. If `quiet` is `false` warnings are printed in logs |
| 33 | 52 |
// whenever an error occurs or misconfigurations are present. |
| 34 |
-func New(quiet bool) *SysInfo {
|
|
| 53 |
+func New(quiet bool, options ...Opt) *SysInfo {
|
|
| 54 |
+ var opts opts |
|
| 55 |
+ for _, o := range options {
|
|
| 56 |
+ o(&opts) |
|
| 57 |
+ } |
|
| 58 |
+ if cgroups.IsCgroup2UnifiedMode() {
|
|
| 59 |
+ return newV2(quiet, &opts) |
|
| 60 |
+ } |
|
| 61 |
+ |
|
| 35 | 62 |
var ops []infoCollector |
| 36 | 63 |
var warnings []string |
| 37 | 64 |
sysInfo := &SysInfo{}
|
| ... | ... |
@@ -60,9 +87,6 @@ func New(quiet bool) *SysInfo {
|
| 60 | 60 |
w := o(sysInfo, cgMounts) |
| 61 | 61 |
warnings = append(warnings, w...) |
| 62 | 62 |
} |
| 63 |
- if cgroups.IsCgroup2UnifiedMode() {
|
|
| 64 |
- warnings = append(warnings, "Your system is running cgroup v2 (unsupported)") |
|
| 65 |
- } |
|
| 66 | 63 |
if !quiet {
|
| 67 | 64 |
for _, w := range warnings {
|
| 68 | 65 |
logrus.Warn(w) |
| ... | ... |
@@ -73,15 +97,6 @@ func New(quiet bool) *SysInfo {
|
| 73 | 73 |
|
| 74 | 74 |
// applyMemoryCgroupInfo reads the memory information from the memory cgroup mount point. |
| 75 | 75 |
func applyMemoryCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
|
| 76 |
- if cgroups.IsCgroup2UnifiedMode() {
|
|
| 77 |
- // TODO: check cgroup2 info correctly |
|
| 78 |
- info.MemoryLimit = true |
|
| 79 |
- info.SwapLimit = true |
|
| 80 |
- info.MemoryReservation = true |
|
| 81 |
- info.OomKillDisable = true |
|
| 82 |
- info.MemorySwappiness = true |
|
| 83 |
- return nil |
|
| 84 |
- } |
|
| 85 | 76 |
var warnings []string |
| 86 | 77 |
mountPoint, ok := cgMounts["memory"] |
| 87 | 78 |
if !ok {
|
| ... | ... |
@@ -120,15 +135,6 @@ func applyMemoryCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
|
| 120 | 120 |
|
| 121 | 121 |
// applyCPUCgroupInfo reads the cpu information from the cpu cgroup mount point. |
| 122 | 122 |
func applyCPUCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
|
| 123 |
- if cgroups.IsCgroup2UnifiedMode() {
|
|
| 124 |
- // TODO: check cgroup2 info correctly |
|
| 125 |
- info.CPUShares = true |
|
| 126 |
- info.CPUCfsPeriod = true |
|
| 127 |
- info.CPUCfsQuota = true |
|
| 128 |
- info.CPURealtimePeriod = true |
|
| 129 |
- info.CPURealtimeRuntime = true |
|
| 130 |
- return nil |
|
| 131 |
- } |
|
| 132 | 123 |
var warnings []string |
| 133 | 124 |
mountPoint, ok := cgMounts["cpu"] |
| 134 | 125 |
if !ok {
|
| ... | ... |
@@ -166,15 +172,6 @@ func applyCPUCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
|
| 166 | 166 |
|
| 167 | 167 |
// applyBlkioCgroupInfo reads the blkio information from the blkio cgroup mount point. |
| 168 | 168 |
func applyBlkioCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
|
| 169 |
- if cgroups.IsCgroup2UnifiedMode() {
|
|
| 170 |
- // TODO: check cgroup2 info correctly |
|
| 171 |
- info.BlkioWeight = true |
|
| 172 |
- info.BlkioReadBpsDevice = true |
|
| 173 |
- info.BlkioWriteBpsDevice = true |
|
| 174 |
- info.BlkioReadIOpsDevice = true |
|
| 175 |
- info.BlkioWriteIOpsDevice = true |
|
| 176 |
- return nil |
|
| 177 |
- } |
|
| 178 | 169 |
var warnings []string |
| 179 | 170 |
mountPoint, ok := cgMounts["blkio"] |
| 180 | 171 |
if !ok {
|
| ... | ... |
@@ -216,11 +213,6 @@ func applyBlkioCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
|
| 216 | 216 |
|
| 217 | 217 |
// applyCPUSetCgroupInfo reads the cpuset information from the cpuset cgroup mount point. |
| 218 | 218 |
func applyCPUSetCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
|
| 219 |
- if cgroups.IsCgroup2UnifiedMode() {
|
|
| 220 |
- // TODO: check cgroup2 info correctly |
|
| 221 |
- info.Cpuset = true |
|
| 222 |
- return nil |
|
| 223 |
- } |
|
| 224 | 219 |
var warnings []string |
| 225 | 220 |
mountPoint, ok := cgMounts["cpuset"] |
| 226 | 221 |
if !ok {
|
| ... | ... |
@@ -248,11 +240,6 @@ func applyCPUSetCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
|
| 248 | 248 |
|
| 249 | 249 |
// applyPIDSCgroupInfo reads the pids information from the pids cgroup mount point. |
| 250 | 250 |
func applyPIDSCgroupInfo(info *SysInfo, _ map[string]string) []string {
|
| 251 |
- if cgroups.IsCgroup2UnifiedMode() {
|
|
| 252 |
- // TODO: check cgroup2 info correctly |
|
| 253 |
- info.PidsLimit = true |
|
| 254 |
- return nil |
|
| 255 |
- } |
|
| 256 | 251 |
var warnings []string |
| 257 | 252 |
_, err := cgroups.FindCgroupMountpoint("", "pids")
|
| 258 | 253 |
if err != nil {
|
| ... | ... |
@@ -265,11 +252,6 @@ func applyPIDSCgroupInfo(info *SysInfo, _ map[string]string) []string {
|
| 265 | 265 |
|
| 266 | 266 |
// applyDevicesCgroupInfo reads the pids information from the devices cgroup mount point. |
| 267 | 267 |
func applyDevicesCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
|
| 268 |
- if cgroups.IsCgroup2UnifiedMode() {
|
|
| 269 |
- // TODO: check cgroup2 info correctly |
|
| 270 |
- info.CgroupDevicesEnabled = true |
|
| 271 |
- return nil |
|
| 272 |
- } |
|
| 273 | 268 |
var warnings []string |
| 274 | 269 |
_, ok := cgMounts["devices"] |
| 275 | 270 |
info.CgroupDevicesEnabled = ok |
| ... | ... |
@@ -2,8 +2,13 @@ |
| 2 | 2 |
|
| 3 | 3 |
package sysinfo // import "github.com/docker/docker/pkg/sysinfo" |
| 4 | 4 |
|
| 5 |
+type opts struct{}
|
|
| 6 |
+ |
|
| 7 |
+// Opt for New(). |
|
| 8 |
+type Opt func(*opts) |
|
| 9 |
+ |
|
| 5 | 10 |
// New returns an empty SysInfo for non linux for now. |
| 6 |
-func New(quiet bool) *SysInfo {
|
|
| 11 |
+func New(quiet bool, options ...Opt) *SysInfo {
|
|
| 7 | 12 |
sysInfo := &SysInfo{}
|
| 8 | 13 |
return sysInfo |
| 9 | 14 |
} |
| ... | ... |
@@ -1,7 +1,12 @@ |
| 1 | 1 |
package sysinfo // import "github.com/docker/docker/pkg/sysinfo" |
| 2 | 2 |
|
| 3 |
+type opts struct{}
|
|
| 4 |
+ |
|
| 5 |
+// Opt for New(). |
|
| 6 |
+type Opt func(*opts) |
|
| 7 |
+ |
|
| 3 | 8 |
// New returns an empty SysInfo for windows for now. |
| 4 |
-func New(quiet bool) *SysInfo {
|
|
| 9 |
+func New(quiet bool, options ...Opt) *SysInfo {
|
|
| 5 | 10 |
sysInfo := &SysInfo{}
|
| 6 | 11 |
return sysInfo |
| 7 | 12 |
} |
| ... | ... |
@@ -11,11 +11,20 @@ import ( |
| 11 | 11 |
|
| 12 | 12 |
// ContainerDecoder implements httputils.ContainerDecoder |
| 13 | 13 |
// calling DecodeContainerConfig. |
| 14 |
-type ContainerDecoder struct{}
|
|
| 14 |
+type ContainerDecoder struct {
|
|
| 15 |
+ GetSysInfo func() *sysinfo.SysInfo |
|
| 16 |
+} |
|
| 15 | 17 |
|
| 16 | 18 |
// DecodeConfig makes ContainerDecoder to implement httputils.ContainerDecoder |
| 17 | 19 |
func (r ContainerDecoder) DecodeConfig(src io.Reader) (*container.Config, *container.HostConfig, *networktypes.NetworkingConfig, error) {
|
| 18 |
- return decodeContainerConfig(src) |
|
| 20 |
+ var si *sysinfo.SysInfo |
|
| 21 |
+ if r.GetSysInfo != nil {
|
|
| 22 |
+ si = r.GetSysInfo() |
|
| 23 |
+ } else {
|
|
| 24 |
+ si = sysinfo.New(true) |
|
| 25 |
+ } |
|
| 26 |
+ |
|
| 27 |
+ return decodeContainerConfig(src, si) |
|
| 19 | 28 |
} |
| 20 | 29 |
|
| 21 | 30 |
// DecodeHostConfig makes ContainerDecoder to implement httputils.ContainerDecoder |
| ... | ... |
@@ -27,7 +36,7 @@ func (r ContainerDecoder) DecodeHostConfig(src io.Reader) (*container.HostConfig |
| 27 | 27 |
// struct and returns both a Config and a HostConfig struct |
| 28 | 28 |
// Be aware this function is not checking whether the resulted structs are nil, |
| 29 | 29 |
// it's your business to do so |
| 30 |
-func decodeContainerConfig(src io.Reader) (*container.Config, *container.HostConfig, *networktypes.NetworkingConfig, error) {
|
|
| 30 |
+func decodeContainerConfig(src io.Reader, si *sysinfo.SysInfo) (*container.Config, *container.HostConfig, *networktypes.NetworkingConfig, error) {
|
|
| 31 | 31 |
var w ContainerConfigWrapper |
| 32 | 32 |
|
| 33 | 33 |
decoder := json.NewDecoder(src) |
| ... | ... |
@@ -63,7 +72,7 @@ func decodeContainerConfig(src io.Reader) (*container.Config, *container.HostCon |
| 63 | 63 |
} |
| 64 | 64 |
|
| 65 | 65 |
// Validate Resources |
| 66 |
- if err := validateResources(hc, sysinfo.New(true)); err != nil {
|
|
| 66 |
+ if err := validateResources(hc, si); err != nil {
|
|
| 67 | 67 |
return nil, nil, nil, err |
| 68 | 68 |
} |
| 69 | 69 |
|
| ... | ... |
@@ -12,6 +12,7 @@ import ( |
| 12 | 12 |
"github.com/docker/docker/api/types/container" |
| 13 | 13 |
networktypes "github.com/docker/docker/api/types/network" |
| 14 | 14 |
"github.com/docker/docker/api/types/strslice" |
| 15 |
+ "github.com/docker/docker/pkg/sysinfo" |
|
| 15 | 16 |
) |
| 16 | 17 |
|
| 17 | 18 |
type f struct {
|
| ... | ... |
@@ -46,7 +47,7 @@ func TestDecodeContainerConfig(t *testing.T) {
|
| 46 | 46 |
t.Fatal(err) |
| 47 | 47 |
} |
| 48 | 48 |
|
| 49 |
- c, h, _, err := decodeContainerConfig(bytes.NewReader(b)) |
|
| 49 |
+ c, h, _, err := decodeContainerConfig(bytes.NewReader(b), sysinfo.New(true)) |
|
| 50 | 50 |
if err != nil {
|
| 51 | 51 |
t.Fatal(fmt.Errorf("Error parsing %s: %v", f, err))
|
| 52 | 52 |
} |
| ... | ... |
@@ -130,5 +131,5 @@ func callDecodeContainerConfigIsolation(isolation string) (*container.Config, *c |
| 130 | 130 |
if b, err = json.Marshal(w); err != nil {
|
| 131 | 131 |
return nil, nil, nil, fmt.Errorf("Error on marshal %s", err.Error())
|
| 132 | 132 |
} |
| 133 |
- return decodeContainerConfig(bytes.NewReader(b)) |
|
| 133 |
+ return decodeContainerConfig(bytes.NewReader(b), sysinfo.New(true)) |
|
| 134 | 134 |
} |
| ... | ... |
@@ -130,6 +130,7 @@ github.com/containerd/go-runc 7016d3ce2328dd2cb1192b2076eb |
| 130 | 130 |
github.com/containerd/typeurl b45ef1f1f737e10bd45b25b669df25f0da8b9ba0 |
| 131 | 131 |
github.com/containerd/ttrpc 0be804eadb152bc3b3c20c5edc314c4633833398 |
| 132 | 132 |
github.com/gogo/googleapis 01e0f9cca9b92166042241267ee2a5cdf5cff46c # v1.3.2 |
| 133 |
+github.com/cilium/ebpf 60c3aa43f488292fe2ee50fb8b833b383ca8ebbb |
|
| 133 | 134 |
|
| 134 | 135 |
# cluster |
| 135 | 136 |
github.com/docker/swarmkit ebe39a32e3ed4c3a3783a02c11cccf388818694c |
| 136 | 137 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,23 @@ |
| 0 |
+MIT License |
|
| 1 |
+ |
|
| 2 |
+Copyright (c) 2017 Nathan Sweet |
|
| 3 |
+Copyright (c) 2018, 2019 Cloudflare |
|
| 4 |
+Copyright (c) 2019 Authors of Cilium |
|
| 5 |
+ |
|
| 6 |
+Permission is hereby granted, free of charge, to any person obtaining a copy |
|
| 7 |
+of this software and associated documentation files (the "Software"), to deal |
|
| 8 |
+in the Software without restriction, including without limitation the rights |
|
| 9 |
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
| 10 |
+copies of the Software, and to permit persons to whom the Software is |
|
| 11 |
+furnished to do so, subject to the following conditions: |
|
| 12 |
+ |
|
| 13 |
+The above copyright notice and this permission notice shall be included in all |
|
| 14 |
+copies or substantial portions of the Software. |
|
| 15 |
+ |
|
| 16 |
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
| 17 |
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
| 18 |
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
| 19 |
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
| 20 |
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
| 21 |
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|
| 22 |
+SOFTWARE. |
| 0 | 23 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,203 @@ |
| 0 |
+package ebpf |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "bufio" |
|
| 4 |
+ "bytes" |
|
| 5 |
+ "fmt" |
|
| 6 |
+ "io" |
|
| 7 |
+ "os" |
|
| 8 |
+ "syscall" |
|
| 9 |
+ |
|
| 10 |
+ "github.com/cilium/ebpf/internal" |
|
| 11 |
+ "github.com/pkg/errors" |
|
| 12 |
+) |
|
| 13 |
+ |
|
| 14 |
+// MapABI are the attributes of a Map which are available across all supported kernels. |
|
| 15 |
+type MapABI struct {
|
|
| 16 |
+ Type MapType |
|
| 17 |
+ KeySize uint32 |
|
| 18 |
+ ValueSize uint32 |
|
| 19 |
+ MaxEntries uint32 |
|
| 20 |
+ Flags uint32 |
|
| 21 |
+} |
|
| 22 |
+ |
|
| 23 |
+func newMapABIFromSpec(spec *MapSpec) *MapABI {
|
|
| 24 |
+ return &MapABI{
|
|
| 25 |
+ spec.Type, |
|
| 26 |
+ spec.KeySize, |
|
| 27 |
+ spec.ValueSize, |
|
| 28 |
+ spec.MaxEntries, |
|
| 29 |
+ spec.Flags, |
|
| 30 |
+ } |
|
| 31 |
+} |
|
| 32 |
+ |
|
| 33 |
+func newMapABIFromFd(fd *bpfFD) (string, *MapABI, error) {
|
|
| 34 |
+ info, err := bpfGetMapInfoByFD(fd) |
|
| 35 |
+ if err != nil {
|
|
| 36 |
+ if errors.Cause(err) == syscall.EINVAL {
|
|
| 37 |
+ abi, err := newMapABIFromProc(fd) |
|
| 38 |
+ return "", abi, err |
|
| 39 |
+ } |
|
| 40 |
+ return "", nil, err |
|
| 41 |
+ } |
|
| 42 |
+ |
|
| 43 |
+ return "", &MapABI{
|
|
| 44 |
+ MapType(info.mapType), |
|
| 45 |
+ info.keySize, |
|
| 46 |
+ info.valueSize, |
|
| 47 |
+ info.maxEntries, |
|
| 48 |
+ info.flags, |
|
| 49 |
+ }, nil |
|
| 50 |
+} |
|
| 51 |
+ |
|
| 52 |
+func newMapABIFromProc(fd *bpfFD) (*MapABI, error) {
|
|
| 53 |
+ var abi MapABI |
|
| 54 |
+ err := scanFdInfo(fd, map[string]interface{}{
|
|
| 55 |
+ "map_type": &abi.Type, |
|
| 56 |
+ "key_size": &abi.KeySize, |
|
| 57 |
+ "value_size": &abi.ValueSize, |
|
| 58 |
+ "max_entries": &abi.MaxEntries, |
|
| 59 |
+ "map_flags": &abi.Flags, |
|
| 60 |
+ }) |
|
| 61 |
+ if err != nil {
|
|
| 62 |
+ return nil, err |
|
| 63 |
+ } |
|
| 64 |
+ return &abi, nil |
|
| 65 |
+} |
|
| 66 |
+ |
|
| 67 |
+// Equal returns true if two ABIs have the same values. |
|
| 68 |
+func (abi *MapABI) Equal(other *MapABI) bool {
|
|
| 69 |
+ switch {
|
|
| 70 |
+ case abi.Type != other.Type: |
|
| 71 |
+ return false |
|
| 72 |
+ case abi.KeySize != other.KeySize: |
|
| 73 |
+ return false |
|
| 74 |
+ case abi.ValueSize != other.ValueSize: |
|
| 75 |
+ return false |
|
| 76 |
+ case abi.MaxEntries != other.MaxEntries: |
|
| 77 |
+ return false |
|
| 78 |
+ case abi.Flags != other.Flags: |
|
| 79 |
+ return false |
|
| 80 |
+ default: |
|
| 81 |
+ return true |
|
| 82 |
+ } |
|
| 83 |
+} |
|
| 84 |
+ |
|
| 85 |
+// ProgramABI are the attributes of a Program which are available across all supported kernels. |
|
| 86 |
+type ProgramABI struct {
|
|
| 87 |
+ Type ProgramType |
|
| 88 |
+} |
|
| 89 |
+ |
|
| 90 |
+func newProgramABIFromSpec(spec *ProgramSpec) *ProgramABI {
|
|
| 91 |
+ return &ProgramABI{
|
|
| 92 |
+ spec.Type, |
|
| 93 |
+ } |
|
| 94 |
+} |
|
| 95 |
+ |
|
| 96 |
+func newProgramABIFromFd(fd *bpfFD) (string, *ProgramABI, error) {
|
|
| 97 |
+ info, err := bpfGetProgInfoByFD(fd) |
|
| 98 |
+ if err != nil {
|
|
| 99 |
+ if errors.Cause(err) == syscall.EINVAL {
|
|
| 100 |
+ return newProgramABIFromProc(fd) |
|
| 101 |
+ } |
|
| 102 |
+ |
|
| 103 |
+ return "", nil, err |
|
| 104 |
+ } |
|
| 105 |
+ |
|
| 106 |
+ var name string |
|
| 107 |
+ if bpfName := convertCString(info.name[:]); bpfName != "" {
|
|
| 108 |
+ name = bpfName |
|
| 109 |
+ } else {
|
|
| 110 |
+ name = convertCString(info.tag[:]) |
|
| 111 |
+ } |
|
| 112 |
+ |
|
| 113 |
+ return name, &ProgramABI{
|
|
| 114 |
+ Type: ProgramType(info.progType), |
|
| 115 |
+ }, nil |
|
| 116 |
+} |
|
| 117 |
+ |
|
| 118 |
+func newProgramABIFromProc(fd *bpfFD) (string, *ProgramABI, error) {
|
|
| 119 |
+ var ( |
|
| 120 |
+ abi ProgramABI |
|
| 121 |
+ name string |
|
| 122 |
+ ) |
|
| 123 |
+ |
|
| 124 |
+ err := scanFdInfo(fd, map[string]interface{}{
|
|
| 125 |
+ "prog_type": &abi.Type, |
|
| 126 |
+ "prog_tag": &name, |
|
| 127 |
+ }) |
|
| 128 |
+ if errors.Cause(err) == errMissingFields {
|
|
| 129 |
+ return "", nil, &internal.UnsupportedFeatureError{
|
|
| 130 |
+ Name: "reading ABI from /proc/self/fdinfo", |
|
| 131 |
+ MinimumVersion: internal.Version{4, 11, 0},
|
|
| 132 |
+ } |
|
| 133 |
+ } |
|
| 134 |
+ if err != nil {
|
|
| 135 |
+ return "", nil, err |
|
| 136 |
+ } |
|
| 137 |
+ |
|
| 138 |
+ return name, &abi, nil |
|
| 139 |
+} |
|
| 140 |
+ |
|
| 141 |
+func scanFdInfo(fd *bpfFD, fields map[string]interface{}) error {
|
|
| 142 |
+ raw, err := fd.value() |
|
| 143 |
+ if err != nil {
|
|
| 144 |
+ return err |
|
| 145 |
+ } |
|
| 146 |
+ |
|
| 147 |
+ fh, err := os.Open(fmt.Sprintf("/proc/self/fdinfo/%d", raw))
|
|
| 148 |
+ if err != nil {
|
|
| 149 |
+ return err |
|
| 150 |
+ } |
|
| 151 |
+ defer fh.Close() |
|
| 152 |
+ |
|
| 153 |
+ return errors.Wrap(scanFdInfoReader(fh, fields), fh.Name()) |
|
| 154 |
+} |
|
| 155 |
+ |
|
| 156 |
+var errMissingFields = errors.New("missing fields")
|
|
| 157 |
+ |
|
| 158 |
+func scanFdInfoReader(r io.Reader, fields map[string]interface{}) error {
|
|
| 159 |
+ var ( |
|
| 160 |
+ scanner = bufio.NewScanner(r) |
|
| 161 |
+ scanned int |
|
| 162 |
+ ) |
|
| 163 |
+ |
|
| 164 |
+ for scanner.Scan() {
|
|
| 165 |
+ parts := bytes.SplitN(scanner.Bytes(), []byte("\t"), 2)
|
|
| 166 |
+ if len(parts) != 2 {
|
|
| 167 |
+ continue |
|
| 168 |
+ } |
|
| 169 |
+ |
|
| 170 |
+ name := bytes.TrimSuffix(parts[0], []byte(":"))
|
|
| 171 |
+ field, ok := fields[string(name)] |
|
| 172 |
+ if !ok {
|
|
| 173 |
+ continue |
|
| 174 |
+ } |
|
| 175 |
+ |
|
| 176 |
+ if n, err := fmt.Fscanln(bytes.NewReader(parts[1]), field); err != nil || n != 1 {
|
|
| 177 |
+ return errors.Wrapf(err, "can't parse field %s", name) |
|
| 178 |
+ } |
|
| 179 |
+ |
|
| 180 |
+ scanned++ |
|
| 181 |
+ } |
|
| 182 |
+ |
|
| 183 |
+ if err := scanner.Err(); err != nil {
|
|
| 184 |
+ return err |
|
| 185 |
+ } |
|
| 186 |
+ |
|
| 187 |
+ if scanned != len(fields) {
|
|
| 188 |
+ return errMissingFields |
|
| 189 |
+ } |
|
| 190 |
+ |
|
| 191 |
+ return nil |
|
| 192 |
+} |
|
| 193 |
+ |
|
| 194 |
+// Equal returns true if two ABIs have the same values. |
|
| 195 |
+func (abi *ProgramABI) Equal(other *ProgramABI) bool {
|
|
| 196 |
+ switch {
|
|
| 197 |
+ case abi.Type != other.Type: |
|
| 198 |
+ return false |
|
| 199 |
+ default: |
|
| 200 |
+ return true |
|
| 201 |
+ } |
|
| 202 |
+} |
| 0 | 203 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,149 @@ |
| 0 |
+package asm |
|
| 1 |
+ |
|
| 2 |
+//go:generate stringer -output alu_string.go -type=Source,Endianness,ALUOp |
|
| 3 |
+ |
|
| 4 |
+// Source of ALU / ALU64 / Branch operations |
|
| 5 |
+// |
|
| 6 |
+// msb lsb |
|
| 7 |
+// +----+-+---+ |
|
| 8 |
+// |op |S|cls| |
|
| 9 |
+// +----+-+---+ |
|
| 10 |
+type Source uint8 |
|
| 11 |
+ |
|
| 12 |
+const sourceMask OpCode = 0x08 |
|
| 13 |
+ |
|
| 14 |
+// Source bitmask |
|
| 15 |
+const ( |
|
| 16 |
+ // InvalidSource is returned by getters when invoked |
|
| 17 |
+ // on non ALU / branch OpCodes. |
|
| 18 |
+ InvalidSource Source = 0xff |
|
| 19 |
+ // ImmSource src is from constant |
|
| 20 |
+ ImmSource Source = 0x00 |
|
| 21 |
+ // RegSource src is from register |
|
| 22 |
+ RegSource Source = 0x08 |
|
| 23 |
+) |
|
| 24 |
+ |
|
| 25 |
+// The Endianness of a byte swap instruction. |
|
| 26 |
+type Endianness uint8 |
|
| 27 |
+ |
|
| 28 |
+const endianMask = sourceMask |
|
| 29 |
+ |
|
| 30 |
+// Endian flags |
|
| 31 |
+const ( |
|
| 32 |
+ InvalidEndian Endianness = 0xff |
|
| 33 |
+ // Convert to little endian |
|
| 34 |
+ LE Endianness = 0x00 |
|
| 35 |
+ // Convert to big endian |
|
| 36 |
+ BE Endianness = 0x08 |
|
| 37 |
+) |
|
| 38 |
+ |
|
| 39 |
+// ALUOp are ALU / ALU64 operations |
|
| 40 |
+// |
|
| 41 |
+// msb lsb |
|
| 42 |
+// +----+-+---+ |
|
| 43 |
+// |OP |s|cls| |
|
| 44 |
+// +----+-+---+ |
|
| 45 |
+type ALUOp uint8 |
|
| 46 |
+ |
|
| 47 |
+const aluMask OpCode = 0xf0 |
|
| 48 |
+ |
|
| 49 |
+const ( |
|
| 50 |
+ // InvalidALUOp is returned by getters when invoked |
|
| 51 |
+ // on non ALU OpCodes |
|
| 52 |
+ InvalidALUOp ALUOp = 0xff |
|
| 53 |
+ // Add - addition |
|
| 54 |
+ Add ALUOp = 0x00 |
|
| 55 |
+ // Sub - subtraction |
|
| 56 |
+ Sub ALUOp = 0x10 |
|
| 57 |
+ // Mul - multiplication |
|
| 58 |
+ Mul ALUOp = 0x20 |
|
| 59 |
+ // Div - division |
|
| 60 |
+ Div ALUOp = 0x30 |
|
| 61 |
+ // Or - bitwise or |
|
| 62 |
+ Or ALUOp = 0x40 |
|
| 63 |
+ // And - bitwise and |
|
| 64 |
+ And ALUOp = 0x50 |
|
| 65 |
+ // LSh - bitwise shift left |
|
| 66 |
+ LSh ALUOp = 0x60 |
|
| 67 |
+ // RSh - bitwise shift right |
|
| 68 |
+ RSh ALUOp = 0x70 |
|
| 69 |
+ // Neg - sign/unsign signing bit |
|
| 70 |
+ Neg ALUOp = 0x80 |
|
| 71 |
+ // Mod - modulo |
|
| 72 |
+ Mod ALUOp = 0x90 |
|
| 73 |
+ // Xor - bitwise xor |
|
| 74 |
+ Xor ALUOp = 0xa0 |
|
| 75 |
+ // Mov - move value from one place to another |
|
| 76 |
+ Mov ALUOp = 0xb0 |
|
| 77 |
+ // ArSh - arithmatic shift |
|
| 78 |
+ ArSh ALUOp = 0xc0 |
|
| 79 |
+ // Swap - endian conversions |
|
| 80 |
+ Swap ALUOp = 0xd0 |
|
| 81 |
+) |
|
| 82 |
+ |
|
| 83 |
+// HostTo converts from host to another endianness. |
|
| 84 |
+func HostTo(endian Endianness, dst Register, size Size) Instruction {
|
|
| 85 |
+ var imm int64 |
|
| 86 |
+ switch size {
|
|
| 87 |
+ case Half: |
|
| 88 |
+ imm = 16 |
|
| 89 |
+ case Word: |
|
| 90 |
+ imm = 32 |
|
| 91 |
+ case DWord: |
|
| 92 |
+ imm = 64 |
|
| 93 |
+ default: |
|
| 94 |
+ return Instruction{OpCode: InvalidOpCode}
|
|
| 95 |
+ } |
|
| 96 |
+ |
|
| 97 |
+ return Instruction{
|
|
| 98 |
+ OpCode: OpCode(ALUClass).SetALUOp(Swap).SetSource(Source(endian)), |
|
| 99 |
+ Dst: dst, |
|
| 100 |
+ Constant: imm, |
|
| 101 |
+ } |
|
| 102 |
+} |
|
| 103 |
+ |
|
| 104 |
+// Op returns the OpCode for an ALU operation with a given source. |
|
| 105 |
+func (op ALUOp) Op(source Source) OpCode {
|
|
| 106 |
+ return OpCode(ALU64Class).SetALUOp(op).SetSource(source) |
|
| 107 |
+} |
|
| 108 |
+ |
|
| 109 |
+// Reg emits `dst (op) src`. |
|
| 110 |
+func (op ALUOp) Reg(dst, src Register) Instruction {
|
|
| 111 |
+ return Instruction{
|
|
| 112 |
+ OpCode: op.Op(RegSource), |
|
| 113 |
+ Dst: dst, |
|
| 114 |
+ Src: src, |
|
| 115 |
+ } |
|
| 116 |
+} |
|
| 117 |
+ |
|
| 118 |
+// Imm emits `dst (op) value`. |
|
| 119 |
+func (op ALUOp) Imm(dst Register, value int32) Instruction {
|
|
| 120 |
+ return Instruction{
|
|
| 121 |
+ OpCode: op.Op(ImmSource), |
|
| 122 |
+ Dst: dst, |
|
| 123 |
+ Constant: int64(value), |
|
| 124 |
+ } |
|
| 125 |
+} |
|
| 126 |
+ |
|
| 127 |
+// Op32 returns the OpCode for a 32-bit ALU operation with a given source. |
|
| 128 |
+func (op ALUOp) Op32(source Source) OpCode {
|
|
| 129 |
+ return OpCode(ALUClass).SetALUOp(op).SetSource(source) |
|
| 130 |
+} |
|
| 131 |
+ |
|
| 132 |
+// Reg32 emits `dst (op) src`, zeroing the upper 32 bit of dst. |
|
| 133 |
+func (op ALUOp) Reg32(dst, src Register) Instruction {
|
|
| 134 |
+ return Instruction{
|
|
| 135 |
+ OpCode: op.Op32(RegSource), |
|
| 136 |
+ Dst: dst, |
|
| 137 |
+ Src: src, |
|
| 138 |
+ } |
|
| 139 |
+} |
|
| 140 |
+ |
|
| 141 |
+// Imm32 emits `dst (op) value`, zeroing the upper 32 bit of dst. |
|
| 142 |
+func (op ALUOp) Imm32(dst Register, value int32) Instruction {
|
|
| 143 |
+ return Instruction{
|
|
| 144 |
+ OpCode: op.Op32(ImmSource), |
|
| 145 |
+ Dst: dst, |
|
| 146 |
+ Constant: int64(value), |
|
| 147 |
+ } |
|
| 148 |
+} |
| 0 | 149 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,107 @@ |
| 0 |
+// Code generated by "stringer -output alu_string.go -type=Source,Endianness,ALUOp"; DO NOT EDIT. |
|
| 1 |
+ |
|
| 2 |
+package asm |
|
| 3 |
+ |
|
| 4 |
+import "strconv" |
|
| 5 |
+ |
|
| 6 |
+func _() {
|
|
| 7 |
+ // An "invalid array index" compiler error signifies that the constant values have changed. |
|
| 8 |
+ // Re-run the stringer command to generate them again. |
|
| 9 |
+ var x [1]struct{}
|
|
| 10 |
+ _ = x[InvalidSource-255] |
|
| 11 |
+ _ = x[ImmSource-0] |
|
| 12 |
+ _ = x[RegSource-8] |
|
| 13 |
+} |
|
| 14 |
+ |
|
| 15 |
+const ( |
|
| 16 |
+ _Source_name_0 = "ImmSource" |
|
| 17 |
+ _Source_name_1 = "RegSource" |
|
| 18 |
+ _Source_name_2 = "InvalidSource" |
|
| 19 |
+) |
|
| 20 |
+ |
|
| 21 |
+func (i Source) String() string {
|
|
| 22 |
+ switch {
|
|
| 23 |
+ case i == 0: |
|
| 24 |
+ return _Source_name_0 |
|
| 25 |
+ case i == 8: |
|
| 26 |
+ return _Source_name_1 |
|
| 27 |
+ case i == 255: |
|
| 28 |
+ return _Source_name_2 |
|
| 29 |
+ default: |
|
| 30 |
+ return "Source(" + strconv.FormatInt(int64(i), 10) + ")"
|
|
| 31 |
+ } |
|
| 32 |
+} |
|
| 33 |
+func _() {
|
|
| 34 |
+ // An "invalid array index" compiler error signifies that the constant values have changed. |
|
| 35 |
+ // Re-run the stringer command to generate them again. |
|
| 36 |
+ var x [1]struct{}
|
|
| 37 |
+ _ = x[InvalidEndian-255] |
|
| 38 |
+ _ = x[LE-0] |
|
| 39 |
+ _ = x[BE-8] |
|
| 40 |
+} |
|
| 41 |
+ |
|
| 42 |
+const ( |
|
| 43 |
+ _Endianness_name_0 = "LE" |
|
| 44 |
+ _Endianness_name_1 = "BE" |
|
| 45 |
+ _Endianness_name_2 = "InvalidEndian" |
|
| 46 |
+) |
|
| 47 |
+ |
|
| 48 |
+func (i Endianness) String() string {
|
|
| 49 |
+ switch {
|
|
| 50 |
+ case i == 0: |
|
| 51 |
+ return _Endianness_name_0 |
|
| 52 |
+ case i == 8: |
|
| 53 |
+ return _Endianness_name_1 |
|
| 54 |
+ case i == 255: |
|
| 55 |
+ return _Endianness_name_2 |
|
| 56 |
+ default: |
|
| 57 |
+ return "Endianness(" + strconv.FormatInt(int64(i), 10) + ")"
|
|
| 58 |
+ } |
|
| 59 |
+} |
|
| 60 |
+func _() {
|
|
| 61 |
+ // An "invalid array index" compiler error signifies that the constant values have changed. |
|
| 62 |
+ // Re-run the stringer command to generate them again. |
|
| 63 |
+ var x [1]struct{}
|
|
| 64 |
+ _ = x[InvalidALUOp-255] |
|
| 65 |
+ _ = x[Add-0] |
|
| 66 |
+ _ = x[Sub-16] |
|
| 67 |
+ _ = x[Mul-32] |
|
| 68 |
+ _ = x[Div-48] |
|
| 69 |
+ _ = x[Or-64] |
|
| 70 |
+ _ = x[And-80] |
|
| 71 |
+ _ = x[LSh-96] |
|
| 72 |
+ _ = x[RSh-112] |
|
| 73 |
+ _ = x[Neg-128] |
|
| 74 |
+ _ = x[Mod-144] |
|
| 75 |
+ _ = x[Xor-160] |
|
| 76 |
+ _ = x[Mov-176] |
|
| 77 |
+ _ = x[ArSh-192] |
|
| 78 |
+ _ = x[Swap-208] |
|
| 79 |
+} |
|
| 80 |
+ |
|
| 81 |
+const _ALUOp_name = "AddSubMulDivOrAndLShRShNegModXorMovArShSwapInvalidALUOp" |
|
| 82 |
+ |
|
| 83 |
+var _ALUOp_map = map[ALUOp]string{
|
|
| 84 |
+ 0: _ALUOp_name[0:3], |
|
| 85 |
+ 16: _ALUOp_name[3:6], |
|
| 86 |
+ 32: _ALUOp_name[6:9], |
|
| 87 |
+ 48: _ALUOp_name[9:12], |
|
| 88 |
+ 64: _ALUOp_name[12:14], |
|
| 89 |
+ 80: _ALUOp_name[14:17], |
|
| 90 |
+ 96: _ALUOp_name[17:20], |
|
| 91 |
+ 112: _ALUOp_name[20:23], |
|
| 92 |
+ 128: _ALUOp_name[23:26], |
|
| 93 |
+ 144: _ALUOp_name[26:29], |
|
| 94 |
+ 160: _ALUOp_name[29:32], |
|
| 95 |
+ 176: _ALUOp_name[32:35], |
|
| 96 |
+ 192: _ALUOp_name[35:39], |
|
| 97 |
+ 208: _ALUOp_name[39:43], |
|
| 98 |
+ 255: _ALUOp_name[43:55], |
|
| 99 |
+} |
|
| 100 |
+ |
|
| 101 |
+func (i ALUOp) String() string {
|
|
| 102 |
+ if str, ok := _ALUOp_map[i]; ok {
|
|
| 103 |
+ return str |
|
| 104 |
+ } |
|
| 105 |
+ return "ALUOp(" + strconv.FormatInt(int64(i), 10) + ")"
|
|
| 106 |
+} |
| 0 | 2 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,143 @@ |
| 0 |
+package asm |
|
| 1 |
+ |
|
| 2 |
+//go:generate stringer -output func_string.go -type=BuiltinFunc |
|
| 3 |
+ |
|
| 4 |
+// BuiltinFunc is a built-in eBPF function. |
|
| 5 |
+type BuiltinFunc int32 |
|
| 6 |
+ |
|
| 7 |
+// eBPF built-in functions |
|
| 8 |
+// |
|
| 9 |
+// You can renegerate this list using the following gawk script: |
|
| 10 |
+// |
|
| 11 |
+// /FN\(.+\),/ {
|
|
| 12 |
+// match($1, /\((.+)\)/, r) |
|
| 13 |
+// split(r[1], p, "_") |
|
| 14 |
+// printf "Fn" |
|
| 15 |
+// for (i in p) {
|
|
| 16 |
+// printf "%s%s", toupper(substr(p[i], 1, 1)), substr(p[i], 2) |
|
| 17 |
+// } |
|
| 18 |
+// print "" |
|
| 19 |
+// } |
|
| 20 |
+// |
|
| 21 |
+// The script expects include/uapi/linux/bpf.h as it's input. |
|
| 22 |
+const ( |
|
| 23 |
+ FnUnspec BuiltinFunc = iota |
|
| 24 |
+ FnMapLookupElem |
|
| 25 |
+ FnMapUpdateElem |
|
| 26 |
+ FnMapDeleteElem |
|
| 27 |
+ FnProbeRead |
|
| 28 |
+ FnKtimeGetNs |
|
| 29 |
+ FnTracePrintk |
|
| 30 |
+ FnGetPrandomU32 |
|
| 31 |
+ FnGetSmpProcessorId |
|
| 32 |
+ FnSkbStoreBytes |
|
| 33 |
+ FnL3CsumReplace |
|
| 34 |
+ FnL4CsumReplace |
|
| 35 |
+ FnTailCall |
|
| 36 |
+ FnCloneRedirect |
|
| 37 |
+ FnGetCurrentPidTgid |
|
| 38 |
+ FnGetCurrentUidGid |
|
| 39 |
+ FnGetCurrentComm |
|
| 40 |
+ FnGetCgroupClassid |
|
| 41 |
+ FnSkbVlanPush |
|
| 42 |
+ FnSkbVlanPop |
|
| 43 |
+ FnSkbGetTunnelKey |
|
| 44 |
+ FnSkbSetTunnelKey |
|
| 45 |
+ FnPerfEventRead |
|
| 46 |
+ FnRedirect |
|
| 47 |
+ FnGetRouteRealm |
|
| 48 |
+ FnPerfEventOutput |
|
| 49 |
+ FnSkbLoadBytes |
|
| 50 |
+ FnGetStackid |
|
| 51 |
+ FnCsumDiff |
|
| 52 |
+ FnSkbGetTunnelOpt |
|
| 53 |
+ FnSkbSetTunnelOpt |
|
| 54 |
+ FnSkbChangeProto |
|
| 55 |
+ FnSkbChangeType |
|
| 56 |
+ FnSkbUnderCgroup |
|
| 57 |
+ FnGetHashRecalc |
|
| 58 |
+ FnGetCurrentTask |
|
| 59 |
+ FnProbeWriteUser |
|
| 60 |
+ FnCurrentTaskUnderCgroup |
|
| 61 |
+ FnSkbChangeTail |
|
| 62 |
+ FnSkbPullData |
|
| 63 |
+ FnCsumUpdate |
|
| 64 |
+ FnSetHashInvalid |
|
| 65 |
+ FnGetNumaNodeId |
|
| 66 |
+ FnSkbChangeHead |
|
| 67 |
+ FnXdpAdjustHead |
|
| 68 |
+ FnProbeReadStr |
|
| 69 |
+ FnGetSocketCookie |
|
| 70 |
+ FnGetSocketUid |
|
| 71 |
+ FnSetHash |
|
| 72 |
+ FnSetsockopt |
|
| 73 |
+ FnSkbAdjustRoom |
|
| 74 |
+ FnRedirectMap |
|
| 75 |
+ FnSkRedirectMap |
|
| 76 |
+ FnSockMapUpdate |
|
| 77 |
+ FnXdpAdjustMeta |
|
| 78 |
+ FnPerfEventReadValue |
|
| 79 |
+ FnPerfProgReadValue |
|
| 80 |
+ FnGetsockopt |
|
| 81 |
+ FnOverrideReturn |
|
| 82 |
+ FnSockOpsCbFlagsSet |
|
| 83 |
+ FnMsgRedirectMap |
|
| 84 |
+ FnMsgApplyBytes |
|
| 85 |
+ FnMsgCorkBytes |
|
| 86 |
+ FnMsgPullData |
|
| 87 |
+ FnBind |
|
| 88 |
+ FnXdpAdjustTail |
|
| 89 |
+ FnSkbGetXfrmState |
|
| 90 |
+ FnGetStack |
|
| 91 |
+ FnSkbLoadBytesRelative |
|
| 92 |
+ FnFibLookup |
|
| 93 |
+ FnSockHashUpdate |
|
| 94 |
+ FnMsgRedirectHash |
|
| 95 |
+ FnSkRedirectHash |
|
| 96 |
+ FnLwtPushEncap |
|
| 97 |
+ FnLwtSeg6StoreBytes |
|
| 98 |
+ FnLwtSeg6AdjustSrh |
|
| 99 |
+ FnLwtSeg6Action |
|
| 100 |
+ FnRcRepeat |
|
| 101 |
+ FnRcKeydown |
|
| 102 |
+ FnSkbCgroupId |
|
| 103 |
+ FnGetCurrentCgroupId |
|
| 104 |
+ FnGetLocalStorage |
|
| 105 |
+ FnSkSelectReuseport |
|
| 106 |
+ FnSkbAncestorCgroupId |
|
| 107 |
+ FnSkLookupTcp |
|
| 108 |
+ FnSkLookupUdp |
|
| 109 |
+ FnSkRelease |
|
| 110 |
+ FnMapPushElem |
|
| 111 |
+ FnMapPopElem |
|
| 112 |
+ FnMapPeekElem |
|
| 113 |
+ FnMsgPushData |
|
| 114 |
+ FnMsgPopData |
|
| 115 |
+ FnRcPointerRel |
|
| 116 |
+ FnSpinLock |
|
| 117 |
+ FnSpinUnlock |
|
| 118 |
+ FnSkFullsock |
|
| 119 |
+ FnTcpSock |
|
| 120 |
+ FnSkbEcnSetCe |
|
| 121 |
+ FnGetListenerSock |
|
| 122 |
+ FnSkcLookupTcp |
|
| 123 |
+ FnTcpCheckSyncookie |
|
| 124 |
+ FnSysctlGetName |
|
| 125 |
+ FnSysctlGetCurrentValue |
|
| 126 |
+ FnSysctlGetNewValue |
|
| 127 |
+ FnSysctlSetNewValue |
|
| 128 |
+ FnStrtol |
|
| 129 |
+ FnStrtoul |
|
| 130 |
+ FnSkStorageGet |
|
| 131 |
+ FnSkStorageDelete |
|
| 132 |
+ FnSendSignal |
|
| 133 |
+ FnTcpGenSyncookie |
|
| 134 |
+) |
|
| 135 |
+ |
|
| 136 |
+// Call emits a function call. |
|
| 137 |
+func (fn BuiltinFunc) Call() Instruction {
|
|
| 138 |
+ return Instruction{
|
|
| 139 |
+ OpCode: OpCode(JumpClass).SetJumpOp(Call), |
|
| 140 |
+ Constant: int64(fn), |
|
| 141 |
+ } |
|
| 142 |
+} |
| 0 | 143 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,133 @@ |
| 0 |
+// Code generated by "stringer -output func_string.go -type=BuiltinFunc"; DO NOT EDIT. |
|
| 1 |
+ |
|
| 2 |
+package asm |
|
| 3 |
+ |
|
| 4 |
+import "strconv" |
|
| 5 |
+ |
|
| 6 |
+func _() {
|
|
| 7 |
+ // An "invalid array index" compiler error signifies that the constant values have changed. |
|
| 8 |
+ // Re-run the stringer command to generate them again. |
|
| 9 |
+ var x [1]struct{}
|
|
| 10 |
+ _ = x[FnUnspec-0] |
|
| 11 |
+ _ = x[FnMapLookupElem-1] |
|
| 12 |
+ _ = x[FnMapUpdateElem-2] |
|
| 13 |
+ _ = x[FnMapDeleteElem-3] |
|
| 14 |
+ _ = x[FnProbeRead-4] |
|
| 15 |
+ _ = x[FnKtimeGetNs-5] |
|
| 16 |
+ _ = x[FnTracePrintk-6] |
|
| 17 |
+ _ = x[FnGetPrandomU32-7] |
|
| 18 |
+ _ = x[FnGetSmpProcessorId-8] |
|
| 19 |
+ _ = x[FnSkbStoreBytes-9] |
|
| 20 |
+ _ = x[FnL3CsumReplace-10] |
|
| 21 |
+ _ = x[FnL4CsumReplace-11] |
|
| 22 |
+ _ = x[FnTailCall-12] |
|
| 23 |
+ _ = x[FnCloneRedirect-13] |
|
| 24 |
+ _ = x[FnGetCurrentPidTgid-14] |
|
| 25 |
+ _ = x[FnGetCurrentUidGid-15] |
|
| 26 |
+ _ = x[FnGetCurrentComm-16] |
|
| 27 |
+ _ = x[FnGetCgroupClassid-17] |
|
| 28 |
+ _ = x[FnSkbVlanPush-18] |
|
| 29 |
+ _ = x[FnSkbVlanPop-19] |
|
| 30 |
+ _ = x[FnSkbGetTunnelKey-20] |
|
| 31 |
+ _ = x[FnSkbSetTunnelKey-21] |
|
| 32 |
+ _ = x[FnPerfEventRead-22] |
|
| 33 |
+ _ = x[FnRedirect-23] |
|
| 34 |
+ _ = x[FnGetRouteRealm-24] |
|
| 35 |
+ _ = x[FnPerfEventOutput-25] |
|
| 36 |
+ _ = x[FnSkbLoadBytes-26] |
|
| 37 |
+ _ = x[FnGetStackid-27] |
|
| 38 |
+ _ = x[FnCsumDiff-28] |
|
| 39 |
+ _ = x[FnSkbGetTunnelOpt-29] |
|
| 40 |
+ _ = x[FnSkbSetTunnelOpt-30] |
|
| 41 |
+ _ = x[FnSkbChangeProto-31] |
|
| 42 |
+ _ = x[FnSkbChangeType-32] |
|
| 43 |
+ _ = x[FnSkbUnderCgroup-33] |
|
| 44 |
+ _ = x[FnGetHashRecalc-34] |
|
| 45 |
+ _ = x[FnGetCurrentTask-35] |
|
| 46 |
+ _ = x[FnProbeWriteUser-36] |
|
| 47 |
+ _ = x[FnCurrentTaskUnderCgroup-37] |
|
| 48 |
+ _ = x[FnSkbChangeTail-38] |
|
| 49 |
+ _ = x[FnSkbPullData-39] |
|
| 50 |
+ _ = x[FnCsumUpdate-40] |
|
| 51 |
+ _ = x[FnSetHashInvalid-41] |
|
| 52 |
+ _ = x[FnGetNumaNodeId-42] |
|
| 53 |
+ _ = x[FnSkbChangeHead-43] |
|
| 54 |
+ _ = x[FnXdpAdjustHead-44] |
|
| 55 |
+ _ = x[FnProbeReadStr-45] |
|
| 56 |
+ _ = x[FnGetSocketCookie-46] |
|
| 57 |
+ _ = x[FnGetSocketUid-47] |
|
| 58 |
+ _ = x[FnSetHash-48] |
|
| 59 |
+ _ = x[FnSetsockopt-49] |
|
| 60 |
+ _ = x[FnSkbAdjustRoom-50] |
|
| 61 |
+ _ = x[FnRedirectMap-51] |
|
| 62 |
+ _ = x[FnSkRedirectMap-52] |
|
| 63 |
+ _ = x[FnSockMapUpdate-53] |
|
| 64 |
+ _ = x[FnXdpAdjustMeta-54] |
|
| 65 |
+ _ = x[FnPerfEventReadValue-55] |
|
| 66 |
+ _ = x[FnPerfProgReadValue-56] |
|
| 67 |
+ _ = x[FnGetsockopt-57] |
|
| 68 |
+ _ = x[FnOverrideReturn-58] |
|
| 69 |
+ _ = x[FnSockOpsCbFlagsSet-59] |
|
| 70 |
+ _ = x[FnMsgRedirectMap-60] |
|
| 71 |
+ _ = x[FnMsgApplyBytes-61] |
|
| 72 |
+ _ = x[FnMsgCorkBytes-62] |
|
| 73 |
+ _ = x[FnMsgPullData-63] |
|
| 74 |
+ _ = x[FnBind-64] |
|
| 75 |
+ _ = x[FnXdpAdjustTail-65] |
|
| 76 |
+ _ = x[FnSkbGetXfrmState-66] |
|
| 77 |
+ _ = x[FnGetStack-67] |
|
| 78 |
+ _ = x[FnSkbLoadBytesRelative-68] |
|
| 79 |
+ _ = x[FnFibLookup-69] |
|
| 80 |
+ _ = x[FnSockHashUpdate-70] |
|
| 81 |
+ _ = x[FnMsgRedirectHash-71] |
|
| 82 |
+ _ = x[FnSkRedirectHash-72] |
|
| 83 |
+ _ = x[FnLwtPushEncap-73] |
|
| 84 |
+ _ = x[FnLwtSeg6StoreBytes-74] |
|
| 85 |
+ _ = x[FnLwtSeg6AdjustSrh-75] |
|
| 86 |
+ _ = x[FnLwtSeg6Action-76] |
|
| 87 |
+ _ = x[FnRcRepeat-77] |
|
| 88 |
+ _ = x[FnRcKeydown-78] |
|
| 89 |
+ _ = x[FnSkbCgroupId-79] |
|
| 90 |
+ _ = x[FnGetCurrentCgroupId-80] |
|
| 91 |
+ _ = x[FnGetLocalStorage-81] |
|
| 92 |
+ _ = x[FnSkSelectReuseport-82] |
|
| 93 |
+ _ = x[FnSkbAncestorCgroupId-83] |
|
| 94 |
+ _ = x[FnSkLookupTcp-84] |
|
| 95 |
+ _ = x[FnSkLookupUdp-85] |
|
| 96 |
+ _ = x[FnSkRelease-86] |
|
| 97 |
+ _ = x[FnMapPushElem-87] |
|
| 98 |
+ _ = x[FnMapPopElem-88] |
|
| 99 |
+ _ = x[FnMapPeekElem-89] |
|
| 100 |
+ _ = x[FnMsgPushData-90] |
|
| 101 |
+ _ = x[FnMsgPopData-91] |
|
| 102 |
+ _ = x[FnRcPointerRel-92] |
|
| 103 |
+ _ = x[FnSpinLock-93] |
|
| 104 |
+ _ = x[FnSpinUnlock-94] |
|
| 105 |
+ _ = x[FnSkFullsock-95] |
|
| 106 |
+ _ = x[FnTcpSock-96] |
|
| 107 |
+ _ = x[FnSkbEcnSetCe-97] |
|
| 108 |
+ _ = x[FnGetListenerSock-98] |
|
| 109 |
+ _ = x[FnSkcLookupTcp-99] |
|
| 110 |
+ _ = x[FnTcpCheckSyncookie-100] |
|
| 111 |
+ _ = x[FnSysctlGetName-101] |
|
| 112 |
+ _ = x[FnSysctlGetCurrentValue-102] |
|
| 113 |
+ _ = x[FnSysctlGetNewValue-103] |
|
| 114 |
+ _ = x[FnSysctlSetNewValue-104] |
|
| 115 |
+ _ = x[FnStrtol-105] |
|
| 116 |
+ _ = x[FnStrtoul-106] |
|
| 117 |
+ _ = x[FnSkStorageGet-107] |
|
| 118 |
+ _ = x[FnSkStorageDelete-108] |
|
| 119 |
+ _ = x[FnSendSignal-109] |
|
| 120 |
+ _ = x[FnTcpGenSyncookie-110] |
|
| 121 |
+} |
|
| 122 |
+ |
|
| 123 |
+const _BuiltinFunc_name = "FnUnspecFnMapLookupElemFnMapUpdateElemFnMapDeleteElemFnProbeReadFnKtimeGetNsFnTracePrintkFnGetPrandomU32FnGetSmpProcessorIdFnSkbStoreBytesFnL3CsumReplaceFnL4CsumReplaceFnTailCallFnCloneRedirectFnGetCurrentPidTgidFnGetCurrentUidGidFnGetCurrentCommFnGetCgroupClassidFnSkbVlanPushFnSkbVlanPopFnSkbGetTunnelKeyFnSkbSetTunnelKeyFnPerfEventReadFnRedirectFnGetRouteRealmFnPerfEventOutputFnSkbLoadBytesFnGetStackidFnCsumDiffFnSkbGetTunnelOptFnSkbSetTunnelOptFnSkbChangeProtoFnSkbChangeTypeFnSkbUnderCgroupFnGetHashRecalcFnGetCurrentTaskFnProbeWriteUserFnCurrentTaskUnderCgroupFnSkbChangeTailFnSkbPullDataFnCsumUpdateFnSetHashInvalidFnGetNumaNodeIdFnSkbChangeHeadFnXdpAdjustHeadFnProbeReadStrFnGetSocketCookieFnGetSocketUidFnSetHashFnSetsockoptFnSkbAdjustRoomFnRedirectMapFnSkRedirectMapFnSockMapUpdateFnXdpAdjustMetaFnPerfEventReadValueFnPerfProgReadValueFnGetsockoptFnOverrideReturnFnSockOpsCbFlagsSetFnMsgRedirectMapFnMsgApplyBytesFnMsgCorkBytesFnMsgPullDataFnBindFnXdpAdjustTailFnSkbGetXfrmStateFnGetStackFnSkbLoadBytesRelativeFnFibLookupFnSockHashUpdateFnMsgRedirectHashFnSkRedirectHashFnLwtPushEncapFnLwtSeg6StoreBytesFnLwtSeg6AdjustSrhFnLwtSeg6ActionFnRcRepeatFnRcKeydownFnSkbCgroupIdFnGetCurrentCgroupIdFnGetLocalStorageFnSkSelectReuseportFnSkbAncestorCgroupIdFnSkLookupTcpFnSkLookupUdpFnSkReleaseFnMapPushElemFnMapPopElemFnMapPeekElemFnMsgPushDataFnMsgPopDataFnRcPointerRelFnSpinLockFnSpinUnlockFnSkFullsockFnTcpSockFnSkbEcnSetCeFnGetListenerSockFnSkcLookupTcpFnTcpCheckSyncookieFnSysctlGetNameFnSysctlGetCurrentValueFnSysctlGetNewValueFnSysctlSetNewValueFnStrtolFnStrtoulFnSkStorageGetFnSkStorageDeleteFnSendSignalFnTcpGenSyncookie" |
|
| 124 |
+ |
|
| 125 |
+var _BuiltinFunc_index = [...]uint16{0, 8, 23, 38, 53, 64, 76, 89, 104, 123, 138, 153, 168, 178, 193, 212, 230, 246, 264, 277, 289, 306, 323, 338, 348, 363, 380, 394, 406, 416, 433, 450, 466, 481, 497, 512, 528, 544, 568, 583, 596, 608, 624, 639, 654, 669, 683, 700, 714, 723, 735, 750, 763, 778, 793, 808, 828, 847, 859, 875, 894, 910, 925, 939, 952, 958, 973, 990, 1000, 1022, 1033, 1049, 1066, 1082, 1096, 1115, 1133, 1148, 1158, 1169, 1182, 1202, 1219, 1238, 1259, 1272, 1285, 1296, 1309, 1321, 1334, 1347, 1359, 1373, 1383, 1395, 1407, 1416, 1429, 1446, 1460, 1479, 1494, 1517, 1536, 1555, 1563, 1572, 1586, 1603, 1615, 1632}
|
|
| 126 |
+ |
|
| 127 |
+func (i BuiltinFunc) String() string {
|
|
| 128 |
+ if i < 0 || i >= BuiltinFunc(len(_BuiltinFunc_index)-1) {
|
|
| 129 |
+ return "BuiltinFunc(" + strconv.FormatInt(int64(i), 10) + ")"
|
|
| 130 |
+ } |
|
| 131 |
+ return _BuiltinFunc_name[_BuiltinFunc_index[i]:_BuiltinFunc_index[i+1]] |
|
| 132 |
+} |
| 0 | 133 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,416 @@ |
| 0 |
+package asm |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "encoding/binary" |
|
| 4 |
+ "fmt" |
|
| 5 |
+ "io" |
|
| 6 |
+ "math" |
|
| 7 |
+ "strings" |
|
| 8 |
+ |
|
| 9 |
+ "github.com/pkg/errors" |
|
| 10 |
+) |
|
| 11 |
+ |
|
| 12 |
+// InstructionSize is the size of a BPF instruction in bytes |
|
| 13 |
+const InstructionSize = 8 |
|
| 14 |
+ |
|
| 15 |
+// Instruction is a single eBPF instruction. |
|
| 16 |
+type Instruction struct {
|
|
| 17 |
+ OpCode OpCode |
|
| 18 |
+ Dst Register |
|
| 19 |
+ Src Register |
|
| 20 |
+ Offset int16 |
|
| 21 |
+ Constant int64 |
|
| 22 |
+ Reference string |
|
| 23 |
+ Symbol string |
|
| 24 |
+} |
|
| 25 |
+ |
|
| 26 |
+// Sym creates a symbol. |
|
| 27 |
+func (ins Instruction) Sym(name string) Instruction {
|
|
| 28 |
+ ins.Symbol = name |
|
| 29 |
+ return ins |
|
| 30 |
+} |
|
| 31 |
+ |
|
| 32 |
+// Unmarshal decodes a BPF instruction. |
|
| 33 |
+func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder) (uint64, error) {
|
|
| 34 |
+ var bi bpfInstruction |
|
| 35 |
+ err := binary.Read(r, bo, &bi) |
|
| 36 |
+ if err != nil {
|
|
| 37 |
+ return 0, err |
|
| 38 |
+ } |
|
| 39 |
+ |
|
| 40 |
+ ins.OpCode = bi.OpCode |
|
| 41 |
+ ins.Dst = bi.Registers.Dst() |
|
| 42 |
+ ins.Src = bi.Registers.Src() |
|
| 43 |
+ ins.Offset = bi.Offset |
|
| 44 |
+ ins.Constant = int64(bi.Constant) |
|
| 45 |
+ |
|
| 46 |
+ if !bi.OpCode.isDWordLoad() {
|
|
| 47 |
+ return InstructionSize, nil |
|
| 48 |
+ } |
|
| 49 |
+ |
|
| 50 |
+ var bi2 bpfInstruction |
|
| 51 |
+ if err := binary.Read(r, bo, &bi2); err != nil {
|
|
| 52 |
+ // No Wrap, to avoid io.EOF clash |
|
| 53 |
+ return 0, errors.New("64bit immediate is missing second half")
|
|
| 54 |
+ } |
|
| 55 |
+ if bi2.OpCode != 0 || bi2.Offset != 0 || bi2.Registers != 0 {
|
|
| 56 |
+ return 0, errors.New("64bit immediate has non-zero fields")
|
|
| 57 |
+ } |
|
| 58 |
+ ins.Constant = int64(uint64(uint32(bi2.Constant))<<32 | uint64(uint32(bi.Constant))) |
|
| 59 |
+ |
|
| 60 |
+ return 2 * InstructionSize, nil |
|
| 61 |
+} |
|
| 62 |
+ |
|
| 63 |
+// Marshal encodes a BPF instruction. |
|
| 64 |
+func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error) {
|
|
| 65 |
+ if ins.OpCode == InvalidOpCode {
|
|
| 66 |
+ return 0, errors.New("invalid opcode")
|
|
| 67 |
+ } |
|
| 68 |
+ |
|
| 69 |
+ isDWordLoad := ins.OpCode.isDWordLoad() |
|
| 70 |
+ |
|
| 71 |
+ cons := int32(ins.Constant) |
|
| 72 |
+ if isDWordLoad {
|
|
| 73 |
+ // Encode least significant 32bit first for 64bit operations. |
|
| 74 |
+ cons = int32(uint32(ins.Constant)) |
|
| 75 |
+ } |
|
| 76 |
+ |
|
| 77 |
+ bpfi := bpfInstruction{
|
|
| 78 |
+ ins.OpCode, |
|
| 79 |
+ newBPFRegisters(ins.Dst, ins.Src), |
|
| 80 |
+ ins.Offset, |
|
| 81 |
+ cons, |
|
| 82 |
+ } |
|
| 83 |
+ |
|
| 84 |
+ if err := binary.Write(w, bo, &bpfi); err != nil {
|
|
| 85 |
+ return 0, err |
|
| 86 |
+ } |
|
| 87 |
+ |
|
| 88 |
+ if !isDWordLoad {
|
|
| 89 |
+ return InstructionSize, nil |
|
| 90 |
+ } |
|
| 91 |
+ |
|
| 92 |
+ bpfi = bpfInstruction{
|
|
| 93 |
+ Constant: int32(ins.Constant >> 32), |
|
| 94 |
+ } |
|
| 95 |
+ |
|
| 96 |
+ if err := binary.Write(w, bo, &bpfi); err != nil {
|
|
| 97 |
+ return 0, err |
|
| 98 |
+ } |
|
| 99 |
+ |
|
| 100 |
+ return 2 * InstructionSize, nil |
|
| 101 |
+} |
|
| 102 |
+ |
|
| 103 |
+// RewriteMapPtr changes an instruction to use a new map fd. |
|
| 104 |
+// |
|
| 105 |
+// Returns an error if the fd is invalid, or the instruction |
|
| 106 |
+// is incorrect. |
|
| 107 |
+func (ins *Instruction) RewriteMapPtr(fd int) error {
|
|
| 108 |
+ if !ins.OpCode.isDWordLoad() {
|
|
| 109 |
+ return errors.Errorf("%s is not a 64 bit load", ins.OpCode)
|
|
| 110 |
+ } |
|
| 111 |
+ |
|
| 112 |
+ if fd < 0 {
|
|
| 113 |
+ return errors.New("invalid fd")
|
|
| 114 |
+ } |
|
| 115 |
+ |
|
| 116 |
+ ins.Src = R1 |
|
| 117 |
+ ins.Constant = int64(fd) |
|
| 118 |
+ return nil |
|
| 119 |
+} |
|
| 120 |
+ |
|
| 121 |
+// Format implements fmt.Formatter. |
|
| 122 |
+func (ins Instruction) Format(f fmt.State, c rune) {
|
|
| 123 |
+ if c != 'v' {
|
|
| 124 |
+ fmt.Fprintf(f, "{UNRECOGNIZED: %c}", c)
|
|
| 125 |
+ return |
|
| 126 |
+ } |
|
| 127 |
+ |
|
| 128 |
+ op := ins.OpCode |
|
| 129 |
+ |
|
| 130 |
+ if op == InvalidOpCode {
|
|
| 131 |
+ fmt.Fprint(f, "INVALID") |
|
| 132 |
+ return |
|
| 133 |
+ } |
|
| 134 |
+ |
|
| 135 |
+ // Omit trailing space for Exit |
|
| 136 |
+ if op.JumpOp() == Exit {
|
|
| 137 |
+ fmt.Fprint(f, op) |
|
| 138 |
+ return |
|
| 139 |
+ } |
|
| 140 |
+ |
|
| 141 |
+ fmt.Fprintf(f, "%v ", op) |
|
| 142 |
+ switch cls := op.Class(); cls {
|
|
| 143 |
+ case LdClass, LdXClass, StClass, StXClass: |
|
| 144 |
+ switch op.Mode() {
|
|
| 145 |
+ case ImmMode: |
|
| 146 |
+ fmt.Fprintf(f, "dst: %s imm: %d", ins.Dst, ins.Constant) |
|
| 147 |
+ case AbsMode: |
|
| 148 |
+ fmt.Fprintf(f, "imm: %d", ins.Constant) |
|
| 149 |
+ case IndMode: |
|
| 150 |
+ fmt.Fprintf(f, "dst: %s src: %s imm: %d", ins.Dst, ins.Src, ins.Constant) |
|
| 151 |
+ case MemMode: |
|
| 152 |
+ fmt.Fprintf(f, "dst: %s src: %s off: %d imm: %d", ins.Dst, ins.Src, ins.Offset, ins.Constant) |
|
| 153 |
+ case XAddMode: |
|
| 154 |
+ fmt.Fprintf(f, "dst: %s src: %s", ins.Dst, ins.Src) |
|
| 155 |
+ } |
|
| 156 |
+ |
|
| 157 |
+ case ALU64Class, ALUClass: |
|
| 158 |
+ fmt.Fprintf(f, "dst: %s ", ins.Dst) |
|
| 159 |
+ if op.ALUOp() == Swap || op.Source() == ImmSource {
|
|
| 160 |
+ fmt.Fprintf(f, "imm: %d", ins.Constant) |
|
| 161 |
+ } else {
|
|
| 162 |
+ fmt.Fprintf(f, "src: %s", ins.Src) |
|
| 163 |
+ } |
|
| 164 |
+ |
|
| 165 |
+ case JumpClass: |
|
| 166 |
+ switch jop := op.JumpOp(); jop {
|
|
| 167 |
+ case Call: |
|
| 168 |
+ if ins.Src == R1 {
|
|
| 169 |
+ // bpf-to-bpf call |
|
| 170 |
+ fmt.Fprint(f, ins.Constant) |
|
| 171 |
+ } else {
|
|
| 172 |
+ fmt.Fprint(f, BuiltinFunc(ins.Constant)) |
|
| 173 |
+ } |
|
| 174 |
+ |
|
| 175 |
+ default: |
|
| 176 |
+ fmt.Fprintf(f, "dst: %s off: %d ", ins.Dst, ins.Offset) |
|
| 177 |
+ if op.Source() == ImmSource {
|
|
| 178 |
+ fmt.Fprintf(f, "imm: %d", ins.Constant) |
|
| 179 |
+ } else {
|
|
| 180 |
+ fmt.Fprintf(f, "src: %s", ins.Src) |
|
| 181 |
+ } |
|
| 182 |
+ } |
|
| 183 |
+ } |
|
| 184 |
+ |
|
| 185 |
+ if ins.Reference != "" {
|
|
| 186 |
+ fmt.Fprintf(f, " <%s>", ins.Reference) |
|
| 187 |
+ } |
|
| 188 |
+} |
|
| 189 |
+ |
|
| 190 |
+// Instructions is an eBPF program. |
|
| 191 |
+type Instructions []Instruction |
|
| 192 |
+ |
|
| 193 |
+func (insns Instructions) String() string {
|
|
| 194 |
+ return fmt.Sprint(insns) |
|
| 195 |
+} |
|
| 196 |
+ |
|
| 197 |
+// RewriteMapPtr rewrites all loads of a specific map pointer to a new fd. |
|
| 198 |
+// |
|
| 199 |
+// Returns an error if the symbol isn't used, see IsUnreferencedSymbol. |
|
| 200 |
+func (insns Instructions) RewriteMapPtr(symbol string, fd int) error {
|
|
| 201 |
+ if symbol == "" {
|
|
| 202 |
+ return errors.New("empty symbol")
|
|
| 203 |
+ } |
|
| 204 |
+ |
|
| 205 |
+ found := false |
|
| 206 |
+ for i := range insns {
|
|
| 207 |
+ ins := &insns[i] |
|
| 208 |
+ if ins.Reference != symbol {
|
|
| 209 |
+ continue |
|
| 210 |
+ } |
|
| 211 |
+ |
|
| 212 |
+ if err := ins.RewriteMapPtr(fd); err != nil {
|
|
| 213 |
+ return err |
|
| 214 |
+ } |
|
| 215 |
+ |
|
| 216 |
+ found = true |
|
| 217 |
+ } |
|
| 218 |
+ |
|
| 219 |
+ if !found {
|
|
| 220 |
+ return &unreferencedSymbolError{symbol}
|
|
| 221 |
+ } |
|
| 222 |
+ |
|
| 223 |
+ return nil |
|
| 224 |
+} |
|
| 225 |
+ |
|
| 226 |
+// SymbolOffsets returns the set of symbols and their offset in |
|
| 227 |
+// the instructions. |
|
| 228 |
+func (insns Instructions) SymbolOffsets() (map[string]int, error) {
|
|
| 229 |
+ offsets := make(map[string]int) |
|
| 230 |
+ |
|
| 231 |
+ for i, ins := range insns {
|
|
| 232 |
+ if ins.Symbol == "" {
|
|
| 233 |
+ continue |
|
| 234 |
+ } |
|
| 235 |
+ |
|
| 236 |
+ if _, ok := offsets[ins.Symbol]; ok {
|
|
| 237 |
+ return nil, errors.Errorf("duplicate symbol %s", ins.Symbol)
|
|
| 238 |
+ } |
|
| 239 |
+ |
|
| 240 |
+ offsets[ins.Symbol] = i |
|
| 241 |
+ } |
|
| 242 |
+ |
|
| 243 |
+ return offsets, nil |
|
| 244 |
+} |
|
| 245 |
+ |
|
| 246 |
+// ReferenceOffsets returns the set of references and their offset in |
|
| 247 |
+// the instructions. |
|
| 248 |
+func (insns Instructions) ReferenceOffsets() map[string][]int {
|
|
| 249 |
+ offsets := make(map[string][]int) |
|
| 250 |
+ |
|
| 251 |
+ for i, ins := range insns {
|
|
| 252 |
+ if ins.Reference == "" {
|
|
| 253 |
+ continue |
|
| 254 |
+ } |
|
| 255 |
+ |
|
| 256 |
+ offsets[ins.Reference] = append(offsets[ins.Reference], i) |
|
| 257 |
+ } |
|
| 258 |
+ |
|
| 259 |
+ return offsets |
|
| 260 |
+} |
|
| 261 |
+ |
|
| 262 |
+func (insns Instructions) marshalledOffsets() (map[string]int, error) {
|
|
| 263 |
+ symbols := make(map[string]int) |
|
| 264 |
+ |
|
| 265 |
+ marshalledPos := 0 |
|
| 266 |
+ for _, ins := range insns {
|
|
| 267 |
+ currentPos := marshalledPos |
|
| 268 |
+ marshalledPos += ins.OpCode.marshalledInstructions() |
|
| 269 |
+ |
|
| 270 |
+ if ins.Symbol == "" {
|
|
| 271 |
+ continue |
|
| 272 |
+ } |
|
| 273 |
+ |
|
| 274 |
+ if _, ok := symbols[ins.Symbol]; ok {
|
|
| 275 |
+ return nil, errors.Errorf("duplicate symbol %s", ins.Symbol)
|
|
| 276 |
+ } |
|
| 277 |
+ |
|
| 278 |
+ symbols[ins.Symbol] = currentPos |
|
| 279 |
+ } |
|
| 280 |
+ |
|
| 281 |
+ return symbols, nil |
|
| 282 |
+} |
|
| 283 |
+ |
|
| 284 |
+// Format implements fmt.Formatter. |
|
| 285 |
+// |
|
| 286 |
+// You can control indentation of symbols by |
|
| 287 |
+// specifying a width. Setting a precision controls the indentation of |
|
| 288 |
+// instructions. |
|
| 289 |
+// The default character is a tab, which can be overriden by specifying |
|
| 290 |
+// the ' ' space flag. |
|
| 291 |
+func (insns Instructions) Format(f fmt.State, c rune) {
|
|
| 292 |
+ if c != 's' && c != 'v' {
|
|
| 293 |
+ fmt.Fprintf(f, "{UNKNOWN FORMAT '%c'}", c)
|
|
| 294 |
+ return |
|
| 295 |
+ } |
|
| 296 |
+ |
|
| 297 |
+ // Precision is better in this case, because it allows |
|
| 298 |
+ // specifying 0 padding easily. |
|
| 299 |
+ padding, ok := f.Precision() |
|
| 300 |
+ if !ok {
|
|
| 301 |
+ padding = 1 |
|
| 302 |
+ } |
|
| 303 |
+ |
|
| 304 |
+ indent := strings.Repeat("\t", padding)
|
|
| 305 |
+ if f.Flag(' ') {
|
|
| 306 |
+ indent = strings.Repeat(" ", padding)
|
|
| 307 |
+ } |
|
| 308 |
+ |
|
| 309 |
+ symPadding, ok := f.Width() |
|
| 310 |
+ if !ok {
|
|
| 311 |
+ symPadding = padding - 1 |
|
| 312 |
+ } |
|
| 313 |
+ if symPadding < 0 {
|
|
| 314 |
+ symPadding = 0 |
|
| 315 |
+ } |
|
| 316 |
+ |
|
| 317 |
+ symIndent := strings.Repeat("\t", symPadding)
|
|
| 318 |
+ if f.Flag(' ') {
|
|
| 319 |
+ symIndent = strings.Repeat(" ", symPadding)
|
|
| 320 |
+ } |
|
| 321 |
+ |
|
| 322 |
+ // Figure out how many digits we need to represent the highest |
|
| 323 |
+ // offset. |
|
| 324 |
+ highestOffset := 0 |
|
| 325 |
+ for _, ins := range insns {
|
|
| 326 |
+ highestOffset += ins.OpCode.marshalledInstructions() |
|
| 327 |
+ } |
|
| 328 |
+ offsetWidth := int(math.Ceil(math.Log10(float64(highestOffset)))) |
|
| 329 |
+ |
|
| 330 |
+ offset := 0 |
|
| 331 |
+ for _, ins := range insns {
|
|
| 332 |
+ if ins.Symbol != "" {
|
|
| 333 |
+ fmt.Fprintf(f, "%s%s:\n", symIndent, ins.Symbol) |
|
| 334 |
+ } |
|
| 335 |
+ fmt.Fprintf(f, "%s%*d: %v\n", indent, offsetWidth, offset, ins) |
|
| 336 |
+ offset += ins.OpCode.marshalledInstructions() |
|
| 337 |
+ } |
|
| 338 |
+ |
|
| 339 |
+ return |
|
| 340 |
+} |
|
| 341 |
+ |
|
| 342 |
+// Marshal encodes a BPF program into the kernel format. |
|
| 343 |
+func (insns Instructions) Marshal(w io.Writer, bo binary.ByteOrder) error {
|
|
| 344 |
+ absoluteOffsets, err := insns.marshalledOffsets() |
|
| 345 |
+ if err != nil {
|
|
| 346 |
+ return err |
|
| 347 |
+ } |
|
| 348 |
+ |
|
| 349 |
+ num := 0 |
|
| 350 |
+ for i, ins := range insns {
|
|
| 351 |
+ switch {
|
|
| 352 |
+ case ins.OpCode.JumpOp() == Call && ins.Constant == -1: |
|
| 353 |
+ // Rewrite bpf to bpf call |
|
| 354 |
+ offset, ok := absoluteOffsets[ins.Reference] |
|
| 355 |
+ if !ok {
|
|
| 356 |
+ return errors.Errorf("instruction %d: reference to missing symbol %s", i, ins.Reference)
|
|
| 357 |
+ } |
|
| 358 |
+ |
|
| 359 |
+ ins.Constant = int64(offset - num - 1) |
|
| 360 |
+ |
|
| 361 |
+ case ins.OpCode.Class() == JumpClass && ins.Offset == -1: |
|
| 362 |
+ // Rewrite jump to label |
|
| 363 |
+ offset, ok := absoluteOffsets[ins.Reference] |
|
| 364 |
+ if !ok {
|
|
| 365 |
+ return errors.Errorf("instruction %d: reference to missing symbol %s", i, ins.Reference)
|
|
| 366 |
+ } |
|
| 367 |
+ |
|
| 368 |
+ ins.Offset = int16(offset - num - 1) |
|
| 369 |
+ } |
|
| 370 |
+ |
|
| 371 |
+ n, err := ins.Marshal(w, bo) |
|
| 372 |
+ if err != nil {
|
|
| 373 |
+ return errors.Wrapf(err, "instruction %d", i) |
|
| 374 |
+ } |
|
| 375 |
+ |
|
| 376 |
+ num += int(n / InstructionSize) |
|
| 377 |
+ } |
|
| 378 |
+ return nil |
|
| 379 |
+} |
|
| 380 |
+ |
|
| 381 |
+type bpfInstruction struct {
|
|
| 382 |
+ OpCode OpCode |
|
| 383 |
+ Registers bpfRegisters |
|
| 384 |
+ Offset int16 |
|
| 385 |
+ Constant int32 |
|
| 386 |
+} |
|
| 387 |
+ |
|
| 388 |
+type bpfRegisters uint8 |
|
| 389 |
+ |
|
| 390 |
+func newBPFRegisters(dst, src Register) bpfRegisters {
|
|
| 391 |
+ return bpfRegisters((src << 4) | (dst & 0xF)) |
|
| 392 |
+} |
|
| 393 |
+ |
|
| 394 |
+func (r bpfRegisters) Dst() Register {
|
|
| 395 |
+ return Register(r & 0xF) |
|
| 396 |
+} |
|
| 397 |
+ |
|
| 398 |
+func (r bpfRegisters) Src() Register {
|
|
| 399 |
+ return Register(r >> 4) |
|
| 400 |
+} |
|
| 401 |
+ |
|
| 402 |
+type unreferencedSymbolError struct {
|
|
| 403 |
+ symbol string |
|
| 404 |
+} |
|
| 405 |
+ |
|
| 406 |
+func (use *unreferencedSymbolError) Error() string {
|
|
| 407 |
+ return fmt.Sprintf("unreferenced symbol %s", use.symbol)
|
|
| 408 |
+} |
|
| 409 |
+ |
|
| 410 |
+// IsUnreferencedSymbol returns true if err was caused by |
|
| 411 |
+// an unreferenced symbol. |
|
| 412 |
+func IsUnreferencedSymbol(err error) bool {
|
|
| 413 |
+ _, ok := err.(*unreferencedSymbolError) |
|
| 414 |
+ return ok |
|
| 415 |
+} |
| 0 | 416 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,109 @@ |
| 0 |
+package asm |
|
| 1 |
+ |
|
| 2 |
+//go:generate stringer -output jump_string.go -type=JumpOp |
|
| 3 |
+ |
|
| 4 |
+// JumpOp affect control flow. |
|
| 5 |
+// |
|
| 6 |
+// msb lsb |
|
| 7 |
+// +----+-+---+ |
|
| 8 |
+// |OP |s|cls| |
|
| 9 |
+// +----+-+---+ |
|
| 10 |
+type JumpOp uint8 |
|
| 11 |
+ |
|
| 12 |
+const jumpMask OpCode = aluMask |
|
| 13 |
+ |
|
| 14 |
+const ( |
|
| 15 |
+ // InvalidJumpOp is returned by getters when invoked |
|
| 16 |
+ // on non branch OpCodes |
|
| 17 |
+ InvalidJumpOp JumpOp = 0xff |
|
| 18 |
+ // Ja jumps by offset unconditionally |
|
| 19 |
+ Ja JumpOp = 0x00 |
|
| 20 |
+ // JEq jumps by offset if r == imm |
|
| 21 |
+ JEq JumpOp = 0x10 |
|
| 22 |
+ // JGT jumps by offset if r > imm |
|
| 23 |
+ JGT JumpOp = 0x20 |
|
| 24 |
+ // JGE jumps by offset if r >= imm |
|
| 25 |
+ JGE JumpOp = 0x30 |
|
| 26 |
+ // JSet jumps by offset if r & imm |
|
| 27 |
+ JSet JumpOp = 0x40 |
|
| 28 |
+ // JNE jumps by offset if r != imm |
|
| 29 |
+ JNE JumpOp = 0x50 |
|
| 30 |
+ // JSGT jumps by offset if signed r > signed imm |
|
| 31 |
+ JSGT JumpOp = 0x60 |
|
| 32 |
+ // JSGE jumps by offset if signed r >= signed imm |
|
| 33 |
+ JSGE JumpOp = 0x70 |
|
| 34 |
+ // Call builtin or user defined function from imm |
|
| 35 |
+ Call JumpOp = 0x80 |
|
| 36 |
+ // Exit ends execution, with value in r0 |
|
| 37 |
+ Exit JumpOp = 0x90 |
|
| 38 |
+ // JLT jumps by offset if r < imm |
|
| 39 |
+ JLT JumpOp = 0xa0 |
|
| 40 |
+ // JLE jumps by offset if r <= imm |
|
| 41 |
+ JLE JumpOp = 0xb0 |
|
| 42 |
+ // JSLT jumps by offset if signed r < signed imm |
|
| 43 |
+ JSLT JumpOp = 0xc0 |
|
| 44 |
+ // JSLE jumps by offset if signed r <= signed imm |
|
| 45 |
+ JSLE JumpOp = 0xd0 |
|
| 46 |
+) |
|
| 47 |
+ |
|
| 48 |
+// Return emits an exit instruction. |
|
| 49 |
+// |
|
| 50 |
+// Requires a return value in R0. |
|
| 51 |
+func Return() Instruction {
|
|
| 52 |
+ return Instruction{
|
|
| 53 |
+ OpCode: OpCode(JumpClass).SetJumpOp(Exit), |
|
| 54 |
+ } |
|
| 55 |
+} |
|
| 56 |
+ |
|
| 57 |
+// Op returns the OpCode for a given jump source. |
|
| 58 |
+func (op JumpOp) Op(source Source) OpCode {
|
|
| 59 |
+ return OpCode(JumpClass).SetJumpOp(op).SetSource(source) |
|
| 60 |
+} |
|
| 61 |
+ |
|
| 62 |
+// Imm compares dst to value, and adjusts PC by offset if the condition is fulfilled. |
|
| 63 |
+func (op JumpOp) Imm(dst Register, value int32, label string) Instruction {
|
|
| 64 |
+ if op == Exit || op == Call || op == Ja {
|
|
| 65 |
+ return Instruction{OpCode: InvalidOpCode}
|
|
| 66 |
+ } |
|
| 67 |
+ |
|
| 68 |
+ return Instruction{
|
|
| 69 |
+ OpCode: OpCode(JumpClass).SetJumpOp(op).SetSource(ImmSource), |
|
| 70 |
+ Dst: dst, |
|
| 71 |
+ Offset: -1, |
|
| 72 |
+ Constant: int64(value), |
|
| 73 |
+ Reference: label, |
|
| 74 |
+ } |
|
| 75 |
+} |
|
| 76 |
+ |
|
| 77 |
+// Reg compares dst to src, and adjusts PC by offset if the condition is fulfilled. |
|
| 78 |
+func (op JumpOp) Reg(dst, src Register, label string) Instruction {
|
|
| 79 |
+ if op == Exit || op == Call || op == Ja {
|
|
| 80 |
+ return Instruction{OpCode: InvalidOpCode}
|
|
| 81 |
+ } |
|
| 82 |
+ |
|
| 83 |
+ return Instruction{
|
|
| 84 |
+ OpCode: OpCode(JumpClass).SetJumpOp(op).SetSource(RegSource), |
|
| 85 |
+ Dst: dst, |
|
| 86 |
+ Src: src, |
|
| 87 |
+ Offset: -1, |
|
| 88 |
+ Reference: label, |
|
| 89 |
+ } |
|
| 90 |
+} |
|
| 91 |
+ |
|
| 92 |
+// Label adjusts PC to the address of the label. |
|
| 93 |
+func (op JumpOp) Label(label string) Instruction {
|
|
| 94 |
+ if op == Call {
|
|
| 95 |
+ return Instruction{
|
|
| 96 |
+ OpCode: OpCode(JumpClass).SetJumpOp(Call), |
|
| 97 |
+ Src: R1, |
|
| 98 |
+ Constant: -1, |
|
| 99 |
+ Reference: label, |
|
| 100 |
+ } |
|
| 101 |
+ } |
|
| 102 |
+ |
|
| 103 |
+ return Instruction{
|
|
| 104 |
+ OpCode: OpCode(JumpClass).SetJumpOp(op), |
|
| 105 |
+ Offset: -1, |
|
| 106 |
+ Reference: label, |
|
| 107 |
+ } |
|
| 108 |
+} |
| 0 | 109 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,53 @@ |
| 0 |
+// Code generated by "stringer -output jump_string.go -type=JumpOp"; DO NOT EDIT. |
|
| 1 |
+ |
|
| 2 |
+package asm |
|
| 3 |
+ |
|
| 4 |
+import "strconv" |
|
| 5 |
+ |
|
| 6 |
+func _() {
|
|
| 7 |
+ // An "invalid array index" compiler error signifies that the constant values have changed. |
|
| 8 |
+ // Re-run the stringer command to generate them again. |
|
| 9 |
+ var x [1]struct{}
|
|
| 10 |
+ _ = x[InvalidJumpOp-255] |
|
| 11 |
+ _ = x[Ja-0] |
|
| 12 |
+ _ = x[JEq-16] |
|
| 13 |
+ _ = x[JGT-32] |
|
| 14 |
+ _ = x[JGE-48] |
|
| 15 |
+ _ = x[JSet-64] |
|
| 16 |
+ _ = x[JNE-80] |
|
| 17 |
+ _ = x[JSGT-96] |
|
| 18 |
+ _ = x[JSGE-112] |
|
| 19 |
+ _ = x[Call-128] |
|
| 20 |
+ _ = x[Exit-144] |
|
| 21 |
+ _ = x[JLT-160] |
|
| 22 |
+ _ = x[JLE-176] |
|
| 23 |
+ _ = x[JSLT-192] |
|
| 24 |
+ _ = x[JSLE-208] |
|
| 25 |
+} |
|
| 26 |
+ |
|
| 27 |
+const _JumpOp_name = "JaJEqJGTJGEJSetJNEJSGTJSGECallExitJLTJLEJSLTJSLEInvalidJumpOp" |
|
| 28 |
+ |
|
| 29 |
+var _JumpOp_map = map[JumpOp]string{
|
|
| 30 |
+ 0: _JumpOp_name[0:2], |
|
| 31 |
+ 16: _JumpOp_name[2:5], |
|
| 32 |
+ 32: _JumpOp_name[5:8], |
|
| 33 |
+ 48: _JumpOp_name[8:11], |
|
| 34 |
+ 64: _JumpOp_name[11:15], |
|
| 35 |
+ 80: _JumpOp_name[15:18], |
|
| 36 |
+ 96: _JumpOp_name[18:22], |
|
| 37 |
+ 112: _JumpOp_name[22:26], |
|
| 38 |
+ 128: _JumpOp_name[26:30], |
|
| 39 |
+ 144: _JumpOp_name[30:34], |
|
| 40 |
+ 160: _JumpOp_name[34:37], |
|
| 41 |
+ 176: _JumpOp_name[37:40], |
|
| 42 |
+ 192: _JumpOp_name[40:44], |
|
| 43 |
+ 208: _JumpOp_name[44:48], |
|
| 44 |
+ 255: _JumpOp_name[48:61], |
|
| 45 |
+} |
|
| 46 |
+ |
|
| 47 |
+func (i JumpOp) String() string {
|
|
| 48 |
+ if str, ok := _JumpOp_map[i]; ok {
|
|
| 49 |
+ return str |
|
| 50 |
+ } |
|
| 51 |
+ return "JumpOp(" + strconv.FormatInt(int64(i), 10) + ")"
|
|
| 52 |
+} |
| 0 | 53 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,189 @@ |
| 0 |
+package asm |
|
| 1 |
+ |
|
| 2 |
+//go:generate stringer -output load_store_string.go -type=Mode,Size |
|
| 3 |
+ |
|
| 4 |
+// Mode for load and store operations |
|
| 5 |
+// |
|
| 6 |
+// msb lsb |
|
| 7 |
+// +---+--+---+ |
|
| 8 |
+// |MDE|sz|cls| |
|
| 9 |
+// +---+--+---+ |
|
| 10 |
+type Mode uint8 |
|
| 11 |
+ |
|
| 12 |
+const modeMask OpCode = 0xe0 |
|
| 13 |
+ |
|
| 14 |
+const ( |
|
| 15 |
+ // InvalidMode is returned by getters when invoked |
|
| 16 |
+ // on non load / store OpCodes |
|
| 17 |
+ InvalidMode Mode = 0xff |
|
| 18 |
+ // ImmMode - immediate value |
|
| 19 |
+ ImmMode Mode = 0x00 |
|
| 20 |
+ // AbsMode - immediate value + offset |
|
| 21 |
+ AbsMode Mode = 0x20 |
|
| 22 |
+ // IndMode - indirect (imm+src) |
|
| 23 |
+ IndMode Mode = 0x40 |
|
| 24 |
+ // MemMode - load from memory |
|
| 25 |
+ MemMode Mode = 0x60 |
|
| 26 |
+ // XAddMode - add atomically across processors. |
|
| 27 |
+ XAddMode Mode = 0xc0 |
|
| 28 |
+) |
|
| 29 |
+ |
|
| 30 |
+// Size of load and store operations |
|
| 31 |
+// |
|
| 32 |
+// msb lsb |
|
| 33 |
+// +---+--+---+ |
|
| 34 |
+// |mde|SZ|cls| |
|
| 35 |
+// +---+--+---+ |
|
| 36 |
+type Size uint8 |
|
| 37 |
+ |
|
| 38 |
+const sizeMask OpCode = 0x18 |
|
| 39 |
+ |
|
| 40 |
+const ( |
|
| 41 |
+ // InvalidSize is returned by getters when invoked |
|
| 42 |
+ // on non load / store OpCodes |
|
| 43 |
+ InvalidSize Size = 0xff |
|
| 44 |
+ // DWord - double word; 64 bits |
|
| 45 |
+ DWord Size = 0x18 |
|
| 46 |
+ // Word - word; 32 bits |
|
| 47 |
+ Word Size = 0x00 |
|
| 48 |
+ // Half - half-word; 16 bits |
|
| 49 |
+ Half Size = 0x08 |
|
| 50 |
+ // Byte - byte; 8 bits |
|
| 51 |
+ Byte Size = 0x10 |
|
| 52 |
+) |
|
| 53 |
+ |
|
| 54 |
+// Sizeof returns the size in bytes. |
|
| 55 |
+func (s Size) Sizeof() int {
|
|
| 56 |
+ switch s {
|
|
| 57 |
+ case DWord: |
|
| 58 |
+ return 8 |
|
| 59 |
+ case Word: |
|
| 60 |
+ return 4 |
|
| 61 |
+ case Half: |
|
| 62 |
+ return 2 |
|
| 63 |
+ case Byte: |
|
| 64 |
+ return 1 |
|
| 65 |
+ default: |
|
| 66 |
+ return -1 |
|
| 67 |
+ } |
|
| 68 |
+} |
|
| 69 |
+ |
|
| 70 |
+// LoadMemOp returns the OpCode to load a value of given size from memory. |
|
| 71 |
+func LoadMemOp(size Size) OpCode {
|
|
| 72 |
+ return OpCode(LdXClass).SetMode(MemMode).SetSize(size) |
|
| 73 |
+} |
|
| 74 |
+ |
|
| 75 |
+// LoadMem emits `dst = *(size *)(src + offset)`. |
|
| 76 |
+func LoadMem(dst, src Register, offset int16, size Size) Instruction {
|
|
| 77 |
+ return Instruction{
|
|
| 78 |
+ OpCode: LoadMemOp(size), |
|
| 79 |
+ Dst: dst, |
|
| 80 |
+ Src: src, |
|
| 81 |
+ Offset: offset, |
|
| 82 |
+ } |
|
| 83 |
+} |
|
| 84 |
+ |
|
| 85 |
+// LoadImmOp returns the OpCode to load an immediate of given size. |
|
| 86 |
+// |
|
| 87 |
+// As of kernel 4.20, only DWord size is accepted. |
|
| 88 |
+func LoadImmOp(size Size) OpCode {
|
|
| 89 |
+ return OpCode(LdClass).SetMode(ImmMode).SetSize(size) |
|
| 90 |
+} |
|
| 91 |
+ |
|
| 92 |
+// LoadImm emits `dst = (size)value`. |
|
| 93 |
+// |
|
| 94 |
+// As of kernel 4.20, only DWord size is accepted. |
|
| 95 |
+func LoadImm(dst Register, value int64, size Size) Instruction {
|
|
| 96 |
+ return Instruction{
|
|
| 97 |
+ OpCode: LoadImmOp(size), |
|
| 98 |
+ Dst: dst, |
|
| 99 |
+ Constant: value, |
|
| 100 |
+ } |
|
| 101 |
+} |
|
| 102 |
+ |
|
| 103 |
+// LoadMapPtr stores a pointer to a map in dst. |
|
| 104 |
+func LoadMapPtr(dst Register, fd int) Instruction {
|
|
| 105 |
+ if fd < 0 {
|
|
| 106 |
+ return Instruction{OpCode: InvalidOpCode}
|
|
| 107 |
+ } |
|
| 108 |
+ |
|
| 109 |
+ return Instruction{
|
|
| 110 |
+ OpCode: LoadImmOp(DWord), |
|
| 111 |
+ Dst: dst, |
|
| 112 |
+ Src: R1, |
|
| 113 |
+ Constant: int64(fd), |
|
| 114 |
+ } |
|
| 115 |
+} |
|
| 116 |
+ |
|
| 117 |
+// LoadIndOp returns the OpCode for loading a value of given size from an sk_buff. |
|
| 118 |
+func LoadIndOp(size Size) OpCode {
|
|
| 119 |
+ return OpCode(LdClass).SetMode(IndMode).SetSize(size) |
|
| 120 |
+} |
|
| 121 |
+ |
|
| 122 |
+// LoadInd emits `dst = ntoh(*(size *)(((sk_buff *)R6)->data + src + offset))`. |
|
| 123 |
+func LoadInd(dst, src Register, offset int32, size Size) Instruction {
|
|
| 124 |
+ return Instruction{
|
|
| 125 |
+ OpCode: LoadIndOp(size), |
|
| 126 |
+ Dst: dst, |
|
| 127 |
+ Src: src, |
|
| 128 |
+ Constant: int64(offset), |
|
| 129 |
+ } |
|
| 130 |
+} |
|
| 131 |
+ |
|
| 132 |
+// LoadAbsOp returns the OpCode for loading a value of given size from an sk_buff. |
|
| 133 |
+func LoadAbsOp(size Size) OpCode {
|
|
| 134 |
+ return OpCode(LdClass).SetMode(AbsMode).SetSize(size) |
|
| 135 |
+} |
|
| 136 |
+ |
|
| 137 |
+// LoadAbs emits `r0 = ntoh(*(size *)(((sk_buff *)R6)->data + offset))`. |
|
| 138 |
+func LoadAbs(offset int32, size Size) Instruction {
|
|
| 139 |
+ return Instruction{
|
|
| 140 |
+ OpCode: LoadAbsOp(size), |
|
| 141 |
+ Dst: R0, |
|
| 142 |
+ Constant: int64(offset), |
|
| 143 |
+ } |
|
| 144 |
+} |
|
| 145 |
+ |
|
| 146 |
+// StoreMemOp returns the OpCode for storing a register of given size in memory. |
|
| 147 |
+func StoreMemOp(size Size) OpCode {
|
|
| 148 |
+ return OpCode(StXClass).SetMode(MemMode).SetSize(size) |
|
| 149 |
+} |
|
| 150 |
+ |
|
| 151 |
+// StoreMem emits `*(size *)(dst + offset) = src` |
|
| 152 |
+func StoreMem(dst Register, offset int16, src Register, size Size) Instruction {
|
|
| 153 |
+ return Instruction{
|
|
| 154 |
+ OpCode: StoreMemOp(size), |
|
| 155 |
+ Dst: dst, |
|
| 156 |
+ Src: src, |
|
| 157 |
+ Offset: offset, |
|
| 158 |
+ } |
|
| 159 |
+} |
|
| 160 |
+ |
|
| 161 |
+// StoreImmOp returns the OpCode for storing an immediate of given size in memory. |
|
| 162 |
+func StoreImmOp(size Size) OpCode {
|
|
| 163 |
+ return OpCode(StClass).SetMode(MemMode).SetSize(size) |
|
| 164 |
+} |
|
| 165 |
+ |
|
| 166 |
+// StoreImm emits `*(size *)(dst + offset) = value`. |
|
| 167 |
+func StoreImm(dst Register, offset int16, value int64, size Size) Instruction {
|
|
| 168 |
+ return Instruction{
|
|
| 169 |
+ OpCode: StoreImmOp(size), |
|
| 170 |
+ Dst: dst, |
|
| 171 |
+ Offset: offset, |
|
| 172 |
+ Constant: value, |
|
| 173 |
+ } |
|
| 174 |
+} |
|
| 175 |
+ |
|
| 176 |
+// StoreXAddOp returns the OpCode to atomically add a register to a value in memory. |
|
| 177 |
+func StoreXAddOp(size Size) OpCode {
|
|
| 178 |
+ return OpCode(StXClass).SetMode(XAddMode).SetSize(size) |
|
| 179 |
+} |
|
| 180 |
+ |
|
| 181 |
+// StoreXAdd atomically adds src to *dst. |
|
| 182 |
+func StoreXAdd(dst, src Register, size Size) Instruction {
|
|
| 183 |
+ return Instruction{
|
|
| 184 |
+ OpCode: StoreXAddOp(size), |
|
| 185 |
+ Dst: dst, |
|
| 186 |
+ Src: src, |
|
| 187 |
+ } |
|
| 188 |
+} |
| 0 | 189 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,80 @@ |
| 0 |
+// Code generated by "stringer -output load_store_string.go -type=Mode,Size"; DO NOT EDIT. |
|
| 1 |
+ |
|
| 2 |
+package asm |
|
| 3 |
+ |
|
| 4 |
+import "strconv" |
|
| 5 |
+ |
|
| 6 |
+func _() {
|
|
| 7 |
+ // An "invalid array index" compiler error signifies that the constant values have changed. |
|
| 8 |
+ // Re-run the stringer command to generate them again. |
|
| 9 |
+ var x [1]struct{}
|
|
| 10 |
+ _ = x[InvalidMode-255] |
|
| 11 |
+ _ = x[ImmMode-0] |
|
| 12 |
+ _ = x[AbsMode-32] |
|
| 13 |
+ _ = x[IndMode-64] |
|
| 14 |
+ _ = x[MemMode-96] |
|
| 15 |
+ _ = x[XAddMode-192] |
|
| 16 |
+} |
|
| 17 |
+ |
|
| 18 |
+const ( |
|
| 19 |
+ _Mode_name_0 = "ImmMode" |
|
| 20 |
+ _Mode_name_1 = "AbsMode" |
|
| 21 |
+ _Mode_name_2 = "IndMode" |
|
| 22 |
+ _Mode_name_3 = "MemMode" |
|
| 23 |
+ _Mode_name_4 = "XAddMode" |
|
| 24 |
+ _Mode_name_5 = "InvalidMode" |
|
| 25 |
+) |
|
| 26 |
+ |
|
| 27 |
+func (i Mode) String() string {
|
|
| 28 |
+ switch {
|
|
| 29 |
+ case i == 0: |
|
| 30 |
+ return _Mode_name_0 |
|
| 31 |
+ case i == 32: |
|
| 32 |
+ return _Mode_name_1 |
|
| 33 |
+ case i == 64: |
|
| 34 |
+ return _Mode_name_2 |
|
| 35 |
+ case i == 96: |
|
| 36 |
+ return _Mode_name_3 |
|
| 37 |
+ case i == 192: |
|
| 38 |
+ return _Mode_name_4 |
|
| 39 |
+ case i == 255: |
|
| 40 |
+ return _Mode_name_5 |
|
| 41 |
+ default: |
|
| 42 |
+ return "Mode(" + strconv.FormatInt(int64(i), 10) + ")"
|
|
| 43 |
+ } |
|
| 44 |
+} |
|
| 45 |
+func _() {
|
|
| 46 |
+ // An "invalid array index" compiler error signifies that the constant values have changed. |
|
| 47 |
+ // Re-run the stringer command to generate them again. |
|
| 48 |
+ var x [1]struct{}
|
|
| 49 |
+ _ = x[InvalidSize-255] |
|
| 50 |
+ _ = x[DWord-24] |
|
| 51 |
+ _ = x[Word-0] |
|
| 52 |
+ _ = x[Half-8] |
|
| 53 |
+ _ = x[Byte-16] |
|
| 54 |
+} |
|
| 55 |
+ |
|
| 56 |
+const ( |
|
| 57 |
+ _Size_name_0 = "Word" |
|
| 58 |
+ _Size_name_1 = "Half" |
|
| 59 |
+ _Size_name_2 = "Byte" |
|
| 60 |
+ _Size_name_3 = "DWord" |
|
| 61 |
+ _Size_name_4 = "InvalidSize" |
|
| 62 |
+) |
|
| 63 |
+ |
|
| 64 |
+func (i Size) String() string {
|
|
| 65 |
+ switch {
|
|
| 66 |
+ case i == 0: |
|
| 67 |
+ return _Size_name_0 |
|
| 68 |
+ case i == 8: |
|
| 69 |
+ return _Size_name_1 |
|
| 70 |
+ case i == 16: |
|
| 71 |
+ return _Size_name_2 |
|
| 72 |
+ case i == 24: |
|
| 73 |
+ return _Size_name_3 |
|
| 74 |
+ case i == 255: |
|
| 75 |
+ return _Size_name_4 |
|
| 76 |
+ default: |
|
| 77 |
+ return "Size(" + strconv.FormatInt(int64(i), 10) + ")"
|
|
| 78 |
+ } |
|
| 79 |
+} |
| 0 | 80 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,237 @@ |
| 0 |
+package asm |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+ "strings" |
|
| 5 |
+) |
|
| 6 |
+ |
|
| 7 |
+//go:generate stringer -output opcode_string.go -type=Class |
|
| 8 |
+ |
|
| 9 |
+type encoding int |
|
| 10 |
+ |
|
| 11 |
+const ( |
|
| 12 |
+ unknownEncoding encoding = iota |
|
| 13 |
+ loadOrStore |
|
| 14 |
+ jumpOrALU |
|
| 15 |
+) |
|
| 16 |
+ |
|
| 17 |
+// Class of operations |
|
| 18 |
+// |
|
| 19 |
+// msb lsb |
|
| 20 |
+// +---+--+---+ |
|
| 21 |
+// | ?? |CLS| |
|
| 22 |
+// +---+--+---+ |
|
| 23 |
+type Class uint8 |
|
| 24 |
+ |
|
| 25 |
+const classMask OpCode = 0x07 |
|
| 26 |
+ |
|
| 27 |
+const ( |
|
| 28 |
+ // LdClass load memory |
|
| 29 |
+ LdClass Class = 0x00 |
|
| 30 |
+ // LdXClass load memory from constant |
|
| 31 |
+ LdXClass Class = 0x01 |
|
| 32 |
+ // StClass load register from memory |
|
| 33 |
+ StClass Class = 0x02 |
|
| 34 |
+ // StXClass load register from constant |
|
| 35 |
+ StXClass Class = 0x03 |
|
| 36 |
+ // ALUClass arithmetic operators |
|
| 37 |
+ ALUClass Class = 0x04 |
|
| 38 |
+ // JumpClass jump operators |
|
| 39 |
+ JumpClass Class = 0x05 |
|
| 40 |
+ // ALU64Class arithmetic in 64 bit mode |
|
| 41 |
+ ALU64Class Class = 0x07 |
|
| 42 |
+) |
|
| 43 |
+ |
|
| 44 |
+func (cls Class) encoding() encoding {
|
|
| 45 |
+ switch cls {
|
|
| 46 |
+ case LdClass, LdXClass, StClass, StXClass: |
|
| 47 |
+ return loadOrStore |
|
| 48 |
+ case ALU64Class, ALUClass, JumpClass: |
|
| 49 |
+ return jumpOrALU |
|
| 50 |
+ default: |
|
| 51 |
+ return unknownEncoding |
|
| 52 |
+ } |
|
| 53 |
+} |
|
| 54 |
+ |
|
| 55 |
+// OpCode is a packed eBPF opcode. |
|
| 56 |
+// |
|
| 57 |
+// Its encoding is defined by a Class value: |
|
| 58 |
+// |
|
| 59 |
+// msb lsb |
|
| 60 |
+// +----+-+---+ |
|
| 61 |
+// | ???? |CLS| |
|
| 62 |
+// +----+-+---+ |
|
| 63 |
+type OpCode uint8 |
|
| 64 |
+ |
|
| 65 |
+// InvalidOpCode is returned by setters on OpCode |
|
| 66 |
+const InvalidOpCode OpCode = 0xff |
|
| 67 |
+ |
|
| 68 |
+// marshalledInstructions returns the number of BPF instructions required |
|
| 69 |
+// to encode this opcode. |
|
| 70 |
+func (op OpCode) marshalledInstructions() int {
|
|
| 71 |
+ if op == LoadImmOp(DWord) {
|
|
| 72 |
+ return 2 |
|
| 73 |
+ } |
|
| 74 |
+ return 1 |
|
| 75 |
+} |
|
| 76 |
+ |
|
| 77 |
+func (op OpCode) isDWordLoad() bool {
|
|
| 78 |
+ return op == LoadImmOp(DWord) |
|
| 79 |
+} |
|
| 80 |
+ |
|
| 81 |
+// Class returns the class of operation. |
|
| 82 |
+func (op OpCode) Class() Class {
|
|
| 83 |
+ return Class(op & classMask) |
|
| 84 |
+} |
|
| 85 |
+ |
|
| 86 |
+// Mode returns the mode for load and store operations. |
|
| 87 |
+func (op OpCode) Mode() Mode {
|
|
| 88 |
+ if op.Class().encoding() != loadOrStore {
|
|
| 89 |
+ return InvalidMode |
|
| 90 |
+ } |
|
| 91 |
+ return Mode(op & modeMask) |
|
| 92 |
+} |
|
| 93 |
+ |
|
| 94 |
+// Size returns the size for load and store operations. |
|
| 95 |
+func (op OpCode) Size() Size {
|
|
| 96 |
+ if op.Class().encoding() != loadOrStore {
|
|
| 97 |
+ return InvalidSize |
|
| 98 |
+ } |
|
| 99 |
+ return Size(op & sizeMask) |
|
| 100 |
+} |
|
| 101 |
+ |
|
| 102 |
+// Source returns the source for branch and ALU operations. |
|
| 103 |
+func (op OpCode) Source() Source {
|
|
| 104 |
+ if op.Class().encoding() != jumpOrALU || op.ALUOp() == Swap {
|
|
| 105 |
+ return InvalidSource |
|
| 106 |
+ } |
|
| 107 |
+ return Source(op & sourceMask) |
|
| 108 |
+} |
|
| 109 |
+ |
|
| 110 |
+// ALUOp returns the ALUOp. |
|
| 111 |
+func (op OpCode) ALUOp() ALUOp {
|
|
| 112 |
+ if op.Class().encoding() != jumpOrALU {
|
|
| 113 |
+ return InvalidALUOp |
|
| 114 |
+ } |
|
| 115 |
+ return ALUOp(op & aluMask) |
|
| 116 |
+} |
|
| 117 |
+ |
|
| 118 |
+// Endianness returns the Endianness for a byte swap instruction. |
|
| 119 |
+func (op OpCode) Endianness() Endianness {
|
|
| 120 |
+ if op.ALUOp() != Swap {
|
|
| 121 |
+ return InvalidEndian |
|
| 122 |
+ } |
|
| 123 |
+ return Endianness(op & endianMask) |
|
| 124 |
+} |
|
| 125 |
+ |
|
| 126 |
+// JumpOp returns the JumpOp. |
|
| 127 |
+func (op OpCode) JumpOp() JumpOp {
|
|
| 128 |
+ if op.Class().encoding() != jumpOrALU {
|
|
| 129 |
+ return InvalidJumpOp |
|
| 130 |
+ } |
|
| 131 |
+ return JumpOp(op & jumpMask) |
|
| 132 |
+} |
|
| 133 |
+ |
|
| 134 |
+// SetMode sets the mode on load and store operations. |
|
| 135 |
+// |
|
| 136 |
+// Returns InvalidOpCode if op is of the wrong class. |
|
| 137 |
+func (op OpCode) SetMode(mode Mode) OpCode {
|
|
| 138 |
+ if op.Class().encoding() != loadOrStore || !valid(OpCode(mode), modeMask) {
|
|
| 139 |
+ return InvalidOpCode |
|
| 140 |
+ } |
|
| 141 |
+ return (op & ^modeMask) | OpCode(mode) |
|
| 142 |
+} |
|
| 143 |
+ |
|
| 144 |
+// SetSize sets the size on load and store operations. |
|
| 145 |
+// |
|
| 146 |
+// Returns InvalidOpCode if op is of the wrong class. |
|
| 147 |
+func (op OpCode) SetSize(size Size) OpCode {
|
|
| 148 |
+ if op.Class().encoding() != loadOrStore || !valid(OpCode(size), sizeMask) {
|
|
| 149 |
+ return InvalidOpCode |
|
| 150 |
+ } |
|
| 151 |
+ return (op & ^sizeMask) | OpCode(size) |
|
| 152 |
+} |
|
| 153 |
+ |
|
| 154 |
+// SetSource sets the source on jump and ALU operations. |
|
| 155 |
+// |
|
| 156 |
+// Returns InvalidOpCode if op is of the wrong class. |
|
| 157 |
+func (op OpCode) SetSource(source Source) OpCode {
|
|
| 158 |
+ if op.Class().encoding() != jumpOrALU || !valid(OpCode(source), sourceMask) {
|
|
| 159 |
+ return InvalidOpCode |
|
| 160 |
+ } |
|
| 161 |
+ return (op & ^sourceMask) | OpCode(source) |
|
| 162 |
+} |
|
| 163 |
+ |
|
| 164 |
+// SetALUOp sets the ALUOp on ALU operations. |
|
| 165 |
+// |
|
| 166 |
+// Returns InvalidOpCode if op is of the wrong class. |
|
| 167 |
+func (op OpCode) SetALUOp(alu ALUOp) OpCode {
|
|
| 168 |
+ class := op.Class() |
|
| 169 |
+ if (class != ALUClass && class != ALU64Class) || !valid(OpCode(alu), aluMask) {
|
|
| 170 |
+ return InvalidOpCode |
|
| 171 |
+ } |
|
| 172 |
+ return (op & ^aluMask) | OpCode(alu) |
|
| 173 |
+} |
|
| 174 |
+ |
|
| 175 |
+// SetJumpOp sets the JumpOp on jump operations. |
|
| 176 |
+// |
|
| 177 |
+// Returns InvalidOpCode if op is of the wrong class. |
|
| 178 |
+func (op OpCode) SetJumpOp(jump JumpOp) OpCode {
|
|
| 179 |
+ if op.Class() != JumpClass || !valid(OpCode(jump), jumpMask) {
|
|
| 180 |
+ return InvalidOpCode |
|
| 181 |
+ } |
|
| 182 |
+ return (op & ^jumpMask) | OpCode(jump) |
|
| 183 |
+} |
|
| 184 |
+ |
|
| 185 |
+func (op OpCode) String() string {
|
|
| 186 |
+ var f strings.Builder |
|
| 187 |
+ |
|
| 188 |
+ switch class := op.Class(); class {
|
|
| 189 |
+ case LdClass, LdXClass, StClass, StXClass: |
|
| 190 |
+ f.WriteString(strings.TrimSuffix(class.String(), "Class")) |
|
| 191 |
+ |
|
| 192 |
+ mode := op.Mode() |
|
| 193 |
+ f.WriteString(strings.TrimSuffix(mode.String(), "Mode")) |
|
| 194 |
+ |
|
| 195 |
+ switch op.Size() {
|
|
| 196 |
+ case DWord: |
|
| 197 |
+ f.WriteString("DW")
|
|
| 198 |
+ case Word: |
|
| 199 |
+ f.WriteString("W")
|
|
| 200 |
+ case Half: |
|
| 201 |
+ f.WriteString("H")
|
|
| 202 |
+ case Byte: |
|
| 203 |
+ f.WriteString("B")
|
|
| 204 |
+ } |
|
| 205 |
+ |
|
| 206 |
+ case ALU64Class, ALUClass: |
|
| 207 |
+ f.WriteString(op.ALUOp().String()) |
|
| 208 |
+ |
|
| 209 |
+ if op.ALUOp() == Swap {
|
|
| 210 |
+ // Width for Endian is controlled by Constant |
|
| 211 |
+ f.WriteString(op.Endianness().String()) |
|
| 212 |
+ } else {
|
|
| 213 |
+ if class == ALUClass {
|
|
| 214 |
+ f.WriteString("32")
|
|
| 215 |
+ } |
|
| 216 |
+ |
|
| 217 |
+ f.WriteString(strings.TrimSuffix(op.Source().String(), "Source")) |
|
| 218 |
+ } |
|
| 219 |
+ |
|
| 220 |
+ case JumpClass: |
|
| 221 |
+ f.WriteString(op.JumpOp().String()) |
|
| 222 |
+ if jop := op.JumpOp(); jop != Exit && jop != Call {
|
|
| 223 |
+ f.WriteString(strings.TrimSuffix(op.Source().String(), "Source")) |
|
| 224 |
+ } |
|
| 225 |
+ |
|
| 226 |
+ default: |
|
| 227 |
+ fmt.Fprintf(&f, "%#x", op) |
|
| 228 |
+ } |
|
| 229 |
+ |
|
| 230 |
+ return f.String() |
|
| 231 |
+} |
|
| 232 |
+ |
|
| 233 |
+// valid returns true if all bits in value are covered by mask. |
|
| 234 |
+func valid(value, mask OpCode) bool {
|
|
| 235 |
+ return value & ^mask == 0 |
|
| 236 |
+} |
| 0 | 237 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,38 @@ |
| 0 |
+// Code generated by "stringer -output opcode_string.go -type=Class"; DO NOT EDIT. |
|
| 1 |
+ |
|
| 2 |
+package asm |
|
| 3 |
+ |
|
| 4 |
+import "strconv" |
|
| 5 |
+ |
|
| 6 |
+func _() {
|
|
| 7 |
+ // An "invalid array index" compiler error signifies that the constant values have changed. |
|
| 8 |
+ // Re-run the stringer command to generate them again. |
|
| 9 |
+ var x [1]struct{}
|
|
| 10 |
+ _ = x[LdClass-0] |
|
| 11 |
+ _ = x[LdXClass-1] |
|
| 12 |
+ _ = x[StClass-2] |
|
| 13 |
+ _ = x[StXClass-3] |
|
| 14 |
+ _ = x[ALUClass-4] |
|
| 15 |
+ _ = x[JumpClass-5] |
|
| 16 |
+ _ = x[ALU64Class-7] |
|
| 17 |
+} |
|
| 18 |
+ |
|
| 19 |
+const ( |
|
| 20 |
+ _Class_name_0 = "LdClassLdXClassStClassStXClassALUClassJumpClass" |
|
| 21 |
+ _Class_name_1 = "ALU64Class" |
|
| 22 |
+) |
|
| 23 |
+ |
|
| 24 |
+var ( |
|
| 25 |
+ _Class_index_0 = [...]uint8{0, 7, 15, 22, 30, 38, 47}
|
|
| 26 |
+) |
|
| 27 |
+ |
|
| 28 |
+func (i Class) String() string {
|
|
| 29 |
+ switch {
|
|
| 30 |
+ case 0 <= i && i <= 5: |
|
| 31 |
+ return _Class_name_0[_Class_index_0[i]:_Class_index_0[i+1]] |
|
| 32 |
+ case i == 7: |
|
| 33 |
+ return _Class_name_1 |
|
| 34 |
+ default: |
|
| 35 |
+ return "Class(" + strconv.FormatInt(int64(i), 10) + ")"
|
|
| 36 |
+ } |
|
| 37 |
+} |
| 0 | 38 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,42 @@ |
| 0 |
+package asm |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+) |
|
| 5 |
+ |
|
| 6 |
+// Register is the source or destination of most operations. |
|
| 7 |
+type Register uint8 |
|
| 8 |
+ |
|
| 9 |
+// R0 contains return values. |
|
| 10 |
+const R0 Register = 0 |
|
| 11 |
+ |
|
| 12 |
+// Registers for function arguments. |
|
| 13 |
+const ( |
|
| 14 |
+ R1 Register = R0 + 1 + iota |
|
| 15 |
+ R2 |
|
| 16 |
+ R3 |
|
| 17 |
+ R4 |
|
| 18 |
+ R5 |
|
| 19 |
+) |
|
| 20 |
+ |
|
| 21 |
+// Callee saved registers preserved by function calls. |
|
| 22 |
+const ( |
|
| 23 |
+ R6 Register = R5 + 1 + iota |
|
| 24 |
+ R7 |
|
| 25 |
+ R8 |
|
| 26 |
+ R9 |
|
| 27 |
+) |
|
| 28 |
+ |
|
| 29 |
+// Read-only frame pointer to access stack. |
|
| 30 |
+const ( |
|
| 31 |
+ R10 Register = R9 + 1 |
|
| 32 |
+ RFP = R10 |
|
| 33 |
+) |
|
| 34 |
+ |
|
| 35 |
+func (r Register) String() string {
|
|
| 36 |
+ v := uint8(r) |
|
| 37 |
+ if v == 10 {
|
|
| 38 |
+ return "rfp" |
|
| 39 |
+ } |
|
| 40 |
+ return fmt.Sprintf("r%d", v)
|
|
| 41 |
+} |
| 0 | 42 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,148 @@ |
| 0 |
+package ebpf |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "github.com/cilium/ebpf/asm" |
|
| 4 |
+ "github.com/pkg/errors" |
|
| 5 |
+) |
|
| 6 |
+ |
|
| 7 |
+// CollectionOptions control loading a collection into the kernel. |
|
| 8 |
+type CollectionOptions struct {
|
|
| 9 |
+ Programs ProgramOptions |
|
| 10 |
+} |
|
| 11 |
+ |
|
| 12 |
+// CollectionSpec describes a collection. |
|
| 13 |
+type CollectionSpec struct {
|
|
| 14 |
+ Maps map[string]*MapSpec |
|
| 15 |
+ Programs map[string]*ProgramSpec |
|
| 16 |
+} |
|
| 17 |
+ |
|
| 18 |
+// Copy returns a recursive copy of the spec. |
|
| 19 |
+func (cs *CollectionSpec) Copy() *CollectionSpec {
|
|
| 20 |
+ if cs == nil {
|
|
| 21 |
+ return nil |
|
| 22 |
+ } |
|
| 23 |
+ |
|
| 24 |
+ cpy := CollectionSpec{
|
|
| 25 |
+ Maps: make(map[string]*MapSpec, len(cs.Maps)), |
|
| 26 |
+ Programs: make(map[string]*ProgramSpec, len(cs.Programs)), |
|
| 27 |
+ } |
|
| 28 |
+ |
|
| 29 |
+ for name, spec := range cs.Maps {
|
|
| 30 |
+ cpy.Maps[name] = spec.Copy() |
|
| 31 |
+ } |
|
| 32 |
+ |
|
| 33 |
+ for name, spec := range cs.Programs {
|
|
| 34 |
+ cpy.Programs[name] = spec.Copy() |
|
| 35 |
+ } |
|
| 36 |
+ |
|
| 37 |
+ return &cpy |
|
| 38 |
+} |
|
| 39 |
+ |
|
| 40 |
+// Collection is a collection of Programs and Maps associated |
|
| 41 |
+// with their symbols |
|
| 42 |
+type Collection struct {
|
|
| 43 |
+ Programs map[string]*Program |
|
| 44 |
+ Maps map[string]*Map |
|
| 45 |
+} |
|
| 46 |
+ |
|
| 47 |
+// NewCollection creates a Collection from a specification. |
|
| 48 |
+// |
|
| 49 |
+// Only maps referenced by at least one of the programs are initialized. |
|
| 50 |
+func NewCollection(spec *CollectionSpec) (*Collection, error) {
|
|
| 51 |
+ return NewCollectionWithOptions(spec, CollectionOptions{})
|
|
| 52 |
+} |
|
| 53 |
+ |
|
| 54 |
+// NewCollectionWithOptions creates a Collection from a specification. |
|
| 55 |
+// |
|
| 56 |
+// Only maps referenced by at least one of the programs are initialized. |
|
| 57 |
+func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Collection, error) {
|
|
| 58 |
+ maps := make(map[string]*Map) |
|
| 59 |
+ for mapName, mapSpec := range spec.Maps {
|
|
| 60 |
+ m, err := NewMap(mapSpec) |
|
| 61 |
+ if err != nil {
|
|
| 62 |
+ return nil, errors.Wrapf(err, "map %s", mapName) |
|
| 63 |
+ } |
|
| 64 |
+ maps[mapName] = m |
|
| 65 |
+ } |
|
| 66 |
+ |
|
| 67 |
+ progs := make(map[string]*Program) |
|
| 68 |
+ for progName, origProgSpec := range spec.Programs {
|
|
| 69 |
+ progSpec := origProgSpec.Copy() |
|
| 70 |
+ |
|
| 71 |
+ // Rewrite any reference to a valid map. |
|
| 72 |
+ for i := range progSpec.Instructions {
|
|
| 73 |
+ var ( |
|
| 74 |
+ ins = &progSpec.Instructions[i] |
|
| 75 |
+ m = maps[ins.Reference] |
|
| 76 |
+ ) |
|
| 77 |
+ |
|
| 78 |
+ if ins.Reference == "" || m == nil {
|
|
| 79 |
+ continue |
|
| 80 |
+ } |
|
| 81 |
+ |
|
| 82 |
+ if ins.Src == asm.R1 {
|
|
| 83 |
+ // Don't overwrite maps already rewritten, users can |
|
| 84 |
+ // rewrite programs in the spec themselves |
|
| 85 |
+ continue |
|
| 86 |
+ } |
|
| 87 |
+ |
|
| 88 |
+ if err := ins.RewriteMapPtr(m.FD()); err != nil {
|
|
| 89 |
+ return nil, errors.Wrapf(err, "progam %s: map %s", progName, ins.Reference) |
|
| 90 |
+ } |
|
| 91 |
+ } |
|
| 92 |
+ |
|
| 93 |
+ prog, err := NewProgramWithOptions(progSpec, opts.Programs) |
|
| 94 |
+ if err != nil {
|
|
| 95 |
+ return nil, errors.Wrapf(err, "program %s", progName) |
|
| 96 |
+ } |
|
| 97 |
+ progs[progName] = prog |
|
| 98 |
+ } |
|
| 99 |
+ |
|
| 100 |
+ return &Collection{
|
|
| 101 |
+ progs, |
|
| 102 |
+ maps, |
|
| 103 |
+ }, nil |
|
| 104 |
+} |
|
| 105 |
+ |
|
| 106 |
+// LoadCollection parses an object file and converts it to a collection. |
|
| 107 |
+func LoadCollection(file string) (*Collection, error) {
|
|
| 108 |
+ spec, err := LoadCollectionSpec(file) |
|
| 109 |
+ if err != nil {
|
|
| 110 |
+ return nil, err |
|
| 111 |
+ } |
|
| 112 |
+ return NewCollection(spec) |
|
| 113 |
+} |
|
| 114 |
+ |
|
| 115 |
+// Close frees all maps and programs associated with the collection. |
|
| 116 |
+// |
|
| 117 |
+// The collection mustn't be used afterwards. |
|
| 118 |
+func (coll *Collection) Close() {
|
|
| 119 |
+ for _, prog := range coll.Programs {
|
|
| 120 |
+ prog.Close() |
|
| 121 |
+ } |
|
| 122 |
+ for _, m := range coll.Maps {
|
|
| 123 |
+ m.Close() |
|
| 124 |
+ } |
|
| 125 |
+} |
|
| 126 |
+ |
|
| 127 |
+// DetachMap removes the named map from the Collection. |
|
| 128 |
+// |
|
| 129 |
+// This means that a later call to Close() will not affect this map. |
|
| 130 |
+// |
|
| 131 |
+// Returns nil if no map of that name exists. |
|
| 132 |
+func (coll *Collection) DetachMap(name string) *Map {
|
|
| 133 |
+ m := coll.Maps[name] |
|
| 134 |
+ delete(coll.Maps, name) |
|
| 135 |
+ return m |
|
| 136 |
+} |
|
| 137 |
+ |
|
| 138 |
+// DetachProgram removes the named program from the Collection. |
|
| 139 |
+// |
|
| 140 |
+// This means that a later call to Close() will not affect this program. |
|
| 141 |
+// |
|
| 142 |
+// Returns nil if no program of that name exists. |
|
| 143 |
+func (coll *Collection) DetachProgram(name string) *Program {
|
|
| 144 |
+ p := coll.Programs[name] |
|
| 145 |
+ delete(coll.Programs, name) |
|
| 146 |
+ return p |
|
| 147 |
+} |
| 0 | 148 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,17 @@ |
| 0 |
+// Package ebpf is a toolkit for working with eBPF programs. |
|
| 1 |
+// |
|
| 2 |
+// eBPF programs are small snippets of code which are executed directly |
|
| 3 |
+// in a VM in the Linux kernel, which makes them very fast and flexible. |
|
| 4 |
+// Many Linux subsystems now accept eBPF programs. This makes it possible |
|
| 5 |
+// to implement highly application specific logic inside the kernel, |
|
| 6 |
+// without having to modify the actual kernel itself. |
|
| 7 |
+// |
|
| 8 |
+// This package is designed for long-running processes which |
|
| 9 |
+// want to use eBPF to implement part of their application logic. It has no |
|
| 10 |
+// run-time dependencies outside of the library and the Linux kernel itself. |
|
| 11 |
+// eBPF code should be compiled ahead of time using clang, and shipped with |
|
| 12 |
+// your application as any other resource. |
|
| 13 |
+// |
|
| 14 |
+// This package doesn't include code required to attach eBPF to Linux |
|
| 15 |
+// subsystems, since this varies per subsystem. |
|
| 16 |
+package ebpf |
| 0 | 17 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,392 @@ |
| 0 |
+package ebpf |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "bytes" |
|
| 4 |
+ "debug/elf" |
|
| 5 |
+ "encoding/binary" |
|
| 6 |
+ "fmt" |
|
| 7 |
+ "io" |
|
| 8 |
+ "os" |
|
| 9 |
+ "strings" |
|
| 10 |
+ |
|
| 11 |
+ "github.com/cilium/ebpf/asm" |
|
| 12 |
+ |
|
| 13 |
+ "github.com/pkg/errors" |
|
| 14 |
+) |
|
| 15 |
+ |
|
| 16 |
+type elfCode struct {
|
|
| 17 |
+ *elf.File |
|
| 18 |
+ symbols []elf.Symbol |
|
| 19 |
+ symbolsPerSection map[elf.SectionIndex]map[uint64]string |
|
| 20 |
+} |
|
| 21 |
+ |
|
| 22 |
+// LoadCollectionSpec parses an ELF file into a CollectionSpec. |
|
| 23 |
+func LoadCollectionSpec(file string) (*CollectionSpec, error) {
|
|
| 24 |
+ f, err := os.Open(file) |
|
| 25 |
+ if err != nil {
|
|
| 26 |
+ return nil, err |
|
| 27 |
+ } |
|
| 28 |
+ defer f.Close() |
|
| 29 |
+ |
|
| 30 |
+ spec, err := LoadCollectionSpecFromReader(f) |
|
| 31 |
+ return spec, errors.Wrapf(err, "file %s", file) |
|
| 32 |
+} |
|
| 33 |
+ |
|
| 34 |
+// LoadCollectionSpecFromReader parses an ELF file into a CollectionSpec. |
|
| 35 |
+func LoadCollectionSpecFromReader(code io.ReaderAt) (*CollectionSpec, error) {
|
|
| 36 |
+ f, err := elf.NewFile(code) |
|
| 37 |
+ if err != nil {
|
|
| 38 |
+ return nil, err |
|
| 39 |
+ } |
|
| 40 |
+ defer f.Close() |
|
| 41 |
+ |
|
| 42 |
+ symbols, err := f.Symbols() |
|
| 43 |
+ if err != nil {
|
|
| 44 |
+ return nil, errors.Wrap(err, "load symbols") |
|
| 45 |
+ } |
|
| 46 |
+ |
|
| 47 |
+ ec := &elfCode{f, symbols, symbolsPerSection(symbols)}
|
|
| 48 |
+ |
|
| 49 |
+ var licenseSection, versionSection *elf.Section |
|
| 50 |
+ progSections := make(map[elf.SectionIndex]*elf.Section) |
|
| 51 |
+ relSections := make(map[elf.SectionIndex]*elf.Section) |
|
| 52 |
+ mapSections := make(map[elf.SectionIndex]*elf.Section) |
|
| 53 |
+ for i, sec := range ec.Sections {
|
|
| 54 |
+ switch {
|
|
| 55 |
+ case strings.HasPrefix(sec.Name, "license"): |
|
| 56 |
+ licenseSection = sec |
|
| 57 |
+ case strings.HasPrefix(sec.Name, "version"): |
|
| 58 |
+ versionSection = sec |
|
| 59 |
+ case strings.HasPrefix(sec.Name, "maps"): |
|
| 60 |
+ mapSections[elf.SectionIndex(i)] = sec |
|
| 61 |
+ case sec.Type == elf.SHT_REL: |
|
| 62 |
+ if int(sec.Info) >= len(ec.Sections) {
|
|
| 63 |
+ return nil, errors.Errorf("found relocation section %v for missing section %v", i, sec.Info)
|
|
| 64 |
+ } |
|
| 65 |
+ |
|
| 66 |
+ // Store relocations under the section index of the target |
|
| 67 |
+ idx := elf.SectionIndex(sec.Info) |
|
| 68 |
+ if relSections[idx] != nil {
|
|
| 69 |
+ return nil, errors.Errorf("section %d has multiple relocation sections", idx)
|
|
| 70 |
+ } |
|
| 71 |
+ relSections[idx] = sec |
|
| 72 |
+ case sec.Type == elf.SHT_PROGBITS && (sec.Flags&elf.SHF_EXECINSTR) != 0 && sec.Size > 0: |
|
| 73 |
+ progSections[elf.SectionIndex(i)] = sec |
|
| 74 |
+ } |
|
| 75 |
+ } |
|
| 76 |
+ |
|
| 77 |
+ license, err := loadLicense(licenseSection) |
|
| 78 |
+ if err != nil {
|
|
| 79 |
+ return nil, errors.Wrap(err, "load license") |
|
| 80 |
+ } |
|
| 81 |
+ |
|
| 82 |
+ version, err := loadVersion(versionSection, ec.ByteOrder) |
|
| 83 |
+ if err != nil {
|
|
| 84 |
+ return nil, errors.Wrap(err, "load version") |
|
| 85 |
+ } |
|
| 86 |
+ |
|
| 87 |
+ maps, err := ec.loadMaps(mapSections) |
|
| 88 |
+ if err != nil {
|
|
| 89 |
+ return nil, errors.Wrap(err, "load maps") |
|
| 90 |
+ } |
|
| 91 |
+ |
|
| 92 |
+ progs, libs, err := ec.loadPrograms(progSections, relSections, license, version) |
|
| 93 |
+ if err != nil {
|
|
| 94 |
+ return nil, errors.Wrap(err, "load programs") |
|
| 95 |
+ } |
|
| 96 |
+ |
|
| 97 |
+ if len(libs) > 0 {
|
|
| 98 |
+ for name, prog := range progs {
|
|
| 99 |
+ prog.Instructions, err = link(prog.Instructions, libs...) |
|
| 100 |
+ if err != nil {
|
|
| 101 |
+ return nil, errors.Wrapf(err, "program %s", name) |
|
| 102 |
+ } |
|
| 103 |
+ } |
|
| 104 |
+ } |
|
| 105 |
+ |
|
| 106 |
+ return &CollectionSpec{maps, progs}, nil
|
|
| 107 |
+} |
|
| 108 |
+ |
|
| 109 |
+func loadLicense(sec *elf.Section) (string, error) {
|
|
| 110 |
+ if sec == nil {
|
|
| 111 |
+ return "", errors.Errorf("missing license section")
|
|
| 112 |
+ } |
|
| 113 |
+ data, err := sec.Data() |
|
| 114 |
+ if err != nil {
|
|
| 115 |
+ return "", errors.Wrapf(err, "section %s", sec.Name) |
|
| 116 |
+ } |
|
| 117 |
+ return string(bytes.TrimRight(data, "\000")), nil |
|
| 118 |
+} |
|
| 119 |
+ |
|
| 120 |
+func loadVersion(sec *elf.Section, bo binary.ByteOrder) (uint32, error) {
|
|
| 121 |
+ if sec == nil {
|
|
| 122 |
+ return 0, nil |
|
| 123 |
+ } |
|
| 124 |
+ |
|
| 125 |
+ var version uint32 |
|
| 126 |
+ err := binary.Read(sec.Open(), bo, &version) |
|
| 127 |
+ return version, errors.Wrapf(err, "section %s", sec.Name) |
|
| 128 |
+} |
|
| 129 |
+ |
|
| 130 |
+func (ec *elfCode) loadPrograms(progSections, relSections map[elf.SectionIndex]*elf.Section, license string, version uint32) (map[string]*ProgramSpec, []asm.Instructions, error) {
|
|
| 131 |
+ var ( |
|
| 132 |
+ progs = make(map[string]*ProgramSpec) |
|
| 133 |
+ libs []asm.Instructions |
|
| 134 |
+ ) |
|
| 135 |
+ for idx, prog := range progSections {
|
|
| 136 |
+ syms := ec.symbolsPerSection[idx] |
|
| 137 |
+ if len(syms) == 0 {
|
|
| 138 |
+ return nil, nil, errors.Errorf("section %v: missing symbols", prog.Name)
|
|
| 139 |
+ } |
|
| 140 |
+ |
|
| 141 |
+ funcSym := syms[0] |
|
| 142 |
+ if funcSym == "" {
|
|
| 143 |
+ return nil, nil, errors.Errorf("section %v: no label at start", prog.Name)
|
|
| 144 |
+ } |
|
| 145 |
+ |
|
| 146 |
+ rels, err := ec.loadRelocations(relSections[idx]) |
|
| 147 |
+ if err != nil {
|
|
| 148 |
+ return nil, nil, errors.Wrapf(err, "program %s: can't load relocations", funcSym) |
|
| 149 |
+ } |
|
| 150 |
+ |
|
| 151 |
+ insns, err := ec.loadInstructions(prog, syms, rels) |
|
| 152 |
+ if err != nil {
|
|
| 153 |
+ return nil, nil, errors.Wrapf(err, "program %s: can't unmarshal instructions", funcSym) |
|
| 154 |
+ } |
|
| 155 |
+ |
|
| 156 |
+ if progType, attachType := getProgType(prog.Name); progType == UnspecifiedProgram {
|
|
| 157 |
+ // There is no single name we can use for "library" sections, |
|
| 158 |
+ // since they may contain multiple functions. We'll decode the |
|
| 159 |
+ // labels they contain later on, and then link sections that way. |
|
| 160 |
+ libs = append(libs, insns) |
|
| 161 |
+ } else {
|
|
| 162 |
+ progs[funcSym] = &ProgramSpec{
|
|
| 163 |
+ Name: funcSym, |
|
| 164 |
+ Type: progType, |
|
| 165 |
+ AttachType: attachType, |
|
| 166 |
+ License: license, |
|
| 167 |
+ KernelVersion: version, |
|
| 168 |
+ Instructions: insns, |
|
| 169 |
+ } |
|
| 170 |
+ } |
|
| 171 |
+ } |
|
| 172 |
+ return progs, libs, nil |
|
| 173 |
+} |
|
| 174 |
+ |
|
| 175 |
+func (ec *elfCode) loadInstructions(section *elf.Section, symbols, relocations map[uint64]string) (asm.Instructions, error) {
|
|
| 176 |
+ var ( |
|
| 177 |
+ r = section.Open() |
|
| 178 |
+ insns asm.Instructions |
|
| 179 |
+ ins asm.Instruction |
|
| 180 |
+ offset uint64 |
|
| 181 |
+ ) |
|
| 182 |
+ for {
|
|
| 183 |
+ n, err := ins.Unmarshal(r, ec.ByteOrder) |
|
| 184 |
+ if err == io.EOF {
|
|
| 185 |
+ return insns, nil |
|
| 186 |
+ } |
|
| 187 |
+ if err != nil {
|
|
| 188 |
+ return nil, errors.Wrapf(err, "offset %d", offset) |
|
| 189 |
+ } |
|
| 190 |
+ |
|
| 191 |
+ ins.Symbol = symbols[offset] |
|
| 192 |
+ ins.Reference = relocations[offset] |
|
| 193 |
+ |
|
| 194 |
+ insns = append(insns, ins) |
|
| 195 |
+ offset += n |
|
| 196 |
+ } |
|
| 197 |
+} |
|
| 198 |
+ |
|
| 199 |
+func (ec *elfCode) loadMaps(mapSections map[elf.SectionIndex]*elf.Section) (map[string]*MapSpec, error) {
|
|
| 200 |
+ var ( |
|
| 201 |
+ maps = make(map[string]*MapSpec) |
|
| 202 |
+ b = make([]byte, 1) |
|
| 203 |
+ ) |
|
| 204 |
+ for idx, sec := range mapSections {
|
|
| 205 |
+ syms := ec.symbolsPerSection[idx] |
|
| 206 |
+ if len(syms) == 0 {
|
|
| 207 |
+ return nil, errors.Errorf("section %v: no symbols", sec.Name)
|
|
| 208 |
+ } |
|
| 209 |
+ |
|
| 210 |
+ if sec.Size%uint64(len(syms)) != 0 {
|
|
| 211 |
+ return nil, errors.Errorf("section %v: map descriptors are not of equal size", sec.Name)
|
|
| 212 |
+ } |
|
| 213 |
+ |
|
| 214 |
+ var ( |
|
| 215 |
+ r = sec.Open() |
|
| 216 |
+ size = sec.Size / uint64(len(syms)) |
|
| 217 |
+ ) |
|
| 218 |
+ for i, offset := 0, uint64(0); i < len(syms); i, offset = i+1, offset+size {
|
|
| 219 |
+ mapSym := syms[offset] |
|
| 220 |
+ if mapSym == "" {
|
|
| 221 |
+ fmt.Println(syms) |
|
| 222 |
+ return nil, errors.Errorf("section %s: missing symbol for map at offset %d", sec.Name, offset)
|
|
| 223 |
+ } |
|
| 224 |
+ |
|
| 225 |
+ if maps[mapSym] != nil {
|
|
| 226 |
+ return nil, errors.Errorf("section %v: map %v already exists", sec.Name, mapSym)
|
|
| 227 |
+ } |
|
| 228 |
+ |
|
| 229 |
+ lr := io.LimitReader(r, int64(size)) |
|
| 230 |
+ |
|
| 231 |
+ var spec MapSpec |
|
| 232 |
+ switch {
|
|
| 233 |
+ case binary.Read(lr, ec.ByteOrder, &spec.Type) != nil: |
|
| 234 |
+ return nil, errors.Errorf("map %v: missing type", mapSym)
|
|
| 235 |
+ case binary.Read(lr, ec.ByteOrder, &spec.KeySize) != nil: |
|
| 236 |
+ return nil, errors.Errorf("map %v: missing key size", mapSym)
|
|
| 237 |
+ case binary.Read(lr, ec.ByteOrder, &spec.ValueSize) != nil: |
|
| 238 |
+ return nil, errors.Errorf("map %v: missing value size", mapSym)
|
|
| 239 |
+ case binary.Read(lr, ec.ByteOrder, &spec.MaxEntries) != nil: |
|
| 240 |
+ return nil, errors.Errorf("map %v: missing max entries", mapSym)
|
|
| 241 |
+ case binary.Read(lr, ec.ByteOrder, &spec.Flags) != nil: |
|
| 242 |
+ return nil, errors.Errorf("map %v: missing flags", mapSym)
|
|
| 243 |
+ } |
|
| 244 |
+ |
|
| 245 |
+ for {
|
|
| 246 |
+ _, err := lr.Read(b) |
|
| 247 |
+ if err == io.EOF {
|
|
| 248 |
+ break |
|
| 249 |
+ } |
|
| 250 |
+ if err != nil {
|
|
| 251 |
+ return nil, err |
|
| 252 |
+ } |
|
| 253 |
+ if b[0] != 0 {
|
|
| 254 |
+ return nil, errors.Errorf("map %v: unknown and non-zero fields in definition", mapSym)
|
|
| 255 |
+ } |
|
| 256 |
+ } |
|
| 257 |
+ |
|
| 258 |
+ maps[mapSym] = &spec |
|
| 259 |
+ } |
|
| 260 |
+ } |
|
| 261 |
+ return maps, nil |
|
| 262 |
+} |
|
| 263 |
+ |
|
| 264 |
+func getProgType(v string) (ProgramType, AttachType) {
|
|
| 265 |
+ types := map[string]ProgramType{
|
|
| 266 |
+ // From https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/libbpf.c#n3568 |
|
| 267 |
+ "socket": SocketFilter, |
|
| 268 |
+ "seccomp": SocketFilter, |
|
| 269 |
+ "kprobe/": Kprobe, |
|
| 270 |
+ "kretprobe/": Kprobe, |
|
| 271 |
+ "tracepoint/": TracePoint, |
|
| 272 |
+ "xdp": XDP, |
|
| 273 |
+ "perf_event": PerfEvent, |
|
| 274 |
+ "sockops": SockOps, |
|
| 275 |
+ "sk_skb": SkSKB, |
|
| 276 |
+ "sk_msg": SkMsg, |
|
| 277 |
+ "lirc_mode2": LircMode2, |
|
| 278 |
+ "flow_dissector": FlowDissector, |
|
| 279 |
+ |
|
| 280 |
+ "cgroup_skb/": CGroupSKB, |
|
| 281 |
+ "cgroup/dev": CGroupDevice, |
|
| 282 |
+ "cgroup/skb": CGroupSKB, |
|
| 283 |
+ "cgroup/sock": CGroupSock, |
|
| 284 |
+ "cgroup/post_bind": CGroupSock, |
|
| 285 |
+ "cgroup/bind": CGroupSockAddr, |
|
| 286 |
+ "cgroup/connect": CGroupSockAddr, |
|
| 287 |
+ "cgroup/sendmsg": CGroupSockAddr, |
|
| 288 |
+ "cgroup/recvmsg": CGroupSockAddr, |
|
| 289 |
+ "cgroup/sysctl": CGroupSysctl, |
|
| 290 |
+ "cgroup/getsockopt": CGroupSockopt, |
|
| 291 |
+ "cgroup/setsockopt": CGroupSockopt, |
|
| 292 |
+ "classifier": SchedCLS, |
|
| 293 |
+ "action": SchedACT, |
|
| 294 |
+ } |
|
| 295 |
+ attachTypes := map[string]AttachType{
|
|
| 296 |
+ "cgroup_skb/ingress": AttachCGroupInetIngress, |
|
| 297 |
+ "cgroup_skb/egress": AttachCGroupInetEgress, |
|
| 298 |
+ "cgroup/sock": AttachCGroupInetSockCreate, |
|
| 299 |
+ "cgroup/post_bind4": AttachCGroupInet4PostBind, |
|
| 300 |
+ "cgroup/post_bind6": AttachCGroupInet6PostBind, |
|
| 301 |
+ "cgroup/dev": AttachCGroupDevice, |
|
| 302 |
+ "sockops": AttachCGroupSockOps, |
|
| 303 |
+ "sk_skb/stream_parser": AttachSkSKBStreamParser, |
|
| 304 |
+ "sk_skb/stream_verdict": AttachSkSKBStreamVerdict, |
|
| 305 |
+ "sk_msg": AttachSkSKBStreamVerdict, |
|
| 306 |
+ "lirc_mode2": AttachLircMode2, |
|
| 307 |
+ "flow_dissector": AttachFlowDissector, |
|
| 308 |
+ "cgroup/bind4": AttachCGroupInet4Bind, |
|
| 309 |
+ "cgroup/bind6": AttachCGroupInet6Bind, |
|
| 310 |
+ "cgroup/connect4": AttachCGroupInet4Connect, |
|
| 311 |
+ "cgroup/connect6": AttachCGroupInet6Connect, |
|
| 312 |
+ "cgroup/sendmsg4": AttachCGroupUDP4Sendmsg, |
|
| 313 |
+ "cgroup/sendmsg6": AttachCGroupUDP6Sendmsg, |
|
| 314 |
+ "cgroup/recvmsg4": AttachCGroupUDP4Recvmsg, |
|
| 315 |
+ "cgroup/recvmsg6": AttachCGroupUDP6Recvmsg, |
|
| 316 |
+ "cgroup/sysctl": AttachCGroupSysctl, |
|
| 317 |
+ "cgroup/getsockopt": AttachCGroupGetsockopt, |
|
| 318 |
+ "cgroup/setsockopt": AttachCGroupSetsockopt, |
|
| 319 |
+ } |
|
| 320 |
+ attachType := AttachNone |
|
| 321 |
+ for k, t := range attachTypes {
|
|
| 322 |
+ if strings.HasPrefix(v, k) {
|
|
| 323 |
+ attachType = t |
|
| 324 |
+ } |
|
| 325 |
+ } |
|
| 326 |
+ |
|
| 327 |
+ for k, t := range types {
|
|
| 328 |
+ if strings.HasPrefix(v, k) {
|
|
| 329 |
+ return t, attachType |
|
| 330 |
+ } |
|
| 331 |
+ } |
|
| 332 |
+ return UnspecifiedProgram, AttachNone |
|
| 333 |
+} |
|
| 334 |
+ |
|
| 335 |
+func (ec *elfCode) loadRelocations(sec *elf.Section) (map[uint64]string, error) {
|
|
| 336 |
+ rels := make(map[uint64]string) |
|
| 337 |
+ if sec == nil {
|
|
| 338 |
+ return rels, nil |
|
| 339 |
+ } |
|
| 340 |
+ |
|
| 341 |
+ if sec.Entsize < 16 {
|
|
| 342 |
+ return nil, errors.New("rels are less than 16 bytes")
|
|
| 343 |
+ } |
|
| 344 |
+ |
|
| 345 |
+ r := sec.Open() |
|
| 346 |
+ for off := uint64(0); off < sec.Size; off += sec.Entsize {
|
|
| 347 |
+ ent := io.LimitReader(r, int64(sec.Entsize)) |
|
| 348 |
+ |
|
| 349 |
+ var rel elf.Rel64 |
|
| 350 |
+ if binary.Read(ent, ec.ByteOrder, &rel) != nil {
|
|
| 351 |
+ return nil, errors.Errorf("can't parse relocation at offset %v", off)
|
|
| 352 |
+ } |
|
| 353 |
+ |
|
| 354 |
+ symNo := int(elf.R_SYM64(rel.Info) - 1) |
|
| 355 |
+ if symNo >= len(ec.symbols) {
|
|
| 356 |
+ return nil, errors.Errorf("relocation at offset %d: symbol %v doesnt exist", off, symNo)
|
|
| 357 |
+ } |
|
| 358 |
+ |
|
| 359 |
+ rels[rel.Off] = ec.symbols[symNo].Name |
|
| 360 |
+ } |
|
| 361 |
+ return rels, nil |
|
| 362 |
+} |
|
| 363 |
+ |
|
| 364 |
+func symbolsPerSection(symbols []elf.Symbol) map[elf.SectionIndex]map[uint64]string {
|
|
| 365 |
+ result := make(map[elf.SectionIndex]map[uint64]string) |
|
| 366 |
+ for i, sym := range symbols {
|
|
| 367 |
+ switch elf.ST_TYPE(sym.Info) {
|
|
| 368 |
+ case elf.STT_NOTYPE: |
|
| 369 |
+ // Older versions of LLVM doesn't tag |
|
| 370 |
+ // symbols correctly. |
|
| 371 |
+ break |
|
| 372 |
+ case elf.STT_OBJECT: |
|
| 373 |
+ break |
|
| 374 |
+ case elf.STT_FUNC: |
|
| 375 |
+ break |
|
| 376 |
+ default: |
|
| 377 |
+ continue |
|
| 378 |
+ } |
|
| 379 |
+ |
|
| 380 |
+ if sym.Name == "" {
|
|
| 381 |
+ continue |
|
| 382 |
+ } |
|
| 383 |
+ |
|
| 384 |
+ idx := sym.Section |
|
| 385 |
+ if _, ok := result[idx]; !ok {
|
|
| 386 |
+ result[idx] = make(map[uint64]string) |
|
| 387 |
+ } |
|
| 388 |
+ result[idx][sym.Value] = symbols[i].Name |
|
| 389 |
+ } |
|
| 390 |
+ return result |
|
| 391 |
+} |
| 0 | 8 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,64 @@ |
| 0 |
+package internal |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+ "os" |
|
| 5 |
+ "sync" |
|
| 6 |
+ |
|
| 7 |
+ "github.com/pkg/errors" |
|
| 8 |
+) |
|
| 9 |
+ |
|
| 10 |
+var sysCPU struct {
|
|
| 11 |
+ once sync.Once |
|
| 12 |
+ err error |
|
| 13 |
+ num int |
|
| 14 |
+} |
|
| 15 |
+ |
|
| 16 |
+// PossibleCPUs returns the max number of CPUs a system may possibly have |
|
| 17 |
+// Logical CPU numbers must be of the form 0-n |
|
| 18 |
+func PossibleCPUs() (int, error) {
|
|
| 19 |
+ sysCPU.once.Do(func() {
|
|
| 20 |
+ sysCPU.num, sysCPU.err = parseCPUs("/sys/devices/system/cpu/possible")
|
|
| 21 |
+ }) |
|
| 22 |
+ |
|
| 23 |
+ return sysCPU.num, sysCPU.err |
|
| 24 |
+} |
|
| 25 |
+ |
|
| 26 |
+var onlineCPU struct {
|
|
| 27 |
+ once sync.Once |
|
| 28 |
+ err error |
|
| 29 |
+ num int |
|
| 30 |
+} |
|
| 31 |
+ |
|
| 32 |
+// OnlineCPUs returns the number of currently online CPUs |
|
| 33 |
+// Logical CPU numbers must be of the form 0-n |
|
| 34 |
+func OnlineCPUs() (int, error) {
|
|
| 35 |
+ onlineCPU.once.Do(func() {
|
|
| 36 |
+ onlineCPU.num, onlineCPU.err = parseCPUs("/sys/devices/system/cpu/online")
|
|
| 37 |
+ }) |
|
| 38 |
+ |
|
| 39 |
+ return onlineCPU.num, onlineCPU.err |
|
| 40 |
+} |
|
| 41 |
+ |
|
| 42 |
+// parseCPUs parses the number of cpus from sysfs, |
|
| 43 |
+// in the format of "/sys/devices/system/cpu/{possible,online,..}.
|
|
| 44 |
+// Logical CPU numbers must be of the form 0-n |
|
| 45 |
+func parseCPUs(path string) (int, error) {
|
|
| 46 |
+ file, err := os.Open(path) |
|
| 47 |
+ if err != nil {
|
|
| 48 |
+ return 0, err |
|
| 49 |
+ } |
|
| 50 |
+ defer file.Close() |
|
| 51 |
+ |
|
| 52 |
+ var low, high int |
|
| 53 |
+ n, _ := fmt.Fscanf(file, "%d-%d", &low, &high) |
|
| 54 |
+ if n < 1 || low != 0 {
|
|
| 55 |
+ return 0, errors.Wrapf(err, "%s has unknown format", path) |
|
| 56 |
+ } |
|
| 57 |
+ if n == 1 {
|
|
| 58 |
+ high = low |
|
| 59 |
+ } |
|
| 60 |
+ |
|
| 61 |
+ // cpus is 0 indexed |
|
| 62 |
+ return high + 1, nil |
|
| 63 |
+} |
| 0 | 64 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,24 @@ |
| 0 |
+package internal |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "encoding/binary" |
|
| 4 |
+ "unsafe" |
|
| 5 |
+) |
|
| 6 |
+ |
|
| 7 |
+// NativeEndian is set to either binary.BigEndian or binary.LittleEndian, |
|
| 8 |
+// depending on the host's endianness. |
|
| 9 |
+var NativeEndian binary.ByteOrder |
|
| 10 |
+ |
|
| 11 |
+func init() {
|
|
| 12 |
+ if isBigEndian() {
|
|
| 13 |
+ NativeEndian = binary.BigEndian |
|
| 14 |
+ } else {
|
|
| 15 |
+ NativeEndian = binary.LittleEndian |
|
| 16 |
+ } |
|
| 17 |
+} |
|
| 18 |
+ |
|
| 19 |
+func isBigEndian() (ret bool) {
|
|
| 20 |
+ i := int(0x1) |
|
| 21 |
+ bs := (*[int(unsafe.Sizeof(i))]byte)(unsafe.Pointer(&i)) |
|
| 22 |
+ return bs[0] == 0 |
|
| 23 |
+} |
| 0 | 24 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,85 @@ |
| 0 |
+package internal |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+ "sync" |
|
| 5 |
+ |
|
| 6 |
+ "github.com/pkg/errors" |
|
| 7 |
+) |
|
| 8 |
+ |
|
| 9 |
+// UnsupportedFeatureError is returned by FeatureTest() functions. |
|
| 10 |
+type UnsupportedFeatureError struct {
|
|
| 11 |
+ // The minimum Linux mainline version required for this feature. |
|
| 12 |
+ // Used for the error string, and for sanity checking during testing. |
|
| 13 |
+ MinimumVersion Version |
|
| 14 |
+ |
|
| 15 |
+ // The name of the feature that isn't supported. |
|
| 16 |
+ Name string |
|
| 17 |
+} |
|
| 18 |
+ |
|
| 19 |
+func (ufe *UnsupportedFeatureError) Error() string {
|
|
| 20 |
+ return fmt.Sprintf("%s not supported (requires >= %s)", ufe.Name, ufe.MinimumVersion)
|
|
| 21 |
+} |
|
| 22 |
+ |
|
| 23 |
+// FeatureTest wraps a function so that it is run at most once. |
|
| 24 |
+// |
|
| 25 |
+// name should identify the tested feature, while version must be in the |
|
| 26 |
+// form Major.Minor[.Patch]. |
|
| 27 |
+// |
|
| 28 |
+// Returns a descriptive UnsupportedFeatureError if the feature is not available. |
|
| 29 |
+func FeatureTest(name, version string, fn func() bool) func() error {
|
|
| 30 |
+ v, err := NewVersion(version) |
|
| 31 |
+ if err != nil {
|
|
| 32 |
+ return func() error { return err }
|
|
| 33 |
+ } |
|
| 34 |
+ |
|
| 35 |
+ var ( |
|
| 36 |
+ once sync.Once |
|
| 37 |
+ result error |
|
| 38 |
+ ) |
|
| 39 |
+ |
|
| 40 |
+ return func() error {
|
|
| 41 |
+ once.Do(func() {
|
|
| 42 |
+ if !fn() {
|
|
| 43 |
+ result = &UnsupportedFeatureError{
|
|
| 44 |
+ MinimumVersion: v, |
|
| 45 |
+ Name: name, |
|
| 46 |
+ } |
|
| 47 |
+ } |
|
| 48 |
+ }) |
|
| 49 |
+ return result |
|
| 50 |
+ } |
|
| 51 |
+} |
|
| 52 |
+ |
|
| 53 |
+// A Version in the form Major.Minor.Patch. |
|
| 54 |
+type Version [3]uint16 |
|
| 55 |
+ |
|
| 56 |
+// NewVersion creates a version from a string like "Major.Minor.Patch". |
|
| 57 |
+// |
|
| 58 |
+// Patch is optional. |
|
| 59 |
+func NewVersion(ver string) (Version, error) {
|
|
| 60 |
+ var major, minor, patch uint16 |
|
| 61 |
+ n, _ := fmt.Sscanf(ver, "%d.%d.%d", &major, &minor, &patch) |
|
| 62 |
+ if n < 2 {
|
|
| 63 |
+ return Version{}, errors.Errorf("invalid version: %s", ver)
|
|
| 64 |
+ } |
|
| 65 |
+ return Version{major, minor, patch}, nil
|
|
| 66 |
+} |
|
| 67 |
+ |
|
| 68 |
+func (v Version) String() string {
|
|
| 69 |
+ if v[2] == 0 {
|
|
| 70 |
+ return fmt.Sprintf("v%d.%d", v[0], v[1])
|
|
| 71 |
+ } |
|
| 72 |
+ return fmt.Sprintf("v%d.%d.%d", v[0], v[1], v[2])
|
|
| 73 |
+} |
|
| 74 |
+ |
|
| 75 |
+// Less returns true if the version is less than another version. |
|
| 76 |
+func (v Version) Less(other Version) bool {
|
|
| 77 |
+ for i, a := range v {
|
|
| 78 |
+ if a == other[i] {
|
|
| 79 |
+ continue |
|
| 80 |
+ } |
|
| 81 |
+ return a < other[i] |
|
| 82 |
+ } |
|
| 83 |
+ return false |
|
| 84 |
+} |
| 0 | 85 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,127 @@ |
| 0 |
+// +build linux |
|
| 1 |
+ |
|
| 2 |
+package unix |
|
| 3 |
+ |
|
| 4 |
+import ( |
|
| 5 |
+ "syscall" |
|
| 6 |
+ |
|
| 7 |
+ linux "golang.org/x/sys/unix" |
|
| 8 |
+) |
|
| 9 |
+ |
|
| 10 |
+const ( |
|
| 11 |
+ ENOENT = linux.ENOENT |
|
| 12 |
+ EAGAIN = linux.EAGAIN |
|
| 13 |
+ ENOSPC = linux.ENOSPC |
|
| 14 |
+ EINVAL = linux.EINVAL |
|
| 15 |
+ EPOLLIN = linux.EPOLLIN |
|
| 16 |
+ BPF_OBJ_NAME_LEN = linux.BPF_OBJ_NAME_LEN |
|
| 17 |
+ BPF_TAG_SIZE = linux.BPF_TAG_SIZE |
|
| 18 |
+ SYS_BPF = linux.SYS_BPF |
|
| 19 |
+ F_DUPFD_CLOEXEC = linux.F_DUPFD_CLOEXEC |
|
| 20 |
+ EPOLL_CTL_ADD = linux.EPOLL_CTL_ADD |
|
| 21 |
+ EPOLL_CLOEXEC = linux.EPOLL_CLOEXEC |
|
| 22 |
+ O_CLOEXEC = linux.O_CLOEXEC |
|
| 23 |
+ O_NONBLOCK = linux.O_NONBLOCK |
|
| 24 |
+ PROT_READ = linux.PROT_READ |
|
| 25 |
+ PROT_WRITE = linux.PROT_WRITE |
|
| 26 |
+ MAP_SHARED = linux.MAP_SHARED |
|
| 27 |
+ PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE |
|
| 28 |
+ PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT |
|
| 29 |
+ PerfBitWatermark = linux.PerfBitWatermark |
|
| 30 |
+ PERF_SAMPLE_RAW = linux.PERF_SAMPLE_RAW |
|
| 31 |
+ PERF_FLAG_FD_CLOEXEC = linux.PERF_FLAG_FD_CLOEXEC |
|
| 32 |
+ RLIM_INFINITY = linux.RLIM_INFINITY |
|
| 33 |
+) |
|
| 34 |
+ |
|
| 35 |
+// Statfs_t is a wrapper |
|
| 36 |
+type Statfs_t = linux.Statfs_t |
|
| 37 |
+ |
|
| 38 |
+// Rlimit is a wrapper |
|
| 39 |
+type Rlimit = linux.Rlimit |
|
| 40 |
+ |
|
| 41 |
+// Setrlimit is a wrapper |
|
| 42 |
+func Setrlimit(resource int, rlim *Rlimit) (err error) {
|
|
| 43 |
+ return linux.Setrlimit(resource, rlim) |
|
| 44 |
+} |
|
| 45 |
+ |
|
| 46 |
+// Syscall is a wrapper |
|
| 47 |
+func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
|
|
| 48 |
+ return linux.Syscall(trap, a1, a2, a3) |
|
| 49 |
+} |
|
| 50 |
+ |
|
| 51 |
+// FcntlInt is a wrapper |
|
| 52 |
+func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
|
|
| 53 |
+ return linux.FcntlInt(fd, cmd, arg) |
|
| 54 |
+} |
|
| 55 |
+ |
|
| 56 |
+// Statfs is a wrapper |
|
| 57 |
+func Statfs(path string, buf *Statfs_t) (err error) {
|
|
| 58 |
+ return linux.Statfs(path, buf) |
|
| 59 |
+} |
|
| 60 |
+ |
|
| 61 |
+// Close is a wrapper |
|
| 62 |
+func Close(fd int) (err error) {
|
|
| 63 |
+ return linux.Close(fd) |
|
| 64 |
+} |
|
| 65 |
+ |
|
| 66 |
+// EpollEvent is a wrapper |
|
| 67 |
+type EpollEvent = linux.EpollEvent |
|
| 68 |
+ |
|
| 69 |
+// EpollWait is a wrapper |
|
| 70 |
+func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
|
|
| 71 |
+ return linux.EpollWait(epfd, events, msec) |
|
| 72 |
+} |
|
| 73 |
+ |
|
| 74 |
+// EpollCtl is a wrapper |
|
| 75 |
+func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
|
|
| 76 |
+ return linux.EpollCtl(epfd, op, fd, event) |
|
| 77 |
+} |
|
| 78 |
+ |
|
| 79 |
+// Eventfd is a wrapper |
|
| 80 |
+func Eventfd(initval uint, flags int) (fd int, err error) {
|
|
| 81 |
+ return linux.Eventfd(initval, flags) |
|
| 82 |
+} |
|
| 83 |
+ |
|
| 84 |
+// Write is a wrapper |
|
| 85 |
+func Write(fd int, p []byte) (n int, err error) {
|
|
| 86 |
+ return linux.Write(fd, p) |
|
| 87 |
+} |
|
| 88 |
+ |
|
| 89 |
+// EpollCreate1 is a wrapper |
|
| 90 |
+func EpollCreate1(flag int) (fd int, err error) {
|
|
| 91 |
+ return linux.EpollCreate1(flag) |
|
| 92 |
+} |
|
| 93 |
+ |
|
| 94 |
+// PerfEventMmapPage is a wrapper |
|
| 95 |
+type PerfEventMmapPage linux.PerfEventMmapPage |
|
| 96 |
+ |
|
| 97 |
+// SetNonblock is a wrapper |
|
| 98 |
+func SetNonblock(fd int, nonblocking bool) (err error) {
|
|
| 99 |
+ return linux.SetNonblock(fd, nonblocking) |
|
| 100 |
+} |
|
| 101 |
+ |
|
| 102 |
+// Mmap is a wrapper |
|
| 103 |
+func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
|
|
| 104 |
+ return linux.Mmap(fd, offset, length, prot, flags) |
|
| 105 |
+} |
|
| 106 |
+ |
|
| 107 |
+// Munmap is a wrapper |
|
| 108 |
+func Munmap(b []byte) (err error) {
|
|
| 109 |
+ return linux.Munmap(b) |
|
| 110 |
+} |
|
| 111 |
+ |
|
| 112 |
+// PerfEventAttr is a wrapper |
|
| 113 |
+type PerfEventAttr = linux.PerfEventAttr |
|
| 114 |
+ |
|
| 115 |
+// PerfEventOpen is a wrapper |
|
| 116 |
+func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
|
|
| 117 |
+ return linux.PerfEventOpen(attr, pid, cpu, groupFd, flags) |
|
| 118 |
+} |
|
| 119 |
+ |
|
| 120 |
+// Utsname is a wrapper |
|
| 121 |
+type Utsname = linux.Utsname |
|
| 122 |
+ |
|
| 123 |
+// Uname is a wrapper |
|
| 124 |
+func Uname(buf *Utsname) (err error) {
|
|
| 125 |
+ return linux.Uname(buf) |
|
| 126 |
+} |
| 0 | 127 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,193 @@ |
| 0 |
+// +build !linux |
|
| 1 |
+ |
|
| 2 |
+package unix |
|
| 3 |
+ |
|
| 4 |
+import ( |
|
| 5 |
+ "fmt" |
|
| 6 |
+ "runtime" |
|
| 7 |
+ "syscall" |
|
| 8 |
+) |
|
| 9 |
+ |
|
| 10 |
+var errNonLinux = fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
|
|
| 11 |
+ |
|
| 12 |
+const ( |
|
| 13 |
+ ENOENT = syscall.ENOENT |
|
| 14 |
+ EAGAIN = syscall.EAGAIN |
|
| 15 |
+ ENOSPC = syscall.ENOSPC |
|
| 16 |
+ EINVAL = syscall.EINVAL |
|
| 17 |
+ BPF_OBJ_NAME_LEN = 0x10 |
|
| 18 |
+ BPF_TAG_SIZE = 0x8 |
|
| 19 |
+ SYS_BPF = 321 |
|
| 20 |
+ F_DUPFD_CLOEXEC = 0x406 |
|
| 21 |
+ EPOLLIN = 0x1 |
|
| 22 |
+ EPOLL_CTL_ADD = 0x1 |
|
| 23 |
+ EPOLL_CLOEXEC = 0x80000 |
|
| 24 |
+ O_CLOEXEC = 0x80000 |
|
| 25 |
+ O_NONBLOCK = 0x800 |
|
| 26 |
+ PROT_READ = 0x1 |
|
| 27 |
+ PROT_WRITE = 0x2 |
|
| 28 |
+ MAP_SHARED = 0x1 |
|
| 29 |
+ PERF_TYPE_SOFTWARE = 0x1 |
|
| 30 |
+ PERF_COUNT_SW_BPF_OUTPUT = 0xa |
|
| 31 |
+ PerfBitWatermark = 0x4000 |
|
| 32 |
+ PERF_SAMPLE_RAW = 0x400 |
|
| 33 |
+ PERF_FLAG_FD_CLOEXEC = 0x8 |
|
| 34 |
+) |
|
| 35 |
+ |
|
| 36 |
+// Statfs_t is a wrapper |
|
| 37 |
+type Statfs_t struct {
|
|
| 38 |
+ Type int64 |
|
| 39 |
+ Bsize int64 |
|
| 40 |
+ Blocks uint64 |
|
| 41 |
+ Bfree uint64 |
|
| 42 |
+ Bavail uint64 |
|
| 43 |
+ Files uint64 |
|
| 44 |
+ Ffree uint64 |
|
| 45 |
+ Fsid [2]int32 |
|
| 46 |
+ Namelen int64 |
|
| 47 |
+ Frsize int64 |
|
| 48 |
+ Flags int64 |
|
| 49 |
+ Spare [4]int64 |
|
| 50 |
+} |
|
| 51 |
+ |
|
| 52 |
+// Rlimit is a wrapper |
|
| 53 |
+type Rlimit struct {
|
|
| 54 |
+ Cur uint64 |
|
| 55 |
+ Max uint64 |
|
| 56 |
+} |
|
| 57 |
+ |
|
| 58 |
+// Setrlimit is a wrapper |
|
| 59 |
+func Setrlimit(resource int, rlim *Rlimit) (err error) {
|
|
| 60 |
+ return errNonLinux |
|
| 61 |
+} |
|
| 62 |
+ |
|
| 63 |
+// Syscall is a wrapper |
|
| 64 |
+func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
|
|
| 65 |
+ return 0, 0, syscall.Errno(1) |
|
| 66 |
+} |
|
| 67 |
+ |
|
| 68 |
+// FcntlInt is a wrapper |
|
| 69 |
+func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
|
|
| 70 |
+ return -1, errNonLinux |
|
| 71 |
+} |
|
| 72 |
+ |
|
| 73 |
+// Statfs is a wrapper |
|
| 74 |
+func Statfs(path string, buf *Statfs_t) error {
|
|
| 75 |
+ return errNonLinux |
|
| 76 |
+} |
|
| 77 |
+ |
|
| 78 |
+// Close is a wrapper |
|
| 79 |
+func Close(fd int) (err error) {
|
|
| 80 |
+ return errNonLinux |
|
| 81 |
+} |
|
| 82 |
+ |
|
| 83 |
+// EpollEvent is a wrapper |
|
| 84 |
+type EpollEvent struct {
|
|
| 85 |
+ Events uint32 |
|
| 86 |
+ Fd int32 |
|
| 87 |
+ Pad int32 |
|
| 88 |
+} |
|
| 89 |
+ |
|
| 90 |
+// EpollWait is a wrapper |
|
| 91 |
+func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
|
|
| 92 |
+ return 0, errNonLinux |
|
| 93 |
+} |
|
| 94 |
+ |
|
| 95 |
+// EpollCtl is a wrapper |
|
| 96 |
+func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
|
|
| 97 |
+ return errNonLinux |
|
| 98 |
+} |
|
| 99 |
+ |
|
| 100 |
+// Eventfd is a wrapper |
|
| 101 |
+func Eventfd(initval uint, flags int) (fd int, err error) {
|
|
| 102 |
+ return 0, errNonLinux |
|
| 103 |
+} |
|
| 104 |
+ |
|
| 105 |
+// Write is a wrapper |
|
| 106 |
+func Write(fd int, p []byte) (n int, err error) {
|
|
| 107 |
+ return 0, errNonLinux |
|
| 108 |
+} |
|
| 109 |
+ |
|
| 110 |
+// EpollCreate1 is a wrapper |
|
| 111 |
+func EpollCreate1(flag int) (fd int, err error) {
|
|
| 112 |
+ return 0, errNonLinux |
|
| 113 |
+} |
|
| 114 |
+ |
|
| 115 |
+// PerfEventMmapPage is a wrapper |
|
| 116 |
+type PerfEventMmapPage struct {
|
|
| 117 |
+ Version uint32 |
|
| 118 |
+ Compat_version uint32 |
|
| 119 |
+ Lock uint32 |
|
| 120 |
+ Index uint32 |
|
| 121 |
+ Offset int64 |
|
| 122 |
+ Time_enabled uint64 |
|
| 123 |
+ Time_running uint64 |
|
| 124 |
+ Capabilities uint64 |
|
| 125 |
+ Pmc_width uint16 |
|
| 126 |
+ Time_shift uint16 |
|
| 127 |
+ Time_mult uint32 |
|
| 128 |
+ Time_offset uint64 |
|
| 129 |
+ Time_zero uint64 |
|
| 130 |
+ Size uint32 |
|
| 131 |
+ |
|
| 132 |
+ Data_head uint64 |
|
| 133 |
+ Data_tail uint64 |
|
| 134 |
+ Data_offset uint64 |
|
| 135 |
+ Data_size uint64 |
|
| 136 |
+ Aux_head uint64 |
|
| 137 |
+ Aux_tail uint64 |
|
| 138 |
+ Aux_offset uint64 |
|
| 139 |
+ Aux_size uint64 |
|
| 140 |
+} |
|
| 141 |
+ |
|
| 142 |
+// SetNonblock is a wrapper |
|
| 143 |
+func SetNonblock(fd int, nonblocking bool) (err error) {
|
|
| 144 |
+ return errNonLinux |
|
| 145 |
+} |
|
| 146 |
+ |
|
| 147 |
+// Mmap is a wrapper |
|
| 148 |
+func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
|
|
| 149 |
+ return []byte{}, errNonLinux
|
|
| 150 |
+} |
|
| 151 |
+ |
|
| 152 |
+// Munmap is a wrapper |
|
| 153 |
+func Munmap(b []byte) (err error) {
|
|
| 154 |
+ return errNonLinux |
|
| 155 |
+} |
|
| 156 |
+ |
|
| 157 |
+// PerfEventAttr is a wrapper |
|
| 158 |
+type PerfEventAttr struct {
|
|
| 159 |
+ Type uint32 |
|
| 160 |
+ Size uint32 |
|
| 161 |
+ Config uint64 |
|
| 162 |
+ Sample uint64 |
|
| 163 |
+ Sample_type uint64 |
|
| 164 |
+ Read_format uint64 |
|
| 165 |
+ Bits uint64 |
|
| 166 |
+ Wakeup uint32 |
|
| 167 |
+ Bp_type uint32 |
|
| 168 |
+ Ext1 uint64 |
|
| 169 |
+ Ext2 uint64 |
|
| 170 |
+ Branch_sample_type uint64 |
|
| 171 |
+ Sample_regs_user uint64 |
|
| 172 |
+ Sample_stack_user uint32 |
|
| 173 |
+ Clockid int32 |
|
| 174 |
+ Sample_regs_intr uint64 |
|
| 175 |
+ Aux_watermark uint32 |
|
| 176 |
+ Sample_max_stack uint16 |
|
| 177 |
+} |
|
| 178 |
+ |
|
| 179 |
+// PerfEventOpen is a wrapper |
|
| 180 |
+func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
|
|
| 181 |
+ return 0, errNonLinux |
|
| 182 |
+} |
|
| 183 |
+ |
|
| 184 |
+// Utsname is a wrapper |
|
| 185 |
+type Utsname struct {
|
|
| 186 |
+ Release [65]byte |
|
| 187 |
+} |
|
| 188 |
+ |
|
| 189 |
+// Uname is a wrapper |
|
| 190 |
+func Uname(buf *Utsname) (err error) {
|
|
| 191 |
+ return errNonLinux |
|
| 192 |
+} |
| 0 | 193 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,58 @@ |
| 0 |
+package ebpf |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "github.com/cilium/ebpf/asm" |
|
| 4 |
+) |
|
| 5 |
+ |
|
| 6 |
+// link resolves bpf-to-bpf calls. |
|
| 7 |
+// |
|
| 8 |
+// Each section may contain multiple functions / labels, and is only linked |
|
| 9 |
+// if the program being edited references one of these functions. |
|
| 10 |
+// |
|
| 11 |
+// Sections must not require linking themselves. |
|
| 12 |
+func link(insns asm.Instructions, sections ...asm.Instructions) (asm.Instructions, error) {
|
|
| 13 |
+ for _, section := range sections {
|
|
| 14 |
+ var err error |
|
| 15 |
+ insns, err = linkSection(insns, section) |
|
| 16 |
+ if err != nil {
|
|
| 17 |
+ return nil, err |
|
| 18 |
+ } |
|
| 19 |
+ } |
|
| 20 |
+ return insns, nil |
|
| 21 |
+} |
|
| 22 |
+ |
|
| 23 |
+func linkSection(insns, section asm.Instructions) (asm.Instructions, error) {
|
|
| 24 |
+ // A map of symbols to the libraries which contain them. |
|
| 25 |
+ symbols, err := section.SymbolOffsets() |
|
| 26 |
+ if err != nil {
|
|
| 27 |
+ return nil, err |
|
| 28 |
+ } |
|
| 29 |
+ |
|
| 30 |
+ for _, ins := range insns {
|
|
| 31 |
+ if ins.Reference == "" {
|
|
| 32 |
+ continue |
|
| 33 |
+ } |
|
| 34 |
+ |
|
| 35 |
+ if ins.OpCode.JumpOp() != asm.Call || ins.Src != asm.R1 {
|
|
| 36 |
+ continue |
|
| 37 |
+ } |
|
| 38 |
+ |
|
| 39 |
+ if ins.Constant != -1 {
|
|
| 40 |
+ // This is already a valid call, no need to link again. |
|
| 41 |
+ continue |
|
| 42 |
+ } |
|
| 43 |
+ |
|
| 44 |
+ if _, ok := symbols[ins.Reference]; !ok {
|
|
| 45 |
+ // Symbol isn't available in this section |
|
| 46 |
+ continue |
|
| 47 |
+ } |
|
| 48 |
+ |
|
| 49 |
+ // At this point we know that at least one function in the |
|
| 50 |
+ // library is called from insns. Merge the two sections. |
|
| 51 |
+ // The rewrite of ins.Constant happens in asm.Instruction.Marshal. |
|
| 52 |
+ return append(insns, section...), nil |
|
| 53 |
+ } |
|
| 54 |
+ |
|
| 55 |
+ // None of the functions in the section are called. Do nothing. |
|
| 56 |
+ return insns, nil |
|
| 57 |
+} |
| 0 | 58 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,604 @@ |
| 0 |
+package ebpf |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "fmt" |
|
| 4 |
+ "unsafe" |
|
| 5 |
+ |
|
| 6 |
+ "github.com/cilium/ebpf/internal" |
|
| 7 |
+ "github.com/cilium/ebpf/internal/unix" |
|
| 8 |
+ |
|
| 9 |
+ "github.com/pkg/errors" |
|
| 10 |
+) |
|
| 11 |
+ |
|
| 12 |
+// MapSpec defines a Map. |
|
| 13 |
+type MapSpec struct {
|
|
| 14 |
+ // Name is passed to the kernel as a debug aid. Must only contain |
|
| 15 |
+ // alpha numeric and '_' characters. |
|
| 16 |
+ Name string |
|
| 17 |
+ Type MapType |
|
| 18 |
+ KeySize uint32 |
|
| 19 |
+ ValueSize uint32 |
|
| 20 |
+ MaxEntries uint32 |
|
| 21 |
+ Flags uint32 |
|
| 22 |
+ // InnerMap is used as a template for ArrayOfMaps and HashOfMaps |
|
| 23 |
+ InnerMap *MapSpec |
|
| 24 |
+} |
|
| 25 |
+ |
|
| 26 |
+func (ms *MapSpec) String() string {
|
|
| 27 |
+ return fmt.Sprintf("%s(keySize=%d, valueSize=%d, maxEntries=%d, flags=%d)", ms.Type, ms.KeySize, ms.ValueSize, ms.MaxEntries, ms.Flags)
|
|
| 28 |
+} |
|
| 29 |
+ |
|
| 30 |
+// Copy returns a copy of the spec. |
|
| 31 |
+func (ms *MapSpec) Copy() *MapSpec {
|
|
| 32 |
+ if ms == nil {
|
|
| 33 |
+ return nil |
|
| 34 |
+ } |
|
| 35 |
+ |
|
| 36 |
+ cpy := *ms |
|
| 37 |
+ cpy.InnerMap = ms.InnerMap.Copy() |
|
| 38 |
+ return &cpy |
|
| 39 |
+} |
|
| 40 |
+ |
|
| 41 |
+// Map represents a Map file descriptor. |
|
| 42 |
+// |
|
| 43 |
+// It is not safe to close a map which is used by other goroutines. |
|
| 44 |
+// |
|
| 45 |
+// Methods which take interface{} arguments by default encode
|
|
| 46 |
+// them using binary.Read/Write in the machine's native endianness. |
|
| 47 |
+// |
|
| 48 |
+// Implement encoding.BinaryMarshaler or encoding.BinaryUnmarshaler |
|
| 49 |
+// if you require custom encoding. |
|
| 50 |
+type Map struct {
|
|
| 51 |
+ name string |
|
| 52 |
+ fd *bpfFD |
|
| 53 |
+ abi MapABI |
|
| 54 |
+ // Per CPU maps return values larger than the size in the spec |
|
| 55 |
+ fullValueSize int |
|
| 56 |
+} |
|
| 57 |
+ |
|
| 58 |
+// NewMapFromFD creates a map from a raw fd. |
|
| 59 |
+// |
|
| 60 |
+// You should not use fd after calling this function. |
|
| 61 |
+func NewMapFromFD(fd int) (*Map, error) {
|
|
| 62 |
+ if fd < 0 {
|
|
| 63 |
+ return nil, errors.New("invalid fd")
|
|
| 64 |
+ } |
|
| 65 |
+ bpfFd := newBPFFD(uint32(fd)) |
|
| 66 |
+ |
|
| 67 |
+ name, abi, err := newMapABIFromFd(bpfFd) |
|
| 68 |
+ if err != nil {
|
|
| 69 |
+ bpfFd.forget() |
|
| 70 |
+ return nil, err |
|
| 71 |
+ } |
|
| 72 |
+ return newMap(bpfFd, name, abi) |
|
| 73 |
+} |
|
| 74 |
+ |
|
| 75 |
+// NewMap creates a new Map. |
|
| 76 |
+// |
|
| 77 |
+// Creating a map for the first time will perform feature detection |
|
| 78 |
+// by creating small, temporary maps. |
|
| 79 |
+func NewMap(spec *MapSpec) (*Map, error) {
|
|
| 80 |
+ if spec.Type != ArrayOfMaps && spec.Type != HashOfMaps {
|
|
| 81 |
+ return createMap(spec, nil) |
|
| 82 |
+ } |
|
| 83 |
+ |
|
| 84 |
+ if spec.InnerMap == nil {
|
|
| 85 |
+ return nil, errors.Errorf("%s requires InnerMap", spec.Type)
|
|
| 86 |
+ } |
|
| 87 |
+ |
|
| 88 |
+ template, err := createMap(spec.InnerMap, nil) |
|
| 89 |
+ if err != nil {
|
|
| 90 |
+ return nil, err |
|
| 91 |
+ } |
|
| 92 |
+ defer template.Close() |
|
| 93 |
+ |
|
| 94 |
+ return createMap(spec, template.fd) |
|
| 95 |
+} |
|
| 96 |
+ |
|
| 97 |
+func createMap(spec *MapSpec, inner *bpfFD) (*Map, error) {
|
|
| 98 |
+ spec = spec.Copy() |
|
| 99 |
+ |
|
| 100 |
+ switch spec.Type {
|
|
| 101 |
+ case ArrayOfMaps: |
|
| 102 |
+ fallthrough |
|
| 103 |
+ case HashOfMaps: |
|
| 104 |
+ if err := haveNestedMaps(); err != nil {
|
|
| 105 |
+ return nil, err |
|
| 106 |
+ } |
|
| 107 |
+ |
|
| 108 |
+ if spec.ValueSize != 0 && spec.ValueSize != 4 {
|
|
| 109 |
+ return nil, errors.Errorf("ValueSize must be zero or four for map of map")
|
|
| 110 |
+ } |
|
| 111 |
+ spec.ValueSize = 4 |
|
| 112 |
+ |
|
| 113 |
+ case PerfEventArray: |
|
| 114 |
+ if spec.KeySize != 0 {
|
|
| 115 |
+ return nil, errors.Errorf("KeySize must be zero for perf event array")
|
|
| 116 |
+ } |
|
| 117 |
+ if spec.ValueSize != 0 {
|
|
| 118 |
+ return nil, errors.Errorf("ValueSize must be zero for perf event array")
|
|
| 119 |
+ } |
|
| 120 |
+ if spec.MaxEntries == 0 {
|
|
| 121 |
+ n, err := internal.OnlineCPUs() |
|
| 122 |
+ if err != nil {
|
|
| 123 |
+ return nil, errors.Wrap(err, "perf event array") |
|
| 124 |
+ } |
|
| 125 |
+ spec.MaxEntries = uint32(n) |
|
| 126 |
+ } |
|
| 127 |
+ |
|
| 128 |
+ spec.KeySize = 4 |
|
| 129 |
+ spec.ValueSize = 4 |
|
| 130 |
+ } |
|
| 131 |
+ |
|
| 132 |
+ attr := bpfMapCreateAttr{
|
|
| 133 |
+ mapType: spec.Type, |
|
| 134 |
+ keySize: spec.KeySize, |
|
| 135 |
+ valueSize: spec.ValueSize, |
|
| 136 |
+ maxEntries: spec.MaxEntries, |
|
| 137 |
+ flags: spec.Flags, |
|
| 138 |
+ } |
|
| 139 |
+ |
|
| 140 |
+ if inner != nil {
|
|
| 141 |
+ var err error |
|
| 142 |
+ attr.innerMapFd, err = inner.value() |
|
| 143 |
+ if err != nil {
|
|
| 144 |
+ return nil, errors.Wrap(err, "map create") |
|
| 145 |
+ } |
|
| 146 |
+ } |
|
| 147 |
+ |
|
| 148 |
+ name, err := newBPFObjName(spec.Name) |
|
| 149 |
+ if err != nil {
|
|
| 150 |
+ return nil, errors.Wrap(err, "map create") |
|
| 151 |
+ } |
|
| 152 |
+ |
|
| 153 |
+ if haveObjName() == nil {
|
|
| 154 |
+ attr.mapName = name |
|
| 155 |
+ } |
|
| 156 |
+ |
|
| 157 |
+ fd, err := bpfMapCreate(&attr) |
|
| 158 |
+ if err != nil {
|
|
| 159 |
+ return nil, errors.Wrap(err, "map create") |
|
| 160 |
+ } |
|
| 161 |
+ |
|
| 162 |
+ return newMap(fd, spec.Name, newMapABIFromSpec(spec)) |
|
| 163 |
+} |
|
| 164 |
+ |
|
| 165 |
+func newMap(fd *bpfFD, name string, abi *MapABI) (*Map, error) {
|
|
| 166 |
+ m := &Map{
|
|
| 167 |
+ name, |
|
| 168 |
+ fd, |
|
| 169 |
+ *abi, |
|
| 170 |
+ int(abi.ValueSize), |
|
| 171 |
+ } |
|
| 172 |
+ |
|
| 173 |
+ if !abi.Type.hasPerCPUValue() {
|
|
| 174 |
+ return m, nil |
|
| 175 |
+ } |
|
| 176 |
+ |
|
| 177 |
+ possibleCPUs, err := internal.PossibleCPUs() |
|
| 178 |
+ if err != nil {
|
|
| 179 |
+ return nil, err |
|
| 180 |
+ } |
|
| 181 |
+ |
|
| 182 |
+ m.fullValueSize = align(int(abi.ValueSize), 8) * possibleCPUs |
|
| 183 |
+ return m, nil |
|
| 184 |
+} |
|
| 185 |
+ |
|
| 186 |
+func (m *Map) String() string {
|
|
| 187 |
+ if m.name != "" {
|
|
| 188 |
+ return fmt.Sprintf("%s(%s)#%v", m.abi.Type, m.name, m.fd)
|
|
| 189 |
+ } |
|
| 190 |
+ return fmt.Sprintf("%s#%v", m.abi.Type, m.fd)
|
|
| 191 |
+} |
|
| 192 |
+ |
|
| 193 |
+// ABI gets the ABI of the Map |
|
| 194 |
+func (m *Map) ABI() MapABI {
|
|
| 195 |
+ return m.abi |
|
| 196 |
+} |
|
| 197 |
+ |
|
| 198 |
+// Lookup retrieves a value from a Map. |
|
| 199 |
+// |
|
| 200 |
+// Calls Close() on valueOut if it is of type **Map or **Program, |
|
| 201 |
+// and *valueOut is not nil. |
|
| 202 |
+// |
|
| 203 |
+// Returns an error if the key doesn't exist, see IsNotExist. |
|
| 204 |
+func (m *Map) Lookup(key, valueOut interface{}) error {
|
|
| 205 |
+ valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize) |
|
| 206 |
+ |
|
| 207 |
+ if err := m.lookup(key, valuePtr); err != nil {
|
|
| 208 |
+ return err |
|
| 209 |
+ } |
|
| 210 |
+ |
|
| 211 |
+ if valueBytes == nil {
|
|
| 212 |
+ return nil |
|
| 213 |
+ } |
|
| 214 |
+ |
|
| 215 |
+ if m.abi.Type.hasPerCPUValue() {
|
|
| 216 |
+ return unmarshalPerCPUValue(valueOut, int(m.abi.ValueSize), valueBytes) |
|
| 217 |
+ } |
|
| 218 |
+ |
|
| 219 |
+ switch value := valueOut.(type) {
|
|
| 220 |
+ case **Map: |
|
| 221 |
+ m, err := unmarshalMap(valueBytes) |
|
| 222 |
+ if err != nil {
|
|
| 223 |
+ return err |
|
| 224 |
+ } |
|
| 225 |
+ |
|
| 226 |
+ (*value).Close() |
|
| 227 |
+ *value = m |
|
| 228 |
+ return nil |
|
| 229 |
+ case *Map: |
|
| 230 |
+ return errors.Errorf("can't unmarshal into %T, need %T", value, (**Map)(nil))
|
|
| 231 |
+ case Map: |
|
| 232 |
+ return errors.Errorf("can't unmarshal into %T, need %T", value, (**Map)(nil))
|
|
| 233 |
+ |
|
| 234 |
+ case **Program: |
|
| 235 |
+ p, err := unmarshalProgram(valueBytes) |
|
| 236 |
+ if err != nil {
|
|
| 237 |
+ return err |
|
| 238 |
+ } |
|
| 239 |
+ |
|
| 240 |
+ (*value).Close() |
|
| 241 |
+ *value = p |
|
| 242 |
+ return nil |
|
| 243 |
+ case *Program: |
|
| 244 |
+ return errors.Errorf("can't unmarshal into %T, need %T", value, (**Program)(nil))
|
|
| 245 |
+ case Program: |
|
| 246 |
+ return errors.Errorf("can't unmarshal into %T, need %T", value, (**Program)(nil))
|
|
| 247 |
+ |
|
| 248 |
+ default: |
|
| 249 |
+ return unmarshalBytes(valueOut, valueBytes) |
|
| 250 |
+ } |
|
| 251 |
+} |
|
| 252 |
+ |
|
| 253 |
+// LookupBytes gets a value from Map. |
|
| 254 |
+// |
|
| 255 |
+// Returns a nil value if a key doesn't exist. |
|
| 256 |
+func (m *Map) LookupBytes(key interface{}) ([]byte, error) {
|
|
| 257 |
+ valueBytes := make([]byte, m.fullValueSize) |
|
| 258 |
+ valuePtr := newPtr(unsafe.Pointer(&valueBytes[0])) |
|
| 259 |
+ |
|
| 260 |
+ err := m.lookup(key, valuePtr) |
|
| 261 |
+ if IsNotExist(err) {
|
|
| 262 |
+ return nil, nil |
|
| 263 |
+ } |
|
| 264 |
+ |
|
| 265 |
+ return valueBytes, err |
|
| 266 |
+} |
|
| 267 |
+ |
|
| 268 |
+func (m *Map) lookup(key interface{}, valueOut syscallPtr) error {
|
|
| 269 |
+ keyPtr, err := marshalPtr(key, int(m.abi.KeySize)) |
|
| 270 |
+ if err != nil {
|
|
| 271 |
+ return errors.WithMessage(err, "can't marshal key") |
|
| 272 |
+ } |
|
| 273 |
+ |
|
| 274 |
+ err = bpfMapLookupElem(m.fd, keyPtr, valueOut) |
|
| 275 |
+ return errors.WithMessage(err, "lookup failed") |
|
| 276 |
+} |
|
| 277 |
+ |
|
| 278 |
+// MapUpdateFlags controls the behaviour of the Map.Update call. |
|
| 279 |
+// |
|
| 280 |
+// The exact semantics depend on the specific MapType. |
|
| 281 |
+type MapUpdateFlags uint64 |
|
| 282 |
+ |
|
| 283 |
+const ( |
|
| 284 |
+ // UpdateAny creates a new element or update an existing one. |
|
| 285 |
+ UpdateAny MapUpdateFlags = iota |
|
| 286 |
+ // UpdateNoExist creates a new element. |
|
| 287 |
+ UpdateNoExist MapUpdateFlags = 1 << (iota - 1) |
|
| 288 |
+ // UpdateExist updates an existing element. |
|
| 289 |
+ UpdateExist |
|
| 290 |
+) |
|
| 291 |
+ |
|
| 292 |
+// Put replaces or creates a value in map. |
|
| 293 |
+// |
|
| 294 |
+// It is equivalent to calling Update with UpdateAny. |
|
| 295 |
+func (m *Map) Put(key, value interface{}) error {
|
|
| 296 |
+ return m.Update(key, value, UpdateAny) |
|
| 297 |
+} |
|
| 298 |
+ |
|
| 299 |
+// Update changes the value of a key. |
|
| 300 |
+func (m *Map) Update(key, value interface{}, flags MapUpdateFlags) error {
|
|
| 301 |
+ keyPtr, err := marshalPtr(key, int(m.abi.KeySize)) |
|
| 302 |
+ if err != nil {
|
|
| 303 |
+ return errors.WithMessage(err, "can't marshal key") |
|
| 304 |
+ } |
|
| 305 |
+ |
|
| 306 |
+ var valuePtr syscallPtr |
|
| 307 |
+ if m.abi.Type.hasPerCPUValue() {
|
|
| 308 |
+ valuePtr, err = marshalPerCPUValue(value, int(m.abi.ValueSize)) |
|
| 309 |
+ } else {
|
|
| 310 |
+ valuePtr, err = marshalPtr(value, int(m.abi.ValueSize)) |
|
| 311 |
+ } |
|
| 312 |
+ if err != nil {
|
|
| 313 |
+ return errors.WithMessage(err, "can't marshal value") |
|
| 314 |
+ } |
|
| 315 |
+ |
|
| 316 |
+ return bpfMapUpdateElem(m.fd, keyPtr, valuePtr, uint64(flags)) |
|
| 317 |
+} |
|
| 318 |
+ |
|
| 319 |
+// Delete removes a value. |
|
| 320 |
+// |
|
| 321 |
+// Returns an error if the key does not exist, see IsNotExist. |
|
| 322 |
+func (m *Map) Delete(key interface{}) error {
|
|
| 323 |
+ keyPtr, err := marshalPtr(key, int(m.abi.KeySize)) |
|
| 324 |
+ if err != nil {
|
|
| 325 |
+ return errors.WithMessage(err, "can't marshal key") |
|
| 326 |
+ } |
|
| 327 |
+ |
|
| 328 |
+ err = bpfMapDeleteElem(m.fd, keyPtr) |
|
| 329 |
+ return errors.WithMessage(err, "can't delete key") |
|
| 330 |
+} |
|
| 331 |
+ |
|
| 332 |
+// NextKey finds the key following an initial key. |
|
| 333 |
+// |
|
| 334 |
+// See NextKeyBytes for details. |
|
| 335 |
+func (m *Map) NextKey(key, nextKeyOut interface{}) error {
|
|
| 336 |
+ nextKeyPtr, nextKeyBytes := makeBuffer(nextKeyOut, int(m.abi.KeySize)) |
|
| 337 |
+ |
|
| 338 |
+ if err := m.nextKey(key, nextKeyPtr); err != nil {
|
|
| 339 |
+ return err |
|
| 340 |
+ } |
|
| 341 |
+ |
|
| 342 |
+ if nextKeyBytes == nil {
|
|
| 343 |
+ return nil |
|
| 344 |
+ } |
|
| 345 |
+ |
|
| 346 |
+ err := unmarshalBytes(nextKeyOut, nextKeyBytes) |
|
| 347 |
+ return errors.WithMessage(err, "can't unmarshal next key") |
|
| 348 |
+} |
|
| 349 |
+ |
|
| 350 |
+// NextKeyBytes returns the key following an initial key as a byte slice. |
|
| 351 |
+// |
|
| 352 |
+// Passing nil will return the first key. |
|
| 353 |
+// |
|
| 354 |
+// Use Iterate if you want to traverse all entries in the map. |
|
| 355 |
+func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) {
|
|
| 356 |
+ nextKey := make([]byte, m.abi.KeySize) |
|
| 357 |
+ nextKeyPtr := newPtr(unsafe.Pointer(&nextKey[0])) |
|
| 358 |
+ |
|
| 359 |
+ err := m.nextKey(key, nextKeyPtr) |
|
| 360 |
+ if IsNotExist(err) {
|
|
| 361 |
+ return nil, nil |
|
| 362 |
+ } |
|
| 363 |
+ |
|
| 364 |
+ return nextKey, err |
|
| 365 |
+} |
|
| 366 |
+ |
|
| 367 |
+func (m *Map) nextKey(key interface{}, nextKeyOut syscallPtr) error {
|
|
| 368 |
+ var ( |
|
| 369 |
+ keyPtr syscallPtr |
|
| 370 |
+ err error |
|
| 371 |
+ ) |
|
| 372 |
+ |
|
| 373 |
+ if key != nil {
|
|
| 374 |
+ keyPtr, err = marshalPtr(key, int(m.abi.KeySize)) |
|
| 375 |
+ if err != nil {
|
|
| 376 |
+ return errors.WithMessage(err, "can't marshal key") |
|
| 377 |
+ } |
|
| 378 |
+ } |
|
| 379 |
+ |
|
| 380 |
+ err = bpfMapGetNextKey(m.fd, keyPtr, nextKeyOut) |
|
| 381 |
+ return errors.WithMessage(err, "can't get next key") |
|
| 382 |
+} |
|
| 383 |
+ |
|
| 384 |
+// Iterate traverses a map. |
|
| 385 |
+// |
|
| 386 |
+// It's safe to create multiple iterators at the same time. |
|
| 387 |
+// |
|
| 388 |
+// It's not possible to guarantee that all keys in a map will be |
|
| 389 |
+// returned if there are concurrent modifications to the map. |
|
| 390 |
+func (m *Map) Iterate() *MapIterator {
|
|
| 391 |
+ return newMapIterator(m) |
|
| 392 |
+} |
|
| 393 |
+ |
|
| 394 |
+// Close removes a Map |
|
| 395 |
+func (m *Map) Close() error {
|
|
| 396 |
+ if m == nil {
|
|
| 397 |
+ // This makes it easier to clean up when iterating maps |
|
| 398 |
+ // of maps / programs. |
|
| 399 |
+ return nil |
|
| 400 |
+ } |
|
| 401 |
+ |
|
| 402 |
+ return m.fd.close() |
|
| 403 |
+} |
|
| 404 |
+ |
|
| 405 |
+// FD gets the file descriptor of the Map. |
|
| 406 |
+// |
|
| 407 |
+// Calling this function is invalid after Close has been called. |
|
| 408 |
+func (m *Map) FD() int {
|
|
| 409 |
+ fd, err := m.fd.value() |
|
| 410 |
+ if err != nil {
|
|
| 411 |
+ // Best effort: -1 is the number most likely to be an |
|
| 412 |
+ // invalid file descriptor. |
|
| 413 |
+ return -1 |
|
| 414 |
+ } |
|
| 415 |
+ |
|
| 416 |
+ return int(fd) |
|
| 417 |
+} |
|
| 418 |
+ |
|
| 419 |
+// Clone creates a duplicate of the Map. |
|
| 420 |
+// |
|
| 421 |
+// Closing the duplicate does not affect the original, and vice versa. |
|
| 422 |
+// Changes made to the map are reflected by both instances however. |
|
| 423 |
+// |
|
| 424 |
+// Cloning a nil Map returns nil. |
|
| 425 |
+func (m *Map) Clone() (*Map, error) {
|
|
| 426 |
+ if m == nil {
|
|
| 427 |
+ return nil, nil |
|
| 428 |
+ } |
|
| 429 |
+ |
|
| 430 |
+ dup, err := m.fd.dup() |
|
| 431 |
+ if err != nil {
|
|
| 432 |
+ return nil, errors.Wrap(err, "can't clone map") |
|
| 433 |
+ } |
|
| 434 |
+ |
|
| 435 |
+ return newMap(dup, m.name, &m.abi) |
|
| 436 |
+} |
|
| 437 |
+ |
|
| 438 |
+// Pin persists the map past the lifetime of the process that created it. |
|
| 439 |
+// |
|
| 440 |
+// This requires bpffs to be mounted above fileName. See http://cilium.readthedocs.io/en/doc-1.0/kubernetes/install/#mounting-the-bpf-fs-optional |
|
| 441 |
+func (m *Map) Pin(fileName string) error {
|
|
| 442 |
+ return bpfPinObject(fileName, m.fd) |
|
| 443 |
+} |
|
| 444 |
+ |
|
| 445 |
+// LoadPinnedMap load a Map from a BPF file. |
|
| 446 |
+// |
|
| 447 |
+// The function is not compatible with nested maps. |
|
| 448 |
+// Use LoadPinnedMapExplicit in these situations. |
|
| 449 |
+func LoadPinnedMap(fileName string) (*Map, error) {
|
|
| 450 |
+ fd, err := bpfGetObject(fileName) |
|
| 451 |
+ if err != nil {
|
|
| 452 |
+ return nil, err |
|
| 453 |
+ } |
|
| 454 |
+ name, abi, err := newMapABIFromFd(fd) |
|
| 455 |
+ if err != nil {
|
|
| 456 |
+ _ = fd.close() |
|
| 457 |
+ return nil, err |
|
| 458 |
+ } |
|
| 459 |
+ return newMap(fd, name, abi) |
|
| 460 |
+} |
|
| 461 |
+ |
|
| 462 |
+// LoadPinnedMapExplicit loads a map with explicit parameters. |
|
| 463 |
+func LoadPinnedMapExplicit(fileName string, abi *MapABI) (*Map, error) {
|
|
| 464 |
+ fd, err := bpfGetObject(fileName) |
|
| 465 |
+ if err != nil {
|
|
| 466 |
+ return nil, err |
|
| 467 |
+ } |
|
| 468 |
+ return newMap(fd, "", abi) |
|
| 469 |
+} |
|
| 470 |
+ |
|
| 471 |
+func unmarshalMap(buf []byte) (*Map, error) {
|
|
| 472 |
+ if len(buf) != 4 {
|
|
| 473 |
+ return nil, errors.New("map id requires 4 byte value")
|
|
| 474 |
+ } |
|
| 475 |
+ |
|
| 476 |
+ // Looking up an entry in a nested map or prog array returns an id, |
|
| 477 |
+ // not an fd. |
|
| 478 |
+ id := internal.NativeEndian.Uint32(buf) |
|
| 479 |
+ fd, err := bpfGetMapFDByID(id) |
|
| 480 |
+ if err != nil {
|
|
| 481 |
+ return nil, err |
|
| 482 |
+ } |
|
| 483 |
+ |
|
| 484 |
+ name, abi, err := newMapABIFromFd(fd) |
|
| 485 |
+ if err != nil {
|
|
| 486 |
+ _ = fd.close() |
|
| 487 |
+ return nil, err |
|
| 488 |
+ } |
|
| 489 |
+ |
|
| 490 |
+ return newMap(fd, name, abi) |
|
| 491 |
+} |
|
| 492 |
+ |
|
| 493 |
+// MarshalBinary implements BinaryMarshaler. |
|
| 494 |
+func (m *Map) MarshalBinary() ([]byte, error) {
|
|
| 495 |
+ fd, err := m.fd.value() |
|
| 496 |
+ if err != nil {
|
|
| 497 |
+ return nil, err |
|
| 498 |
+ } |
|
| 499 |
+ |
|
| 500 |
+ buf := make([]byte, 4) |
|
| 501 |
+ internal.NativeEndian.PutUint32(buf, fd) |
|
| 502 |
+ return buf, nil |
|
| 503 |
+} |
|
| 504 |
+ |
|
| 505 |
+// MapIterator iterates a Map. |
|
| 506 |
+// |
|
| 507 |
+// See Map.Iterate. |
|
| 508 |
+type MapIterator struct {
|
|
| 509 |
+ target *Map |
|
| 510 |
+ prevKey interface{}
|
|
| 511 |
+ prevBytes []byte |
|
| 512 |
+ count, maxEntries uint32 |
|
| 513 |
+ done bool |
|
| 514 |
+ err error |
|
| 515 |
+} |
|
| 516 |
+ |
|
| 517 |
+func newMapIterator(target *Map) *MapIterator {
|
|
| 518 |
+ return &MapIterator{
|
|
| 519 |
+ target: target, |
|
| 520 |
+ maxEntries: target.abi.MaxEntries, |
|
| 521 |
+ prevBytes: make([]byte, int(target.abi.KeySize)), |
|
| 522 |
+ } |
|
| 523 |
+} |
|
| 524 |
+ |
|
| 525 |
+var errIterationAborted = errors.New("iteration aborted")
|
|
| 526 |
+ |
|
| 527 |
+// Next decodes the next key and value. |
|
| 528 |
+// |
|
| 529 |
+// Iterating a hash map from which keys are being deleted is not |
|
| 530 |
+// safe. You may see the same key multiple times. Iteration may |
|
| 531 |
+// also abort with an error, see IsIterationAborted. |
|
| 532 |
+// |
|
| 533 |
+// Returns false if there are no more entries. You must check |
|
| 534 |
+// the result of Err afterwards. |
|
| 535 |
+// |
|
| 536 |
+// See Map.Get for further caveats around valueOut. |
|
| 537 |
+func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool {
|
|
| 538 |
+ if mi.err != nil || mi.done {
|
|
| 539 |
+ return false |
|
| 540 |
+ } |
|
| 541 |
+ |
|
| 542 |
+ for ; mi.count < mi.maxEntries; mi.count++ {
|
|
| 543 |
+ var nextBytes []byte |
|
| 544 |
+ nextBytes, mi.err = mi.target.NextKeyBytes(mi.prevKey) |
|
| 545 |
+ if mi.err != nil {
|
|
| 546 |
+ return false |
|
| 547 |
+ } |
|
| 548 |
+ |
|
| 549 |
+ if nextBytes == nil {
|
|
| 550 |
+ mi.done = true |
|
| 551 |
+ return false |
|
| 552 |
+ } |
|
| 553 |
+ |
|
| 554 |
+ // The user can get access to nextBytes since unmarshalBytes |
|
| 555 |
+ // does not copy when unmarshaling into a []byte. |
|
| 556 |
+ // Make a copy to prevent accidental corruption of |
|
| 557 |
+ // iterator state. |
|
| 558 |
+ copy(mi.prevBytes, nextBytes) |
|
| 559 |
+ mi.prevKey = mi.prevBytes |
|
| 560 |
+ |
|
| 561 |
+ mi.err = mi.target.Lookup(nextBytes, valueOut) |
|
| 562 |
+ if IsNotExist(mi.err) {
|
|
| 563 |
+ // Even though the key should be valid, we couldn't look up |
|
| 564 |
+ // its value. If we're iterating a hash map this is probably |
|
| 565 |
+ // because a concurrent delete removed the value before we |
|
| 566 |
+ // could get it. This means that the next call to NextKeyBytes |
|
| 567 |
+ // is very likely to restart iteration. |
|
| 568 |
+ // If we're iterating one of the fd maps like |
|
| 569 |
+ // ProgramArray it means that a given slot doesn't have |
|
| 570 |
+ // a valid fd associated. It's OK to continue to the next slot. |
|
| 571 |
+ continue |
|
| 572 |
+ } |
|
| 573 |
+ if mi.err != nil {
|
|
| 574 |
+ return false |
|
| 575 |
+ } |
|
| 576 |
+ |
|
| 577 |
+ mi.err = unmarshalBytes(keyOut, nextBytes) |
|
| 578 |
+ return mi.err == nil |
|
| 579 |
+ } |
|
| 580 |
+ |
|
| 581 |
+ mi.err = errIterationAborted |
|
| 582 |
+ return false |
|
| 583 |
+} |
|
| 584 |
+ |
|
| 585 |
+// Err returns any encountered error. |
|
| 586 |
+// |
|
| 587 |
+// The method must be called after Next returns nil. |
|
| 588 |
+func (mi *MapIterator) Err() error {
|
|
| 589 |
+ return mi.err |
|
| 590 |
+} |
|
| 591 |
+ |
|
| 592 |
+// IsNotExist returns true if the error indicates that a |
|
| 593 |
+// key doesn't exist. |
|
| 594 |
+func IsNotExist(err error) bool {
|
|
| 595 |
+ return errors.Cause(err) == unix.ENOENT |
|
| 596 |
+} |
|
| 597 |
+ |
|
| 598 |
+// IsIterationAborted returns true if the iteration was aborted. |
|
| 599 |
+// |
|
| 600 |
+// This occurs when keys are deleted from a hash map during iteration. |
|
| 601 |
+func IsIterationAborted(err error) bool {
|
|
| 602 |
+ return errors.Cause(err) == errIterationAborted |
|
| 603 |
+} |
| 0 | 604 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,192 @@ |
| 0 |
+package ebpf |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "bytes" |
|
| 4 |
+ "encoding" |
|
| 5 |
+ "encoding/binary" |
|
| 6 |
+ "reflect" |
|
| 7 |
+ "runtime" |
|
| 8 |
+ "unsafe" |
|
| 9 |
+ |
|
| 10 |
+ "github.com/cilium/ebpf/internal" |
|
| 11 |
+ |
|
| 12 |
+ "github.com/pkg/errors" |
|
| 13 |
+) |
|
| 14 |
+ |
|
| 15 |
+func marshalPtr(data interface{}, length int) (syscallPtr, error) {
|
|
| 16 |
+ if ptr, ok := data.(unsafe.Pointer); ok {
|
|
| 17 |
+ return newPtr(ptr), nil |
|
| 18 |
+ } |
|
| 19 |
+ |
|
| 20 |
+ buf, err := marshalBytes(data, length) |
|
| 21 |
+ if err != nil {
|
|
| 22 |
+ return syscallPtr{}, err
|
|
| 23 |
+ } |
|
| 24 |
+ |
|
| 25 |
+ return newPtr(unsafe.Pointer(&buf[0])), nil |
|
| 26 |
+} |
|
| 27 |
+ |
|
| 28 |
+func marshalBytes(data interface{}, length int) (buf []byte, err error) {
|
|
| 29 |
+ switch value := data.(type) {
|
|
| 30 |
+ case encoding.BinaryMarshaler: |
|
| 31 |
+ buf, err = value.MarshalBinary() |
|
| 32 |
+ case string: |
|
| 33 |
+ buf = []byte(value) |
|
| 34 |
+ case []byte: |
|
| 35 |
+ buf = value |
|
| 36 |
+ case unsafe.Pointer: |
|
| 37 |
+ err = errors.New("can't marshal from unsafe.Pointer")
|
|
| 38 |
+ default: |
|
| 39 |
+ var wr bytes.Buffer |
|
| 40 |
+ err = binary.Write(&wr, internal.NativeEndian, value) |
|
| 41 |
+ err = errors.Wrapf(err, "encoding %T", value) |
|
| 42 |
+ buf = wr.Bytes() |
|
| 43 |
+ } |
|
| 44 |
+ if err != nil {
|
|
| 45 |
+ return nil, err |
|
| 46 |
+ } |
|
| 47 |
+ |
|
| 48 |
+ if len(buf) != length {
|
|
| 49 |
+ return nil, errors.Errorf("%T doesn't marshal to %d bytes", data, length)
|
|
| 50 |
+ } |
|
| 51 |
+ return buf, nil |
|
| 52 |
+} |
|
| 53 |
+ |
|
| 54 |
+func makeBuffer(dst interface{}, length int) (syscallPtr, []byte) {
|
|
| 55 |
+ if ptr, ok := dst.(unsafe.Pointer); ok {
|
|
| 56 |
+ return newPtr(ptr), nil |
|
| 57 |
+ } |
|
| 58 |
+ |
|
| 59 |
+ buf := make([]byte, length) |
|
| 60 |
+ return newPtr(unsafe.Pointer(&buf[0])), buf |
|
| 61 |
+} |
|
| 62 |
+ |
|
| 63 |
+func unmarshalBytes(data interface{}, buf []byte) error {
|
|
| 64 |
+ switch value := data.(type) {
|
|
| 65 |
+ case unsafe.Pointer: |
|
| 66 |
+ sh := &reflect.SliceHeader{
|
|
| 67 |
+ Data: uintptr(value), |
|
| 68 |
+ Len: len(buf), |
|
| 69 |
+ Cap: len(buf), |
|
| 70 |
+ } |
|
| 71 |
+ |
|
| 72 |
+ dst := *(*[]byte)(unsafe.Pointer(sh)) |
|
| 73 |
+ copy(dst, buf) |
|
| 74 |
+ runtime.KeepAlive(value) |
|
| 75 |
+ return nil |
|
| 76 |
+ case encoding.BinaryUnmarshaler: |
|
| 77 |
+ return value.UnmarshalBinary(buf) |
|
| 78 |
+ case *string: |
|
| 79 |
+ *value = string(buf) |
|
| 80 |
+ return nil |
|
| 81 |
+ case *[]byte: |
|
| 82 |
+ *value = buf |
|
| 83 |
+ return nil |
|
| 84 |
+ case string: |
|
| 85 |
+ return errors.New("require pointer to string")
|
|
| 86 |
+ case []byte: |
|
| 87 |
+ return errors.New("require pointer to []byte")
|
|
| 88 |
+ default: |
|
| 89 |
+ rd := bytes.NewReader(buf) |
|
| 90 |
+ err := binary.Read(rd, internal.NativeEndian, value) |
|
| 91 |
+ return errors.Wrapf(err, "decoding %T", value) |
|
| 92 |
+ } |
|
| 93 |
+} |
|
| 94 |
+ |
|
| 95 |
+// marshalPerCPUValue encodes a slice containing one value per |
|
| 96 |
+// possible CPU into a buffer of bytes. |
|
| 97 |
+// |
|
| 98 |
+// Values are initialized to zero if the slice has less elements than CPUs. |
|
| 99 |
+// |
|
| 100 |
+// slice must have a type like []elementType. |
|
| 101 |
+func marshalPerCPUValue(slice interface{}, elemLength int) (syscallPtr, error) {
|
|
| 102 |
+ sliceType := reflect.TypeOf(slice) |
|
| 103 |
+ if sliceType.Kind() != reflect.Slice {
|
|
| 104 |
+ return syscallPtr{}, errors.New("per-CPU value requires slice")
|
|
| 105 |
+ } |
|
| 106 |
+ |
|
| 107 |
+ possibleCPUs, err := internal.PossibleCPUs() |
|
| 108 |
+ if err != nil {
|
|
| 109 |
+ return syscallPtr{}, err
|
|
| 110 |
+ } |
|
| 111 |
+ |
|
| 112 |
+ sliceValue := reflect.ValueOf(slice) |
|
| 113 |
+ sliceLen := sliceValue.Len() |
|
| 114 |
+ if sliceLen > possibleCPUs {
|
|
| 115 |
+ return syscallPtr{}, errors.Errorf("per-CPU value exceeds number of CPUs")
|
|
| 116 |
+ } |
|
| 117 |
+ |
|
| 118 |
+ alignedElemLength := align(elemLength, 8) |
|
| 119 |
+ buf := make([]byte, alignedElemLength*possibleCPUs) |
|
| 120 |
+ |
|
| 121 |
+ for i := 0; i < sliceLen; i++ {
|
|
| 122 |
+ elem := sliceValue.Index(i).Interface() |
|
| 123 |
+ elemBytes, err := marshalBytes(elem, elemLength) |
|
| 124 |
+ if err != nil {
|
|
| 125 |
+ return syscallPtr{}, err
|
|
| 126 |
+ } |
|
| 127 |
+ |
|
| 128 |
+ offset := i * alignedElemLength |
|
| 129 |
+ copy(buf[offset:offset+elemLength], elemBytes) |
|
| 130 |
+ } |
|
| 131 |
+ |
|
| 132 |
+ return newPtr(unsafe.Pointer(&buf[0])), nil |
|
| 133 |
+} |
|
| 134 |
+ |
|
| 135 |
+// unmarshalPerCPUValue decodes a buffer into a slice containing one value per |
|
| 136 |
+// possible CPU. |
|
| 137 |
+// |
|
| 138 |
+// valueOut must have a type like *[]elementType |
|
| 139 |
+func unmarshalPerCPUValue(slicePtr interface{}, elemLength int, buf []byte) error {
|
|
| 140 |
+ slicePtrType := reflect.TypeOf(slicePtr) |
|
| 141 |
+ if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice {
|
|
| 142 |
+ return errors.Errorf("per-cpu value requires pointer to slice")
|
|
| 143 |
+ } |
|
| 144 |
+ |
|
| 145 |
+ possibleCPUs, err := internal.PossibleCPUs() |
|
| 146 |
+ if err != nil {
|
|
| 147 |
+ return err |
|
| 148 |
+ } |
|
| 149 |
+ |
|
| 150 |
+ sliceType := slicePtrType.Elem() |
|
| 151 |
+ slice := reflect.MakeSlice(sliceType, possibleCPUs, possibleCPUs) |
|
| 152 |
+ |
|
| 153 |
+ sliceElemType := sliceType.Elem() |
|
| 154 |
+ sliceElemIsPointer := sliceElemType.Kind() == reflect.Ptr |
|
| 155 |
+ if sliceElemIsPointer {
|
|
| 156 |
+ sliceElemType = sliceElemType.Elem() |
|
| 157 |
+ } |
|
| 158 |
+ |
|
| 159 |
+ step := len(buf) / possibleCPUs |
|
| 160 |
+ if step < elemLength {
|
|
| 161 |
+ return errors.Errorf("per-cpu element length is larger than available data")
|
|
| 162 |
+ } |
|
| 163 |
+ for i := 0; i < possibleCPUs; i++ {
|
|
| 164 |
+ var elem interface{}
|
|
| 165 |
+ if sliceElemIsPointer {
|
|
| 166 |
+ newElem := reflect.New(sliceElemType) |
|
| 167 |
+ slice.Index(i).Set(newElem) |
|
| 168 |
+ elem = newElem.Interface() |
|
| 169 |
+ } else {
|
|
| 170 |
+ elem = slice.Index(i).Addr().Interface() |
|
| 171 |
+ } |
|
| 172 |
+ |
|
| 173 |
+ // Make a copy, since unmarshal can hold on to itemBytes |
|
| 174 |
+ elemBytes := make([]byte, elemLength) |
|
| 175 |
+ copy(elemBytes, buf[:elemLength]) |
|
| 176 |
+ |
|
| 177 |
+ err := unmarshalBytes(elem, elemBytes) |
|
| 178 |
+ if err != nil {
|
|
| 179 |
+ return errors.Wrapf(err, "cpu %d", i) |
|
| 180 |
+ } |
|
| 181 |
+ |
|
| 182 |
+ buf = buf[step:] |
|
| 183 |
+ } |
|
| 184 |
+ |
|
| 185 |
+ reflect.ValueOf(slicePtr).Elem().Set(slice) |
|
| 186 |
+ return nil |
|
| 187 |
+} |
|
| 188 |
+ |
|
| 189 |
+func align(n, alignment int) int {
|
|
| 190 |
+ return (int(n) + alignment - 1) / alignment * alignment |
|
| 191 |
+} |
| 0 | 192 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,504 @@ |
| 0 |
+package ebpf |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "bytes" |
|
| 4 |
+ "fmt" |
|
| 5 |
+ "math" |
|
| 6 |
+ "strings" |
|
| 7 |
+ "time" |
|
| 8 |
+ "unsafe" |
|
| 9 |
+ |
|
| 10 |
+ "github.com/cilium/ebpf/asm" |
|
| 11 |
+ "github.com/cilium/ebpf/internal" |
|
| 12 |
+ "github.com/cilium/ebpf/internal/unix" |
|
| 13 |
+ |
|
| 14 |
+ "github.com/pkg/errors" |
|
| 15 |
+) |
|
| 16 |
+ |
|
| 17 |
+const ( |
|
| 18 |
+ // Number of bytes to pad the output buffer for BPF_PROG_TEST_RUN. |
|
| 19 |
+ // This is currently the maximum of spare space allocated for SKB |
|
| 20 |
+ // and XDP programs, and equal to XDP_PACKET_HEADROOM + NET_IP_ALIGN. |
|
| 21 |
+ outputPad = 256 + 2 |
|
| 22 |
+) |
|
| 23 |
+ |
|
| 24 |
+// DefaultVerifierLogSize is the default number of bytes allocated for the |
|
| 25 |
+// verifier log. |
|
| 26 |
+const DefaultVerifierLogSize = 64 * 1024 |
|
| 27 |
+ |
|
| 28 |
+// ProgramOptions control loading a program into the kernel. |
|
| 29 |
+type ProgramOptions struct {
|
|
| 30 |
+ // Controls the detail emitted by the kernel verifier. Set to non-zero |
|
| 31 |
+ // to enable logging. |
|
| 32 |
+ LogLevel uint32 |
|
| 33 |
+ // Controls the output buffer size for the verifier. Defaults to |
|
| 34 |
+ // DefaultVerifierLogSize. |
|
| 35 |
+ LogSize int |
|
| 36 |
+} |
|
| 37 |
+ |
|
| 38 |
+// ProgramSpec defines a Program |
|
| 39 |
+type ProgramSpec struct {
|
|
| 40 |
+ // Name is passed to the kernel as a debug aid. Must only contain |
|
| 41 |
+ // alpha numeric and '_' characters. |
|
| 42 |
+ Name string |
|
| 43 |
+ Type ProgramType |
|
| 44 |
+ AttachType AttachType |
|
| 45 |
+ Instructions asm.Instructions |
|
| 46 |
+ License string |
|
| 47 |
+ KernelVersion uint32 |
|
| 48 |
+} |
|
| 49 |
+ |
|
| 50 |
+// Copy returns a copy of the spec. |
|
| 51 |
+func (ps *ProgramSpec) Copy() *ProgramSpec {
|
|
| 52 |
+ if ps == nil {
|
|
| 53 |
+ return nil |
|
| 54 |
+ } |
|
| 55 |
+ |
|
| 56 |
+ cpy := *ps |
|
| 57 |
+ cpy.Instructions = make(asm.Instructions, len(ps.Instructions)) |
|
| 58 |
+ copy(cpy.Instructions, ps.Instructions) |
|
| 59 |
+ return &cpy |
|
| 60 |
+} |
|
| 61 |
+ |
|
| 62 |
+// Program represents BPF program loaded into the kernel. |
|
| 63 |
+// |
|
| 64 |
+// It is not safe to close a Program which is used by other goroutines. |
|
| 65 |
+type Program struct {
|
|
| 66 |
+ // Contains the output of the kernel verifier if enabled, |
|
| 67 |
+ // otherwise it is empty. |
|
| 68 |
+ VerifierLog string |
|
| 69 |
+ |
|
| 70 |
+ fd *bpfFD |
|
| 71 |
+ name string |
|
| 72 |
+ abi ProgramABI |
|
| 73 |
+} |
|
| 74 |
+ |
|
| 75 |
+// NewProgram creates a new Program. |
|
| 76 |
+// |
|
| 77 |
+// Loading a program for the first time will perform |
|
| 78 |
+// feature detection by loading small, temporary programs. |
|
| 79 |
+func NewProgram(spec *ProgramSpec) (*Program, error) {
|
|
| 80 |
+ return NewProgramWithOptions(spec, ProgramOptions{})
|
|
| 81 |
+} |
|
| 82 |
+ |
|
| 83 |
+// NewProgramWithOptions creates a new Program. |
|
| 84 |
+// |
|
| 85 |
+// Loading a program for the first time will perform |
|
| 86 |
+// feature detection by loading small, temporary programs. |
|
| 87 |
+func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) {
|
|
| 88 |
+ attr, err := convertProgramSpec(spec) |
|
| 89 |
+ if err != nil {
|
|
| 90 |
+ return nil, err |
|
| 91 |
+ } |
|
| 92 |
+ |
|
| 93 |
+ logSize := DefaultVerifierLogSize |
|
| 94 |
+ if opts.LogSize > 0 {
|
|
| 95 |
+ logSize = opts.LogSize |
|
| 96 |
+ } |
|
| 97 |
+ |
|
| 98 |
+ var logBuf []byte |
|
| 99 |
+ if opts.LogLevel > 0 {
|
|
| 100 |
+ logBuf = make([]byte, logSize) |
|
| 101 |
+ attr.logLevel = opts.LogLevel |
|
| 102 |
+ attr.logSize = uint32(len(logBuf)) |
|
| 103 |
+ attr.logBuf = newPtr(unsafe.Pointer(&logBuf[0])) |
|
| 104 |
+ } |
|
| 105 |
+ |
|
| 106 |
+ fd, err := bpfProgLoad(attr) |
|
| 107 |
+ if err == nil {
|
|
| 108 |
+ prog := newProgram(fd, spec.Name, &ProgramABI{spec.Type})
|
|
| 109 |
+ prog.VerifierLog = convertCString(logBuf) |
|
| 110 |
+ return prog, nil |
|
| 111 |
+ } |
|
| 112 |
+ |
|
| 113 |
+ truncated := errors.Cause(err) == unix.ENOSPC |
|
| 114 |
+ if opts.LogLevel == 0 {
|
|
| 115 |
+ // Re-run with the verifier enabled to get better error messages. |
|
| 116 |
+ logBuf = make([]byte, logSize) |
|
| 117 |
+ attr.logLevel = 1 |
|
| 118 |
+ attr.logSize = uint32(len(logBuf)) |
|
| 119 |
+ attr.logBuf = newPtr(unsafe.Pointer(&logBuf[0])) |
|
| 120 |
+ |
|
| 121 |
+ _, nerr := bpfProgLoad(attr) |
|
| 122 |
+ truncated = errors.Cause(nerr) == unix.ENOSPC |
|
| 123 |
+ } |
|
| 124 |
+ |
|
| 125 |
+ logs := convertCString(logBuf) |
|
| 126 |
+ if truncated {
|
|
| 127 |
+ logs += "\n(truncated...)" |
|
| 128 |
+ } |
|
| 129 |
+ |
|
| 130 |
+ return nil, &loadError{err, logs}
|
|
| 131 |
+} |
|
| 132 |
+ |
|
| 133 |
+// NewProgramFromFD creates a program from a raw fd. |
|
| 134 |
+// |
|
| 135 |
+// You should not use fd after calling this function. |
|
| 136 |
+// |
|
| 137 |
+// Requires at least Linux 4.11. |
|
| 138 |
+func NewProgramFromFD(fd int) (*Program, error) {
|
|
| 139 |
+ if fd < 0 {
|
|
| 140 |
+ return nil, errors.New("invalid fd")
|
|
| 141 |
+ } |
|
| 142 |
+ bpfFd := newBPFFD(uint32(fd)) |
|
| 143 |
+ |
|
| 144 |
+ name, abi, err := newProgramABIFromFd(bpfFd) |
|
| 145 |
+ if err != nil {
|
|
| 146 |
+ bpfFd.forget() |
|
| 147 |
+ return nil, err |
|
| 148 |
+ } |
|
| 149 |
+ |
|
| 150 |
+ return newProgram(bpfFd, name, abi), nil |
|
| 151 |
+} |
|
| 152 |
+ |
|
| 153 |
+func newProgram(fd *bpfFD, name string, abi *ProgramABI) *Program {
|
|
| 154 |
+ return &Program{
|
|
| 155 |
+ name: name, |
|
| 156 |
+ fd: fd, |
|
| 157 |
+ abi: *abi, |
|
| 158 |
+ } |
|
| 159 |
+} |
|
| 160 |
+ |
|
| 161 |
+func convertProgramSpec(spec *ProgramSpec) (*bpfProgLoadAttr, error) {
|
|
| 162 |
+ if len(spec.Instructions) == 0 {
|
|
| 163 |
+ return nil, errors.New("Instructions cannot be empty")
|
|
| 164 |
+ } |
|
| 165 |
+ |
|
| 166 |
+ if len(spec.License) == 0 {
|
|
| 167 |
+ return nil, errors.New("License cannot be empty")
|
|
| 168 |
+ } |
|
| 169 |
+ |
|
| 170 |
+ buf := bytes.NewBuffer(make([]byte, 0, len(spec.Instructions)*asm.InstructionSize)) |
|
| 171 |
+ err := spec.Instructions.Marshal(buf, internal.NativeEndian) |
|
| 172 |
+ if err != nil {
|
|
| 173 |
+ return nil, err |
|
| 174 |
+ } |
|
| 175 |
+ |
|
| 176 |
+ bytecode := buf.Bytes() |
|
| 177 |
+ insCount := uint32(len(bytecode) / asm.InstructionSize) |
|
| 178 |
+ lic := []byte(spec.License) |
|
| 179 |
+ attr := &bpfProgLoadAttr{
|
|
| 180 |
+ progType: spec.Type, |
|
| 181 |
+ expectedAttachType: spec.AttachType, |
|
| 182 |
+ insCount: insCount, |
|
| 183 |
+ instructions: newPtr(unsafe.Pointer(&bytecode[0])), |
|
| 184 |
+ license: newPtr(unsafe.Pointer(&lic[0])), |
|
| 185 |
+ } |
|
| 186 |
+ |
|
| 187 |
+ name, err := newBPFObjName(spec.Name) |
|
| 188 |
+ if err != nil {
|
|
| 189 |
+ return nil, err |
|
| 190 |
+ } |
|
| 191 |
+ |
|
| 192 |
+ if haveObjName() == nil {
|
|
| 193 |
+ attr.progName = name |
|
| 194 |
+ } |
|
| 195 |
+ |
|
| 196 |
+ return attr, nil |
|
| 197 |
+} |
|
| 198 |
+ |
|
| 199 |
+func (p *Program) String() string {
|
|
| 200 |
+ if p.name != "" {
|
|
| 201 |
+ return fmt.Sprintf("%s(%s)#%v", p.abi.Type, p.name, p.fd)
|
|
| 202 |
+ } |
|
| 203 |
+ return fmt.Sprintf("%s#%v", p.abi.Type, p.fd)
|
|
| 204 |
+} |
|
| 205 |
+ |
|
| 206 |
+// ABI gets the ABI of the Program |
|
| 207 |
+func (p *Program) ABI() ProgramABI {
|
|
| 208 |
+ return p.abi |
|
| 209 |
+} |
|
| 210 |
+ |
|
| 211 |
+// FD gets the file descriptor of the Program. |
|
| 212 |
+// |
|
| 213 |
+// It is invalid to call this function after Close has been called. |
|
| 214 |
+func (p *Program) FD() int {
|
|
| 215 |
+ fd, err := p.fd.value() |
|
| 216 |
+ if err != nil {
|
|
| 217 |
+ // Best effort: -1 is the number most likely to be an |
|
| 218 |
+ // invalid file descriptor. |
|
| 219 |
+ return -1 |
|
| 220 |
+ } |
|
| 221 |
+ |
|
| 222 |
+ return int(fd) |
|
| 223 |
+} |
|
| 224 |
+ |
|
| 225 |
+// Clone creates a duplicate of the Program. |
|
| 226 |
+// |
|
| 227 |
+// Closing the duplicate does not affect the original, and vice versa. |
|
| 228 |
+// |
|
| 229 |
+// Cloning a nil Program returns nil. |
|
| 230 |
+func (p *Program) Clone() (*Program, error) {
|
|
| 231 |
+ if p == nil {
|
|
| 232 |
+ return nil, nil |
|
| 233 |
+ } |
|
| 234 |
+ |
|
| 235 |
+ dup, err := p.fd.dup() |
|
| 236 |
+ if err != nil {
|
|
| 237 |
+ return nil, errors.Wrap(err, "can't clone program") |
|
| 238 |
+ } |
|
| 239 |
+ |
|
| 240 |
+ return newProgram(dup, p.name, &p.abi), nil |
|
| 241 |
+} |
|
| 242 |
+ |
|
| 243 |
+// Pin persists the Program past the lifetime of the process that created it |
|
| 244 |
+// |
|
| 245 |
+// This requires bpffs to be mounted above fileName. See http://cilium.readthedocs.io/en/doc-1.0/kubernetes/install/#mounting-the-bpf-fs-optional |
|
| 246 |
+func (p *Program) Pin(fileName string) error {
|
|
| 247 |
+ return errors.Wrap(bpfPinObject(fileName, p.fd), "can't pin program") |
|
| 248 |
+} |
|
| 249 |
+ |
|
| 250 |
+// Close unloads the program from the kernel. |
|
| 251 |
+func (p *Program) Close() error {
|
|
| 252 |
+ if p == nil {
|
|
| 253 |
+ return nil |
|
| 254 |
+ } |
|
| 255 |
+ |
|
| 256 |
+ return p.fd.close() |
|
| 257 |
+} |
|
| 258 |
+ |
|
| 259 |
+// Test runs the Program in the kernel with the given input and returns the |
|
| 260 |
+// value returned by the eBPF program. outLen may be zero. |
|
| 261 |
+// |
|
| 262 |
+// Note: the kernel expects at least 14 bytes input for an ethernet header for |
|
| 263 |
+// XDP and SKB programs. |
|
| 264 |
+// |
|
| 265 |
+// This function requires at least Linux 4.12. |
|
| 266 |
+func (p *Program) Test(in []byte) (uint32, []byte, error) {
|
|
| 267 |
+ ret, out, _, err := p.testRun(in, 1) |
|
| 268 |
+ return ret, out, errors.Wrap(err, "can't test program") |
|
| 269 |
+} |
|
| 270 |
+ |
|
| 271 |
+// Benchmark runs the Program with the given input for a number of times |
|
| 272 |
+// and returns the time taken per iteration. |
|
| 273 |
+// |
|
| 274 |
+// The returned value is the return value of the last execution of |
|
| 275 |
+// the program. |
|
| 276 |
+// |
|
| 277 |
+// This function requires at least Linux 4.12. |
|
| 278 |
+func (p *Program) Benchmark(in []byte, repeat int) (uint32, time.Duration, error) {
|
|
| 279 |
+ ret, _, total, err := p.testRun(in, repeat) |
|
| 280 |
+ return ret, total, errors.Wrap(err, "can't benchmark program") |
|
| 281 |
+} |
|
| 282 |
+ |
|
| 283 |
+var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() bool {
|
|
| 284 |
+ prog, err := NewProgram(&ProgramSpec{
|
|
| 285 |
+ Type: SocketFilter, |
|
| 286 |
+ Instructions: asm.Instructions{
|
|
| 287 |
+ asm.LoadImm(asm.R0, 0, asm.DWord), |
|
| 288 |
+ asm.Return(), |
|
| 289 |
+ }, |
|
| 290 |
+ License: "MIT", |
|
| 291 |
+ }) |
|
| 292 |
+ if err != nil {
|
|
| 293 |
+ // This may be because we lack sufficient permissions, etc. |
|
| 294 |
+ return false |
|
| 295 |
+ } |
|
| 296 |
+ defer prog.Close() |
|
| 297 |
+ |
|
| 298 |
+ fd, err := prog.fd.value() |
|
| 299 |
+ if err != nil {
|
|
| 300 |
+ return false |
|
| 301 |
+ } |
|
| 302 |
+ |
|
| 303 |
+ // Programs require at least 14 bytes input |
|
| 304 |
+ in := make([]byte, 14) |
|
| 305 |
+ attr := bpfProgTestRunAttr{
|
|
| 306 |
+ fd: fd, |
|
| 307 |
+ dataSizeIn: uint32(len(in)), |
|
| 308 |
+ dataIn: newPtr(unsafe.Pointer(&in[0])), |
|
| 309 |
+ } |
|
| 310 |
+ |
|
| 311 |
+ _, err = bpfCall(_ProgTestRun, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) |
|
| 312 |
+ |
|
| 313 |
+ // Check for EINVAL specifically, rather than err != nil since we |
|
| 314 |
+ // otherwise misdetect due to insufficient permissions. |
|
| 315 |
+ return errors.Cause(err) != unix.EINVAL |
|
| 316 |
+}) |
|
| 317 |
+ |
|
| 318 |
+func (p *Program) testRun(in []byte, repeat int) (uint32, []byte, time.Duration, error) {
|
|
| 319 |
+ if uint(repeat) > math.MaxUint32 {
|
|
| 320 |
+ return 0, nil, 0, fmt.Errorf("repeat is too high")
|
|
| 321 |
+ } |
|
| 322 |
+ |
|
| 323 |
+ if len(in) == 0 {
|
|
| 324 |
+ return 0, nil, 0, fmt.Errorf("missing input")
|
|
| 325 |
+ } |
|
| 326 |
+ |
|
| 327 |
+ if uint(len(in)) > math.MaxUint32 {
|
|
| 328 |
+ return 0, nil, 0, fmt.Errorf("input is too long")
|
|
| 329 |
+ } |
|
| 330 |
+ |
|
| 331 |
+ if err := haveProgTestRun(); err != nil {
|
|
| 332 |
+ return 0, nil, 0, err |
|
| 333 |
+ } |
|
| 334 |
+ |
|
| 335 |
+ // Older kernels ignore the dataSizeOut argument when copying to user space. |
|
| 336 |
+ // Combined with things like bpf_xdp_adjust_head() we don't really know what the final |
|
| 337 |
+ // size will be. Hence we allocate an output buffer which we hope will always be large |
|
| 338 |
+ // enough, and panic if the kernel wrote past the end of the allocation. |
|
| 339 |
+ // See https://patchwork.ozlabs.org/cover/1006822/ |
|
| 340 |
+ out := make([]byte, len(in)+outputPad) |
|
| 341 |
+ |
|
| 342 |
+ fd, err := p.fd.value() |
|
| 343 |
+ if err != nil {
|
|
| 344 |
+ return 0, nil, 0, err |
|
| 345 |
+ } |
|
| 346 |
+ |
|
| 347 |
+ attr := bpfProgTestRunAttr{
|
|
| 348 |
+ fd: fd, |
|
| 349 |
+ dataSizeIn: uint32(len(in)), |
|
| 350 |
+ dataSizeOut: uint32(len(out)), |
|
| 351 |
+ dataIn: newPtr(unsafe.Pointer(&in[0])), |
|
| 352 |
+ dataOut: newPtr(unsafe.Pointer(&out[0])), |
|
| 353 |
+ repeat: uint32(repeat), |
|
| 354 |
+ } |
|
| 355 |
+ |
|
| 356 |
+ _, err = bpfCall(_ProgTestRun, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) |
|
| 357 |
+ if err != nil {
|
|
| 358 |
+ return 0, nil, 0, errors.Wrap(err, "can't run test") |
|
| 359 |
+ } |
|
| 360 |
+ |
|
| 361 |
+ if int(attr.dataSizeOut) > cap(out) {
|
|
| 362 |
+ // Houston, we have a problem. The program created more data than we allocated, |
|
| 363 |
+ // and the kernel wrote past the end of our buffer. |
|
| 364 |
+ panic("kernel wrote past end of output buffer")
|
|
| 365 |
+ } |
|
| 366 |
+ out = out[:int(attr.dataSizeOut)] |
|
| 367 |
+ |
|
| 368 |
+ total := time.Duration(attr.duration) * time.Nanosecond |
|
| 369 |
+ return attr.retval, out, total, nil |
|
| 370 |
+} |
|
| 371 |
+ |
|
| 372 |
+func unmarshalProgram(buf []byte) (*Program, error) {
|
|
| 373 |
+ if len(buf) != 4 {
|
|
| 374 |
+ return nil, errors.New("program id requires 4 byte value")
|
|
| 375 |
+ } |
|
| 376 |
+ |
|
| 377 |
+ // Looking up an entry in a nested map or prog array returns an id, |
|
| 378 |
+ // not an fd. |
|
| 379 |
+ id := internal.NativeEndian.Uint32(buf) |
|
| 380 |
+ fd, err := bpfGetProgramFDByID(id) |
|
| 381 |
+ if err != nil {
|
|
| 382 |
+ return nil, err |
|
| 383 |
+ } |
|
| 384 |
+ |
|
| 385 |
+ name, abi, err := newProgramABIFromFd(fd) |
|
| 386 |
+ if err != nil {
|
|
| 387 |
+ _ = fd.close() |
|
| 388 |
+ return nil, err |
|
| 389 |
+ } |
|
| 390 |
+ |
|
| 391 |
+ return newProgram(fd, name, abi), nil |
|
| 392 |
+} |
|
| 393 |
+ |
|
| 394 |
+// MarshalBinary implements BinaryMarshaler. |
|
| 395 |
+func (p *Program) MarshalBinary() ([]byte, error) {
|
|
| 396 |
+ value, err := p.fd.value() |
|
| 397 |
+ if err != nil {
|
|
| 398 |
+ return nil, err |
|
| 399 |
+ } |
|
| 400 |
+ |
|
| 401 |
+ buf := make([]byte, 4) |
|
| 402 |
+ internal.NativeEndian.PutUint32(buf, value) |
|
| 403 |
+ return buf, nil |
|
| 404 |
+} |
|
| 405 |
+ |
|
| 406 |
+// Attach a Program to a container object fd |
|
| 407 |
+func (p *Program) Attach(fd int, typ AttachType, flags AttachFlags) error {
|
|
| 408 |
+ if fd < 0 {
|
|
| 409 |
+ return errors.New("invalid fd")
|
|
| 410 |
+ } |
|
| 411 |
+ |
|
| 412 |
+ pfd, err := p.fd.value() |
|
| 413 |
+ if err != nil {
|
|
| 414 |
+ return err |
|
| 415 |
+ } |
|
| 416 |
+ |
|
| 417 |
+ attr := bpfProgAlterAttr{
|
|
| 418 |
+ targetFd: uint32(fd), |
|
| 419 |
+ attachBpfFd: pfd, |
|
| 420 |
+ attachType: uint32(typ), |
|
| 421 |
+ attachFlags: uint32(flags), |
|
| 422 |
+ } |
|
| 423 |
+ |
|
| 424 |
+ return bpfProgAlter(_ProgAttach, &attr) |
|
| 425 |
+} |
|
| 426 |
+ |
|
| 427 |
+// Detach a Program from a container object fd |
|
| 428 |
+func (p *Program) Detach(fd int, typ AttachType, flags AttachFlags) error {
|
|
| 429 |
+ if fd < 0 {
|
|
| 430 |
+ return errors.New("invalid fd")
|
|
| 431 |
+ } |
|
| 432 |
+ |
|
| 433 |
+ pfd, err := p.fd.value() |
|
| 434 |
+ if err != nil {
|
|
| 435 |
+ return err |
|
| 436 |
+ } |
|
| 437 |
+ |
|
| 438 |
+ attr := bpfProgAlterAttr{
|
|
| 439 |
+ targetFd: uint32(fd), |
|
| 440 |
+ attachBpfFd: pfd, |
|
| 441 |
+ attachType: uint32(typ), |
|
| 442 |
+ attachFlags: uint32(flags), |
|
| 443 |
+ } |
|
| 444 |
+ |
|
| 445 |
+ return bpfProgAlter(_ProgDetach, &attr) |
|
| 446 |
+} |
|
| 447 |
+ |
|
| 448 |
+// LoadPinnedProgram loads a Program from a BPF file. |
|
| 449 |
+// |
|
| 450 |
+// Requires at least Linux 4.11. |
|
| 451 |
+func LoadPinnedProgram(fileName string) (*Program, error) {
|
|
| 452 |
+ fd, err := bpfGetObject(fileName) |
|
| 453 |
+ if err != nil {
|
|
| 454 |
+ return nil, err |
|
| 455 |
+ } |
|
| 456 |
+ |
|
| 457 |
+ name, abi, err := newProgramABIFromFd(fd) |
|
| 458 |
+ if err != nil {
|
|
| 459 |
+ _ = fd.close() |
|
| 460 |
+ return nil, errors.Wrapf(err, "can't get ABI for %s", fileName) |
|
| 461 |
+ } |
|
| 462 |
+ |
|
| 463 |
+ return newProgram(fd, name, abi), nil |
|
| 464 |
+} |
|
| 465 |
+ |
|
| 466 |
+// SanitizeName replaces all invalid characters in name. |
|
| 467 |
+// |
|
| 468 |
+// Use this to automatically generate valid names for maps and |
|
| 469 |
+// programs at run time. |
|
| 470 |
+// |
|
| 471 |
+// Passing a negative value for replacement will delete characters |
|
| 472 |
+// instead of replacing them. |
|
| 473 |
+func SanitizeName(name string, replacement rune) string {
|
|
| 474 |
+ return strings.Map(func(char rune) rune {
|
|
| 475 |
+ if invalidBPFObjNameChar(char) {
|
|
| 476 |
+ return replacement |
|
| 477 |
+ } |
|
| 478 |
+ return char |
|
| 479 |
+ }, name) |
|
| 480 |
+} |
|
| 481 |
+ |
|
| 482 |
+type loadError struct {
|
|
| 483 |
+ cause error |
|
| 484 |
+ verifierLog string |
|
| 485 |
+} |
|
| 486 |
+ |
|
| 487 |
+func (le *loadError) Error() string {
|
|
| 488 |
+ if le.verifierLog == "" {
|
|
| 489 |
+ return fmt.Sprintf("failed to load program: %s", le.cause)
|
|
| 490 |
+ } |
|
| 491 |
+ return fmt.Sprintf("failed to load program: %s: %s", le.cause, le.verifierLog)
|
|
| 492 |
+} |
|
| 493 |
+ |
|
| 494 |
+func (le *loadError) Cause() error {
|
|
| 495 |
+ return le.cause |
|
| 496 |
+} |
|
| 497 |
+ |
|
| 498 |
+// IsNotSupported returns true if an error occurred because |
|
| 499 |
+// the kernel does not have support for a specific feature. |
|
| 500 |
+func IsNotSupported(err error) bool {
|
|
| 501 |
+ _, notSupported := errors.Cause(err).(*internal.UnsupportedFeatureError) |
|
| 502 |
+ return notSupported |
|
| 503 |
+} |
| 0 | 504 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,14 @@ |
| 0 |
+// +build armbe mips mips64p32 |
|
| 1 |
+ |
|
| 2 |
+package ebpf |
|
| 3 |
+ |
|
| 4 |
+import ( |
|
| 5 |
+ "unsafe" |
|
| 6 |
+) |
|
| 7 |
+ |
|
| 8 |
+// ptr wraps an unsafe.Pointer to be 64bit to |
|
| 9 |
+// conform to the syscall specification. |
|
| 10 |
+type syscallPtr struct {
|
|
| 11 |
+ pad uint32 |
|
| 12 |
+ ptr unsafe.Pointer |
|
| 13 |
+} |
| 0 | 14 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,14 @@ |
| 0 |
+// +build 386 amd64p32 arm mipsle mips64p32le |
|
| 1 |
+ |
|
| 2 |
+package ebpf |
|
| 3 |
+ |
|
| 4 |
+import ( |
|
| 5 |
+ "unsafe" |
|
| 6 |
+) |
|
| 7 |
+ |
|
| 8 |
+// ptr wraps an unsafe.Pointer to be 64bit to |
|
| 9 |
+// conform to the syscall specification. |
|
| 10 |
+type syscallPtr struct {
|
|
| 11 |
+ ptr unsafe.Pointer |
|
| 12 |
+ pad uint32 |
|
| 13 |
+} |
| 0 | 14 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,14 @@ |
| 0 |
+// +build !386,!amd64p32,!arm,!mipsle,!mips64p32le |
|
| 1 |
+// +build !armbe,!mips,!mips64p32 |
|
| 2 |
+ |
|
| 3 |
+package ebpf |
|
| 4 |
+ |
|
| 5 |
+import ( |
|
| 6 |
+ "unsafe" |
|
| 7 |
+) |
|
| 8 |
+ |
|
| 9 |
+// ptr wraps an unsafe.Pointer to be 64bit to |
|
| 10 |
+// conform to the syscall specification. |
|
| 11 |
+type syscallPtr struct {
|
|
| 12 |
+ ptr unsafe.Pointer |
|
| 13 |
+} |
| 0 | 14 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,20 @@ |
| 0 |
+eBPF |
|
| 1 |
+------- |
|
| 2 |
+[](https://godoc.org/github.com/cilium/ebpf) |
|
| 3 |
+ |
|
| 4 |
+eBPF is a pure Go library that provides utilities for loading, compiling, and debugging eBPF programs. It has minimal external dependencies and is intended to be used in long running processes. |
|
| 5 |
+ |
|
| 6 |
+[ebpf/asm](https://godoc.org/github.com/cilium/ebpf/asm) contains a basic assembler. |
|
| 7 |
+ |
|
| 8 |
+The library is maintained by [Cloudflare](https://www.cloudflare.com) and [Cilium](https://www.cilium.io). Feel free to [join](https://cilium.herokuapp.com/) the [libbpf-go](https://cilium.slack.com/messages/libbpf-go) channel on Slack. |
|
| 9 |
+ |
|
| 10 |
+## Current status |
|
| 11 |
+ |
|
| 12 |
+The package is production ready, but **the API is explicitly unstable |
|
| 13 |
+right now**. Expect to update your code if you want to follow along. |
|
| 14 |
+ |
|
| 15 |
+## Useful resources |
|
| 16 |
+ |
|
| 17 |
+* [Cilium eBPF documentation](https://cilium.readthedocs.io/en/latest/bpf/#bpf-guide) (recommended) |
|
| 18 |
+* [Linux documentation on BPF](http://elixir.free-electrons.com/linux/latest/source/Documentation/networking/filter.txt) |
|
| 19 |
+* [eBPF features by Linux version](https://github.com/iovisor/bcc/blob/master/docs/kernel-versions.md) |
| 0 | 20 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,447 @@ |
| 0 |
+package ebpf |
|
| 1 |
+ |
|
| 2 |
+import ( |
|
| 3 |
+ "bytes" |
|
| 4 |
+ "path/filepath" |
|
| 5 |
+ "runtime" |
|
| 6 |
+ "strconv" |
|
| 7 |
+ "strings" |
|
| 8 |
+ "unsafe" |
|
| 9 |
+ |
|
| 10 |
+ "github.com/cilium/ebpf/internal" |
|
| 11 |
+ "github.com/cilium/ebpf/internal/unix" |
|
| 12 |
+ |
|
| 13 |
+ "github.com/pkg/errors" |
|
| 14 |
+) |
|
| 15 |
+ |
|
| 16 |
+var errClosedFd = errors.New("use of closed file descriptor")
|
|
| 17 |
+ |
|
| 18 |
+type bpfFD struct {
|
|
| 19 |
+ raw int64 |
|
| 20 |
+} |
|
| 21 |
+ |
|
| 22 |
+func newBPFFD(value uint32) *bpfFD {
|
|
| 23 |
+ fd := &bpfFD{int64(value)}
|
|
| 24 |
+ runtime.SetFinalizer(fd, (*bpfFD).close) |
|
| 25 |
+ return fd |
|
| 26 |
+} |
|
| 27 |
+ |
|
| 28 |
+func (fd *bpfFD) String() string {
|
|
| 29 |
+ return strconv.FormatInt(fd.raw, 10) |
|
| 30 |
+} |
|
| 31 |
+ |
|
| 32 |
+func (fd *bpfFD) value() (uint32, error) {
|
|
| 33 |
+ if fd.raw < 0 {
|
|
| 34 |
+ return 0, errClosedFd |
|
| 35 |
+ } |
|
| 36 |
+ |
|
| 37 |
+ return uint32(fd.raw), nil |
|
| 38 |
+} |
|
| 39 |
+ |
|
| 40 |
+func (fd *bpfFD) close() error {
|
|
| 41 |
+ if fd.raw < 0 {
|
|
| 42 |
+ return nil |
|
| 43 |
+ } |
|
| 44 |
+ |
|
| 45 |
+ value := int(fd.raw) |
|
| 46 |
+ fd.raw = -1 |
|
| 47 |
+ |
|
| 48 |
+ fd.forget() |
|
| 49 |
+ return unix.Close(value) |
|
| 50 |
+} |
|
| 51 |
+ |
|
| 52 |
+func (fd *bpfFD) forget() {
|
|
| 53 |
+ runtime.SetFinalizer(fd, nil) |
|
| 54 |
+} |
|
| 55 |
+ |
|
| 56 |
+func (fd *bpfFD) dup() (*bpfFD, error) {
|
|
| 57 |
+ if fd.raw < 0 {
|
|
| 58 |
+ return nil, errClosedFd |
|
| 59 |
+ } |
|
| 60 |
+ |
|
| 61 |
+ dup, err := unix.FcntlInt(uintptr(fd.raw), unix.F_DUPFD_CLOEXEC, 0) |
|
| 62 |
+ if err != nil {
|
|
| 63 |
+ return nil, errors.Wrap(err, "can't dup fd") |
|
| 64 |
+ } |
|
| 65 |
+ |
|
| 66 |
+ return newBPFFD(uint32(dup)), nil |
|
| 67 |
+} |
|
| 68 |
+ |
|
| 69 |
+// bpfObjName is a null-terminated string made up of |
|
| 70 |
+// 'A-Za-z0-9_' characters. |
|
| 71 |
+type bpfObjName [unix.BPF_OBJ_NAME_LEN]byte |
|
| 72 |
+ |
|
| 73 |
+// newBPFObjName truncates the result if it is too long. |
|
| 74 |
+func newBPFObjName(name string) (bpfObjName, error) {
|
|
| 75 |
+ idx := strings.IndexFunc(name, invalidBPFObjNameChar) |
|
| 76 |
+ if idx != -1 {
|
|
| 77 |
+ return bpfObjName{}, errors.Errorf("invalid character '%c' in name '%s'", name[idx], name)
|
|
| 78 |
+ } |
|
| 79 |
+ |
|
| 80 |
+ var result bpfObjName |
|
| 81 |
+ copy(result[:unix.BPF_OBJ_NAME_LEN-1], name) |
|
| 82 |
+ return result, nil |
|
| 83 |
+} |
|
| 84 |
+ |
|
| 85 |
+func invalidBPFObjNameChar(char rune) bool {
|
|
| 86 |
+ switch {
|
|
| 87 |
+ case char >= 'A' && char <= 'Z': |
|
| 88 |
+ fallthrough |
|
| 89 |
+ case char >= 'a' && char <= 'z': |
|
| 90 |
+ fallthrough |
|
| 91 |
+ case char >= '0' && char <= '9': |
|
| 92 |
+ fallthrough |
|
| 93 |
+ case char == '_': |
|
| 94 |
+ return false |
|
| 95 |
+ default: |
|
| 96 |
+ return true |
|
| 97 |
+ } |
|
| 98 |
+} |
|
| 99 |
+ |
|
| 100 |
+type bpfMapCreateAttr struct {
|
|
| 101 |
+ mapType MapType |
|
| 102 |
+ keySize uint32 |
|
| 103 |
+ valueSize uint32 |
|
| 104 |
+ maxEntries uint32 |
|
| 105 |
+ flags uint32 |
|
| 106 |
+ innerMapFd uint32 // since 4.12 56f668dfe00d |
|
| 107 |
+ numaNode uint32 // since 4.14 96eabe7a40aa |
|
| 108 |
+ mapName bpfObjName // since 4.15 ad5b177bd73f |
|
| 109 |
+} |
|
| 110 |
+ |
|
| 111 |
+type bpfMapOpAttr struct {
|
|
| 112 |
+ mapFd uint32 |
|
| 113 |
+ padding uint32 |
|
| 114 |
+ key syscallPtr |
|
| 115 |
+ value syscallPtr |
|
| 116 |
+ flags uint64 |
|
| 117 |
+} |
|
| 118 |
+ |
|
| 119 |
+type bpfMapInfo struct {
|
|
| 120 |
+ mapType uint32 |
|
| 121 |
+ id uint32 |
|
| 122 |
+ keySize uint32 |
|
| 123 |
+ valueSize uint32 |
|
| 124 |
+ maxEntries uint32 |
|
| 125 |
+ flags uint32 |
|
| 126 |
+ mapName bpfObjName // since 4.15 ad5b177bd73f |
|
| 127 |
+} |
|
| 128 |
+ |
|
| 129 |
+type bpfPinObjAttr struct {
|
|
| 130 |
+ fileName syscallPtr |
|
| 131 |
+ fd uint32 |
|
| 132 |
+ padding uint32 |
|
| 133 |
+} |
|
| 134 |
+ |
|
| 135 |
+type bpfProgLoadAttr struct {
|
|
| 136 |
+ progType ProgramType |
|
| 137 |
+ insCount uint32 |
|
| 138 |
+ instructions syscallPtr |
|
| 139 |
+ license syscallPtr |
|
| 140 |
+ logLevel uint32 |
|
| 141 |
+ logSize uint32 |
|
| 142 |
+ logBuf syscallPtr |
|
| 143 |
+ kernelVersion uint32 // since 4.1 2541517c32be |
|
| 144 |
+ progFlags uint32 // since 4.11 e07b98d9bffe |
|
| 145 |
+ progName bpfObjName // since 4.15 067cae47771c |
|
| 146 |
+ progIfIndex uint32 // since 4.15 1f6f4cb7ba21 |
|
| 147 |
+ expectedAttachType AttachType // since 4.17 5e43f899b03a |
|
| 148 |
+} |
|
| 149 |
+ |
|
| 150 |
+type bpfProgInfo struct {
|
|
| 151 |
+ progType uint32 |
|
| 152 |
+ id uint32 |
|
| 153 |
+ tag [unix.BPF_TAG_SIZE]byte |
|
| 154 |
+ jitedLen uint32 |
|
| 155 |
+ xlatedLen uint32 |
|
| 156 |
+ jited syscallPtr |
|
| 157 |
+ xlated syscallPtr |
|
| 158 |
+ loadTime uint64 // since 4.15 cb4d2b3f03d8 |
|
| 159 |
+ createdByUID uint32 |
|
| 160 |
+ nrMapIDs uint32 |
|
| 161 |
+ mapIds syscallPtr |
|
| 162 |
+ name bpfObjName |
|
| 163 |
+} |
|
| 164 |
+ |
|
| 165 |
+type bpfProgTestRunAttr struct {
|
|
| 166 |
+ fd uint32 |
|
| 167 |
+ retval uint32 |
|
| 168 |
+ dataSizeIn uint32 |
|
| 169 |
+ dataSizeOut uint32 |
|
| 170 |
+ dataIn syscallPtr |
|
| 171 |
+ dataOut syscallPtr |
|
| 172 |
+ repeat uint32 |
|
| 173 |
+ duration uint32 |
|
| 174 |
+} |
|
| 175 |
+ |
|
| 176 |
+type bpfProgAlterAttr struct {
|
|
| 177 |
+ targetFd uint32 |
|
| 178 |
+ attachBpfFd uint32 |
|
| 179 |
+ attachType uint32 |
|
| 180 |
+ attachFlags uint32 |
|
| 181 |
+} |
|
| 182 |
+ |
|
| 183 |
+type bpfObjGetInfoByFDAttr struct {
|
|
| 184 |
+ fd uint32 |
|
| 185 |
+ infoLen uint32 |
|
| 186 |
+ info syscallPtr // May be either bpfMapInfo or bpfProgInfo |
|
| 187 |
+} |
|
| 188 |
+ |
|
| 189 |
+type bpfGetFDByIDAttr struct {
|
|
| 190 |
+ id uint32 |
|
| 191 |
+ next uint32 |
|
| 192 |
+} |
|
| 193 |
+ |
|
| 194 |
+func newPtr(ptr unsafe.Pointer) syscallPtr {
|
|
| 195 |
+ return syscallPtr{ptr: ptr}
|
|
| 196 |
+} |
|
| 197 |
+ |
|
| 198 |
+func bpfProgLoad(attr *bpfProgLoadAttr) (*bpfFD, error) {
|
|
| 199 |
+ for {
|
|
| 200 |
+ fd, err := bpfCall(_ProgLoad, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) |
|
| 201 |
+ // As of ~4.20 the verifier can be interrupted by a signal, |
|
| 202 |
+ // and returns EAGAIN in that case. |
|
| 203 |
+ if err == unix.EAGAIN {
|
|
| 204 |
+ continue |
|
| 205 |
+ } |
|
| 206 |
+ |
|
| 207 |
+ if err != nil {
|
|
| 208 |
+ return nil, err |
|
| 209 |
+ } |
|
| 210 |
+ |
|
| 211 |
+ return newBPFFD(uint32(fd)), nil |
|
| 212 |
+ } |
|
| 213 |
+} |
|
| 214 |
+ |
|
| 215 |
+func bpfProgAlter(cmd int, attr *bpfProgAlterAttr) error {
|
|
| 216 |
+ _, err := bpfCall(cmd, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) |
|
| 217 |
+ return err |
|
| 218 |
+} |
|
| 219 |
+ |
|
| 220 |
+func bpfMapCreate(attr *bpfMapCreateAttr) (*bpfFD, error) {
|
|
| 221 |
+ fd, err := bpfCall(_MapCreate, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) |
|
| 222 |
+ if err != nil {
|
|
| 223 |
+ return nil, err |
|
| 224 |
+ } |
|
| 225 |
+ |
|
| 226 |
+ return newBPFFD(uint32(fd)), nil |
|
| 227 |
+} |
|
| 228 |
+ |
|
| 229 |
+var haveNestedMaps = internal.FeatureTest("nested maps", "4.12", func() bool {
|
|
| 230 |
+ inner, err := bpfMapCreate(&bpfMapCreateAttr{
|
|
| 231 |
+ mapType: Array, |
|
| 232 |
+ keySize: 4, |
|
| 233 |
+ valueSize: 4, |
|
| 234 |
+ maxEntries: 1, |
|
| 235 |
+ }) |
|
| 236 |
+ if err != nil {
|
|
| 237 |
+ return false |
|
| 238 |
+ } |
|
| 239 |
+ defer inner.close() |
|
| 240 |
+ |
|
| 241 |
+ innerFd, _ := inner.value() |
|
| 242 |
+ nested, err := bpfMapCreate(&bpfMapCreateAttr{
|
|
| 243 |
+ mapType: ArrayOfMaps, |
|
| 244 |
+ keySize: 4, |
|
| 245 |
+ valueSize: 4, |
|
| 246 |
+ maxEntries: 1, |
|
| 247 |
+ innerMapFd: innerFd, |
|
| 248 |
+ }) |
|
| 249 |
+ if err != nil {
|
|
| 250 |
+ return false |
|
| 251 |
+ } |
|
| 252 |
+ |
|
| 253 |
+ _ = nested.close() |
|
| 254 |
+ return true |
|
| 255 |
+}) |
|
| 256 |
+ |
|
| 257 |
+func bpfMapLookupElem(m *bpfFD, key, valueOut syscallPtr) error {
|
|
| 258 |
+ fd, err := m.value() |
|
| 259 |
+ if err != nil {
|
|
| 260 |
+ return err |
|
| 261 |
+ } |
|
| 262 |
+ |
|
| 263 |
+ attr := bpfMapOpAttr{
|
|
| 264 |
+ mapFd: fd, |
|
| 265 |
+ key: key, |
|
| 266 |
+ value: valueOut, |
|
| 267 |
+ } |
|
| 268 |
+ _, err = bpfCall(_MapLookupElem, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) |
|
| 269 |
+ return err |
|
| 270 |
+} |
|
| 271 |
+ |
|
| 272 |
+func bpfMapUpdateElem(m *bpfFD, key, valueOut syscallPtr, flags uint64) error {
|
|
| 273 |
+ fd, err := m.value() |
|
| 274 |
+ if err != nil {
|
|
| 275 |
+ return err |
|
| 276 |
+ } |
|
| 277 |
+ |
|
| 278 |
+ attr := bpfMapOpAttr{
|
|
| 279 |
+ mapFd: fd, |
|
| 280 |
+ key: key, |
|
| 281 |
+ value: valueOut, |
|
| 282 |
+ flags: flags, |
|
| 283 |
+ } |
|
| 284 |
+ _, err = bpfCall(_MapUpdateElem, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) |
|
| 285 |
+ return err |
|
| 286 |
+} |
|
| 287 |
+ |
|
| 288 |
+func bpfMapDeleteElem(m *bpfFD, key syscallPtr) error {
|
|
| 289 |
+ fd, err := m.value() |
|
| 290 |
+ if err != nil {
|
|
| 291 |
+ return err |
|
| 292 |
+ } |
|
| 293 |
+ |
|
| 294 |
+ attr := bpfMapOpAttr{
|
|
| 295 |
+ mapFd: fd, |
|
| 296 |
+ key: key, |
|
| 297 |
+ } |
|
| 298 |
+ _, err = bpfCall(_MapDeleteElem, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) |
|
| 299 |
+ return err |
|
| 300 |
+} |
|
| 301 |
+ |
|
| 302 |
+func bpfMapGetNextKey(m *bpfFD, key, nextKeyOut syscallPtr) error {
|
|
| 303 |
+ fd, err := m.value() |
|
| 304 |
+ if err != nil {
|
|
| 305 |
+ return err |
|
| 306 |
+ } |
|
| 307 |
+ |
|
| 308 |
+ attr := bpfMapOpAttr{
|
|
| 309 |
+ mapFd: fd, |
|
| 310 |
+ key: key, |
|
| 311 |
+ value: nextKeyOut, |
|
| 312 |
+ } |
|
| 313 |
+ _, err = bpfCall(_MapGetNextKey, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) |
|
| 314 |
+ return err |
|
| 315 |
+} |
|
| 316 |
+ |
|
| 317 |
+const bpfFSType = 0xcafe4a11 |
|
| 318 |
+ |
|
| 319 |
+func bpfPinObject(fileName string, fd *bpfFD) error {
|
|
| 320 |
+ dirName := filepath.Dir(fileName) |
|
| 321 |
+ var statfs unix.Statfs_t |
|
| 322 |
+ if err := unix.Statfs(dirName, &statfs); err != nil {
|
|
| 323 |
+ return err |
|
| 324 |
+ } |
|
| 325 |
+ if uint64(statfs.Type) != bpfFSType {
|
|
| 326 |
+ return errors.Errorf("%s is not on a bpf filesystem", fileName)
|
|
| 327 |
+ } |
|
| 328 |
+ |
|
| 329 |
+ value, err := fd.value() |
|
| 330 |
+ if err != nil {
|
|
| 331 |
+ return err |
|
| 332 |
+ } |
|
| 333 |
+ |
|
| 334 |
+ _, err = bpfCall(_ObjPin, unsafe.Pointer(&bpfPinObjAttr{
|
|
| 335 |
+ fileName: newPtr(unsafe.Pointer(&[]byte(fileName)[0])), |
|
| 336 |
+ fd: value, |
|
| 337 |
+ }), 16) |
|
| 338 |
+ return errors.Wrapf(err, "pin object %s", fileName) |
|
| 339 |
+} |
|
| 340 |
+ |
|
| 341 |
+func bpfGetObject(fileName string) (*bpfFD, error) {
|
|
| 342 |
+ ptr, err := bpfCall(_ObjGet, unsafe.Pointer(&bpfPinObjAttr{
|
|
| 343 |
+ fileName: newPtr(unsafe.Pointer(&[]byte(fileName)[0])), |
|
| 344 |
+ }), 16) |
|
| 345 |
+ if err != nil {
|
|
| 346 |
+ return nil, errors.Wrapf(err, "get object %s", fileName) |
|
| 347 |
+ } |
|
| 348 |
+ return newBPFFD(uint32(ptr)), nil |
|
| 349 |
+} |
|
| 350 |
+ |
|
| 351 |
+func bpfGetObjectInfoByFD(fd *bpfFD, info unsafe.Pointer, size uintptr) error {
|
|
| 352 |
+ value, err := fd.value() |
|
| 353 |
+ if err != nil {
|
|
| 354 |
+ return err |
|
| 355 |
+ } |
|
| 356 |
+ |
|
| 357 |
+ // available from 4.13 |
|
| 358 |
+ attr := bpfObjGetInfoByFDAttr{
|
|
| 359 |
+ fd: value, |
|
| 360 |
+ infoLen: uint32(size), |
|
| 361 |
+ info: newPtr(info), |
|
| 362 |
+ } |
|
| 363 |
+ _, err = bpfCall(_ObjGetInfoByFD, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) |
|
| 364 |
+ return errors.Wrapf(err, "fd %d", value) |
|
| 365 |
+} |
|
| 366 |
+ |
|
| 367 |
+func bpfGetProgInfoByFD(fd *bpfFD) (*bpfProgInfo, error) {
|
|
| 368 |
+ var info bpfProgInfo |
|
| 369 |
+ err := bpfGetObjectInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info)) |
|
| 370 |
+ return &info, errors.Wrap(err, "can't get program info") |
|
| 371 |
+} |
|
| 372 |
+ |
|
| 373 |
+func bpfGetMapInfoByFD(fd *bpfFD) (*bpfMapInfo, error) {
|
|
| 374 |
+ var info bpfMapInfo |
|
| 375 |
+ err := bpfGetObjectInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info)) |
|
| 376 |
+ return &info, errors.Wrap(err, "can't get map info") |
|
| 377 |
+} |
|
| 378 |
+ |
|
| 379 |
+var haveObjName = internal.FeatureTest("object names", "4.15", func() bool {
|
|
| 380 |
+ name, err := newBPFObjName("feature_test")
|
|
| 381 |
+ if err != nil {
|
|
| 382 |
+ // This really is a fatal error, but it should be caught |
|
| 383 |
+ // by the unit tests not working. |
|
| 384 |
+ return false |
|
| 385 |
+ } |
|
| 386 |
+ |
|
| 387 |
+ attr := bpfMapCreateAttr{
|
|
| 388 |
+ mapType: Array, |
|
| 389 |
+ keySize: 4, |
|
| 390 |
+ valueSize: 4, |
|
| 391 |
+ maxEntries: 1, |
|
| 392 |
+ mapName: name, |
|
| 393 |
+ } |
|
| 394 |
+ |
|
| 395 |
+ fd, err := bpfMapCreate(&attr) |
|
| 396 |
+ if err != nil {
|
|
| 397 |
+ return false |
|
| 398 |
+ } |
|
| 399 |
+ |
|
| 400 |
+ _ = fd.close() |
|
| 401 |
+ return true |
|
| 402 |
+}) |
|
| 403 |
+ |
|
| 404 |
+func bpfGetMapFDByID(id uint32) (*bpfFD, error) {
|
|
| 405 |
+ // available from 4.13 |
|
| 406 |
+ attr := bpfGetFDByIDAttr{
|
|
| 407 |
+ id: id, |
|
| 408 |
+ } |
|
| 409 |
+ ptr, err := bpfCall(_MapGetFDByID, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) |
|
| 410 |
+ if err != nil {
|
|
| 411 |
+ return nil, errors.Wrapf(err, "can't get fd for map id %d", id) |
|
| 412 |
+ } |
|
| 413 |
+ return newBPFFD(uint32(ptr)), nil |
|
| 414 |
+} |
|
| 415 |
+ |
|
| 416 |
+func bpfGetProgramFDByID(id uint32) (*bpfFD, error) {
|
|
| 417 |
+ // available from 4.13 |
|
| 418 |
+ attr := bpfGetFDByIDAttr{
|
|
| 419 |
+ id: id, |
|
| 420 |
+ } |
|
| 421 |
+ ptr, err := bpfCall(_ProgGetFDByID, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) |
|
| 422 |
+ if err != nil {
|
|
| 423 |
+ return nil, errors.Wrapf(err, "can't get fd for program id %d", id) |
|
| 424 |
+ } |
|
| 425 |
+ return newBPFFD(uint32(ptr)), nil |
|
| 426 |
+} |
|
| 427 |
+ |
|
| 428 |
+func bpfCall(cmd int, attr unsafe.Pointer, size uintptr) (uintptr, error) {
|
|
| 429 |
+ r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size) |
|
| 430 |
+ runtime.KeepAlive(attr) |
|
| 431 |
+ |
|
| 432 |
+ var err error |
|
| 433 |
+ if errNo != 0 {
|
|
| 434 |
+ err = errNo |
|
| 435 |
+ } |
|
| 436 |
+ |
|
| 437 |
+ return r1, err |
|
| 438 |
+} |
|
| 439 |
+ |
|
| 440 |
+func convertCString(in []byte) string {
|
|
| 441 |
+ inLen := bytes.IndexByte(in, 0) |
|
| 442 |
+ if inLen == -1 {
|
|
| 443 |
+ return "" |
|
| 444 |
+ } |
|
| 445 |
+ return string(in[:inLen]) |
|
| 446 |
+} |
| 0 | 447 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,189 @@ |
| 0 |
+package ebpf |
|
| 1 |
+ |
|
| 2 |
+//go:generate stringer -output types_string.go -type=MapType,ProgramType |
|
| 3 |
+ |
|
| 4 |
+// MapType indicates the type map structure |
|
| 5 |
+// that will be initialized in the kernel. |
|
| 6 |
+type MapType uint32 |
|
| 7 |
+ |
|
| 8 |
+// All the various map types that can be created |
|
| 9 |
+const ( |
|
| 10 |
+ UnspecifiedMap MapType = iota |
|
| 11 |
+ // Hash is a hash map |
|
| 12 |
+ Hash |
|
| 13 |
+ // Array is an array map |
|
| 14 |
+ Array |
|
| 15 |
+ // ProgramArray - A program array map is a special kind of array map whose map |
|
| 16 |
+ // values contain only file descriptors referring to other eBPF |
|
| 17 |
+ // programs. Thus, both the key_size and value_size must be |
|
| 18 |
+ // exactly four bytes. This map is used in conjunction with the |
|
| 19 |
+ // TailCall helper. |
|
| 20 |
+ ProgramArray |
|
| 21 |
+ // PerfEventArray - A perf event array is used in conjunction with PerfEventRead |
|
| 22 |
+ // and PerfEventOutput calls, to read the raw bpf_perf_data from the registers. |
|
| 23 |
+ PerfEventArray |
|
| 24 |
+ // PerCPUHash - This data structure is useful for people who have high performance |
|
| 25 |
+ // network needs and can reconcile adds at the end of some cycle, so that |
|
| 26 |
+ // hashes can be lock free without the use of XAdd, which can be costly. |
|
| 27 |
+ PerCPUHash |
|
| 28 |
+ // PerCPUArray - This data structure is useful for people who have high performance |
|
| 29 |
+ // network needs and can reconcile adds at the end of some cycle, so that |
|
| 30 |
+ // hashes can be lock free without the use of XAdd, which can be costly. |
|
| 31 |
+ // Each CPU gets a copy of this hash, the contents of all of which can be reconciled |
|
| 32 |
+ // later. |
|
| 33 |
+ PerCPUArray |
|
| 34 |
+ // StackTrace - This holds whole user and kernel stack traces, it can be retrieved with |
|
| 35 |
+ // GetStackID |
|
| 36 |
+ StackTrace |
|
| 37 |
+ // CGroupArray - This is a very niche structure used to help SKBInCGroup determine |
|
| 38 |
+ // if an skb is from a socket belonging to a specific cgroup |
|
| 39 |
+ CGroupArray |
|
| 40 |
+ // LRUHash - This allows you to create a small hash structure that will purge the |
|
| 41 |
+ // least recently used items rather than thow an error when you run out of memory |
|
| 42 |
+ LRUHash |
|
| 43 |
+ // LRUCPUHash - This is NOT like PerCPUHash, this structure is shared among the CPUs, |
|
| 44 |
+ // it has more to do with including the CPU id with the LRU calculation so that if a |
|
| 45 |
+ // particular CPU is using a value over-and-over again, then it will be saved, but if |
|
| 46 |
+ // a value is being retrieved a lot but sparsely across CPUs it is not as important, basically |
|
| 47 |
+ // giving weight to CPU locality over overall usage. |
|
| 48 |
+ LRUCPUHash |
|
| 49 |
+ // LPMTrie - This is an implementation of Longest-Prefix-Match Trie structure. It is useful, |
|
| 50 |
+ // for storing things like IP addresses which can be bit masked allowing for keys of differing |
|
| 51 |
+ // values to refer to the same reference based on their masks. See wikipedia for more details. |
|
| 52 |
+ LPMTrie |
|
| 53 |
+ // ArrayOfMaps - Each item in the array is another map. The inner map mustn't be a map of maps |
|
| 54 |
+ // itself. |
|
| 55 |
+ ArrayOfMaps |
|
| 56 |
+ // HashOfMaps - Each item in the hash map is another map. The inner map mustn't be a map of maps |
|
| 57 |
+ // itself. |
|
| 58 |
+ HashOfMaps |
|
| 59 |
+) |
|
| 60 |
+ |
|
| 61 |
+// hasPerCPUValue returns true if the Map stores a value per CPU. |
|
| 62 |
+func (mt MapType) hasPerCPUValue() bool {
|
|
| 63 |
+ if mt == PerCPUHash || mt == PerCPUArray {
|
|
| 64 |
+ return true |
|
| 65 |
+ } |
|
| 66 |
+ return false |
|
| 67 |
+} |
|
| 68 |
+ |
|
| 69 |
+const ( |
|
| 70 |
+ _MapCreate = iota |
|
| 71 |
+ _MapLookupElem |
|
| 72 |
+ _MapUpdateElem |
|
| 73 |
+ _MapDeleteElem |
|
| 74 |
+ _MapGetNextKey |
|
| 75 |
+ _ProgLoad |
|
| 76 |
+ _ObjPin |
|
| 77 |
+ _ObjGet |
|
| 78 |
+ _ProgAttach |
|
| 79 |
+ _ProgDetach |
|
| 80 |
+ _ProgTestRun |
|
| 81 |
+ _ProgGetNextID |
|
| 82 |
+ _MapGetNextID |
|
| 83 |
+ _ProgGetFDByID |
|
| 84 |
+ _MapGetFDByID |
|
| 85 |
+ _ObjGetInfoByFD |
|
| 86 |
+) |
|
| 87 |
+ |
|
| 88 |
+const ( |
|
| 89 |
+ _Any = iota |
|
| 90 |
+ _NoExist |
|
| 91 |
+ _Exist |
|
| 92 |
+) |
|
| 93 |
+ |
|
| 94 |
+// ProgramType of the eBPF program |
|
| 95 |
+type ProgramType uint32 |
|
| 96 |
+ |
|
| 97 |
+// eBPF program types |
|
| 98 |
+const ( |
|
| 99 |
+ // Unrecognized program type |
|
| 100 |
+ UnspecifiedProgram ProgramType = iota |
|
| 101 |
+ // SocketFilter socket or seccomp filter |
|
| 102 |
+ SocketFilter |
|
| 103 |
+ // Kprobe program |
|
| 104 |
+ Kprobe |
|
| 105 |
+ // SchedCLS traffic control shaper |
|
| 106 |
+ SchedCLS |
|
| 107 |
+ // SchedACT routing control shaper |
|
| 108 |
+ SchedACT |
|
| 109 |
+ // TracePoint program |
|
| 110 |
+ TracePoint |
|
| 111 |
+ // XDP program |
|
| 112 |
+ XDP |
|
| 113 |
+ // PerfEvent program |
|
| 114 |
+ PerfEvent |
|
| 115 |
+ // CGroupSKB program |
|
| 116 |
+ CGroupSKB |
|
| 117 |
+ // CGroupSock program |
|
| 118 |
+ CGroupSock |
|
| 119 |
+ // LWTIn program |
|
| 120 |
+ LWTIn |
|
| 121 |
+ // LWTOut program |
|
| 122 |
+ LWTOut |
|
| 123 |
+ // LWTXmit program |
|
| 124 |
+ LWTXmit |
|
| 125 |
+ // SockOps program |
|
| 126 |
+ SockOps |
|
| 127 |
+ // SkSKB program |
|
| 128 |
+ SkSKB |
|
| 129 |
+ // CGroupDevice program |
|
| 130 |
+ CGroupDevice |
|
| 131 |
+ // SkMsg program |
|
| 132 |
+ SkMsg |
|
| 133 |
+ // RawTracepoint program |
|
| 134 |
+ RawTracepoint |
|
| 135 |
+ // CGroupSockAddr program |
|
| 136 |
+ CGroupSockAddr |
|
| 137 |
+ // LWTSeg6Local program |
|
| 138 |
+ LWTSeg6Local |
|
| 139 |
+ // LircMode2 program |
|
| 140 |
+ LircMode2 |
|
| 141 |
+ // SkReuseport program |
|
| 142 |
+ SkReuseport |
|
| 143 |
+ // FlowDissector program |
|
| 144 |
+ FlowDissector |
|
| 145 |
+ // CGroupSysctl program |
|
| 146 |
+ CGroupSysctl |
|
| 147 |
+ // RawTracepointWritable program |
|
| 148 |
+ RawTracepointWritable |
|
| 149 |
+ // CGroupSockopt program |
|
| 150 |
+ CGroupSockopt |
|
| 151 |
+) |
|
| 152 |
+ |
|
| 153 |
+// AttachType of the eBPF program, needed to differentiate allowed context accesses in |
|
| 154 |
+// some newer program types like CGroupSockAddr. Should be set to AttachNone if not required. |
|
| 155 |
+// Will cause invalid argument (EINVAL) at program load time if set incorrectly. |
|
| 156 |
+type AttachType uint32 |
|
| 157 |
+ |
|
| 158 |
+// AttachNone is an alias for AttachCGroupInetIngress for readability reasons |
|
| 159 |
+const AttachNone AttachType = 0 |
|
| 160 |
+ |
|
| 161 |
+const ( |
|
| 162 |
+ AttachCGroupInetIngress AttachType = iota |
|
| 163 |
+ AttachCGroupInetEgress |
|
| 164 |
+ AttachCGroupInetSockCreate |
|
| 165 |
+ AttachCGroupSockOps |
|
| 166 |
+ AttachSkSKBStreamParser |
|
| 167 |
+ AttachSkSKBStreamVerdict |
|
| 168 |
+ AttachCGroupDevice |
|
| 169 |
+ AttachSkMsgVerdict |
|
| 170 |
+ AttachCGroupInet4Bind |
|
| 171 |
+ AttachCGroupInet6Bind |
|
| 172 |
+ AttachCGroupInet4Connect |
|
| 173 |
+ AttachCGroupInet6Connect |
|
| 174 |
+ AttachCGroupInet4PostBind |
|
| 175 |
+ AttachCGroupInet6PostBind |
|
| 176 |
+ AttachCGroupUDP4Sendmsg |
|
| 177 |
+ AttachCGroupUDP6Sendmsg |
|
| 178 |
+ AttachLircMode2 |
|
| 179 |
+ AttachFlowDissector |
|
| 180 |
+ AttachCGroupSysctl |
|
| 181 |
+ AttachCGroupUDP4Recvmsg |
|
| 182 |
+ AttachCGroupUDP6Recvmsg |
|
| 183 |
+ AttachCGroupGetsockopt |
|
| 184 |
+ AttachCGroupSetsockopt |
|
| 185 |
+) |
|
| 186 |
+ |
|
| 187 |
+// AttachFlags of the eBPF program used in BPF_PROG_ATTACH command |
|
| 188 |
+type AttachFlags uint32 |
| 0 | 189 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,78 @@ |
| 0 |
+// Code generated by "stringer -output types_string.go -type=MapType,ProgramType"; DO NOT EDIT. |
|
| 1 |
+ |
|
| 2 |
+package ebpf |
|
| 3 |
+ |
|
| 4 |
+import "strconv" |
|
| 5 |
+ |
|
| 6 |
+func _() {
|
|
| 7 |
+ // An "invalid array index" compiler error signifies that the constant values have changed. |
|
| 8 |
+ // Re-run the stringer command to generate them again. |
|
| 9 |
+ var x [1]struct{}
|
|
| 10 |
+ _ = x[UnspecifiedMap-0] |
|
| 11 |
+ _ = x[Hash-1] |
|
| 12 |
+ _ = x[Array-2] |
|
| 13 |
+ _ = x[ProgramArray-3] |
|
| 14 |
+ _ = x[PerfEventArray-4] |
|
| 15 |
+ _ = x[PerCPUHash-5] |
|
| 16 |
+ _ = x[PerCPUArray-6] |
|
| 17 |
+ _ = x[StackTrace-7] |
|
| 18 |
+ _ = x[CGroupArray-8] |
|
| 19 |
+ _ = x[LRUHash-9] |
|
| 20 |
+ _ = x[LRUCPUHash-10] |
|
| 21 |
+ _ = x[LPMTrie-11] |
|
| 22 |
+ _ = x[ArrayOfMaps-12] |
|
| 23 |
+ _ = x[HashOfMaps-13] |
|
| 24 |
+} |
|
| 25 |
+ |
|
| 26 |
+const _MapType_name = "UnspecifiedMapHashArrayProgramArrayPerfEventArrayPerCPUHashPerCPUArrayStackTraceCGroupArrayLRUHashLRUCPUHashLPMTrieArrayOfMapsHashOfMaps" |
|
| 27 |
+ |
|
| 28 |
+var _MapType_index = [...]uint8{0, 14, 18, 23, 35, 49, 59, 70, 80, 91, 98, 108, 115, 126, 136}
|
|
| 29 |
+ |
|
| 30 |
+func (i MapType) String() string {
|
|
| 31 |
+ if i >= MapType(len(_MapType_index)-1) {
|
|
| 32 |
+ return "MapType(" + strconv.FormatInt(int64(i), 10) + ")"
|
|
| 33 |
+ } |
|
| 34 |
+ return _MapType_name[_MapType_index[i]:_MapType_index[i+1]] |
|
| 35 |
+} |
|
| 36 |
+func _() {
|
|
| 37 |
+ // An "invalid array index" compiler error signifies that the constant values have changed. |
|
| 38 |
+ // Re-run the stringer command to generate them again. |
|
| 39 |
+ var x [1]struct{}
|
|
| 40 |
+ _ = x[UnspecifiedProgram-0] |
|
| 41 |
+ _ = x[SocketFilter-1] |
|
| 42 |
+ _ = x[Kprobe-2] |
|
| 43 |
+ _ = x[SchedCLS-3] |
|
| 44 |
+ _ = x[SchedACT-4] |
|
| 45 |
+ _ = x[TracePoint-5] |
|
| 46 |
+ _ = x[XDP-6] |
|
| 47 |
+ _ = x[PerfEvent-7] |
|
| 48 |
+ _ = x[CGroupSKB-8] |
|
| 49 |
+ _ = x[CGroupSock-9] |
|
| 50 |
+ _ = x[LWTIn-10] |
|
| 51 |
+ _ = x[LWTOut-11] |
|
| 52 |
+ _ = x[LWTXmit-12] |
|
| 53 |
+ _ = x[SockOps-13] |
|
| 54 |
+ _ = x[SkSKB-14] |
|
| 55 |
+ _ = x[CGroupDevice-15] |
|
| 56 |
+ _ = x[SkMsg-16] |
|
| 57 |
+ _ = x[RawTracepoint-17] |
|
| 58 |
+ _ = x[CGroupSockAddr-18] |
|
| 59 |
+ _ = x[LWTSeg6Local-19] |
|
| 60 |
+ _ = x[LircMode2-20] |
|
| 61 |
+ _ = x[SkReuseport-21] |
|
| 62 |
+ _ = x[FlowDissector-22] |
|
| 63 |
+ _ = x[CGroupSysctl-23] |
|
| 64 |
+ _ = x[RawTracepointWritable-24] |
|
| 65 |
+ _ = x[CGroupSockopt-25] |
|
| 66 |
+} |
|
| 67 |
+ |
|
| 68 |
+const _ProgramType_name = "UnspecifiedProgramSocketFilterKprobeSchedCLSSchedACTTracePointXDPPerfEventCGroupSKBCGroupSockLWTInLWTOutLWTXmitSockOpsSkSKBCGroupDeviceSkMsgRawTracepointCGroupSockAddrLWTSeg6LocalLircMode2SkReuseportFlowDissectorCGroupSysctlRawTracepointWritableCGroupSockopt" |
|
| 69 |
+ |
|
| 70 |
+var _ProgramType_index = [...]uint16{0, 18, 30, 36, 44, 52, 62, 65, 74, 83, 93, 98, 104, 111, 118, 123, 135, 140, 153, 167, 179, 188, 199, 212, 224, 245, 258}
|
|
| 71 |
+ |
|
| 72 |
+func (i ProgramType) String() string {
|
|
| 73 |
+ if i >= ProgramType(len(_ProgramType_index)-1) {
|
|
| 74 |
+ return "ProgramType(" + strconv.FormatInt(int64(i), 10) + ")"
|
|
| 75 |
+ } |
|
| 76 |
+ return _ProgramType_name[_ProgramType_index[i]:_ProgramType_index[i+1]] |
|
| 77 |
+} |
| 0 | 78 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,83 @@ |
| 0 |
+/* |
|
| 1 |
+ Copyright The containerd Authors. |
|
| 2 |
+ |
|
| 3 |
+ Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 4 |
+ you may not use this file except in compliance with the License. |
|
| 5 |
+ You may obtain a copy of the License at |
|
| 6 |
+ |
|
| 7 |
+ http://www.apache.org/licenses/LICENSE-2.0 |
|
| 8 |
+ |
|
| 9 |
+ Unless required by applicable law or agreed to in writing, software |
|
| 10 |
+ distributed under the License is distributed on an "AS IS" BASIS, |
|
| 11 |
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 12 |
+ See the License for the specific language governing permissions and |
|
| 13 |
+ limitations under the License. |
|
| 14 |
+*/ |
|
| 15 |
+ |
|
| 16 |
+package v2 |
|
| 17 |
+ |
|
| 18 |
+import ( |
|
| 19 |
+ "math" |
|
| 20 |
+ "strconv" |
|
| 21 |
+ "strings" |
|
| 22 |
+) |
|
| 23 |
+ |
|
| 24 |
+type CPUMax string |
|
| 25 |
+ |
|
| 26 |
+func NewCPUMax(quota *int64, period *uint64) CPUMax {
|
|
| 27 |
+ max := "max" |
|
| 28 |
+ if quota != nil {
|
|
| 29 |
+ max = strconv.FormatInt(*quota, 10) |
|
| 30 |
+ } |
|
| 31 |
+ return CPUMax(strings.Join([]string{max, strconv.FormatUint(*period, 10)}, " "))
|
|
| 32 |
+} |
|
| 33 |
+ |
|
| 34 |
+type CPU struct {
|
|
| 35 |
+ Weight *uint64 |
|
| 36 |
+ Max CPUMax |
|
| 37 |
+ Cpus string |
|
| 38 |
+ Mems string |
|
| 39 |
+} |
|
| 40 |
+ |
|
| 41 |
+func (c CPUMax) extractQuotaAndPeriod() (int64, uint64) {
|
|
| 42 |
+ var ( |
|
| 43 |
+ quota int64 |
|
| 44 |
+ period uint64 |
|
| 45 |
+ ) |
|
| 46 |
+ values := strings.Split(string(c), " ") |
|
| 47 |
+ if values[0] == "max" {
|
|
| 48 |
+ quota = math.MaxInt64 |
|
| 49 |
+ } else {
|
|
| 50 |
+ quota, _ = strconv.ParseInt(values[0], 10, 64) |
|
| 51 |
+ } |
|
| 52 |
+ period, _ = strconv.ParseUint(values[1], 10, 64) |
|
| 53 |
+ return quota, period |
|
| 54 |
+} |
|
| 55 |
+ |
|
| 56 |
+func (r *CPU) Values() (o []Value) {
|
|
| 57 |
+ if r.Weight != nil {
|
|
| 58 |
+ o = append(o, Value{
|
|
| 59 |
+ filename: "cpu.weight", |
|
| 60 |
+ value: *r.Weight, |
|
| 61 |
+ }) |
|
| 62 |
+ } |
|
| 63 |
+ if r.Max != "" {
|
|
| 64 |
+ o = append(o, Value{
|
|
| 65 |
+ filename: "cpu.max", |
|
| 66 |
+ value: r.Max, |
|
| 67 |
+ }) |
|
| 68 |
+ } |
|
| 69 |
+ if r.Cpus != "" {
|
|
| 70 |
+ o = append(o, Value{
|
|
| 71 |
+ filename: "cpuset.cpus", |
|
| 72 |
+ value: r.Cpus, |
|
| 73 |
+ }) |
|
| 74 |
+ } |
|
| 75 |
+ if r.Mems != "" {
|
|
| 76 |
+ o = append(o, Value{
|
|
| 77 |
+ filename: "cpuset.mems", |
|
| 78 |
+ value: r.Mems, |
|
| 79 |
+ }) |
|
| 80 |
+ } |
|
| 81 |
+ return o |
|
| 82 |
+} |
| 0 | 83 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,199 @@ |
| 0 |
+/* |
|
| 1 |
+ Copyright The containerd Authors. |
|
| 2 |
+ |
|
| 3 |
+ Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 4 |
+ you may not use this file except in compliance with the License. |
|
| 5 |
+ You may obtain a copy of the License at |
|
| 6 |
+ |
|
| 7 |
+ http://www.apache.org/licenses/LICENSE-2.0 |
|
| 8 |
+ |
|
| 9 |
+ Unless required by applicable law or agreed to in writing, software |
|
| 10 |
+ distributed under the License is distributed on an "AS IS" BASIS, |
|
| 11 |
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 12 |
+ See the License for the specific language governing permissions and |
|
| 13 |
+ limitations under the License. |
|
| 14 |
+*/ |
|
| 15 |
+ |
|
| 16 |
+// Devicefilter containes eBPF device filter program |
|
| 17 |
+// |
|
| 18 |
+// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c |
|
| 19 |
+// |
|
| 20 |
+// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano) |
|
| 21 |
+// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397 |
|
| 22 |
+// |
|
| 23 |
+// This particular Go implementation based on runc version |
|
| 24 |
+// https://github.com/opencontainers/runc/blob/master/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go |
|
| 25 |
+package v2 |
|
| 26 |
+ |
|
| 27 |
+import ( |
|
| 28 |
+ "fmt" |
|
| 29 |
+ "math" |
|
| 30 |
+ |
|
| 31 |
+ "github.com/cilium/ebpf/asm" |
|
| 32 |
+ "github.com/opencontainers/runtime-spec/specs-go" |
|
| 33 |
+ "github.com/pkg/errors" |
|
| 34 |
+ "golang.org/x/sys/unix" |
|
| 35 |
+) |
|
| 36 |
+ |
|
| 37 |
+const ( |
|
| 38 |
+ // license string format is same as kernel MODULE_LICENSE macro |
|
| 39 |
+ license = "Apache" |
|
| 40 |
+) |
|
| 41 |
+ |
|
| 42 |
+// DeviceFilter returns eBPF device filter program and its license string |
|
| 43 |
+func DeviceFilter(devices []specs.LinuxDeviceCgroup) (asm.Instructions, string, error) {
|
|
| 44 |
+ p := &program{}
|
|
| 45 |
+ p.init() |
|
| 46 |
+ for i := len(devices) - 1; i >= 0; i-- {
|
|
| 47 |
+ if err := p.appendDevice(devices[i]); err != nil {
|
|
| 48 |
+ return nil, "", err |
|
| 49 |
+ } |
|
| 50 |
+ } |
|
| 51 |
+ insts, err := p.finalize() |
|
| 52 |
+ return insts, license, err |
|
| 53 |
+} |
|
| 54 |
+ |
|
| 55 |
+type program struct {
|
|
| 56 |
+ insts asm.Instructions |
|
| 57 |
+ hasWildCard bool |
|
| 58 |
+ blockID int |
|
| 59 |
+} |
|
| 60 |
+ |
|
| 61 |
+func (p *program) init() {
|
|
| 62 |
+ // struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423 |
|
| 63 |
+ /* |
|
| 64 |
+ u32 access_type |
|
| 65 |
+ u32 major |
|
| 66 |
+ u32 minor |
|
| 67 |
+ */ |
|
| 68 |
+ // R2 <- type (lower 16 bit of u32 access_type at R1[0]) |
|
| 69 |
+ p.insts = append(p.insts, |
|
| 70 |
+ asm.LoadMem(asm.R2, asm.R1, 0, asm.Half)) |
|
| 71 |
+ |
|
| 72 |
+ // R3 <- access (upper 16 bit of u32 access_type at R1[0]) |
|
| 73 |
+ p.insts = append(p.insts, |
|
| 74 |
+ asm.LoadMem(asm.R3, asm.R1, 0, asm.Word), |
|
| 75 |
+ // RSh: bitwise shift right |
|
| 76 |
+ asm.RSh.Imm32(asm.R3, 16)) |
|
| 77 |
+ |
|
| 78 |
+ // R4 <- major (u32 major at R1[4]) |
|
| 79 |
+ p.insts = append(p.insts, |
|
| 80 |
+ asm.LoadMem(asm.R4, asm.R1, 4, asm.Word)) |
|
| 81 |
+ |
|
| 82 |
+ // R5 <- minor (u32 minor at R1[8]) |
|
| 83 |
+ p.insts = append(p.insts, |
|
| 84 |
+ asm.LoadMem(asm.R5, asm.R1, 8, asm.Word)) |
|
| 85 |
+} |
|
| 86 |
+ |
|
| 87 |
+// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element. |
|
| 88 |
+func (p *program) appendDevice(dev specs.LinuxDeviceCgroup) error {
|
|
| 89 |
+ if p.blockID < 0 {
|
|
| 90 |
+ return errors.New("the program is finalized")
|
|
| 91 |
+ } |
|
| 92 |
+ if p.hasWildCard {
|
|
| 93 |
+ // All entries after wildcard entry are ignored |
|
| 94 |
+ return nil |
|
| 95 |
+ } |
|
| 96 |
+ |
|
| 97 |
+ bpfType := int32(-1) |
|
| 98 |
+ hasType := true |
|
| 99 |
+ switch dev.Type {
|
|
| 100 |
+ case string('c'):
|
|
| 101 |
+ bpfType = int32(unix.BPF_DEVCG_DEV_CHAR) |
|
| 102 |
+ case string('b'):
|
|
| 103 |
+ bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK) |
|
| 104 |
+ case string('a'):
|
|
| 105 |
+ hasType = false |
|
| 106 |
+ default: |
|
| 107 |
+ // if not specified in OCI json, typ is set to DeviceTypeAll |
|
| 108 |
+ return errors.Errorf("invalid DeviceType %q", dev.Type)
|
|
| 109 |
+ } |
|
| 110 |
+ if *dev.Major > math.MaxUint32 {
|
|
| 111 |
+ return errors.Errorf("invalid major %d", *dev.Major)
|
|
| 112 |
+ } |
|
| 113 |
+ if *dev.Minor > math.MaxUint32 {
|
|
| 114 |
+ return errors.Errorf("invalid minor %d", *dev.Major)
|
|
| 115 |
+ } |
|
| 116 |
+ hasMajor := *dev.Major >= 0 // if not specified in OCI json, major is set to -1 |
|
| 117 |
+ hasMinor := *dev.Minor >= 0 |
|
| 118 |
+ bpfAccess := int32(0) |
|
| 119 |
+ for _, r := range dev.Access {
|
|
| 120 |
+ switch r {
|
|
| 121 |
+ case 'r': |
|
| 122 |
+ bpfAccess |= unix.BPF_DEVCG_ACC_READ |
|
| 123 |
+ case 'w': |
|
| 124 |
+ bpfAccess |= unix.BPF_DEVCG_ACC_WRITE |
|
| 125 |
+ case 'm': |
|
| 126 |
+ bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD |
|
| 127 |
+ default: |
|
| 128 |
+ return errors.Errorf("unknown device access %v", r)
|
|
| 129 |
+ } |
|
| 130 |
+ } |
|
| 131 |
+ // If the access is rwm, skip the check. |
|
| 132 |
+ hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD) |
|
| 133 |
+ |
|
| 134 |
+ blockSym := fmt.Sprintf("block-%d", p.blockID)
|
|
| 135 |
+ nextBlockSym := fmt.Sprintf("block-%d", p.blockID+1)
|
|
| 136 |
+ prevBlockLastIdx := len(p.insts) - 1 |
|
| 137 |
+ if hasType {
|
|
| 138 |
+ p.insts = append(p.insts, |
|
| 139 |
+ // if (R2 != bpfType) goto next |
|
| 140 |
+ asm.JNE.Imm(asm.R2, bpfType, nextBlockSym), |
|
| 141 |
+ ) |
|
| 142 |
+ } |
|
| 143 |
+ if hasAccess {
|
|
| 144 |
+ p.insts = append(p.insts, |
|
| 145 |
+ // if (R3 & bpfAccess == 0 /* use R1 as a temp var */) goto next |
|
| 146 |
+ asm.Mov.Reg32(asm.R1, asm.R3), |
|
| 147 |
+ asm.And.Imm32(asm.R1, bpfAccess), |
|
| 148 |
+ asm.JEq.Imm(asm.R1, 0, nextBlockSym), |
|
| 149 |
+ ) |
|
| 150 |
+ } |
|
| 151 |
+ if hasMajor {
|
|
| 152 |
+ p.insts = append(p.insts, |
|
| 153 |
+ // if (R4 != major) goto next |
|
| 154 |
+ asm.JNE.Imm(asm.R4, int32(*dev.Major), nextBlockSym), |
|
| 155 |
+ ) |
|
| 156 |
+ } |
|
| 157 |
+ if hasMinor {
|
|
| 158 |
+ p.insts = append(p.insts, |
|
| 159 |
+ // if (R5 != minor) goto next |
|
| 160 |
+ asm.JNE.Imm(asm.R5, int32(*dev.Minor), nextBlockSym), |
|
| 161 |
+ ) |
|
| 162 |
+ } |
|
| 163 |
+ if !hasType && !hasAccess && !hasMajor && !hasMinor {
|
|
| 164 |
+ p.hasWildCard = true |
|
| 165 |
+ } |
|
| 166 |
+ p.insts = append(p.insts, acceptBlock(dev.Allow)...) |
|
| 167 |
+ // set blockSym to the first instruction we added in this iteration |
|
| 168 |
+ p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym) |
|
| 169 |
+ p.blockID++ |
|
| 170 |
+ return nil |
|
| 171 |
+} |
|
| 172 |
+ |
|
| 173 |
+func (p *program) finalize() (asm.Instructions, error) {
|
|
| 174 |
+ if p.hasWildCard {
|
|
| 175 |
+ // acceptBlock with asm.Return() is already inserted |
|
| 176 |
+ return p.insts, nil |
|
| 177 |
+ } |
|
| 178 |
+ blockSym := fmt.Sprintf("block-%d", p.blockID)
|
|
| 179 |
+ p.insts = append(p.insts, |
|
| 180 |
+ // R0 <- 0 |
|
| 181 |
+ asm.Mov.Imm32(asm.R0, 0).Sym(blockSym), |
|
| 182 |
+ asm.Return(), |
|
| 183 |
+ ) |
|
| 184 |
+ p.blockID = -1 |
|
| 185 |
+ return p.insts, nil |
|
| 186 |
+} |
|
| 187 |
+ |
|
| 188 |
+func acceptBlock(accept bool) asm.Instructions {
|
|
| 189 |
+ v := int32(0) |
|
| 190 |
+ if accept {
|
|
| 191 |
+ v = 1 |
|
| 192 |
+ } |
|
| 193 |
+ return []asm.Instruction{
|
|
| 194 |
+ // R0 <- v |
|
| 195 |
+ asm.Mov.Imm32(asm.R0, v), |
|
| 196 |
+ asm.Return(), |
|
| 197 |
+ } |
|
| 198 |
+} |
| 0 | 199 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,83 @@ |
| 0 |
+/* |
|
| 1 |
+ Copyright The containerd Authors. |
|
| 2 |
+ |
|
| 3 |
+ Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 4 |
+ you may not use this file except in compliance with the License. |
|
| 5 |
+ You may obtain a copy of the License at |
|
| 6 |
+ |
|
| 7 |
+ http://www.apache.org/licenses/LICENSE-2.0 |
|
| 8 |
+ |
|
| 9 |
+ Unless required by applicable law or agreed to in writing, software |
|
| 10 |
+ distributed under the License is distributed on an "AS IS" BASIS, |
|
| 11 |
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 12 |
+ See the License for the specific language governing permissions and |
|
| 13 |
+ limitations under the License. |
|
| 14 |
+*/ |
|
| 15 |
+ |
|
| 16 |
+package v2 |
|
| 17 |
+ |
|
| 18 |
+import ( |
|
| 19 |
+ "github.com/cilium/ebpf" |
|
| 20 |
+ "github.com/cilium/ebpf/asm" |
|
| 21 |
+ "github.com/opencontainers/runtime-spec/specs-go" |
|
| 22 |
+ "github.com/pkg/errors" |
|
| 23 |
+ "golang.org/x/sys/unix" |
|
| 24 |
+) |
|
| 25 |
+ |
|
| 26 |
+// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory. |
|
| 27 |
+// |
|
| 28 |
+// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 . |
|
| 29 |
+// |
|
| 30 |
+// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92 |
|
| 31 |
+func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD int) (func() error, error) {
|
|
| 32 |
+ nilCloser := func() error {
|
|
| 33 |
+ return nil |
|
| 34 |
+ } |
|
| 35 |
+ spec := &ebpf.ProgramSpec{
|
|
| 36 |
+ Type: ebpf.CGroupDevice, |
|
| 37 |
+ Instructions: insts, |
|
| 38 |
+ License: license, |
|
| 39 |
+ } |
|
| 40 |
+ prog, err := ebpf.NewProgram(spec) |
|
| 41 |
+ if err != nil {
|
|
| 42 |
+ return nilCloser, err |
|
| 43 |
+ } |
|
| 44 |
+ if err := prog.Attach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
|
|
| 45 |
+ return nilCloser, errors.Wrap(err, "failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)") |
|
| 46 |
+ } |
|
| 47 |
+ closer := func() error {
|
|
| 48 |
+ if err := prog.Detach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
|
|
| 49 |
+ return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)") |
|
| 50 |
+ } |
|
| 51 |
+ return nil |
|
| 52 |
+ } |
|
| 53 |
+ return closer, nil |
|
| 54 |
+} |
|
| 55 |
+ |
|
| 56 |
+func isRWM(cgroupPermissions string) bool {
|
|
| 57 |
+ r := false |
|
| 58 |
+ w := false |
|
| 59 |
+ m := false |
|
| 60 |
+ for _, rn := range cgroupPermissions {
|
|
| 61 |
+ switch rn {
|
|
| 62 |
+ case 'r': |
|
| 63 |
+ r = true |
|
| 64 |
+ case 'w': |
|
| 65 |
+ w = true |
|
| 66 |
+ case 'm': |
|
| 67 |
+ m = true |
|
| 68 |
+ } |
|
| 69 |
+ } |
|
| 70 |
+ return r && w && m |
|
| 71 |
+} |
|
| 72 |
+ |
|
| 73 |
+// the logic is from runc |
|
| 74 |
+// https://github.com/opencontainers/runc/blob/master/libcontainer/cgroups/fs/devices_v2.go#L44 |
|
| 75 |
+func canSkipEBPFError(devices []specs.LinuxDeviceCgroup) bool {
|
|
| 76 |
+ for _, dev := range devices {
|
|
| 77 |
+ if dev.Allow || !isRWM(dev.Access) {
|
|
| 78 |
+ return false |
|
| 79 |
+ } |
|
| 80 |
+ } |
|
| 81 |
+ return true |
|
| 82 |
+} |
| 0 | 83 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,50 @@ |
| 0 |
+/* |
|
| 1 |
+ Copyright The containerd Authors. |
|
| 2 |
+ |
|
| 3 |
+ Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 4 |
+ you may not use this file except in compliance with the License. |
|
| 5 |
+ You may obtain a copy of the License at |
|
| 6 |
+ |
|
| 7 |
+ http://www.apache.org/licenses/LICENSE-2.0 |
|
| 8 |
+ |
|
| 9 |
+ Unless required by applicable law or agreed to in writing, software |
|
| 10 |
+ distributed under the License is distributed on an "AS IS" BASIS, |
|
| 11 |
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 12 |
+ See the License for the specific language governing permissions and |
|
| 13 |
+ limitations under the License. |
|
| 14 |
+*/ |
|
| 15 |
+ |
|
| 16 |
+package v2 |
|
| 17 |
+ |
|
| 18 |
+import ( |
|
| 19 |
+ "errors" |
|
| 20 |
+ "os" |
|
| 21 |
+) |
|
| 22 |
+ |
|
| 23 |
+var ( |
|
| 24 |
+ ErrInvalidPid = errors.New("cgroups: pid must be greater than 0")
|
|
| 25 |
+ ErrMountPointNotExist = errors.New("cgroups: cgroup mountpoint does not exist")
|
|
| 26 |
+ ErrInvalidFormat = errors.New("cgroups: parsing file with invalid format failed")
|
|
| 27 |
+ ErrFreezerNotSupported = errors.New("cgroups: freezer cgroup (v2) not supported on this system")
|
|
| 28 |
+ ErrMemoryNotSupported = errors.New("cgroups: memory cgroup (v2) not supported on this system")
|
|
| 29 |
+ ErrPidsNotSupported = errors.New("cgroups: pids cgroup (v2) not supported on this system")
|
|
| 30 |
+ ErrCPUNotSupported = errors.New("cgroups: cpu cgroup (v2) not supported on this system")
|
|
| 31 |
+ ErrCgroupDeleted = errors.New("cgroups: cgroup deleted")
|
|
| 32 |
+ ErrNoCgroupMountDestination = errors.New("cgroups: cannot find cgroup mount destination")
|
|
| 33 |
+ ErrInvalidGroupPath = errors.New("cgroups: invalid group path")
|
|
| 34 |
+) |
|
| 35 |
+ |
|
| 36 |
+// ErrorHandler is a function that handles and acts on errors |
|
| 37 |
+type ErrorHandler func(err error) error |
|
| 38 |
+ |
|
| 39 |
+// IgnoreNotExist ignores any errors that are for not existing files |
|
| 40 |
+func IgnoreNotExist(err error) error {
|
|
| 41 |
+ if os.IsNotExist(err) {
|
|
| 42 |
+ return nil |
|
| 43 |
+ } |
|
| 44 |
+ return err |
|
| 45 |
+} |
|
| 46 |
+ |
|
| 47 |
+func errPassthrough(err error) error {
|
|
| 48 |
+ return err |
|
| 49 |
+} |
| 0 | 50 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,37 @@ |
| 0 |
+/* |
|
| 1 |
+ Copyright The containerd Authors. |
|
| 2 |
+ |
|
| 3 |
+ Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 4 |
+ you may not use this file except in compliance with the License. |
|
| 5 |
+ You may obtain a copy of the License at |
|
| 6 |
+ |
|
| 7 |
+ http://www.apache.org/licenses/LICENSE-2.0 |
|
| 8 |
+ |
|
| 9 |
+ Unless required by applicable law or agreed to in writing, software |
|
| 10 |
+ distributed under the License is distributed on an "AS IS" BASIS, |
|
| 11 |
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 12 |
+ See the License for the specific language governing permissions and |
|
| 13 |
+ limitations under the License. |
|
| 14 |
+*/ |
|
| 15 |
+ |
|
| 16 |
+package v2 |
|
| 17 |
+ |
|
| 18 |
+import "strings" |
|
| 19 |
+ |
|
| 20 |
+type HugeTlb []HugeTlbEntry |
|
| 21 |
+ |
|
| 22 |
+type HugeTlbEntry struct {
|
|
| 23 |
+ HugePageSize string |
|
| 24 |
+ Limit uint64 |
|
| 25 |
+} |
|
| 26 |
+ |
|
| 27 |
+func (r *HugeTlb) Values() (o []Value) {
|
|
| 28 |
+ for _, e := range *r {
|
|
| 29 |
+ o = append(o, Value{
|
|
| 30 |
+ filename: strings.Join([]string{"hugetlb", e.HugePageSize, "max"}, "."),
|
|
| 31 |
+ value: e.Limit, |
|
| 32 |
+ }) |
|
| 33 |
+ } |
|
| 34 |
+ |
|
| 35 |
+ return o |
|
| 36 |
+} |
| 0 | 37 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,64 @@ |
| 0 |
+/* |
|
| 1 |
+ Copyright The containerd Authors. |
|
| 2 |
+ |
|
| 3 |
+ Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 4 |
+ you may not use this file except in compliance with the License. |
|
| 5 |
+ You may obtain a copy of the License at |
|
| 6 |
+ |
|
| 7 |
+ http://www.apache.org/licenses/LICENSE-2.0 |
|
| 8 |
+ |
|
| 9 |
+ Unless required by applicable law or agreed to in writing, software |
|
| 10 |
+ distributed under the License is distributed on an "AS IS" BASIS, |
|
| 11 |
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 12 |
+ See the License for the specific language governing permissions and |
|
| 13 |
+ limitations under the License. |
|
| 14 |
+*/ |
|
| 15 |
+ |
|
| 16 |
+package v2 |
|
| 17 |
+ |
|
| 18 |
+import "fmt" |
|
| 19 |
+ |
|
| 20 |
+type IOType string |
|
| 21 |
+ |
|
| 22 |
+const ( |
|
| 23 |
+ ReadBPS IOType = "rbps" |
|
| 24 |
+ WriteBPS IOType = "wbps" |
|
| 25 |
+ ReadIOPS IOType = "riops" |
|
| 26 |
+ WriteIOPS IOType = "wiops" |
|
| 27 |
+) |
|
| 28 |
+ |
|
| 29 |
+type BFQ struct {
|
|
| 30 |
+ Weight uint16 |
|
| 31 |
+} |
|
| 32 |
+ |
|
| 33 |
+type Entry struct {
|
|
| 34 |
+ Type IOType |
|
| 35 |
+ Major int64 |
|
| 36 |
+ Minor int64 |
|
| 37 |
+ Rate uint64 |
|
| 38 |
+} |
|
| 39 |
+ |
|
| 40 |
+func (e Entry) String() string {
|
|
| 41 |
+ return fmt.Sprintf("%d:%d %s=%d", e.Major, e.Minor, e.Type, e.Rate)
|
|
| 42 |
+} |
|
| 43 |
+ |
|
| 44 |
+type IO struct {
|
|
| 45 |
+ BFQ BFQ |
|
| 46 |
+ Max []Entry |
|
| 47 |
+} |
|
| 48 |
+ |
|
| 49 |
+func (i *IO) Values() (o []Value) {
|
|
| 50 |
+ if i.BFQ.Weight != 0 {
|
|
| 51 |
+ o = append(o, Value{
|
|
| 52 |
+ filename: "io.bfq.weight", |
|
| 53 |
+ value: i.BFQ.Weight, |
|
| 54 |
+ }) |
|
| 55 |
+ } |
|
| 56 |
+ for _, e := range i.Max {
|
|
| 57 |
+ o = append(o, Value{
|
|
| 58 |
+ filename: "io.max", |
|
| 59 |
+ value: e.String(), |
|
| 60 |
+ }) |
|
| 61 |
+ } |
|
| 62 |
+ return o |
|
| 63 |
+} |
| 0 | 64 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,739 @@ |
| 0 |
+/* |
|
| 1 |
+ Copyright The containerd Authors. |
|
| 2 |
+ |
|
| 3 |
+ Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 4 |
+ you may not use this file except in compliance with the License. |
|
| 5 |
+ You may obtain a copy of the License at |
|
| 6 |
+ |
|
| 7 |
+ http://www.apache.org/licenses/LICENSE-2.0 |
|
| 8 |
+ |
|
| 9 |
+ Unless required by applicable law or agreed to in writing, software |
|
| 10 |
+ distributed under the License is distributed on an "AS IS" BASIS, |
|
| 11 |
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 12 |
+ See the License for the specific language governing permissions and |
|
| 13 |
+ limitations under the License. |
|
| 14 |
+*/ |
|
| 15 |
+ |
|
| 16 |
+package v2 |
|
| 17 |
+ |
|
| 18 |
+import ( |
|
| 19 |
+ "bufio" |
|
| 20 |
+ "fmt" |
|
| 21 |
+ "io/ioutil" |
|
| 22 |
+ "math" |
|
| 23 |
+ "os" |
|
| 24 |
+ "path/filepath" |
|
| 25 |
+ "strconv" |
|
| 26 |
+ "strings" |
|
| 27 |
+ "sync" |
|
| 28 |
+ "syscall" |
|
| 29 |
+ "time" |
|
| 30 |
+ |
|
| 31 |
+ "golang.org/x/sys/unix" |
|
| 32 |
+ |
|
| 33 |
+ "github.com/containerd/cgroups/v2/stats" |
|
| 34 |
+ "github.com/godbus/dbus/v5" |
|
| 35 |
+ "github.com/opencontainers/runtime-spec/specs-go" |
|
| 36 |
+ "github.com/pkg/errors" |
|
| 37 |
+ "github.com/sirupsen/logrus" |
|
| 38 |
+ |
|
| 39 |
+ systemdDbus "github.com/coreos/go-systemd/v22/dbus" |
|
| 40 |
+) |
|
| 41 |
+ |
|
| 42 |
+const ( |
|
| 43 |
+ subtreeControl = "cgroup.subtree_control" |
|
| 44 |
+ controllersFile = "cgroup.controllers" |
|
| 45 |
+ defaultCgroup2Path = "/sys/fs/cgroup" |
|
| 46 |
+ defaultSlice = "system.slice" |
|
| 47 |
+) |
|
| 48 |
+ |
|
| 49 |
+var ( |
|
| 50 |
+ canDelegate bool |
|
| 51 |
+ once sync.Once |
|
| 52 |
+) |
|
| 53 |
+ |
|
| 54 |
+type cgValuer interface {
|
|
| 55 |
+ Values() []Value |
|
| 56 |
+} |
|
| 57 |
+ |
|
| 58 |
+type Event struct {
|
|
| 59 |
+ Low uint64 |
|
| 60 |
+ High uint64 |
|
| 61 |
+ Max uint64 |
|
| 62 |
+ OOM uint64 |
|
| 63 |
+ OOMKill uint64 |
|
| 64 |
+} |
|
| 65 |
+ |
|
| 66 |
+// Resources for a cgroups v2 unified hierarchy |
|
| 67 |
+type Resources struct {
|
|
| 68 |
+ CPU *CPU |
|
| 69 |
+ Memory *Memory |
|
| 70 |
+ Pids *Pids |
|
| 71 |
+ IO *IO |
|
| 72 |
+ RDMA *RDMA |
|
| 73 |
+ HugeTlb *HugeTlb |
|
| 74 |
+ // When len(Devices) is zero, devices are not controlled |
|
| 75 |
+ Devices []specs.LinuxDeviceCgroup |
|
| 76 |
+} |
|
| 77 |
+ |
|
| 78 |
+// Values returns the raw filenames and values that |
|
| 79 |
+// can be written to the unified hierarchy |
|
| 80 |
+func (r *Resources) Values() (o []Value) {
|
|
| 81 |
+ if r.CPU != nil {
|
|
| 82 |
+ o = append(o, r.CPU.Values()...) |
|
| 83 |
+ } |
|
| 84 |
+ if r.Memory != nil {
|
|
| 85 |
+ o = append(o, r.Memory.Values()...) |
|
| 86 |
+ } |
|
| 87 |
+ if r.Pids != nil {
|
|
| 88 |
+ o = append(o, r.Pids.Values()...) |
|
| 89 |
+ } |
|
| 90 |
+ if r.IO != nil {
|
|
| 91 |
+ o = append(o, r.IO.Values()...) |
|
| 92 |
+ } |
|
| 93 |
+ if r.RDMA != nil {
|
|
| 94 |
+ o = append(o, r.RDMA.Values()...) |
|
| 95 |
+ } |
|
| 96 |
+ if r.HugeTlb != nil {
|
|
| 97 |
+ o = append(o, r.HugeTlb.Values()...) |
|
| 98 |
+ } |
|
| 99 |
+ return o |
|
| 100 |
+} |
|
| 101 |
+ |
|
| 102 |
+// EnabledControllers returns the list of all not nil resource controllers |
|
| 103 |
+func (r *Resources) EnabledControllers() (c []string) {
|
|
| 104 |
+ if r.CPU != nil {
|
|
| 105 |
+ c = append(c, "cpu") |
|
| 106 |
+ c = append(c, "cpuset") |
|
| 107 |
+ } |
|
| 108 |
+ if r.Memory != nil {
|
|
| 109 |
+ c = append(c, "memory") |
|
| 110 |
+ } |
|
| 111 |
+ if r.Pids != nil {
|
|
| 112 |
+ c = append(c, "pids") |
|
| 113 |
+ } |
|
| 114 |
+ if r.IO != nil {
|
|
| 115 |
+ c = append(c, "io") |
|
| 116 |
+ } |
|
| 117 |
+ if r.RDMA != nil {
|
|
| 118 |
+ c = append(c, "rdma") |
|
| 119 |
+ } |
|
| 120 |
+ if r.HugeTlb != nil {
|
|
| 121 |
+ c = append(c, "hugetlb") |
|
| 122 |
+ } |
|
| 123 |
+ return |
|
| 124 |
+} |
|
| 125 |
+ |
|
| 126 |
+// Value of a cgroup setting |
|
| 127 |
+type Value struct {
|
|
| 128 |
+ filename string |
|
| 129 |
+ value interface{}
|
|
| 130 |
+} |
|
| 131 |
+ |
|
| 132 |
+// write the value to the full, absolute path, of a unified hierarchy |
|
| 133 |
+func (c *Value) write(path string, perm os.FileMode) error {
|
|
| 134 |
+ var data []byte |
|
| 135 |
+ switch t := c.value.(type) {
|
|
| 136 |
+ case uint64: |
|
| 137 |
+ data = []byte(strconv.FormatUint(t, 10)) |
|
| 138 |
+ case uint16: |
|
| 139 |
+ data = []byte(strconv.FormatUint(uint64(t), 10)) |
|
| 140 |
+ case int64: |
|
| 141 |
+ data = []byte(strconv.FormatInt(t, 10)) |
|
| 142 |
+ case []byte: |
|
| 143 |
+ data = t |
|
| 144 |
+ case string: |
|
| 145 |
+ data = []byte(t) |
|
| 146 |
+ case CPUMax: |
|
| 147 |
+ data = []byte(t) |
|
| 148 |
+ default: |
|
| 149 |
+ return ErrInvalidFormat |
|
| 150 |
+ } |
|
| 151 |
+ return ioutil.WriteFile( |
|
| 152 |
+ filepath.Join(path, c.filename), |
|
| 153 |
+ data, |
|
| 154 |
+ perm, |
|
| 155 |
+ ) |
|
| 156 |
+} |
|
| 157 |
+ |
|
| 158 |
+func writeValues(path string, values []Value) error {
|
|
| 159 |
+ for _, o := range values {
|
|
| 160 |
+ if err := o.write(path, defaultFilePerm); err != nil {
|
|
| 161 |
+ return err |
|
| 162 |
+ } |
|
| 163 |
+ } |
|
| 164 |
+ return nil |
|
| 165 |
+} |
|
| 166 |
+ |
|
| 167 |
+func NewManager(mountpoint string, group string, resources *Resources) (*Manager, error) {
|
|
| 168 |
+ if err := VerifyGroupPath(group); err != nil {
|
|
| 169 |
+ return nil, err |
|
| 170 |
+ } |
|
| 171 |
+ path := filepath.Join(mountpoint, group) |
|
| 172 |
+ if err := os.MkdirAll(path, defaultDirPerm); err != nil {
|
|
| 173 |
+ return nil, err |
|
| 174 |
+ } |
|
| 175 |
+ m := Manager{
|
|
| 176 |
+ unifiedMountpoint: mountpoint, |
|
| 177 |
+ path: path, |
|
| 178 |
+ } |
|
| 179 |
+ if err := m.ToggleControllers(resources.EnabledControllers(), Enable); err != nil {
|
|
| 180 |
+ // clean up cgroup dir on failure |
|
| 181 |
+ os.Remove(path) |
|
| 182 |
+ return nil, err |
|
| 183 |
+ } |
|
| 184 |
+ if err := setResources(path, resources); err != nil {
|
|
| 185 |
+ os.Remove(path) |
|
| 186 |
+ return nil, err |
|
| 187 |
+ } |
|
| 188 |
+ return &m, nil |
|
| 189 |
+} |
|
| 190 |
+ |
|
| 191 |
+func LoadManager(mountpoint string, group string) (*Manager, error) {
|
|
| 192 |
+ if err := VerifyGroupPath(group); err != nil {
|
|
| 193 |
+ return nil, err |
|
| 194 |
+ } |
|
| 195 |
+ path := filepath.Join(mountpoint, group) |
|
| 196 |
+ return &Manager{
|
|
| 197 |
+ unifiedMountpoint: mountpoint, |
|
| 198 |
+ path: path, |
|
| 199 |
+ }, nil |
|
| 200 |
+} |
|
| 201 |
+ |
|
| 202 |
+type Manager struct {
|
|
| 203 |
+ unifiedMountpoint string |
|
| 204 |
+ path string |
|
| 205 |
+} |
|
| 206 |
+ |
|
| 207 |
+func setResources(path string, resources *Resources) error {
|
|
| 208 |
+ if resources != nil {
|
|
| 209 |
+ if err := writeValues(path, resources.Values()); err != nil {
|
|
| 210 |
+ return err |
|
| 211 |
+ } |
|
| 212 |
+ if err := setDevices(path, resources.Devices); err != nil {
|
|
| 213 |
+ return err |
|
| 214 |
+ } |
|
| 215 |
+ } |
|
| 216 |
+ return nil |
|
| 217 |
+} |
|
| 218 |
+ |
|
| 219 |
+func (c *Manager) RootControllers() ([]string, error) {
|
|
| 220 |
+ b, err := ioutil.ReadFile(filepath.Join(c.unifiedMountpoint, controllersFile)) |
|
| 221 |
+ if err != nil {
|
|
| 222 |
+ return nil, err |
|
| 223 |
+ } |
|
| 224 |
+ return strings.Fields(string(b)), nil |
|
| 225 |
+} |
|
| 226 |
+ |
|
| 227 |
+func (c *Manager) Controllers() ([]string, error) {
|
|
| 228 |
+ b, err := ioutil.ReadFile(filepath.Join(c.path, controllersFile)) |
|
| 229 |
+ if err != nil {
|
|
| 230 |
+ return nil, err |
|
| 231 |
+ } |
|
| 232 |
+ return strings.Fields(string(b)), nil |
|
| 233 |
+} |
|
| 234 |
+ |
|
| 235 |
+type ControllerToggle int |
|
| 236 |
+ |
|
| 237 |
+const ( |
|
| 238 |
+ Enable ControllerToggle = iota + 1 |
|
| 239 |
+ Disable |
|
| 240 |
+) |
|
| 241 |
+ |
|
| 242 |
+func toggleFunc(controllers []string, prefix string) []string {
|
|
| 243 |
+ out := make([]string, len(controllers)) |
|
| 244 |
+ for i, c := range controllers {
|
|
| 245 |
+ out[i] = prefix + c |
|
| 246 |
+ } |
|
| 247 |
+ return out |
|
| 248 |
+} |
|
| 249 |
+ |
|
| 250 |
+func (c *Manager) ToggleControllers(controllers []string, t ControllerToggle) error {
|
|
| 251 |
+ // when c.path is like /foo/bar/baz, the following files need to be written: |
|
| 252 |
+ // * /sys/fs/cgroup/cgroup.subtree_control |
|
| 253 |
+ // * /sys/fs/cgroup/foo/cgroup.subtree_control |
|
| 254 |
+ // * /sys/fs/cgroup/foo/bar/cgroup.subtree_control |
|
| 255 |
+ // Note that /sys/fs/cgroup/foo/bar/baz/cgroup.subtree_control does not need to be written. |
|
| 256 |
+ split := strings.Split(c.path, "/") |
|
| 257 |
+ var lastErr error |
|
| 258 |
+ for i, _ := range split {
|
|
| 259 |
+ f := strings.Join(split[:i], "/") |
|
| 260 |
+ if !strings.HasPrefix(f, c.unifiedMountpoint) || f == c.path {
|
|
| 261 |
+ continue |
|
| 262 |
+ } |
|
| 263 |
+ filePath := filepath.Join(f, subtreeControl) |
|
| 264 |
+ if err := c.writeSubtreeControl(filePath, controllers, t); err != nil {
|
|
| 265 |
+ // When running as rootless, the user may face EPERM on parent groups, but it is neglible when the |
|
| 266 |
+ // controller is already written. |
|
| 267 |
+ // So we only return the last error. |
|
| 268 |
+ lastErr = errors.Wrapf(err, "failed to write subtree controllers %+v to %q", controllers, filePath) |
|
| 269 |
+ } |
|
| 270 |
+ } |
|
| 271 |
+ return lastErr |
|
| 272 |
+} |
|
| 273 |
+ |
|
| 274 |
+func (c *Manager) writeSubtreeControl(filePath string, controllers []string, t ControllerToggle) error {
|
|
| 275 |
+ f, err := os.OpenFile(filePath, os.O_WRONLY, 0) |
|
| 276 |
+ if err != nil {
|
|
| 277 |
+ return err |
|
| 278 |
+ } |
|
| 279 |
+ defer f.Close() |
|
| 280 |
+ switch t {
|
|
| 281 |
+ case Enable: |
|
| 282 |
+ controllers = toggleFunc(controllers, "+") |
|
| 283 |
+ case Disable: |
|
| 284 |
+ controllers = toggleFunc(controllers, "-") |
|
| 285 |
+ } |
|
| 286 |
+ _, err = f.WriteString(strings.Join(controllers, " ")) |
|
| 287 |
+ return err |
|
| 288 |
+} |
|
| 289 |
+ |
|
| 290 |
+func (c *Manager) NewChild(name string, resources *Resources) (*Manager, error) {
|
|
| 291 |
+ if strings.HasPrefix(name, "/") {
|
|
| 292 |
+ return nil, errors.New("name must be relative")
|
|
| 293 |
+ } |
|
| 294 |
+ path := filepath.Join(c.path, name) |
|
| 295 |
+ if err := os.MkdirAll(path, defaultDirPerm); err != nil {
|
|
| 296 |
+ return nil, err |
|
| 297 |
+ } |
|
| 298 |
+ if err := setResources(path, resources); err != nil {
|
|
| 299 |
+ // clean up cgroup dir on failure |
|
| 300 |
+ os.Remove(path) |
|
| 301 |
+ return nil, err |
|
| 302 |
+ } |
|
| 303 |
+ return &Manager{
|
|
| 304 |
+ unifiedMountpoint: c.unifiedMountpoint, |
|
| 305 |
+ path: path, |
|
| 306 |
+ }, nil |
|
| 307 |
+} |
|
| 308 |
+ |
|
| 309 |
+func (c *Manager) AddProc(pid uint64) error {
|
|
| 310 |
+ v := Value{
|
|
| 311 |
+ filename: cgroupProcs, |
|
| 312 |
+ value: pid, |
|
| 313 |
+ } |
|
| 314 |
+ return writeValues(c.path, []Value{v})
|
|
| 315 |
+} |
|
| 316 |
+ |
|
| 317 |
+func (c *Manager) Delete() error {
|
|
| 318 |
+ return remove(c.path) |
|
| 319 |
+} |
|
| 320 |
+ |
|
| 321 |
+func (c *Manager) Procs(recursive bool) ([]uint64, error) {
|
|
| 322 |
+ var processes []uint64 |
|
| 323 |
+ err := filepath.Walk(c.path, func(p string, info os.FileInfo, err error) error {
|
|
| 324 |
+ if err != nil {
|
|
| 325 |
+ return err |
|
| 326 |
+ } |
|
| 327 |
+ if !recursive && info.IsDir() {
|
|
| 328 |
+ if p == c.path {
|
|
| 329 |
+ return nil |
|
| 330 |
+ } |
|
| 331 |
+ return filepath.SkipDir |
|
| 332 |
+ } |
|
| 333 |
+ _, name := filepath.Split(p) |
|
| 334 |
+ if name != cgroupProcs {
|
|
| 335 |
+ return nil |
|
| 336 |
+ } |
|
| 337 |
+ procs, err := parseCgroupProcsFile(p) |
|
| 338 |
+ if err != nil {
|
|
| 339 |
+ return err |
|
| 340 |
+ } |
|
| 341 |
+ processes = append(processes, procs...) |
|
| 342 |
+ return nil |
|
| 343 |
+ }) |
|
| 344 |
+ return processes, err |
|
| 345 |
+} |
|
| 346 |
+ |
|
| 347 |
+var singleValueFiles = []string{
|
|
| 348 |
+ "pids.current", |
|
| 349 |
+ "pids.max", |
|
| 350 |
+} |
|
| 351 |
+ |
|
| 352 |
+func (c *Manager) Stat() (*stats.Metrics, error) {
|
|
| 353 |
+ controllers, err := c.Controllers() |
|
| 354 |
+ if err != nil {
|
|
| 355 |
+ return nil, err |
|
| 356 |
+ } |
|
| 357 |
+ out := make(map[string]interface{})
|
|
| 358 |
+ for _, controller := range controllers {
|
|
| 359 |
+ switch controller {
|
|
| 360 |
+ case "cpu", "memory": |
|
| 361 |
+ filename := fmt.Sprintf("%s.stat", controller)
|
|
| 362 |
+ if err := readKVStatsFile(c.path, filename, out); err != nil {
|
|
| 363 |
+ if os.IsNotExist(err) {
|
|
| 364 |
+ continue |
|
| 365 |
+ } |
|
| 366 |
+ return nil, err |
|
| 367 |
+ } |
|
| 368 |
+ } |
|
| 369 |
+ } |
|
| 370 |
+ for _, name := range singleValueFiles {
|
|
| 371 |
+ if err := readSingleFile(c.path, name, out); err != nil {
|
|
| 372 |
+ if os.IsNotExist(err) {
|
|
| 373 |
+ continue |
|
| 374 |
+ } |
|
| 375 |
+ return nil, err |
|
| 376 |
+ } |
|
| 377 |
+ } |
|
| 378 |
+ var metrics stats.Metrics |
|
| 379 |
+ |
|
| 380 |
+ metrics.Pids = &stats.PidsStat{
|
|
| 381 |
+ Current: getPidValue("pids.current", out),
|
|
| 382 |
+ Limit: getPidValue("pids.max", out),
|
|
| 383 |
+ } |
|
| 384 |
+ metrics.CPU = &stats.CPUStat{
|
|
| 385 |
+ UsageUsec: getUint64Value("usage_usec", out),
|
|
| 386 |
+ UserUsec: getUint64Value("user_usec", out),
|
|
| 387 |
+ SystemUsec: getUint64Value("system_usec", out),
|
|
| 388 |
+ NrPeriods: getUint64Value("nr_periods", out),
|
|
| 389 |
+ NrThrottled: getUint64Value("nr_throttled", out),
|
|
| 390 |
+ ThrottledUsec: getUint64Value("throttled_usec", out),
|
|
| 391 |
+ } |
|
| 392 |
+ metrics.Memory = &stats.MemoryStat{
|
|
| 393 |
+ Anon: getUint64Value("anon", out),
|
|
| 394 |
+ File: getUint64Value("file", out),
|
|
| 395 |
+ KernelStack: getUint64Value("kernel_stack", out),
|
|
| 396 |
+ Slab: getUint64Value("slab", out),
|
|
| 397 |
+ Sock: getUint64Value("sock", out),
|
|
| 398 |
+ Shmem: getUint64Value("shmem", out),
|
|
| 399 |
+ FileMapped: getUint64Value("file_mapped", out),
|
|
| 400 |
+ FileDirty: getUint64Value("file_dirty", out),
|
|
| 401 |
+ FileWriteback: getUint64Value("file_writeback", out),
|
|
| 402 |
+ AnonThp: getUint64Value("anon_thp", out),
|
|
| 403 |
+ InactiveAnon: getUint64Value("inactive_anon", out),
|
|
| 404 |
+ ActiveAnon: getUint64Value("active_anon", out),
|
|
| 405 |
+ InactiveFile: getUint64Value("inactive_file", out),
|
|
| 406 |
+ ActiveFile: getUint64Value("active_file", out),
|
|
| 407 |
+ Unevictable: getUint64Value("unevictable", out),
|
|
| 408 |
+ SlabReclaimable: getUint64Value("slab_reclaimable", out),
|
|
| 409 |
+ SlabUnreclaimable: getUint64Value("slab_unreclaimable", out),
|
|
| 410 |
+ Pgfault: getUint64Value("pgfault", out),
|
|
| 411 |
+ Pgmajfault: getUint64Value("pgmajfault", out),
|
|
| 412 |
+ WorkingsetRefault: getUint64Value("workingset_refault", out),
|
|
| 413 |
+ WorkingsetActivate: getUint64Value("workingset_activate", out),
|
|
| 414 |
+ WorkingsetNodereclaim: getUint64Value("workingset_nodereclaim", out),
|
|
| 415 |
+ Pgrefill: getUint64Value("pgrefill", out),
|
|
| 416 |
+ Pgscan: getUint64Value("pgscan", out),
|
|
| 417 |
+ Pgsteal: getUint64Value("pgsteal", out),
|
|
| 418 |
+ Pgactivate: getUint64Value("pgactivate", out),
|
|
| 419 |
+ Pgdeactivate: getUint64Value("pgdeactivate", out),
|
|
| 420 |
+ Pglazyfree: getUint64Value("pglazyfree", out),
|
|
| 421 |
+ Pglazyfreed: getUint64Value("pglazyfreed", out),
|
|
| 422 |
+ ThpFaultAlloc: getUint64Value("thp_fault_alloc", out),
|
|
| 423 |
+ ThpCollapseAlloc: getUint64Value("thp_collapse_alloc", out),
|
|
| 424 |
+ Usage: getStatFileContentUint64(filepath.Join(c.path, "memory.current")), |
|
| 425 |
+ UsageLimit: getStatFileContentUint64(filepath.Join(c.path, "memory.max")), |
|
| 426 |
+ SwapUsage: getStatFileContentUint64(filepath.Join(c.path, "memory.swap.current")), |
|
| 427 |
+ SwapLimit: getStatFileContentUint64(filepath.Join(c.path, "memory.swap.max")), |
|
| 428 |
+ } |
|
| 429 |
+ |
|
| 430 |
+ metrics.Io = &stats.IOStat{Usage: readIoStats(c.path)}
|
|
| 431 |
+ metrics.Rdma = &stats.RdmaStat{
|
|
| 432 |
+ Current: rdmaStats(filepath.Join(c.path, "rdma.current")), |
|
| 433 |
+ Limit: rdmaStats(filepath.Join(c.path, "rdma.max")), |
|
| 434 |
+ } |
|
| 435 |
+ metrics.Hugetlb = readHugeTlbStats(c.path) |
|
| 436 |
+ |
|
| 437 |
+ return &metrics, nil |
|
| 438 |
+} |
|
| 439 |
+ |
|
| 440 |
+func getUint64Value(key string, out map[string]interface{}) uint64 {
|
|
| 441 |
+ v, ok := out[key] |
|
| 442 |
+ if !ok {
|
|
| 443 |
+ return 0 |
|
| 444 |
+ } |
|
| 445 |
+ switch t := v.(type) {
|
|
| 446 |
+ case uint64: |
|
| 447 |
+ return t |
|
| 448 |
+ } |
|
| 449 |
+ return 0 |
|
| 450 |
+} |
|
| 451 |
+ |
|
| 452 |
+func getPidValue(key string, out map[string]interface{}) uint64 {
|
|
| 453 |
+ v, ok := out[key] |
|
| 454 |
+ if !ok {
|
|
| 455 |
+ return 0 |
|
| 456 |
+ } |
|
| 457 |
+ switch t := v.(type) {
|
|
| 458 |
+ case uint64: |
|
| 459 |
+ return t |
|
| 460 |
+ case string: |
|
| 461 |
+ if t == "max" {
|
|
| 462 |
+ return math.MaxUint64 |
|
| 463 |
+ } |
|
| 464 |
+ } |
|
| 465 |
+ return 0 |
|
| 466 |
+} |
|
| 467 |
+ |
|
| 468 |
+func readSingleFile(path string, file string, out map[string]interface{}) error {
|
|
| 469 |
+ f, err := os.Open(filepath.Join(path, file)) |
|
| 470 |
+ if err != nil {
|
|
| 471 |
+ return err |
|
| 472 |
+ } |
|
| 473 |
+ defer f.Close() |
|
| 474 |
+ data, err := ioutil.ReadAll(f) |
|
| 475 |
+ if err != nil {
|
|
| 476 |
+ return err |
|
| 477 |
+ } |
|
| 478 |
+ s := strings.TrimSpace(string(data)) |
|
| 479 |
+ v, err := parseUint(s, 10, 64) |
|
| 480 |
+ if err != nil {
|
|
| 481 |
+ // if we cannot parse as a uint, parse as a string |
|
| 482 |
+ out[file] = s |
|
| 483 |
+ return nil |
|
| 484 |
+ } |
|
| 485 |
+ out[file] = v |
|
| 486 |
+ return nil |
|
| 487 |
+} |
|
| 488 |
+ |
|
| 489 |
+func readKVStatsFile(path string, file string, out map[string]interface{}) error {
|
|
| 490 |
+ f, err := os.Open(filepath.Join(path, file)) |
|
| 491 |
+ if err != nil {
|
|
| 492 |
+ return err |
|
| 493 |
+ } |
|
| 494 |
+ defer f.Close() |
|
| 495 |
+ |
|
| 496 |
+ s := bufio.NewScanner(f) |
|
| 497 |
+ for s.Scan() {
|
|
| 498 |
+ if err := s.Err(); err != nil {
|
|
| 499 |
+ return err |
|
| 500 |
+ } |
|
| 501 |
+ name, value, err := parseKV(s.Text()) |
|
| 502 |
+ if err != nil {
|
|
| 503 |
+ return errors.Wrapf(err, "error while parsing %s (line=%q)", filepath.Join(path, file), s.Text()) |
|
| 504 |
+ } |
|
| 505 |
+ out[name] = value |
|
| 506 |
+ } |
|
| 507 |
+ return nil |
|
| 508 |
+} |
|
| 509 |
+ |
|
| 510 |
+func (c *Manager) Freeze() error {
|
|
| 511 |
+ return c.freeze(c.path, Frozen) |
|
| 512 |
+} |
|
| 513 |
+ |
|
| 514 |
+func (c *Manager) Thaw() error {
|
|
| 515 |
+ return c.freeze(c.path, Thawed) |
|
| 516 |
+} |
|
| 517 |
+ |
|
| 518 |
+func (c *Manager) freeze(path string, state State) error {
|
|
| 519 |
+ values := state.Values() |
|
| 520 |
+ for {
|
|
| 521 |
+ if err := writeValues(path, values); err != nil {
|
|
| 522 |
+ return err |
|
| 523 |
+ } |
|
| 524 |
+ current, err := fetchState(path) |
|
| 525 |
+ if err != nil {
|
|
| 526 |
+ return err |
|
| 527 |
+ } |
|
| 528 |
+ if current == state {
|
|
| 529 |
+ return nil |
|
| 530 |
+ } |
|
| 531 |
+ time.Sleep(1 * time.Millisecond) |
|
| 532 |
+ } |
|
| 533 |
+} |
|
| 534 |
+ |
|
| 535 |
+// MemoryEventFD returns inotify file descriptor and 'memory.events' inotify watch descriptor |
|
| 536 |
+func (c *Manager) MemoryEventFD() (int, uint32, error) {
|
|
| 537 |
+ fpath := filepath.Join(c.path, "memory.events") |
|
| 538 |
+ fd, err := syscall.InotifyInit() |
|
| 539 |
+ if err != nil {
|
|
| 540 |
+ return 0, 0, errors.Errorf("Failed to create inotify fd")
|
|
| 541 |
+ } |
|
| 542 |
+ wd, err := syscall.InotifyAddWatch(fd, fpath, unix.IN_MODIFY) |
|
| 543 |
+ if wd < 0 {
|
|
| 544 |
+ syscall.Close(fd) |
|
| 545 |
+ return 0, 0, errors.Errorf("Failed to add inotify watch for %q", fpath)
|
|
| 546 |
+ } |
|
| 547 |
+ |
|
| 548 |
+ return fd, uint32(wd), nil |
|
| 549 |
+} |
|
| 550 |
+ |
|
| 551 |
+func (c *Manager) EventChan() (<-chan Event, <-chan error) {
|
|
| 552 |
+ ec := make(chan Event) |
|
| 553 |
+ errCh := make(chan error) |
|
| 554 |
+ go c.waitForEvents(ec, errCh) |
|
| 555 |
+ |
|
| 556 |
+ return ec, nil |
|
| 557 |
+} |
|
| 558 |
+ |
|
| 559 |
+func (c *Manager) waitForEvents(ec chan<- Event, errCh chan<- error) {
|
|
| 560 |
+ fd, wd, err := c.MemoryEventFD() |
|
| 561 |
+ |
|
| 562 |
+ defer syscall.InotifyRmWatch(fd, wd) |
|
| 563 |
+ defer syscall.Close(fd) |
|
| 564 |
+ |
|
| 565 |
+ if err != nil {
|
|
| 566 |
+ errCh <- err |
|
| 567 |
+ return |
|
| 568 |
+ } |
|
| 569 |
+ |
|
| 570 |
+ for {
|
|
| 571 |
+ buffer := make([]byte, syscall.SizeofInotifyEvent*10) |
|
| 572 |
+ bytesRead, err := syscall.Read(fd, buffer) |
|
| 573 |
+ if err != nil {
|
|
| 574 |
+ errCh <- err |
|
| 575 |
+ return |
|
| 576 |
+ } |
|
| 577 |
+ var out map[string]interface{}
|
|
| 578 |
+ if bytesRead >= syscall.SizeofInotifyEvent {
|
|
| 579 |
+ if err := readKVStatsFile(c.path, "memory.events", out); err != nil {
|
|
| 580 |
+ e := Event{
|
|
| 581 |
+ High: out["high"].(uint64), |
|
| 582 |
+ Low: out["low"].(uint64), |
|
| 583 |
+ Max: out["max"].(uint64), |
|
| 584 |
+ OOM: out["oom"].(uint64), |
|
| 585 |
+ OOMKill: out["oom_kill"].(uint64), |
|
| 586 |
+ } |
|
| 587 |
+ ec <- e |
|
| 588 |
+ } else {
|
|
| 589 |
+ errCh <- err |
|
| 590 |
+ return |
|
| 591 |
+ } |
|
| 592 |
+ } |
|
| 593 |
+ } |
|
| 594 |
+} |
|
| 595 |
+ |
|
| 596 |
+func setDevices(path string, devices []specs.LinuxDeviceCgroup) error {
|
|
| 597 |
+ if len(devices) == 0 {
|
|
| 598 |
+ return nil |
|
| 599 |
+ } |
|
| 600 |
+ insts, license, err := DeviceFilter(devices) |
|
| 601 |
+ if err != nil {
|
|
| 602 |
+ return err |
|
| 603 |
+ } |
|
| 604 |
+ dirFD, err := unix.Open(path, unix.O_DIRECTORY|unix.O_RDONLY, 0600) |
|
| 605 |
+ if err != nil {
|
|
| 606 |
+ return errors.Errorf("cannot get dir FD for %s", path)
|
|
| 607 |
+ } |
|
| 608 |
+ defer unix.Close(dirFD) |
|
| 609 |
+ if _, err := LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
|
|
| 610 |
+ if !canSkipEBPFError(devices) {
|
|
| 611 |
+ return err |
|
| 612 |
+ } |
|
| 613 |
+ } |
|
| 614 |
+ return nil |
|
| 615 |
+} |
|
| 616 |
+ |
|
| 617 |
+func NewSystemd(slice, group string, pid int, resources *Resources) (*Manager, error) {
|
|
| 618 |
+ if slice == "" {
|
|
| 619 |
+ slice = defaultSlice |
|
| 620 |
+ } |
|
| 621 |
+ path := filepath.Join(defaultCgroup2Path, slice, group) |
|
| 622 |
+ conn, err := systemdDbus.New() |
|
| 623 |
+ if err != nil {
|
|
| 624 |
+ return &Manager{}, err
|
|
| 625 |
+ } |
|
| 626 |
+ defer conn.Close() |
|
| 627 |
+ |
|
| 628 |
+ properties := []systemdDbus.Property{
|
|
| 629 |
+ systemdDbus.PropDescription(fmt.Sprintf("cgroup %s", group)),
|
|
| 630 |
+ newSystemdProperty("DefaultDependencies", false),
|
|
| 631 |
+ newSystemdProperty("MemoryAccounting", true),
|
|
| 632 |
+ newSystemdProperty("CPUAccounting", true),
|
|
| 633 |
+ newSystemdProperty("IOAccounting", true),
|
|
| 634 |
+ } |
|
| 635 |
+ |
|
| 636 |
+ // if we create a slice, the parent is defined via a Wants= |
|
| 637 |
+ if strings.HasSuffix(group, ".slice") {
|
|
| 638 |
+ properties = append(properties, systemdDbus.PropWants(defaultSlice)) |
|
| 639 |
+ } else {
|
|
| 640 |
+ // otherwise, we use Slice= |
|
| 641 |
+ properties = append(properties, systemdDbus.PropSlice(defaultSlice)) |
|
| 642 |
+ } |
|
| 643 |
+ |
|
| 644 |
+ // only add pid if its valid, -1 is used w/ general slice creation. |
|
| 645 |
+ if pid != -1 {
|
|
| 646 |
+ properties = append(properties, newSystemdProperty("PIDs", []uint32{uint32(pid)}))
|
|
| 647 |
+ } |
|
| 648 |
+ |
|
| 649 |
+ if resources.Memory != nil && *resources.Memory.Max != 0 {
|
|
| 650 |
+ properties = append(properties, |
|
| 651 |
+ newSystemdProperty("MemoryMax", uint64(*resources.Memory.Max)))
|
|
| 652 |
+ } |
|
| 653 |
+ |
|
| 654 |
+ if resources.CPU != nil && *resources.CPU.Weight != 0 {
|
|
| 655 |
+ properties = append(properties, |
|
| 656 |
+ newSystemdProperty("CPUWeight", *resources.CPU.Weight))
|
|
| 657 |
+ } |
|
| 658 |
+ |
|
| 659 |
+ if resources.CPU != nil && resources.CPU.Max != "" {
|
|
| 660 |
+ quota, period := resources.CPU.Max.extractQuotaAndPeriod() |
|
| 661 |
+ // cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd. |
|
| 662 |
+ // corresponds to USEC_INFINITY in systemd |
|
| 663 |
+ // if USEC_INFINITY is provided, CPUQuota is left unbound by systemd |
|
| 664 |
+ // always setting a property value ensures we can apply a quota and remove it later |
|
| 665 |
+ cpuQuotaPerSecUSec := uint64(math.MaxUint64) |
|
| 666 |
+ if quota > 0 {
|
|
| 667 |
+ // systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota |
|
| 668 |
+ // (integer percentage of CPU) internally. This means that if a fractional percent of |
|
| 669 |
+ // CPU is indicated by Resources.CpuQuota, we need to round up to the nearest |
|
| 670 |
+ // 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect. |
|
| 671 |
+ cpuQuotaPerSecUSec = uint64(quota*1000000) / period |
|
| 672 |
+ if cpuQuotaPerSecUSec%10000 != 0 {
|
|
| 673 |
+ cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000 |
|
| 674 |
+ } |
|
| 675 |
+ } |
|
| 676 |
+ properties = append(properties, |
|
| 677 |
+ newSystemdProperty("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
|
| 678 |
+ } |
|
| 679 |
+ |
|
| 680 |
+ // If we can delegate, we add the property back in |
|
| 681 |
+ if canDelegate {
|
|
| 682 |
+ properties = append(properties, newSystemdProperty("Delegate", true))
|
|
| 683 |
+ } |
|
| 684 |
+ |
|
| 685 |
+ if resources.Pids != nil && resources.Pids.Max > 0 {
|
|
| 686 |
+ properties = append(properties, |
|
| 687 |
+ newSystemdProperty("TasksAccounting", true),
|
|
| 688 |
+ newSystemdProperty("TasksMax", uint64(resources.Pids.Max)))
|
|
| 689 |
+ } |
|
| 690 |
+ |
|
| 691 |
+ statusChan := make(chan string, 1) |
|
| 692 |
+ if _, err := conn.StartTransientUnit(group, "replace", properties, statusChan); err == nil {
|
|
| 693 |
+ select {
|
|
| 694 |
+ case <-statusChan: |
|
| 695 |
+ case <-time.After(time.Second): |
|
| 696 |
+ logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", group)
|
|
| 697 |
+ } |
|
| 698 |
+ } else if !isUnitExists(err) {
|
|
| 699 |
+ return &Manager{}, err
|
|
| 700 |
+ } |
|
| 701 |
+ |
|
| 702 |
+ return &Manager{
|
|
| 703 |
+ path: path, |
|
| 704 |
+ }, nil |
|
| 705 |
+} |
|
| 706 |
+ |
|
| 707 |
+func LoadSystemd(slice, group string) (*Manager, error) {
|
|
| 708 |
+ if slice == "" {
|
|
| 709 |
+ slice = defaultSlice |
|
| 710 |
+ } |
|
| 711 |
+ group = filepath.Join(defaultCgroup2Path, slice, group) |
|
| 712 |
+ return &Manager{
|
|
| 713 |
+ path: group, |
|
| 714 |
+ }, nil |
|
| 715 |
+} |
|
| 716 |
+ |
|
| 717 |
+func (c *Manager) DeleteSystemd() error {
|
|
| 718 |
+ conn, err := systemdDbus.New() |
|
| 719 |
+ if err != nil {
|
|
| 720 |
+ return err |
|
| 721 |
+ } |
|
| 722 |
+ defer conn.Close() |
|
| 723 |
+ group := systemdUnitFromPath(c.path) |
|
| 724 |
+ ch := make(chan string) |
|
| 725 |
+ _, err = conn.StopUnit(group, "replace", ch) |
|
| 726 |
+ if err != nil {
|
|
| 727 |
+ return err |
|
| 728 |
+ } |
|
| 729 |
+ <-ch |
|
| 730 |
+ return nil |
|
| 731 |
+} |
|
| 732 |
+ |
|
| 733 |
+func newSystemdProperty(name string, units interface{}) systemdDbus.Property {
|
|
| 734 |
+ return systemdDbus.Property{
|
|
| 735 |
+ Name: name, |
|
| 736 |
+ Value: dbus.MakeVariant(units), |
|
| 737 |
+ } |
|
| 738 |
+} |
| 0 | 739 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,52 @@ |
| 0 |
+/* |
|
| 1 |
+ Copyright The containerd Authors. |
|
| 2 |
+ |
|
| 3 |
+ Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 4 |
+ you may not use this file except in compliance with the License. |
|
| 5 |
+ You may obtain a copy of the License at |
|
| 6 |
+ |
|
| 7 |
+ http://www.apache.org/licenses/LICENSE-2.0 |
|
| 8 |
+ |
|
| 9 |
+ Unless required by applicable law or agreed to in writing, software |
|
| 10 |
+ distributed under the License is distributed on an "AS IS" BASIS, |
|
| 11 |
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 12 |
+ See the License for the specific language governing permissions and |
|
| 13 |
+ limitations under the License. |
|
| 14 |
+*/ |
|
| 15 |
+ |
|
| 16 |
+package v2 |
|
| 17 |
+ |
|
| 18 |
+type Memory struct {
|
|
| 19 |
+ Swap *int64 |
|
| 20 |
+ Max *int64 |
|
| 21 |
+ Low *int64 |
|
| 22 |
+ High *int64 |
|
| 23 |
+} |
|
| 24 |
+ |
|
| 25 |
+func (r *Memory) Values() (o []Value) {
|
|
| 26 |
+ if r.Swap != nil {
|
|
| 27 |
+ o = append(o, Value{
|
|
| 28 |
+ filename: "memory.swap.max", |
|
| 29 |
+ value: *r.Swap, |
|
| 30 |
+ }) |
|
| 31 |
+ } |
|
| 32 |
+ if r.Max != nil {
|
|
| 33 |
+ o = append(o, Value{
|
|
| 34 |
+ filename: "memory.max", |
|
| 35 |
+ value: *r.Max, |
|
| 36 |
+ }) |
|
| 37 |
+ } |
|
| 38 |
+ if r.Low != nil {
|
|
| 39 |
+ o = append(o, Value{
|
|
| 40 |
+ filename: "memory.low", |
|
| 41 |
+ value: *r.Low, |
|
| 42 |
+ }) |
|
| 43 |
+ } |
|
| 44 |
+ if r.High != nil {
|
|
| 45 |
+ o = append(o, Value{
|
|
| 46 |
+ filename: "memory.high", |
|
| 47 |
+ value: *r.High, |
|
| 48 |
+ }) |
|
| 49 |
+ } |
|
| 50 |
+ return o |
|
| 51 |
+} |
| 0 | 52 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,60 @@ |
| 0 |
+/* |
|
| 1 |
+ Copyright The containerd Authors. |
|
| 2 |
+ |
|
| 3 |
+ Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 4 |
+ you may not use this file except in compliance with the License. |
|
| 5 |
+ You may obtain a copy of the License at |
|
| 6 |
+ |
|
| 7 |
+ http://www.apache.org/licenses/LICENSE-2.0 |
|
| 8 |
+ |
|
| 9 |
+ Unless required by applicable law or agreed to in writing, software |
|
| 10 |
+ distributed under the License is distributed on an "AS IS" BASIS, |
|
| 11 |
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 12 |
+ See the License for the specific language governing permissions and |
|
| 13 |
+ limitations under the License. |
|
| 14 |
+*/ |
|
| 15 |
+ |
|
| 16 |
+package v2 |
|
| 17 |
+ |
|
| 18 |
+import ( |
|
| 19 |
+ "fmt" |
|
| 20 |
+ "path/filepath" |
|
| 21 |
+ "strings" |
|
| 22 |
+) |
|
| 23 |
+ |
|
| 24 |
+// NestedGroupPath will nest the cgroups based on the calling processes cgroup |
|
| 25 |
+// placing its child processes inside its own path |
|
| 26 |
+func NestedGroupPath(suffix string) (string, error) {
|
|
| 27 |
+ path, err := parseCgroupFile("/proc/self/cgroup")
|
|
| 28 |
+ if err != nil {
|
|
| 29 |
+ return "", err |
|
| 30 |
+ } |
|
| 31 |
+ return filepath.Join(string(path), suffix), nil |
|
| 32 |
+} |
|
| 33 |
+ |
|
| 34 |
+// PidGroupPath will return the correct cgroup paths for an existing process running inside a cgroup |
|
| 35 |
+// This is commonly used for the Load function to restore an existing container |
|
| 36 |
+func PidGroupPath(pid int) (string, error) {
|
|
| 37 |
+ p := fmt.Sprintf("/proc/%d/cgroup", pid)
|
|
| 38 |
+ return parseCgroupFile(p) |
|
| 39 |
+} |
|
| 40 |
+ |
|
| 41 |
+// VerifyGroupPath verifies the format of group path string g. |
|
| 42 |
+// The format is same as the third field in /proc/PID/cgroup. |
|
| 43 |
+// e.g. "/user.slice/user-1001.slice/session-1.scope" |
|
| 44 |
+// |
|
| 45 |
+// g must be a "clean" absolute path starts with "/", and must not contain "/sys/fs/cgroup" prefix. |
|
| 46 |
+// |
|
| 47 |
+// VerifyGroupPath doesn't verify whether g actually exists on the system. |
|
| 48 |
+func VerifyGroupPath(g string) error {
|
|
| 49 |
+ if !strings.HasPrefix(g, "/") {
|
|
| 50 |
+ return ErrInvalidGroupPath |
|
| 51 |
+ } |
|
| 52 |
+ if filepath.Clean(g) != g {
|
|
| 53 |
+ return ErrInvalidGroupPath |
|
| 54 |
+ } |
|
| 55 |
+ if strings.HasPrefix(g, "/sys/fs/cgroup") {
|
|
| 56 |
+ return ErrInvalidGroupPath |
|
| 57 |
+ } |
|
| 58 |
+ return nil |
|
| 59 |
+} |
| 0 | 60 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,37 @@ |
| 0 |
+/* |
|
| 1 |
+ Copyright The containerd Authors. |
|
| 2 |
+ |
|
| 3 |
+ Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 4 |
+ you may not use this file except in compliance with the License. |
|
| 5 |
+ You may obtain a copy of the License at |
|
| 6 |
+ |
|
| 7 |
+ http://www.apache.org/licenses/LICENSE-2.0 |
|
| 8 |
+ |
|
| 9 |
+ Unless required by applicable law or agreed to in writing, software |
|
| 10 |
+ distributed under the License is distributed on an "AS IS" BASIS, |
|
| 11 |
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 12 |
+ See the License for the specific language governing permissions and |
|
| 13 |
+ limitations under the License. |
|
| 14 |
+*/ |
|
| 15 |
+ |
|
| 16 |
+package v2 |
|
| 17 |
+ |
|
| 18 |
+import "strconv" |
|
| 19 |
+ |
|
| 20 |
+type Pids struct {
|
|
| 21 |
+ Max int64 |
|
| 22 |
+} |
|
| 23 |
+ |
|
| 24 |
+func (r *Pids) Values() (o []Value) {
|
|
| 25 |
+ if r.Max != 0 {
|
|
| 26 |
+ limit := "max" |
|
| 27 |
+ if r.Max > 0 {
|
|
| 28 |
+ limit = strconv.FormatInt(r.Max, 10) |
|
| 29 |
+ } |
|
| 30 |
+ o = append(o, Value{
|
|
| 31 |
+ filename: "pids.max", |
|
| 32 |
+ value: limit, |
|
| 33 |
+ }) |
|
| 34 |
+ } |
|
| 35 |
+ return o |
|
| 36 |
+} |
| 0 | 37 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,46 @@ |
| 0 |
+/* |
|
| 1 |
+ Copyright The containerd Authors. |
|
| 2 |
+ |
|
| 3 |
+ Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 4 |
+ you may not use this file except in compliance with the License. |
|
| 5 |
+ You may obtain a copy of the License at |
|
| 6 |
+ |
|
| 7 |
+ http://www.apache.org/licenses/LICENSE-2.0 |
|
| 8 |
+ |
|
| 9 |
+ Unless required by applicable law or agreed to in writing, software |
|
| 10 |
+ distributed under the License is distributed on an "AS IS" BASIS, |
|
| 11 |
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 12 |
+ See the License for the specific language governing permissions and |
|
| 13 |
+ limitations under the License. |
|
| 14 |
+*/ |
|
| 15 |
+ |
|
| 16 |
+package v2 |
|
| 17 |
+ |
|
| 18 |
+import ( |
|
| 19 |
+ "fmt" |
|
| 20 |
+) |
|
| 21 |
+ |
|
| 22 |
+type RDMA struct {
|
|
| 23 |
+ Limit []RDMAEntry |
|
| 24 |
+} |
|
| 25 |
+ |
|
| 26 |
+type RDMAEntry struct {
|
|
| 27 |
+ Device string |
|
| 28 |
+ HcaHandles uint32 |
|
| 29 |
+ HcaObjects uint32 |
|
| 30 |
+} |
|
| 31 |
+ |
|
| 32 |
+func (r RDMAEntry) String() string {
|
|
| 33 |
+ return fmt.Sprintf("%s hca_handle=%d hca_object=%d", r.Device, r.HcaHandles, r.HcaObjects)
|
|
| 34 |
+} |
|
| 35 |
+ |
|
| 36 |
+func (r *RDMA) Values() (o []Value) {
|
|
| 37 |
+ for _, e := range r.Limit {
|
|
| 38 |
+ o = append(o, Value{
|
|
| 39 |
+ filename: "rdma.max", |
|
| 40 |
+ value: e.String(), |
|
| 41 |
+ }) |
|
| 42 |
+ } |
|
| 43 |
+ |
|
| 44 |
+ return o |
|
| 45 |
+} |
| 0 | 46 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,65 @@ |
| 0 |
+/* |
|
| 1 |
+ Copyright The containerd Authors. |
|
| 2 |
+ |
|
| 3 |
+ Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 4 |
+ you may not use this file except in compliance with the License. |
|
| 5 |
+ You may obtain a copy of the License at |
|
| 6 |
+ |
|
| 7 |
+ http://www.apache.org/licenses/LICENSE-2.0 |
|
| 8 |
+ |
|
| 9 |
+ Unless required by applicable law or agreed to in writing, software |
|
| 10 |
+ distributed under the License is distributed on an "AS IS" BASIS, |
|
| 11 |
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 12 |
+ See the License for the specific language governing permissions and |
|
| 13 |
+ limitations under the License. |
|
| 14 |
+*/ |
|
| 15 |
+ |
|
| 16 |
+package v2 |
|
| 17 |
+ |
|
| 18 |
+import ( |
|
| 19 |
+ "io/ioutil" |
|
| 20 |
+ "path/filepath" |
|
| 21 |
+ "strings" |
|
| 22 |
+) |
|
| 23 |
+ |
|
| 24 |
+// State is a type that represents the state of the current cgroup |
|
| 25 |
+type State string |
|
| 26 |
+ |
|
| 27 |
+const ( |
|
| 28 |
+ Unknown State = "" |
|
| 29 |
+ Thawed State = "thawed" |
|
| 30 |
+ Frozen State = "frozen" |
|
| 31 |
+ Deleted State = "deleted" |
|
| 32 |
+ |
|
| 33 |
+ cgroupFreeze = "cgroup.freeze" |
|
| 34 |
+) |
|
| 35 |
+ |
|
| 36 |
+func (s State) Values() []Value {
|
|
| 37 |
+ v := Value{
|
|
| 38 |
+ filename: cgroupFreeze, |
|
| 39 |
+ } |
|
| 40 |
+ switch s {
|
|
| 41 |
+ case Frozen: |
|
| 42 |
+ v.value = "1" |
|
| 43 |
+ case Thawed: |
|
| 44 |
+ v.value = "0" |
|
| 45 |
+ } |
|
| 46 |
+ return []Value{
|
|
| 47 |
+ v, |
|
| 48 |
+ } |
|
| 49 |
+} |
|
| 50 |
+ |
|
| 51 |
+func fetchState(path string) (State, error) {
|
|
| 52 |
+ current, err := ioutil.ReadFile(filepath.Join(path, cgroupFreeze)) |
|
| 53 |
+ if err != nil {
|
|
| 54 |
+ return Unknown, err |
|
| 55 |
+ } |
|
| 56 |
+ switch strings.TrimSpace(string(current)) {
|
|
| 57 |
+ case "1": |
|
| 58 |
+ return Frozen, nil |
|
| 59 |
+ case "0": |
|
| 60 |
+ return Thawed, nil |
|
| 61 |
+ default: |
|
| 62 |
+ return Unknown, nil |
|
| 63 |
+ } |
|
| 64 |
+} |
| 0 | 65 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,442 @@ |
| 0 |
+/* |
|
| 1 |
+ Copyright The containerd Authors. |
|
| 2 |
+ |
|
| 3 |
+ Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 4 |
+ you may not use this file except in compliance with the License. |
|
| 5 |
+ You may obtain a copy of the License at |
|
| 6 |
+ |
|
| 7 |
+ http://www.apache.org/licenses/LICENSE-2.0 |
|
| 8 |
+ |
|
| 9 |
+ Unless required by applicable law or agreed to in writing, software |
|
| 10 |
+ distributed under the License is distributed on an "AS IS" BASIS, |
|
| 11 |
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 12 |
+ See the License for the specific language governing permissions and |
|
| 13 |
+ limitations under the License. |
|
| 14 |
+*/ |
|
| 15 |
+ |
|
| 16 |
+package v2 |
|
| 17 |
+ |
|
| 18 |
+import ( |
|
| 19 |
+ "bufio" |
|
| 20 |
+ "fmt" |
|
| 21 |
+ "io" |
|
| 22 |
+ "io/ioutil" |
|
| 23 |
+ "math" |
|
| 24 |
+ "os" |
|
| 25 |
+ "path/filepath" |
|
| 26 |
+ "strconv" |
|
| 27 |
+ "strings" |
|
| 28 |
+ "time" |
|
| 29 |
+ |
|
| 30 |
+ "github.com/godbus/dbus/v5" |
|
| 31 |
+ |
|
| 32 |
+ "github.com/containerd/cgroups/v2/stats" |
|
| 33 |
+ "github.com/opencontainers/runtime-spec/specs-go" |
|
| 34 |
+ "github.com/pkg/errors" |
|
| 35 |
+ "github.com/sirupsen/logrus" |
|
| 36 |
+) |
|
| 37 |
+ |
|
| 38 |
+const ( |
|
| 39 |
+ cgroupProcs = "cgroup.procs" |
|
| 40 |
+ defaultDirPerm = 0755 |
|
| 41 |
+) |
|
| 42 |
+ |
|
| 43 |
+// defaultFilePerm is a var so that the test framework can change the filemode |
|
| 44 |
+// of all files created when the tests are running. The difference between the |
|
| 45 |
+// tests and real world use is that files like "cgroup.procs" will exist when writing |
|
| 46 |
+// to a read cgroup filesystem and do not exist prior when running in the tests. |
|
| 47 |
+// this is set to a non 0 value in the test code |
|
| 48 |
+var defaultFilePerm = os.FileMode(0) |
|
| 49 |
+ |
|
| 50 |
+// remove will remove a cgroup path handling EAGAIN and EBUSY errors and |
|
| 51 |
+// retrying the remove after a exp timeout |
|
| 52 |
+func remove(path string) error {
|
|
| 53 |
+ var err error |
|
| 54 |
+ delay := 10 * time.Millisecond |
|
| 55 |
+ for i := 0; i < 5; i++ {
|
|
| 56 |
+ if i != 0 {
|
|
| 57 |
+ time.Sleep(delay) |
|
| 58 |
+ delay *= 2 |
|
| 59 |
+ } |
|
| 60 |
+ if err = os.RemoveAll(path); err == nil {
|
|
| 61 |
+ return nil |
|
| 62 |
+ } |
|
| 63 |
+ } |
|
| 64 |
+ return errors.Wrapf(err, "cgroups: unable to remove path %q", path) |
|
| 65 |
+} |
|
| 66 |
+ |
|
| 67 |
+// parseCgroupProcsFile parses /sys/fs/cgroup/$GROUPPATH/cgroup.procs |
|
| 68 |
+func parseCgroupProcsFile(path string) ([]uint64, error) {
|
|
| 69 |
+ f, err := os.Open(path) |
|
| 70 |
+ if err != nil {
|
|
| 71 |
+ return nil, err |
|
| 72 |
+ } |
|
| 73 |
+ defer f.Close() |
|
| 74 |
+ var ( |
|
| 75 |
+ out []uint64 |
|
| 76 |
+ s = bufio.NewScanner(f) |
|
| 77 |
+ ) |
|
| 78 |
+ for s.Scan() {
|
|
| 79 |
+ if t := s.Text(); t != "" {
|
|
| 80 |
+ pid, err := strconv.ParseUint(t, 10, 0) |
|
| 81 |
+ if err != nil {
|
|
| 82 |
+ return nil, err |
|
| 83 |
+ } |
|
| 84 |
+ out = append(out, pid) |
|
| 85 |
+ } |
|
| 86 |
+ } |
|
| 87 |
+ return out, nil |
|
| 88 |
+} |
|
| 89 |
+ |
|
| 90 |
+func parseKV(raw string) (string, interface{}, error) {
|
|
| 91 |
+ parts := strings.Fields(raw) |
|
| 92 |
+ switch len(parts) {
|
|
| 93 |
+ case 2: |
|
| 94 |
+ v, err := parseUint(parts[1], 10, 64) |
|
| 95 |
+ if err != nil {
|
|
| 96 |
+ // if we cannot parse as a uint, parse as a string |
|
| 97 |
+ return parts[0], parts[1], nil |
|
| 98 |
+ } |
|
| 99 |
+ return parts[0], v, nil |
|
| 100 |
+ default: |
|
| 101 |
+ return "", 0, ErrInvalidFormat |
|
| 102 |
+ } |
|
| 103 |
+} |
|
| 104 |
+ |
|
| 105 |
+func readUint(path string) (uint64, error) {
|
|
| 106 |
+ v, err := ioutil.ReadFile(path) |
|
| 107 |
+ if err != nil {
|
|
| 108 |
+ return 0, err |
|
| 109 |
+ } |
|
| 110 |
+ return parseUint(strings.TrimSpace(string(v)), 10, 64) |
|
| 111 |
+} |
|
| 112 |
+ |
|
| 113 |
+func parseUint(s string, base, bitSize int) (uint64, error) {
|
|
| 114 |
+ v, err := strconv.ParseUint(s, base, bitSize) |
|
| 115 |
+ if err != nil {
|
|
| 116 |
+ intValue, intErr := strconv.ParseInt(s, base, bitSize) |
|
| 117 |
+ // 1. Handle negative values greater than MinInt64 (and) |
|
| 118 |
+ // 2. Handle negative values lesser than MinInt64 |
|
| 119 |
+ if intErr == nil && intValue < 0 {
|
|
| 120 |
+ return 0, nil |
|
| 121 |
+ } else if intErr != nil && |
|
| 122 |
+ intErr.(*strconv.NumError).Err == strconv.ErrRange && |
|
| 123 |
+ intValue < 0 {
|
|
| 124 |
+ return 0, nil |
|
| 125 |
+ } |
|
| 126 |
+ return 0, err |
|
| 127 |
+ } |
|
| 128 |
+ return v, nil |
|
| 129 |
+} |
|
| 130 |
+ |
|
| 131 |
+// parseCgroupFile parses /proc/PID/cgroup file and return string |
|
| 132 |
+func parseCgroupFile(path string) (string, error) {
|
|
| 133 |
+ f, err := os.Open(path) |
|
| 134 |
+ if err != nil {
|
|
| 135 |
+ return "", err |
|
| 136 |
+ } |
|
| 137 |
+ defer f.Close() |
|
| 138 |
+ return parseCgroupFromReader(f) |
|
| 139 |
+} |
|
| 140 |
+ |
|
| 141 |
+func parseCgroupFromReader(r io.Reader) (string, error) {
|
|
| 142 |
+ var ( |
|
| 143 |
+ s = bufio.NewScanner(r) |
|
| 144 |
+ ) |
|
| 145 |
+ for s.Scan() {
|
|
| 146 |
+ if err := s.Err(); err != nil {
|
|
| 147 |
+ return "", err |
|
| 148 |
+ } |
|
| 149 |
+ var ( |
|
| 150 |
+ text = s.Text() |
|
| 151 |
+ parts = strings.SplitN(text, ":", 3) |
|
| 152 |
+ ) |
|
| 153 |
+ if len(parts) < 3 {
|
|
| 154 |
+ return "", fmt.Errorf("invalid cgroup entry: %q", text)
|
|
| 155 |
+ } |
|
| 156 |
+ // text is like "0::/user.slice/user-1001.slice/session-1.scope" |
|
| 157 |
+ if parts[0] == "0" && parts[1] == "" {
|
|
| 158 |
+ return parts[2], nil |
|
| 159 |
+ } |
|
| 160 |
+ } |
|
| 161 |
+ return "", fmt.Errorf("cgroup path not found")
|
|
| 162 |
+} |
|
| 163 |
+ |
|
| 164 |
+// ToResources converts the oci LinuxResources struct into a |
|
| 165 |
+// v2 Resources type for use with this package. |
|
| 166 |
+// |
|
| 167 |
+// converting cgroups configuration from v1 to v2 |
|
| 168 |
+// ref: https://github.com/containers/crun/blob/master/crun.1.md#cgroup-v2 |
|
| 169 |
+func ToResources(spec *specs.LinuxResources) *Resources {
|
|
| 170 |
+ var resources Resources |
|
| 171 |
+ if cpu := spec.CPU; cpu != nil {
|
|
| 172 |
+ resources.CPU = &CPU{
|
|
| 173 |
+ Cpus: cpu.Cpus, |
|
| 174 |
+ Mems: cpu.Mems, |
|
| 175 |
+ } |
|
| 176 |
+ if shares := cpu.Shares; shares != nil {
|
|
| 177 |
+ convertedWeight := (1 + ((*shares-2)*9999)/262142) |
|
| 178 |
+ resources.CPU.Weight = &convertedWeight |
|
| 179 |
+ } |
|
| 180 |
+ if period := cpu.Period; period != nil {
|
|
| 181 |
+ resources.CPU.Max = NewCPUMax(cpu.Quota, period) |
|
| 182 |
+ } |
|
| 183 |
+ } |
|
| 184 |
+ if mem := spec.Memory; mem != nil {
|
|
| 185 |
+ resources.Memory = &Memory{}
|
|
| 186 |
+ if swap := mem.Swap; swap != nil {
|
|
| 187 |
+ resources.Memory.Swap = swap |
|
| 188 |
+ } |
|
| 189 |
+ if l := mem.Limit; l != nil {
|
|
| 190 |
+ resources.Memory.Max = l |
|
| 191 |
+ } |
|
| 192 |
+ if l := mem.Reservation; l != nil {
|
|
| 193 |
+ resources.Memory.Low = l |
|
| 194 |
+ } |
|
| 195 |
+ } |
|
| 196 |
+ if hugetlbs := spec.HugepageLimits; hugetlbs != nil {
|
|
| 197 |
+ hugeTlbUsage := HugeTlb{}
|
|
| 198 |
+ for _, hugetlb := range hugetlbs {
|
|
| 199 |
+ hugeTlbUsage = append(hugeTlbUsage, HugeTlbEntry{
|
|
| 200 |
+ HugePageSize: hugetlb.Pagesize, |
|
| 201 |
+ Limit: hugetlb.Limit, |
|
| 202 |
+ }) |
|
| 203 |
+ } |
|
| 204 |
+ resources.HugeTlb = &hugeTlbUsage |
|
| 205 |
+ } |
|
| 206 |
+ if pids := spec.Pids; pids != nil {
|
|
| 207 |
+ resources.Pids = &Pids{
|
|
| 208 |
+ Max: pids.Limit, |
|
| 209 |
+ } |
|
| 210 |
+ } |
|
| 211 |
+ if i := spec.BlockIO; i != nil {
|
|
| 212 |
+ resources.IO = &IO{}
|
|
| 213 |
+ if i.Weight != nil {
|
|
| 214 |
+ resources.IO.BFQ.Weight = 1 + (*i.Weight-10)*9999/990 |
|
| 215 |
+ } |
|
| 216 |
+ for t, devices := range map[IOType][]specs.LinuxThrottleDevice{
|
|
| 217 |
+ ReadBPS: i.ThrottleReadBpsDevice, |
|
| 218 |
+ WriteBPS: i.ThrottleWriteBpsDevice, |
|
| 219 |
+ ReadIOPS: i.ThrottleReadIOPSDevice, |
|
| 220 |
+ WriteIOPS: i.ThrottleWriteIOPSDevice, |
|
| 221 |
+ } {
|
|
| 222 |
+ for _, d := range devices {
|
|
| 223 |
+ resources.IO.Max = append(resources.IO.Max, Entry{
|
|
| 224 |
+ Type: t, |
|
| 225 |
+ Major: d.Major, |
|
| 226 |
+ Minor: d.Minor, |
|
| 227 |
+ Rate: d.Rate, |
|
| 228 |
+ }) |
|
| 229 |
+ } |
|
| 230 |
+ } |
|
| 231 |
+ } |
|
| 232 |
+ if i := spec.Rdma; i != nil {
|
|
| 233 |
+ resources.RDMA = &RDMA{}
|
|
| 234 |
+ for device, value := range spec.Rdma {
|
|
| 235 |
+ if device != "" && (value.HcaHandles != nil || value.HcaObjects != nil) {
|
|
| 236 |
+ resources.RDMA.Limit = append(resources.RDMA.Limit, RDMAEntry{
|
|
| 237 |
+ Device: device, |
|
| 238 |
+ HcaHandles: *value.HcaHandles, |
|
| 239 |
+ HcaObjects: *value.HcaObjects, |
|
| 240 |
+ }) |
|
| 241 |
+ } |
|
| 242 |
+ } |
|
| 243 |
+ } |
|
| 244 |
+ |
|
| 245 |
+ return &resources |
|
| 246 |
+} |
|
| 247 |
+ |
|
| 248 |
+// Gets uint64 parsed content of single value cgroup stat file |
|
| 249 |
+func getStatFileContentUint64(filePath string) uint64 {
|
|
| 250 |
+ contents, err := ioutil.ReadFile(filePath) |
|
| 251 |
+ if err != nil {
|
|
| 252 |
+ return 0 |
|
| 253 |
+ } |
|
| 254 |
+ trimmed := strings.TrimSpace(string(contents)) |
|
| 255 |
+ if trimmed == "max" {
|
|
| 256 |
+ return math.MaxUint64 |
|
| 257 |
+ } |
|
| 258 |
+ |
|
| 259 |
+ res, err := parseUint(trimmed, 10, 64) |
|
| 260 |
+ if err != nil {
|
|
| 261 |
+ logrus.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), filePath)
|
|
| 262 |
+ return res |
|
| 263 |
+ } |
|
| 264 |
+ |
|
| 265 |
+ return res |
|
| 266 |
+} |
|
| 267 |
+ |
|
| 268 |
+func readIoStats(path string) []*stats.IOEntry {
|
|
| 269 |
+ // more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt |
|
| 270 |
+ var usage []*stats.IOEntry |
|
| 271 |
+ fpath := filepath.Join(path, "io.stat") |
|
| 272 |
+ currentData, err := ioutil.ReadFile(fpath) |
|
| 273 |
+ if err != nil {
|
|
| 274 |
+ return usage |
|
| 275 |
+ } |
|
| 276 |
+ entries := strings.Split(string(currentData), "\n") |
|
| 277 |
+ |
|
| 278 |
+ for _, entry := range entries {
|
|
| 279 |
+ parts := strings.Split(entry, " ") |
|
| 280 |
+ if len(parts) < 2 {
|
|
| 281 |
+ continue |
|
| 282 |
+ } |
|
| 283 |
+ majmin := strings.Split(parts[0], ":") |
|
| 284 |
+ if len(majmin) != 2 {
|
|
| 285 |
+ continue |
|
| 286 |
+ } |
|
| 287 |
+ major, err := strconv.ParseUint(majmin[0], 10, 0) |
|
| 288 |
+ if err != nil {
|
|
| 289 |
+ return usage |
|
| 290 |
+ } |
|
| 291 |
+ minor, err := strconv.ParseUint(majmin[1], 10, 0) |
|
| 292 |
+ if err != nil {
|
|
| 293 |
+ return usage |
|
| 294 |
+ } |
|
| 295 |
+ parts = parts[1:] |
|
| 296 |
+ ioEntry := stats.IOEntry{
|
|
| 297 |
+ Major: major, |
|
| 298 |
+ Minor: minor, |
|
| 299 |
+ } |
|
| 300 |
+ for _, stats := range parts {
|
|
| 301 |
+ keyPairValue := strings.Split(stats, "=") |
|
| 302 |
+ if len(keyPairValue) != 2 {
|
|
| 303 |
+ continue |
|
| 304 |
+ } |
|
| 305 |
+ v, err := strconv.ParseUint(keyPairValue[1], 10, 0) |
|
| 306 |
+ if err != nil {
|
|
| 307 |
+ continue |
|
| 308 |
+ } |
|
| 309 |
+ switch keyPairValue[0] {
|
|
| 310 |
+ case "rbytes": |
|
| 311 |
+ ioEntry.Rbytes = v |
|
| 312 |
+ case "wbytes": |
|
| 313 |
+ ioEntry.Wbytes = v |
|
| 314 |
+ case "rios": |
|
| 315 |
+ ioEntry.Rios = v |
|
| 316 |
+ case "wios": |
|
| 317 |
+ ioEntry.Wios = v |
|
| 318 |
+ } |
|
| 319 |
+ } |
|
| 320 |
+ usage = append(usage, &ioEntry) |
|
| 321 |
+ } |
|
| 322 |
+ return usage |
|
| 323 |
+} |
|
| 324 |
+ |
|
| 325 |
+func rdmaStats(filepath string) []*stats.RdmaEntry {
|
|
| 326 |
+ currentData, err := ioutil.ReadFile(filepath) |
|
| 327 |
+ if err != nil {
|
|
| 328 |
+ return []*stats.RdmaEntry{}
|
|
| 329 |
+ } |
|
| 330 |
+ return toRdmaEntry(strings.Split(string(currentData), "\n")) |
|
| 331 |
+} |
|
| 332 |
+ |
|
| 333 |
+func parseRdmaKV(raw string, entry *stats.RdmaEntry) {
|
|
| 334 |
+ var value uint64 |
|
| 335 |
+ var err error |
|
| 336 |
+ |
|
| 337 |
+ parts := strings.Split(raw, "=") |
|
| 338 |
+ switch len(parts) {
|
|
| 339 |
+ case 2: |
|
| 340 |
+ if parts[1] == "max" {
|
|
| 341 |
+ value = math.MaxUint32 |
|
| 342 |
+ } else {
|
|
| 343 |
+ value, err = parseUint(parts[1], 10, 32) |
|
| 344 |
+ if err != nil {
|
|
| 345 |
+ return |
|
| 346 |
+ } |
|
| 347 |
+ } |
|
| 348 |
+ if parts[0] == "hca_handle" {
|
|
| 349 |
+ entry.HcaHandles = uint32(value) |
|
| 350 |
+ } else if parts[0] == "hca_object" {
|
|
| 351 |
+ entry.HcaObjects = uint32(value) |
|
| 352 |
+ } |
|
| 353 |
+ } |
|
| 354 |
+} |
|
| 355 |
+ |
|
| 356 |
+func toRdmaEntry(strEntries []string) []*stats.RdmaEntry {
|
|
| 357 |
+ var rdmaEntries []*stats.RdmaEntry |
|
| 358 |
+ for i := range strEntries {
|
|
| 359 |
+ parts := strings.Fields(strEntries[i]) |
|
| 360 |
+ switch len(parts) {
|
|
| 361 |
+ case 3: |
|
| 362 |
+ entry := new(stats.RdmaEntry) |
|
| 363 |
+ entry.Device = parts[0] |
|
| 364 |
+ parseRdmaKV(parts[1], entry) |
|
| 365 |
+ parseRdmaKV(parts[2], entry) |
|
| 366 |
+ |
|
| 367 |
+ rdmaEntries = append(rdmaEntries, entry) |
|
| 368 |
+ default: |
|
| 369 |
+ continue |
|
| 370 |
+ } |
|
| 371 |
+ } |
|
| 372 |
+ return rdmaEntries |
|
| 373 |
+} |
|
| 374 |
+ |
|
| 375 |
+// isUnitExists returns true if the error is that a systemd unit already exists. |
|
| 376 |
+func isUnitExists(err error) bool {
|
|
| 377 |
+ if err != nil {
|
|
| 378 |
+ if dbusError, ok := err.(dbus.Error); ok {
|
|
| 379 |
+ return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists") |
|
| 380 |
+ } |
|
| 381 |
+ } |
|
| 382 |
+ return false |
|
| 383 |
+} |
|
| 384 |
+ |
|
| 385 |
+func systemdUnitFromPath(path string) string {
|
|
| 386 |
+ _, unit := filepath.Split(path) |
|
| 387 |
+ return unit |
|
| 388 |
+} |
|
| 389 |
+ |
|
| 390 |
+func readHugeTlbStats(path string) []*stats.HugeTlbStat {
|
|
| 391 |
+ var usage = []*stats.HugeTlbStat{}
|
|
| 392 |
+ var keyUsage = make(map[string]*stats.HugeTlbStat) |
|
| 393 |
+ f, err := os.Open(path) |
|
| 394 |
+ if err != nil {
|
|
| 395 |
+ return usage |
|
| 396 |
+ } |
|
| 397 |
+ files, err := f.Readdir(-1) |
|
| 398 |
+ f.Close() |
|
| 399 |
+ if err != nil {
|
|
| 400 |
+ return usage |
|
| 401 |
+ } |
|
| 402 |
+ |
|
| 403 |
+ for _, file := range files {
|
|
| 404 |
+ if strings.Contains(file.Name(), "hugetlb") && |
|
| 405 |
+ (strings.HasSuffix(file.Name(), "max") || strings.HasSuffix(file.Name(), "current")) {
|
|
| 406 |
+ var hugeTlb *stats.HugeTlbStat |
|
| 407 |
+ var ok bool |
|
| 408 |
+ fileName := strings.Split(file.Name(), ".") |
|
| 409 |
+ pageSize := fileName[1] |
|
| 410 |
+ if hugeTlb, ok = keyUsage[pageSize]; !ok {
|
|
| 411 |
+ hugeTlb = &stats.HugeTlbStat{}
|
|
| 412 |
+ } |
|
| 413 |
+ hugeTlb.Pagesize = pageSize |
|
| 414 |
+ out, err := ioutil.ReadFile(filepath.Join(path, file.Name())) |
|
| 415 |
+ if err != nil {
|
|
| 416 |
+ continue |
|
| 417 |
+ } |
|
| 418 |
+ var value uint64 |
|
| 419 |
+ stringVal := strings.TrimSpace(string(out)) |
|
| 420 |
+ if stringVal == "max" {
|
|
| 421 |
+ value = math.MaxUint64 |
|
| 422 |
+ } else {
|
|
| 423 |
+ value, err = strconv.ParseUint(stringVal, 10, 64) |
|
| 424 |
+ } |
|
| 425 |
+ if err != nil {
|
|
| 426 |
+ continue |
|
| 427 |
+ } |
|
| 428 |
+ switch fileName[2] {
|
|
| 429 |
+ case "max": |
|
| 430 |
+ hugeTlb.Max = value |
|
| 431 |
+ case "current": |
|
| 432 |
+ hugeTlb.Current = value |
|
| 433 |
+ } |
|
| 434 |
+ keyUsage[pageSize] = hugeTlb |
|
| 435 |
+ } |
|
| 436 |
+ } |
|
| 437 |
+ for _, entry := range keyUsage {
|
|
| 438 |
+ usage = append(usage, entry) |
|
| 439 |
+ } |
|
| 440 |
+ return usage |
|
| 441 |
+} |