In https://github.com/torvalds/linux/commit/5ca3726 (released in v4.7-rc1) the
content of the `cpuacct.usage_percpu` file in sysfs was changed to include both
online and offline cpus. This broke the arithmetic in the stats helpers used by
`docker stats`, since it was using the length of the PerCPUUsage array as a
proxy for the number of online CPUs.
Add current number of online CPUs to types.StatsJSON and use it in the
calculation.
Keep a fallback to `len(v.CPUStats.CPUUsage.PercpuUsage)` so this code
continues to work when talking to an older daemon. An old client talking to a
new daemon will ignore the new field and behave as before.
Fixes #28941.
Signed-off-by: Ian Campbell <ian.campbell@docker.com>
| ... | ... |
@@ -3468,6 +3468,10 @@ paths: |
| 3468 | 3468 |
The `precpu_stats` is the CPU statistic of last read, which is used |
| 3469 | 3469 |
for calculating the CPU usage percentage. It is not the same as the |
| 3470 | 3470 |
`cpu_stats` field. |
| 3471 |
+ |
|
| 3472 |
+ If either `precpu_stats.online_cpus` or `cpu_stats.online_cpus` is |
|
| 3473 |
+ nil then for compatibility with older daemons the length of the |
|
| 3474 |
+ corresponding `cpu_usage.percpu_usage` array should be used. |
|
| 3471 | 3475 |
operationId: "ContainerStats" |
| 3472 | 3476 |
produces: ["application/json"] |
| 3473 | 3477 |
responses: |
| ... | ... |
@@ -3546,6 +3550,7 @@ paths: |
| 3546 | 3546 |
total_usage: 100215355 |
| 3547 | 3547 |
usage_in_kernelmode: 30000000 |
| 3548 | 3548 |
system_cpu_usage: 739306590000000 |
| 3549 |
+ online_cpus: 4 |
|
| 3549 | 3550 |
throttling_data: |
| 3550 | 3551 |
periods: 0 |
| 3551 | 3552 |
throttled_periods: 0 |
| ... | ... |
@@ -3561,6 +3566,7 @@ paths: |
| 3561 | 3561 |
total_usage: 100093996 |
| 3562 | 3562 |
usage_in_kernelmode: 30000000 |
| 3563 | 3563 |
system_cpu_usage: 9492140000000 |
| 3564 |
+ online_cpus: 4 |
|
| 3564 | 3565 |
throttling_data: |
| 3565 | 3566 |
periods: 0 |
| 3566 | 3567 |
throttled_periods: 0 |
| ... | ... |
@@ -47,6 +47,9 @@ type CPUStats struct {
|
| 47 | 47 |
// System Usage. Linux only. |
| 48 | 48 |
SystemUsage uint64 `json:"system_cpu_usage,omitempty"` |
| 49 | 49 |
|
| 50 |
+ // Online CPUs. Linux only. |
|
| 51 |
+ OnlineCPUs uint32 `json:"online_cpus,omitempty"` |
|
| 52 |
+ |
|
| 50 | 53 |
// Throttling Data. Linux only. |
| 51 | 54 |
ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` |
| 52 | 55 |
} |
| ... | ... |
@@ -178,10 +178,14 @@ func calculateCPUPercentUnix(previousCPU, previousSystem uint64, v *types.StatsJ |
| 178 | 178 |
cpuDelta = float64(v.CPUStats.CPUUsage.TotalUsage) - float64(previousCPU) |
| 179 | 179 |
// calculate the change for the entire system between readings |
| 180 | 180 |
systemDelta = float64(v.CPUStats.SystemUsage) - float64(previousSystem) |
| 181 |
+ onlineCPUs = float64(v.CPUStats.OnlineCPUs) |
|
| 181 | 182 |
) |
| 182 | 183 |
|
| 184 |
+ if onlineCPUs == 0.0 {
|
|
| 185 |
+ onlineCPUs = float64(len(v.CPUStats.CPUUsage.PercpuUsage)) |
|
| 186 |
+ } |
|
| 183 | 187 |
if systemDelta > 0.0 && cpuDelta > 0.0 {
|
| 184 |
- cpuPercent = (cpuDelta / systemDelta) * float64(len(v.CPUStats.CPUUsage.PercpuUsage)) * 100.0 |
|
| 188 |
+ cpuPercent = (cpuDelta / systemDelta) * onlineCPUs * 100.0 |
|
| 185 | 189 |
} |
| 186 | 190 |
return cpuPercent |
| 187 | 191 |
} |
| ... | ... |
@@ -80,6 +80,12 @@ func (s *Collector) Run() {
|
| 80 | 80 |
continue |
| 81 | 81 |
} |
| 82 | 82 |
|
| 83 |
+ onlineCPUs, err := s.getNumberOnlineCPUs() |
|
| 84 |
+ if err != nil {
|
|
| 85 |
+ logrus.Errorf("collecting system online cpu count: %v", err)
|
|
| 86 |
+ continue |
|
| 87 |
+ } |
|
| 88 |
+ |
|
| 83 | 89 |
for _, pair := range pairs {
|
| 84 | 90 |
stats, err := s.supervisor.GetContainerStats(pair.container) |
| 85 | 91 |
if err != nil {
|
| ... | ... |
@@ -97,6 +103,7 @@ func (s *Collector) Run() {
|
| 97 | 97 |
} |
| 98 | 98 |
// FIXME: move to containerd on Linux (not Windows) |
| 99 | 99 |
stats.CPUStats.SystemUsage = systemUsage |
| 100 |
+ stats.CPUStats.OnlineCPUs = onlineCPUs |
|
| 100 | 101 |
|
| 101 | 102 |
pair.publisher.Publish(*stats) |
| 102 | 103 |
} |
| ... | ... |
@@ -11,6 +11,11 @@ import ( |
| 11 | 11 |
"github.com/opencontainers/runc/libcontainer/system" |
| 12 | 12 |
) |
| 13 | 13 |
|
| 14 |
+/* |
|
| 15 |
+#include <unistd.h> |
|
| 16 |
+*/ |
|
| 17 |
+import "C" |
|
| 18 |
+ |
|
| 14 | 19 |
// platformNewStatsCollector performs platform specific initialisation of the |
| 15 | 20 |
// Collector structure. |
| 16 | 21 |
func platformNewStatsCollector(s *Collector) {
|
| ... | ... |
@@ -64,3 +69,11 @@ func (s *Collector) getSystemCPUUsage() (uint64, error) {
|
| 64 | 64 |
} |
| 65 | 65 |
return 0, fmt.Errorf("invalid stat format. Error trying to parse the '/proc/stat' file")
|
| 66 | 66 |
} |
| 67 |
+ |
|
| 68 |
+func (s *Collector) getNumberOnlineCPUs() (uint32, error) {
|
|
| 69 |
+ i, err := C.sysconf(C._SC_NPROCESSORS_ONLN) |
|
| 70 |
+ if err != nil {
|
|
| 71 |
+ return 0, err |
|
| 72 |
+ } |
|
| 73 |
+ return uint32(i), nil |
|
| 74 |
+} |
| ... | ... |
@@ -24,6 +24,7 @@ keywords: "API, Docker, rcli, REST, documentation" |
| 24 | 24 |
* `POST /build` now accepts `extrahosts` parameter to specify a host to ip mapping to use during the build. |
| 25 | 25 |
* `POST /services/create` and `POST /services/(id or name)/update` now accept a `rollback` value for `FailureAction`. |
| 26 | 26 |
* `POST /services/create` and `POST /services/(id or name)/update` now accept an optional `RollbackConfig` object which specifies rollback options. |
| 27 |
+* `GET /containers/(id or name)/stats` now includes an `online_cpus` field in both `precpu_stats` and `cpu_stats`. If this field is `nil` then for compatibility with older daemons the length of the corresponding `cpu_usage.percpu_usage` array should be used. |
|
| 27 | 28 |
|
| 28 | 29 |
## v1.26 API changes |
| 29 | 30 |
|