Browse code

Correct CPU usage calculation in presence of offline CPUs and newer Linux

In https://github.com/torvalds/linux/commit/5ca3726 (released in v4.7-rc1) the
content of the `cpuacct.usage_percpu` file in sysfs was changed to include both
online and offline cpus. This broke the arithmetic in the stats helpers used by
`docker stats`, since it was using the length of the PerCPUUsage array as a
proxy for the number of online CPUs.

Add current number of online CPUs to types.StatsJSON and use it in the
calculation.

Keep a fallback to `len(v.CPUStats.CPUUsage.PercpuUsage)` so this code
continues to work when talking to an older daemon. An old client talking to a
new daemon will ignore the new field and behave as before.

Fixes #28941.

Signed-off-by: Ian Campbell <ian.campbell@docker.com>
(cherry picked from commit 115f91d7575d6de6c7781a96a082f144fd17e400)
Signed-off-by: Victor Vieux <victorvieux@gmail.com>

Ian Campbell authored on 2017/03/07 02:29:09
Showing 9 changed files
... ...
@@ -21,7 +21,7 @@ import (
21 21
 // Common constants for daemon and client.
22 22
 const (
23 23
 	// DefaultVersion of Current REST API
24
-	DefaultVersion string = "1.26"
24
+	DefaultVersion string = "1.27"
25 25
 
26 26
 	// NoBaseImageSpecifier is the symbol used by the FROM
27 27
 	// command to specify that no base image is to be used.
... ...
@@ -19,10 +19,10 @@ produces:
19 19
 consumes:
20 20
   - "application/json"
21 21
   - "text/plain"
22
-basePath: "/v1.26"
22
+basePath: "/v1.27"
23 23
 info:
24 24
   title: "Docker Engine API"
25
-  version: "1.26"
25
+  version: "1.27"
26 26
   x-logo:
27 27
     url: "https://docs.docker.com/images/logo-docker-main.png"
28 28
   description: |
... ...
@@ -44,7 +44,7 @@ info:
44 44
 
45 45
     The API is usually changed in each release of Docker, so API calls are versioned to ensure that clients don't break.
46 46
 
47
-    For Docker Engine >= 1.13.1, the API version is 1.26. To lock to this version, you prefix the URL with `/v1.26`. For example, calling `/info` is the same as calling `/v1.26/info`.
47
+    For Docker Engine >= 17.03.1, the API version is 1.27. To lock to this version, you prefix the URL with `/v1.27`. For example, calling `/info` is the same as calling `/v1.27/info`.
48 48
 
49 49
     Engine releases in the near future should support this version of the API, so your client will continue to work even if it is talking to a newer Engine.
50 50
 
... ...
@@ -52,10 +52,11 @@ info:
52 52
 
53 53
     The API uses an open schema model, which means server may add extra properties to responses. Likewise, the server will ignore any extra query parameters and request body properties. When you write clients, you need to ignore additional properties in responses to ensure they do not break when talking to newer Docker daemons.
54 54
 
55
-    This documentation is for version 1.26 of the API, which was introduced with Docker 1.13.1. Use this table to find documentation for previous versions of the API:
55
+    This documentation is for version 1.27 of the API, which was introduced with Docker 17.03.1. Use this table to find documentation for previous versions of the API:
56 56
 
57 57
     Docker version  | API version | Changes
58 58
     ----------------|-------------|---------
59
+    1.13.1 & 17.03.0 | [1.26](https://docs.docker.com/engine/api/v1.26/) | [API changes](https://docs.docker.com/engine/api/version-history/#v1-26-api-changes)
59 60
     1.13.0 | [1.25](https://docs.docker.com/engine/api/v1.25/) | [API changes](https://docs.docker.com/engine/api/version-history/#v1-25-api-changes)
60 61
     1.12.x | [1.24](https://docs.docker.com/engine/api/v1.24/) | [API changes](https://docs.docker.com/engine/api/version-history/#v1-24-api-changes)
61 62
     1.11.x | [1.23](https://docs.docker.com/engine/api/v1.23/) | [API changes](https://docs.docker.com/engine/api/version-history/#v1-23-api-changes)
... ...
@@ -3378,7 +3379,13 @@ paths:
3378 3378
       description: |
3379 3379
         This endpoint returns a live stream of a container’s resource usage statistics.
3380 3380
 
3381
-        The `precpu_stats` is the CPU statistic of last read, which is used for calculating the CPU usage percentage. It is not the same as the `cpu_stats` field.
3381
+        The `precpu_stats` is the CPU statistic of last read, which is used
3382
+        for calculating the CPU usage percentage. It is not the same as the
3383
+        `cpu_stats` field.
3384
+
3385
+        If either `precpu_stats.online_cpus` or `cpu_stats.online_cpus` is
3386
+        nil then for compatibility with older daemons the length of the
3387
+        corresponding `cpu_usage.percpu_usage` array should be used.
3382 3388
       operationId: "ContainerStats"
3383 3389
       produces:
3384 3390
         - "application/json"
... ...
@@ -3458,6 +3465,7 @@ paths:
3458 3458
                   total_usage: 100215355
3459 3459
                   usage_in_kernelmode: 30000000
3460 3460
                 system_cpu_usage: 739306590000000
3461
+                online_cpus: 4
3461 3462
                 throttling_data:
3462 3463
                   periods: 0
3463 3464
                   throttled_periods: 0
... ...
@@ -3473,6 +3481,7 @@ paths:
3473 3473
                   total_usage: 100093996
3474 3474
                   usage_in_kernelmode: 30000000
3475 3475
                 system_cpu_usage: 9492140000000
3476
+                online_cpus: 4
3476 3477
                 throttling_data:
3477 3478
                   periods: 0
3478 3479
                   throttled_periods: 0
... ...
@@ -47,6 +47,9 @@ type CPUStats struct {
47 47
 	// System Usage. Linux only.
48 48
 	SystemUsage uint64 `json:"system_cpu_usage,omitempty"`
49 49
 
50
+	// Online CPUs. Linux only.
51
+	OnlineCPUs uint32 `json:"online_cpus,omitempty"`
52
+
50 53
 	// Throttling Data. Linux only.
51 54
 	ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
52 55
 }
... ...
@@ -179,10 +179,14 @@ func calculateCPUPercentUnix(previousCPU, previousSystem uint64, v *types.StatsJ
179 179
 		cpuDelta = float64(v.CPUStats.CPUUsage.TotalUsage) - float64(previousCPU)
180 180
 		// calculate the change for the entire system between readings
181 181
 		systemDelta = float64(v.CPUStats.SystemUsage) - float64(previousSystem)
182
+		onlineCPUs  = float64(v.CPUStats.OnlineCPUs)
182 183
 	)
183 184
 
185
+	if onlineCPUs == 0.0 {
186
+		onlineCPUs = float64(len(v.CPUStats.CPUUsage.PercpuUsage))
187
+	}
184 188
 	if systemDelta > 0.0 && cpuDelta > 0.0 {
185
-		cpuPercent = (cpuDelta / systemDelta) * float64(len(v.CPUStats.CPUUsage.PercpuUsage)) * 100.0
189
+		cpuPercent = (cpuDelta / systemDelta) * onlineCPUs * 100.0
186 190
 	}
187 191
 	return cpuPercent
188 192
 }
... ...
@@ -58,7 +58,7 @@ import (
58 58
 )
59 59
 
60 60
 // DefaultVersion is the version of the current stable API
61
-const DefaultVersion string = "1.26"
61
+const DefaultVersion string = "1.27"
62 62
 
63 63
 // Client is the API client that performs all operations
64 64
 // against a docker server.
... ...
@@ -115,6 +115,12 @@ func (s *statsCollector) run() {
115 115
 			continue
116 116
 		}
117 117
 
118
+		onlineCPUs, err := s.getNumberOnlineCPUs()
119
+		if err != nil {
120
+			logrus.Errorf("collecting system online cpu count: %v", err)
121
+			continue
122
+		}
123
+
118 124
 		for _, pair := range pairs {
119 125
 			stats, err := s.supervisor.GetContainerStats(pair.container)
120 126
 			if err != nil {
... ...
@@ -132,6 +138,7 @@ func (s *statsCollector) run() {
132 132
 			}
133 133
 			// FIXME: move to containerd on Linux (not Windows)
134 134
 			stats.CPUStats.SystemUsage = systemUsage
135
+			stats.CPUStats.OnlineCPUs = onlineCPUs
135 136
 
136 137
 			pair.publisher.Publish(*stats)
137 138
 		}
... ...
@@ -12,6 +12,11 @@ import (
12 12
 	"github.com/opencontainers/runc/libcontainer/system"
13 13
 )
14 14
 
15
+/*
16
+#include <unistd.h>
17
+*/
18
+import "C"
19
+
15 20
 // platformNewStatsCollector performs platform specific initialisation of the
16 21
 // statsCollector structure.
17 22
 func platformNewStatsCollector(s *statsCollector) {
... ...
@@ -69,3 +74,11 @@ func (s *statsCollector) getSystemCPUUsage() (uint64, error) {
69 69
 	}
70 70
 	return 0, fmt.Errorf("invalid stat format. Error trying to parse the '/proc/stat' file")
71 71
 }
72
+
73
+func (s *statsCollector) getNumberOnlineCPUs() (uint32, error) {
74
+	i, err := C.sysconf(C._SC_NPROCESSORS_ONLN)
75
+	if err != nil {
76
+		return 0, err
77
+	}
78
+	return uint32(i), nil
79
+}
... ...
@@ -13,3 +13,7 @@ func platformNewStatsCollector(s *statsCollector) {
13 13
 func (s *statsCollector) getSystemCPUUsage() (uint64, error) {
14 14
 	return 0, nil
15 15
 }
16
+
17
+func (s *statsCollector) getNumberOnlineCPUs() (uint32, error) {
18
+	return 0, nil
19
+}
... ...
@@ -13,6 +13,12 @@ keywords: "API, Docker, rcli, REST, documentation"
13 13
      will be rejected.
14 14
 -->
15 15
 
16
+## v1.27 API changes
17
+
18
+[Docker Engine API v1.27](https://docs.docker.com/engine/api/v1.27/) documentation
19
+
20
+* `GET /containers/(id or name)/stats` now includes an `online_cpus` field in both `precpu_stats` and `cpu_stats`. If this field is `nil` then for compatibility with older daemons the length of the corresponding `cpu_usage.percpu_usage` array should be used.
21
+
16 22
 ## v1.26 API changes
17 23
 
18 24
 [Docker Engine API v1.26](https://docs.docker.com/engine/api/v1.26/) documentation