Browse code

Add support for kernel memory limit

Signed-off-by: Qiang Huang <h.huangqiang@huawei.com>

Qiang Huang authored on 2015/08/20 00:56:55
Showing 17 changed files
... ...
@@ -1075,6 +1075,7 @@ _docker_run() {
1075 1075
 		--group-add
1076 1076
 		--hostname -h
1077 1077
 		--ipc
1078
+		--kernel-memory
1078 1079
 		--label-file
1079 1080
 		--label -l
1080 1081
 		--link
... ...
@@ -248,6 +248,7 @@ func populateCommand(c *Container, env []string) error {
248 248
 	resources := &execdriver.Resources{
249 249
 		Memory:           c.hostConfig.Memory,
250 250
 		MemorySwap:       c.hostConfig.MemorySwap,
251
+		KernelMemory:     c.hostConfig.KernelMemory,
251 252
 		CPUShares:        c.hostConfig.CPUShares,
252 253
 		CpusetCpus:       c.hostConfig.CpusetCpus,
253 254
 		CpusetMems:       c.hostConfig.CpusetMems,
... ...
@@ -156,6 +156,15 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *runconfig.HostC
156 156
 			return warnings, fmt.Errorf("Invalid value: %v, valid memory swappiness range is 0-100.", swappiness)
157 157
 		}
158 158
 	}
159
+	if hostConfig.KernelMemory > 0 && !sysInfo.KernelMemory {
160
+		warnings = append(warnings, "Your kernel does not support kernel memory limit capabilities. Limitation discarded.")
161
+		logrus.Warnf("Your kernel does not support kernel memory limit capabilities. Limitation discarded.")
162
+		hostConfig.KernelMemory = 0
163
+	}
164
+	if hostConfig.KernelMemory > 0 && !CheckKernelVersion(4, 0, 0) {
165
+		warnings = append(warnings, "You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
166
+		logrus.Warnf("You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected and can cause your system to be unstable.")
167
+	}
159 168
 	if hostConfig.CPUShares > 0 && !sysInfo.CPUShares {
160 169
 		warnings = append(warnings, "Your kernel does not support CPU shares. Shares discarded.")
161 170
 		logrus.Warnf("Your kernel does not support CPU shares. Shares discarded.")
... ...
@@ -127,6 +127,7 @@ type UTS struct {
127 127
 type Resources struct {
128 128
 	Memory           int64            `json:"memory"`
129 129
 	MemorySwap       int64            `json:"memory_swap"`
130
+	KernelMemory     int64            `json:"kernel_memory"`
130 131
 	CPUShares        int64            `json:"cpu_shares"`
131 132
 	CpusetCpus       string           `json:"cpuset_cpus"`
132 133
 	CpusetMems       string           `json:"cpuset_mems"`
... ...
@@ -96,6 +96,9 @@ lxc.cgroup.memory.soft_limit_in_bytes = {{.Resources.Memory}}
96 96
 lxc.cgroup.memory.memsw.limit_in_bytes = {{$memSwap}}
97 97
 {{end}}
98 98
 {{end}}
99
+{{if .Resources.KernelMemory}}
100
+lxc.cgroup.memory.kmem.limit_in_bytes = {{.Resources.Memory}}
101
+{{end}}
99 102
 {{if .Resources.CPUShares}}
100 103
 lxc.cgroup.cpu.shares = {{.Resources.CPUShares}}
101 104
 {{end}}
... ...
@@ -172,6 +172,7 @@ Create a container
172 172
              "LxcConf": {"lxc.utsname":"docker"},
173 173
              "Memory": 0,
174 174
              "MemorySwap": 0,
175
+             "KernelMemory": 0,
175 176
              "CpuShares": 512,
176 177
              "CpuPeriod": 100000,
177 178
              "CpusetCpus": "0,1",
... ...
@@ -217,8 +218,9 @@ Json Parameters:
217 217
       for the container.
218 218
 -   **User** - A string value specifying the user inside the container.
219 219
 -   **Memory** - Memory limit in bytes.
220
--   **MemorySwap**- Total memory limit (memory + swap); set `-1` to disable swap
220
+-   **MemorySwap** - Total memory limit (memory + swap); set `-1` to disable swap
221 221
       You must use this with `memory` and make the swap value larger than `memory`.
222
+-   **KernelMemory** - Kernel memory limit in bytes.
222 223
 -   **CpuShares** - An integer value containing the container's CPU Shares
223 224
       (ie. the relative weight vs other containers).
224 225
 -   **CpuPeriod** - The length of a CPU period in microseconds.
... ...
@@ -387,6 +389,7 @@ Return low-level information on the container `id`
387 387
 			"LxcConf": [],
388 388
 			"Memory": 0,
389 389
 			"MemorySwap": 0,
390
+			"KernelMemory": 0,
390 391
 			"OomKillDisable": false,
391 392
 			"NetworkMode": "bridge",
392 393
 			"PortBindings": {},
... ...
@@ -40,6 +40,7 @@ Creates a new container.
40 40
       --help=false                  Print usage
41 41
       -i, --interactive=false       Keep STDIN open even if not attached
42 42
       --ipc=""                      IPC namespace to use
43
+      --kernel-memory=""            Kernel memory limit
43 44
       -l, --label=[]                Set metadata on the container (e.g., --label=com.example.key=value)
44 45
       --label-file=[]               Read in a line delimited file of labels
45 46
       --link=[]                     Add link to another container
... ...
@@ -40,6 +40,7 @@ weight=1
40 40
       --help=false                  Print usage
41 41
       -i, --interactive=false       Keep STDIN open even if not attached
42 42
       --ipc=""                      IPC namespace to use
43
+      --kernel-memory=""            Kernel memory limit
43 44
       -l, --label=[]                Set metadata on the container (e.g., --label=com.example.key=value)
44 45
       --label-file=[]               Read in a file of labels (EOL delimited)
45 46
       --link=[]                     Add link to another container
... ...
@@ -509,6 +509,7 @@ container:
509 509
 |----------------------------|---------------------------------------------------------------------------------------------|
510 510
 | `-m`, `--memory="" `       | Memory limit (format: `<number>[<unit>]`, where unit = b, k, m or g)                        |
511 511
 | `--memory-swap=""`         | Total memory limit (memory + swap, format: `<number>[<unit>]`, where unit = b, k, m or g)   |
512
+| `--kernel-memory=""`       | Kernel memory limit (format: `<number>[<unit>]`, where unit = b, k, m or g)                 |
512 513
 | `-c`, `--cpu-shares=0`     | CPU shares (relative weight)                                                                |
513 514
 | `--cpu-period=0`           | Limit the CPU CFS (Completely Fair Scheduler) period                                        |
514 515
 | `--cpuset-cpus="" `        | CPUs in which to allow execution (0-3, 0,1)                                                 |
... ...
@@ -518,9 +519,9 @@ container:
518 518
 | `--oom-kill-disable=false` | Whether to disable OOM Killer for the container or not.                                     |
519 519
 | `--memory-swappiness=""  ` | Tune a container's memory swappiness behavior. Accepts an integer between 0 and 100.        |
520 520
 
521
-### Memory constraints
521
+### User memory constraints
522 522
 
523
-We have four ways to set memory usage:
523
+We have four ways to set user memory usage:
524 524
 
525 525
 <table>
526 526
   <thead>
... ...
@@ -568,7 +569,7 @@ We have four ways to set memory usage:
568 568
   </tbody>
569 569
 </table>
570 570
 
571
-### Examples
571
+Examples:
572 572
 
573 573
     $ docker run -ti ubuntu:14.04 /bin/bash
574 574
 
... ...
@@ -612,6 +613,76 @@ The following example, illustrates a dangerous way to use the flag:
612 612
 The container has unlimited memory which can cause the host to run out memory
613 613
 and require killing system processes to free memory.
614 614
 
615
+### Kernel memory constraints
616
+
617
+Kernel memory is fundamentally different than user memory as kernel memory can't
618
+be swapped out. The inability to swap makes it possible for the container to
619
+block system services by consuming too much kernel memory. Kernel memory includes:
620
+
621
+ - stack pages
622
+ - slab pages
623
+ - sockets memory pressure
624
+ - tcp memory pressure
625
+
626
+You can setup kernel memory limit to constrain these kinds of memory. For example,
627
+every process consumes some stack pages. By limiting kernel memory, you can
628
+prevent new processes from being created when the kernel memory usage is too high.
629
+
630
+Kernel memory is never completely independent of user memory. Instead, you limit
631
+kernel memory in the context of the user memory limit. Assume "U" is the user memory
632
+limit and "K" the kernel limit. There are three possible ways to set limits:
633
+
634
+<table>
635
+  <thead>
636
+    <tr>
637
+      <th>Option</th>
638
+      <th>Result</th>
639
+    </tr>
640
+  </thead>
641
+  <tbody>
642
+    <tr>
643
+      <td class="no-wrap"><strong>U != 0, K = inf</strong> (default)</td>
644
+      <td>
645
+        This is the standard memory limitation mechanism already present before using
646
+        kernel memory. Kernel memory is completely ignored.
647
+      </td>
648
+    </tr>
649
+    <tr>
650
+      <td class="no-wrap"><strong>U != 0, K &lt; U</strong></td>
651
+      <td>
652
+        Kernel memory is a subset of the user memory. This setup is useful in
653
+        deployments where the total amount of memory per-cgroup is overcommited.
654
+        Overcommiting kernel memory limits is definitely not recommended, since the
655
+        box can still run out of non-reclaimable memory.
656
+        In this case, the you can configure K so that the sum of all groups is
657
+        never greater than the total memory. Then, freely set U at the expense of
658
+        the system's service quality.
659
+      </td>
660
+    </tr>
661
+    <tr>
662
+      <td class="no-wrap"><strong>U != 0, K &gt; U</strong></td>
663
+      <td>
664
+        Since kernel memory charges are also fed to the user counter and reclaimation
665
+        is triggered for the container for both kinds of memory. This configuration
666
+        gives the admin a unified view of memory. It is also useful for people
667
+        who just want to track kernel memory usage.
668
+      </td>
669
+    </tr>
670
+  </tbody>
671
+</table>
672
+
673
+Examples:
674
+
675
+    $ docker run -ti -m 500M --kernel-memory 50M ubuntu:14.04 /bin/bash
676
+
677
+We set memory and kernel memory, so the processes in the container can use
678
+500M memory in total, in this 500M memory, it can be 50M kernel memory tops.
679
+
680
+    $ docker run -ti --kernel-memory 50M ubuntu:14.04 /bin/bash
681
+
682
+We set kernel memory without **-m**, so the processes in the container can
683
+use as much memory as they want, but they can only use 50M kernel memory.
684
+
615 685
 ### Swappiness constraint
616 686
 
617 687
 By default, a container's kernel can swap out a percentage of anonymous pages.
... ...
@@ -283,6 +283,18 @@ func (s *DockerSuite) TestRunWithCpuPeriod(c *check.C) {
283 283
 	}
284 284
 }
285 285
 
286
+func (s *DockerSuite) TestRunWithKernelMemory(c *check.C) {
287
+	testRequires(c, kernelMemorySupport)
288
+
289
+	dockerCmd(c, "run", "--kernel-memory", "50M", "--name", "test", "busybox", "true")
290
+
291
+	out, err := inspectField("test", "HostConfig.KernelMemory")
292
+	c.Assert(err, check.IsNil)
293
+	if out != "52428800" {
294
+		c.Fatalf("setting the kernel memory limit failed")
295
+	}
296
+}
297
+
286 298
 func (s *DockerSuite) TestRunOOMExitCode(c *check.C) {
287 299
 	testRequires(c, oomControl)
288 300
 	errChan := make(chan error)
... ...
@@ -54,4 +54,17 @@ var (
54 54
 		},
55 55
 		"Test requires Oom control enabled.",
56 56
 	}
57
+	kernelMemorySupport = testRequirement{
58
+		func() bool {
59
+			cgroupMemoryMountpoint, err := cgroups.FindCgroupMountpoint("memory")
60
+			if err != nil {
61
+				return false
62
+			}
63
+			if _, err := ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.kmem.limit_in_bytes")); err != nil {
64
+				return false
65
+			}
66
+			return true
67
+		},
68
+		"Test requires an environment that supports cgroup kernel memory.",
69
+	}
57 70
 )
... ...
@@ -30,6 +30,7 @@ docker-create - Create a new container
30 30
 [**--help**]
31 31
 [**-i**|**--interactive**[=*false*]]
32 32
 [**--ipc**[=*IPC*]]
33
+[**--kernel-memory**[=*KERNEL-MEMORY*]]
33 34
 [**-l**|**--label**[=*[]*]]
34 35
 [**--label-file**[=*[]*]]
35 36
 [**--link**[=*[]*]]
... ...
@@ -148,6 +149,15 @@ two memory nodes.
148 148
                                'container:<name|id>': reuses another container shared memory, semaphores and message queues
149 149
                                'host': use the host shared memory,semaphores and message queues inside the container.  Note: the host mode gives the container full access to local shared memory and is therefore considered insecure.
150 150
 
151
+**--kernel-memory**=""
152
+   Kernel memory limit (format: `<number>[<unit>]`, where unit = b, k, m or g)
153
+
154
+   Constrains the kernel memory available to a container. If a limit of 0
155
+is specified (not using `--kernel-memory`), the container's kernel memory
156
+is not limited. If you specify a limit, it may be rounded up to a multiple
157
+of the operating system's page size and the value can be very large,
158
+millions of trillions.
159
+
151 160
 **-l**, **--label**=[]
152 161
    Adds metadata to a container (e.g., --label=com.example.key=value)
153 162
 
... ...
@@ -31,6 +31,7 @@ docker-run - Run a command in a new container
31 31
 [**--help**]
32 32
 [**-i**|**--interactive**[=*false*]]
33 33
 [**--ipc**[=*IPC*]]
34
+[**--kernel-memory**[=*KERNEL-MEMORY*]]
34 35
 [**-l**|**--label**[=*[]*]]
35 36
 [**--label-file**[=*[]*]]
36 37
 [**--link**[=*[]*]]
... ...
@@ -242,6 +243,15 @@ ENTRYPOINT.
242 242
 **-l**, **--label**=[]
243 243
    Set metadata on the container (e.g., --label com.example.key=value)
244 244
 
245
+**--kernel-memory**=""
246
+   Kernel memory limit (format: `<number>[<unit>]`, where unit = b, k, m or g)
247
+
248
+   Constrains the kernel memory available to a container. If a limit of 0
249
+is specified (not using `--kernel-memory`), the container's kernel memory
250
+is not limited. If you specify a limit, it may be rounded up to a multiple
251
+of the operating system's page size and the value can be very large,
252
+millions of trillions.
253
+
245 254
 **--label-file**=[]
246 255
    Read in a line delimited file of labels
247 256
 
... ...
@@ -36,6 +36,9 @@ type cgroupMemInfo struct {
36 36
 
37 37
 	// Whether memory swappiness is supported or not
38 38
 	MemorySwappiness bool
39
+
40
+	// Whether kernel memory limit is supported or not
41
+	KernelMemory bool
39 42
 }
40 43
 
41 44
 type cgroupCPUInfo struct {
... ...
@@ -57,12 +57,17 @@ func checkCgroupMem(quiet bool) cgroupMemInfo {
57 57
 	if !quiet && !memorySwappiness {
58 58
 		logrus.Warnf("Your kernel does not support memory swappiness.")
59 59
 	}
60
+	kernelMemory := cgroupEnabled(mountPoint, "memory.kmem.limit_in_bytes")
61
+	if !quiet && !kernelMemory {
62
+		logrus.Warnf("Your kernel does not support kernel memory limit.")
63
+	}
60 64
 
61 65
 	return cgroupMemInfo{
62 66
 		MemoryLimit:      true,
63 67
 		SwapLimit:        swapLimit,
64 68
 		OomKillDisable:   oomKillDisable,
65 69
 		MemorySwappiness: memorySwappiness,
70
+		KernelMemory:     kernelMemory,
66 71
 	}
67 72
 }
68 73
 
... ...
@@ -265,6 +265,7 @@ type HostConfig struct {
265 265
 	LxcConf          *LxcConfig       // Additional lxc configuration
266 266
 	Memory           int64            // Memory limit (in bytes)
267 267
 	MemorySwap       int64            // Total memory usage (memory + swap); set `-1` to disable swap
268
+	KernelMemory     int64            // Kernel memory limit (in bytes)
268 269
 	CPUShares        int64            `json:"CpuShares"` // CPU shares (relative weight vs. other containers)
269 270
 	CPUPeriod        int64            `json:"CpuPeriod"` // CPU CFS (Completely Fair Scheduler) period
270 271
 	CpusetCpus       string           // CpusetCpus 0-2, 0,1
... ...
@@ -74,6 +74,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe
74 74
 		flHostname        = cmd.String([]string{"h", "-hostname"}, "", "Container host name")
75 75
 		flMemoryString    = cmd.String([]string{"m", "-memory"}, "", "Memory limit")
76 76
 		flMemorySwap      = cmd.String([]string{"-memory-swap"}, "", "Total memory (memory + swap), '-1' to disable swap")
77
+		flKernelMemory    = cmd.String([]string{"-kernel-memory"}, "", "Kernel memory limit")
77 78
 		flUser            = cmd.String([]string{"u", "-user"}, "", "Username or UID (format: <name|uid>[:<group|gid>])")
78 79
 		flWorkingDir      = cmd.String([]string{"w", "-workdir"}, "", "Working directory inside the container")
79 80
 		flCPUShares       = cmd.Int64([]string{"c", "-cpu-shares"}, 0, "CPU shares (relative weight)")
... ...
@@ -166,6 +167,15 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe
166 166
 		}
167 167
 	}
168 168
 
169
+	var KernelMemory int64
170
+	if *flKernelMemory != "" {
171
+		parsedKernelMemory, err := units.RAMInBytes(*flKernelMemory)
172
+		if err != nil {
173
+			return nil, nil, cmd, err
174
+		}
175
+		KernelMemory = parsedKernelMemory
176
+	}
177
+
169 178
 	swappiness := *flSwappiness
170 179
 	if swappiness != -1 && (swappiness < 0 || swappiness > 100) {
171 180
 		return nil, nil, cmd, fmt.Errorf("Invalid value: %d. Valid memory swappiness range is 0-100", swappiness)
... ...
@@ -320,6 +330,7 @@ func Parse(cmd *flag.FlagSet, args []string) (*Config, *HostConfig, *flag.FlagSe
320 320
 		LxcConf:          lxcConf,
321 321
 		Memory:           flMemory,
322 322
 		MemorySwap:       memorySwap,
323
+		KernelMemory:     KernelMemory,
323 324
 		CPUShares:        *flCPUShares,
324 325
 		CPUPeriod:        *flCPUPeriod,
325 326
 		CpusetCpus:       *flCpusetCpus,