Browse code

Add DeviceRequests to HostConfig to support NVIDIA GPUs

This patch hard-codes support for NVIDIA GPUs.
In a future patch it should move out into its own Device Plugin.

Signed-off-by: Tibor Vass <tibor@docker.com>

Tibor Vass authored on 2019/03/01 09:32:08
Showing 10 changed files
... ...
@@ -210,6 +210,43 @@ definitions:
210 210
       PathInContainer: "/dev/deviceName"
211 211
       CgroupPermissions: "mrw"
212 212
 
213
+  DeviceRequest:
214
+    type: "object"
215
+    description: "A request for devices to be sent to device drivers"
216
+    properties:
217
+      Driver:
218
+        type: "string"
219
+        example: "nvidia"
220
+      Count:
221
+        type: "integer"
222
+        example: -1
223
+      DeviceIDs:
224
+        type: "array"
225
+        items:
226
+          type: "string"
227
+        example:
228
+          - "0"
229
+          - "1"
230
+          - "GPU-fef8089b-4820-abfc-e83e-94318197576e"
231
+      Capabilities:
232
+        description: |
233
+          A list of capabilities; an OR list of AND lists of capabilities.
234
+        type: "array"
235
+        items:
236
+          type: "array"
237
+          items:
238
+            type: "string"
239
+        example:
240
+          # gpu AND nvidia AND compute
241
+          - ["gpu", "nvidia", "compute"]
242
+      Options:
243
+        description: |
244
+          Driver-specific options, specified as a key/value pairs. These options
245
+          are passed directly to the driver.
246
+        type: "object"
247
+        additionalProperties:
248
+          type: "string"
249
+
213 250
   ThrottleDevice:
214 251
     type: "object"
215 252
     properties:
... ...
@@ -421,6 +458,11 @@ definitions:
421 421
         items:
422 422
           type: "string"
423 423
           example: "c 13:* rwm"
424
+      DeviceRequests:
425
+        description: "a list of requests for devices to be sent to device drivers"
426
+        type: "array"
427
+        items:
428
+          $ref: "#/definitions/DeviceRequest"
424 429
       DiskQuota:
425 430
         description: "Disk limit (in bytes)."
426 431
         type: "integer"
... ...
@@ -244,6 +244,16 @@ func (n PidMode) Container() string {
244 244
 	return ""
245 245
 }
246 246
 
247
+// DeviceRequest represents a request for devices from a device driver.
248
+// Used by GPU device drivers.
249
+type DeviceRequest struct {
250
+	Driver       string            // Name of device driver
251
+	Count        int               // Number of devices to request (-1 = All)
252
+	DeviceIDs    []string          // List of device IDs as recognizable by the device driver
253
+	Capabilities [][]string        // An OR list of AND lists of device capabilities (e.g. "gpu")
254
+	Options      map[string]string // Options to pass onto the device driver
255
+}
256
+
247 257
 // DeviceMapping represents the device mapping between the host and the container.
248 258
 type DeviceMapping struct {
249 259
 	PathOnHost        string
... ...
@@ -327,6 +337,7 @@ type Resources struct {
327 327
 	CpusetMems           string          // CpusetMems 0-2, 0,1
328 328
 	Devices              []DeviceMapping // List of devices to map inside the container
329 329
 	DeviceCgroupRules    []string        // List of rule to be added to the device cgroup
330
+	DeviceRequests       []DeviceRequest // List of device requests for device drivers
330 331
 	DiskQuota            int64           // Disk limit (in bytes)
331 332
 	KernelMemory         int64           // Kernel memory limit (in bytes)
332 333
 	KernelMemoryTCP      int64           // Hard limit for kernel TCP buffer memory (in bytes)
333 334
new file mode 100644
... ...
@@ -0,0 +1,38 @@
0
+package daemon // import "github.com/docker/docker/daemon"
1
+
2
+import (
3
+	"github.com/docker/docker/api/types/container"
4
+	"github.com/docker/docker/pkg/capabilities"
5
+	specs "github.com/opencontainers/runtime-spec/specs-go"
6
+)
7
+
8
+var deviceDrivers = map[string]*deviceDriver{}
9
+
10
+type deviceDriver struct {
11
+	capset     capabilities.Set
12
+	updateSpec func(*specs.Spec, *deviceInstance) error
13
+}
14
+
15
+type deviceInstance struct {
16
+	req          container.DeviceRequest
17
+	selectedCaps []string
18
+}
19
+
20
+func registerDeviceDriver(name string, d *deviceDriver) {
21
+	deviceDrivers[name] = d
22
+}
23
+
24
+func (daemon *Daemon) handleDevice(req container.DeviceRequest, spec *specs.Spec) error {
25
+	if req.Driver == "" {
26
+		for _, dd := range deviceDrivers {
27
+			if selected := dd.capset.Match(req.Capabilities); selected != nil {
28
+				return dd.updateSpec(spec, &deviceInstance{req: req, selectedCaps: selected})
29
+			}
30
+		}
31
+	} else if dd := deviceDrivers[req.Driver]; dd != nil {
32
+		if selected := dd.capset.Match(req.Capabilities); selected != nil {
33
+			return dd.updateSpec(spec, &deviceInstance{req: req, selectedCaps: selected})
34
+		}
35
+	}
36
+	return incompatibleDeviceRequest{req.Driver, req.Capabilities}
37
+}
... ...
@@ -80,6 +80,17 @@ func (e invalidIdentifier) Error() string {
80 80
 
81 81
 func (invalidIdentifier) InvalidParameter() {}
82 82
 
83
+type incompatibleDeviceRequest struct {
84
+	driver string
85
+	caps   [][]string
86
+}
87
+
88
+func (i incompatibleDeviceRequest) Error() string {
89
+	return fmt.Sprintf("could not select device driver %q with capabilities: %v", i.driver, i.caps)
90
+}
91
+
92
+func (incompatibleDeviceRequest) InvalidParameter() {}
93
+
83 94
 type duplicateMountPointError string
84 95
 
85 96
 func (e duplicateMountPointError) Error() string {
86 97
new file mode 100644
... ...
@@ -0,0 +1,107 @@
0
+package daemon
1
+
2
+import (
3
+	"os/exec"
4
+	"strconv"
5
+
6
+	"github.com/containerd/containerd/contrib/nvidia"
7
+	"github.com/docker/docker/pkg/capabilities"
8
+	"github.com/opencontainers/runtime-spec/specs-go"
9
+	"github.com/pkg/errors"
10
+)
11
+
12
+// TODO: nvidia should not be hard-coded, and should be a device plugin instead on the daemon object.
13
+// TODO: add list of device capabilities in daemon/node info
14
+
15
+var errConflictCountDeviceIDs = errors.New("cannot set both Count and DeviceIDs on device request")
16
+
17
+// stolen from github.com/containerd/containerd/contrib/nvidia
18
+const nvidiaCLI = "nvidia-container-cli"
19
+
20
+// These are NVIDIA-specific capabilities stolen from github.com/containerd/containerd/contrib/nvidia.allCaps
21
+var allNvidiaCaps = map[nvidia.Capability]struct{}{
22
+	nvidia.Compute:  {},
23
+	nvidia.Compat32: {},
24
+	nvidia.Graphics: {},
25
+	nvidia.Utility:  {},
26
+	nvidia.Video:    {},
27
+	nvidia.Display:  {},
28
+}
29
+
30
+func init() {
31
+	if _, err := exec.LookPath(nvidiaCLI); err != nil {
32
+		// do not register Nvidia driver if helper binary is not present.
33
+		return
34
+	}
35
+	capset := capabilities.Set{"gpu": struct{}{}, "nvidia": struct{}{}}
36
+	nvidiaDriver := &deviceDriver{
37
+		capset:     capset,
38
+		updateSpec: setNvidiaGPUs,
39
+	}
40
+	for c := range capset {
41
+		nvidiaDriver.capset[c] = struct{}{}
42
+	}
43
+	registerDeviceDriver("nvidia", nvidiaDriver)
44
+}
45
+
46
+func setNvidiaGPUs(s *specs.Spec, dev *deviceInstance) error {
47
+	var opts []nvidia.Opts
48
+
49
+	req := dev.req
50
+	if req.Count != 0 && len(req.DeviceIDs) > 0 {
51
+		return errConflictCountDeviceIDs
52
+	}
53
+
54
+	if len(req.DeviceIDs) > 0 {
55
+		var ids []int
56
+		var uuids []string
57
+		for _, devID := range req.DeviceIDs {
58
+			id, err := strconv.Atoi(devID)
59
+			if err == nil {
60
+				ids = append(ids, id)
61
+				continue
62
+			}
63
+			// if not an integer, then assume UUID.
64
+			uuids = append(uuids, devID)
65
+		}
66
+		if len(ids) > 0 {
67
+			opts = append(opts, nvidia.WithDevices(ids...))
68
+		}
69
+		if len(uuids) > 0 {
70
+			opts = append(opts, nvidia.WithDeviceUUIDs(uuids...))
71
+		}
72
+	}
73
+
74
+	if req.Count < 0 {
75
+		opts = append(opts, nvidia.WithAllDevices)
76
+	} else if req.Count > 0 {
77
+		opts = append(opts, nvidia.WithDevices(countToDevices(req.Count)...))
78
+	}
79
+
80
+	var nvidiaCaps []nvidia.Capability
81
+	// req.Capabilities contains device capabilities, some but not all are NVIDIA driver capabilities.
82
+	for _, c := range dev.selectedCaps {
83
+		nvcap := nvidia.Capability(c)
84
+		if _, isNvidiaCap := allNvidiaCaps[nvcap]; isNvidiaCap {
85
+			nvidiaCaps = append(nvidiaCaps, nvcap)
86
+			continue
87
+		}
88
+		// TODO: nvidia.WithRequiredCUDAVersion
89
+		// for now we let the prestart hook verify cuda versions but errors are not pretty.
90
+	}
91
+
92
+	if nvidiaCaps != nil {
93
+		opts = append(opts, nvidia.WithCapabilities(nvidiaCaps...))
94
+	}
95
+
96
+	return nvidia.WithGPUs(opts...)(nil, nil, nil, s)
97
+}
98
+
99
+// countToDevices returns the list 0, 1, ... count-1 of deviceIDs.
100
+func countToDevices(count int) []int {
101
+	devices := make([]int, count)
102
+	for i := range devices {
103
+		devices[i] = i
104
+	}
105
+	return devices
106
+}
... ...
@@ -85,7 +85,7 @@ func setResources(s *specs.Spec, r containertypes.Resources) error {
85 85
 	return nil
86 86
 }
87 87
 
88
-func setDevices(s *specs.Spec, c *container.Container) error {
88
+func (daemon *Daemon) setDevices(s *specs.Spec, c *container.Container) error {
89 89
 	// Build lists of devices allowed and created within the container.
90 90
 	var devs []specs.LinuxDevice
91 91
 	devPermissions := s.Linux.Resources.Devices
... ...
@@ -122,6 +122,13 @@ func setDevices(s *specs.Spec, c *container.Container) error {
122 122
 
123 123
 	s.Linux.Devices = append(s.Linux.Devices, devs...)
124 124
 	s.Linux.Resources.Devices = devPermissions
125
+
126
+	for _, req := range c.HostConfig.DeviceRequests {
127
+		if err := daemon.handleDevice(req, s); err != nil {
128
+			return err
129
+		}
130
+	}
131
+
125 132
 	return nil
126 133
 }
127 134
 
... ...
@@ -751,7 +758,7 @@ func (daemon *Daemon) createSpec(c *container.Container) (retSpec *specs.Spec, e
751 751
 	if err := daemon.initCgroupsPath(parentPath); err != nil {
752 752
 		return nil, fmt.Errorf("linux init cgroups path: %v", err)
753 753
 	}
754
-	if err := setDevices(&s, c); err != nil {
754
+	if err := daemon.setDevices(&s, c); err != nil {
755 755
 		return nil, fmt.Errorf("linux runtime spec devices: %v", err)
756 756
 	}
757 757
 	if err := daemon.setRlimits(&s, c); err != nil {
... ...
@@ -818,15 +825,16 @@ func (daemon *Daemon) createSpec(c *container.Container) (retSpec *specs.Spec, e
818 818
 		return nil, fmt.Errorf("linux mounts: %v", err)
819 819
 	}
820 820
 
821
+	if s.Hooks == nil {
822
+		s.Hooks = &specs.Hooks{}
823
+	}
821 824
 	for _, ns := range s.Linux.Namespaces {
822 825
 		if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled {
823 826
 			target := filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe")
824
-			s.Hooks = &specs.Hooks{
825
-				Prestart: []specs.Hook{{
826
-					Path: target,
827
-					Args: []string{"libnetwork-setkey", "-exec-root=" + daemon.configStore.GetExecRoot(), c.ID, daemon.netController.ID()},
828
-				}},
829
-			}
827
+			s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{
828
+				Path: target,
829
+				Args: []string{"libnetwork-setkey", "-exec-root=" + daemon.configStore.GetExecRoot(), c.ID, daemon.netController.ID()},
830
+			})
830 831
 		}
831 832
 	}
832 833
 
... ...
@@ -49,6 +49,8 @@ keywords: "API, Docker, rcli, REST, documentation"
49 49
 * `GET /info` now returns information about `DataPathPort` that is currently used in swarm
50 50
 * `GET /info` now returns `PidsLimit` boolean to indicate if the host kernel has
51 51
   PID limit support enabled.
52
+* `POST /containers/create` now accepts `DeviceRequests` as part of `HostConfig`.
53
+  Can be used to set Nvidia GPUs.
52 54
 * `GET /swarm` endpoint now returns DataPathPort info
53 55
 * `POST /containers/create` now takes `KernelMemoryTCP` field to set hard limit for kernel TCP buffer memory.
54 56
 * `GET /service` now  returns `MaxReplicas` as part of the `Placement`.
55 57
new file mode 100644
... ...
@@ -0,0 +1,23 @@
0
+// Package capabilities allows to generically handle capabilities.
1
+package capabilities
2
+
3
+// Set represents a set of capabilities.
4
+type Set map[string]struct{}
5
+
6
+// Match tries to match set with caps, which is an OR list of AND lists of capabilities.
7
+// The matched AND list of capabilities is returned; or nil if none are matched.
8
+func (set Set) Match(caps [][]string) []string {
9
+	if set == nil {
10
+		return nil
11
+	}
12
+anyof:
13
+	for _, andList := range caps {
14
+		for _, cap := range andList {
15
+			if _, ok := set[cap]; !ok {
16
+				continue anyof
17
+			}
18
+		}
19
+		return andList
20
+	}
21
+	return nil
22
+}
0 23
new file mode 100644
... ...
@@ -0,0 +1,72 @@
0
+package capabilities
1
+
2
+import (
3
+	"fmt"
4
+	"testing"
5
+)
6
+
7
+func TestMatch(t *testing.T) {
8
+	set := Set{
9
+		"foo": struct{}{},
10
+		"bar": struct{}{},
11
+	}
12
+	type testcase struct {
13
+		caps     [][]string
14
+		expected []string
15
+	}
16
+	var testcases = []testcase{
17
+		// matches
18
+		{
19
+			caps:     [][]string{{}},
20
+			expected: []string{},
21
+		},
22
+		{
23
+			caps:     [][]string{{"foo"}},
24
+			expected: []string{"foo"},
25
+		},
26
+		{
27
+			caps:     [][]string{{"bar"}, {"foo"}},
28
+			expected: []string{"bar"},
29
+		},
30
+		{
31
+			caps:     [][]string{{"foo", "bar"}},
32
+			expected: []string{"foo", "bar"},
33
+		},
34
+		{
35
+			caps:     [][]string{{"qux"}, {"foo"}},
36
+			expected: []string{"foo"},
37
+		},
38
+		{
39
+			caps:     [][]string{{"foo", "bar"}, {"baz"}, {"bar"}},
40
+			expected: []string{"foo", "bar"},
41
+		},
42
+
43
+		// non matches
44
+		{caps: nil},
45
+		{caps: [][]string{}},
46
+		{caps: [][]string{{"qux"}}},
47
+		{caps: [][]string{{"foo", "bar", "qux"}}},
48
+		{caps: [][]string{{"qux"}, {"baz"}}},
49
+		{caps: [][]string{{"foo", "baz"}}},
50
+	}
51
+
52
+	for _, m := range testcases {
53
+		t.Run(fmt.Sprintf("%v", m.caps), func(t *testing.T) {
54
+			selected := set.Match(m.caps)
55
+			if m.expected == nil || selected == nil {
56
+				if m.expected == nil && selected == nil {
57
+					return
58
+				}
59
+				t.Fatalf("selected = %v, expected = %v", selected, m.expected)
60
+			}
61
+			if len(selected) != len(m.expected) {
62
+				t.Fatalf("len(selected) = %d, len(expected) = %d", len(selected), len(m.expected))
63
+			}
64
+			for i, s := range selected {
65
+				if m.expected[i] != s {
66
+					t.Fatalf("selected[%d] = %s, expected[%d] = %s", i, s, i, m.expected[i])
67
+				}
68
+			}
69
+		})
70
+	}
71
+}
0 72
new file mode 100644
... ...
@@ -0,0 +1,207 @@
0
+/*
1
+   Copyright The containerd Authors.
2
+
3
+   Licensed under the Apache License, Version 2.0 (the "License");
4
+   you may not use this file except in compliance with the License.
5
+   You may obtain a copy of the License at
6
+
7
+       http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+   Unless required by applicable law or agreed to in writing, software
10
+   distributed under the License is distributed on an "AS IS" BASIS,
11
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+   See the License for the specific language governing permissions and
13
+   limitations under the License.
14
+*/
15
+
16
+package nvidia
17
+
18
+import (
19
+	"context"
20
+	"fmt"
21
+	"os"
22
+	"os/exec"
23
+	"strconv"
24
+	"strings"
25
+
26
+	"github.com/containerd/containerd/containers"
27
+	"github.com/containerd/containerd/oci"
28
+	specs "github.com/opencontainers/runtime-spec/specs-go"
29
+)
30
+
31
+const nvidiaCLI = "nvidia-container-cli"
32
+
33
+// Capability specifies capabilities for the gpu inside the container
34
+// Detailed explanation of options can be found:
35
+// https://github.com/nvidia/nvidia-container-runtime#supported-driver-capabilities
36
+type Capability string
37
+
38
+const (
39
+	// Compute capability
40
+	Compute Capability = "compute"
41
+	// Compat32 capability
42
+	Compat32 Capability = "compat32"
43
+	// Graphics capability
44
+	Graphics Capability = "graphics"
45
+	// Utility capability
46
+	Utility Capability = "utility"
47
+	// Video capability
48
+	Video Capability = "video"
49
+	// Display capability
50
+	Display Capability = "display"
51
+)
52
+
53
+var allCaps = []Capability{
54
+	Compute,
55
+	Compat32,
56
+	Graphics,
57
+	Utility,
58
+	Video,
59
+	Display,
60
+}
61
+
62
+// WithGPUs adds NVIDIA gpu support to a container
63
+func WithGPUs(opts ...Opts) oci.SpecOpts {
64
+	return func(_ context.Context, _ oci.Client, _ *containers.Container, s *specs.Spec) error {
65
+		c := &config{}
66
+		for _, o := range opts {
67
+			if err := o(c); err != nil {
68
+				return err
69
+			}
70
+		}
71
+		if c.OCIHookPath == "" {
72
+			path, err := exec.LookPath("containerd")
73
+			if err != nil {
74
+				return err
75
+			}
76
+			c.OCIHookPath = path
77
+		}
78
+		nvidiaPath, err := exec.LookPath(nvidiaCLI)
79
+		if err != nil {
80
+			return err
81
+		}
82
+		if s.Hooks == nil {
83
+			s.Hooks = &specs.Hooks{}
84
+		}
85
+		s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{
86
+			Path: c.OCIHookPath,
87
+			Args: append([]string{
88
+				"containerd",
89
+				"oci-hook",
90
+				"--",
91
+				nvidiaPath,
92
+				// ensures the required kernel modules are properly loaded
93
+				"--load-kmods",
94
+			}, c.args()...),
95
+			Env: os.Environ(),
96
+		})
97
+		return nil
98
+	}
99
+}
100
+
101
+type config struct {
102
+	Devices      []string
103
+	Capabilities []Capability
104
+	LoadKmods    bool
105
+	LDCache      string
106
+	LDConfig     string
107
+	Requirements []string
108
+	OCIHookPath  string
109
+}
110
+
111
+func (c *config) args() []string {
112
+	var args []string
113
+
114
+	if c.LoadKmods {
115
+		args = append(args, "--load-kmods")
116
+	}
117
+	if c.LDCache != "" {
118
+		args = append(args, fmt.Sprintf("--ldcache=%s", c.LDCache))
119
+	}
120
+	args = append(args,
121
+		"configure",
122
+	)
123
+	if len(c.Devices) > 0 {
124
+		args = append(args, fmt.Sprintf("--device=%s", strings.Join(c.Devices, ",")))
125
+	}
126
+	for _, c := range c.Capabilities {
127
+		args = append(args, fmt.Sprintf("--%s", c))
128
+	}
129
+	if c.LDConfig != "" {
130
+		args = append(args, fmt.Sprintf("--ldconfig=%s", c.LDConfig))
131
+	}
132
+	for _, r := range c.Requirements {
133
+		args = append(args, fmt.Sprintf("--require=%s", r))
134
+	}
135
+	args = append(args, "--pid={{pid}}", "{{rootfs}}")
136
+	return args
137
+}
138
+
139
+// Opts are options for configuring gpu support
140
+type Opts func(*config) error
141
+
142
+// WithDevices adds the provided device indexes to the container
143
+func WithDevices(ids ...int) Opts {
144
+	return func(c *config) error {
145
+		for _, i := range ids {
146
+			c.Devices = append(c.Devices, strconv.Itoa(i))
147
+		}
148
+		return nil
149
+	}
150
+}
151
+
152
+// WithDeviceUUIDs adds the specific device UUID to the container
153
+func WithDeviceUUIDs(uuids ...string) Opts {
154
+	return func(c *config) error {
155
+		c.Devices = append(c.Devices, uuids...)
156
+		return nil
157
+	}
158
+}
159
+
160
+// WithAllDevices adds all gpus to the container
161
+func WithAllDevices(c *config) error {
162
+	c.Devices = []string{"all"}
163
+	return nil
164
+}
165
+
166
+// WithAllCapabilities adds all capabilities to the container for the gpus
167
+func WithAllCapabilities(c *config) error {
168
+	c.Capabilities = allCaps
169
+	return nil
170
+}
171
+
172
+// WithCapabilities adds the specified capabilities to the container for the gpus
173
+func WithCapabilities(caps ...Capability) Opts {
174
+	return func(c *config) error {
175
+		c.Capabilities = append(c.Capabilities, caps...)
176
+		return nil
177
+	}
178
+}
179
+
180
+// WithRequiredCUDAVersion sets the required cuda version
181
+func WithRequiredCUDAVersion(major, minor int) Opts {
182
+	return func(c *config) error {
183
+		c.Requirements = append(c.Requirements, fmt.Sprintf("cuda>=%d.%d", major, minor))
184
+		return nil
185
+	}
186
+}
187
+
188
+// WithOCIHookPath sets the hook path for the binary
189
+func WithOCIHookPath(path string) Opts {
190
+	return func(c *config) error {
191
+		c.OCIHookPath = path
192
+		return nil
193
+	}
194
+}
195
+
196
+// WithLookupOCIHookPath sets the hook path for the binary via a binary name
197
+func WithLookupOCIHookPath(name string) Opts {
198
+	return func(c *config) error {
199
+		path, err := exec.LookPath(name)
200
+		if err != nil {
201
+			return err
202
+		}
203
+		c.OCIHookPath = path
204
+		return nil
205
+	}
206
+}