Browse code

cgroup2: use shim V2

* Requires containerd binaries from containerd/containerd#3799 . Metrics are unimplemented yet.
* Works with crun v0.10.4, but `--security-opt seccomp=unconfined` is needed unless using master version of libseccomp
( containers/crun#156, seccomp/libseccomp#177 )
* Doesn't work with master runc yet
* Resource limitations are unimplemented

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>

Akihiro Suda authored on 2019/11/05 16:10:19
Showing 10 changed files
... ...
@@ -794,6 +794,7 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
794 794
 		PluginStore: pluginStore,
795 795
 		startupDone: make(chan struct{}),
796 796
 	}
797
+
797 798
 	// Ensure the daemon is properly shutdown if there is a failure during
798 799
 	// initialization
799 800
 	defer func() {
... ...
@@ -914,7 +915,7 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
914 914
 			}
915 915
 		}
916 916
 
917
-		return pluginexec.New(ctx, getPluginExecRoot(config.Root), pluginCli, config.ContainerdPluginNamespace, m)
917
+		return pluginexec.New(ctx, getPluginExecRoot(config.Root), pluginCli, config.ContainerdPluginNamespace, m, d.useShimV2())
918 918
 	}
919 919
 
920 920
 	// Plugin system initialization should happen before restore. Do not change order.
... ...
@@ -1063,7 +1064,7 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
1063 1063
 
1064 1064
 	go d.execCommandGC()
1065 1065
 
1066
-	d.containerd, err = libcontainerd.NewClient(ctx, d.containerdCli, filepath.Join(config.ExecRoot, "containerd"), config.ContainerdNamespace, d)
1066
+	d.containerd, err = libcontainerd.NewClient(ctx, d.containerdCli, filepath.Join(config.ExecRoot, "containerd"), config.ContainerdNamespace, d, d.useShimV2())
1067 1067
 	if err != nil {
1068 1068
 		return nil, err
1069 1069
 	}
... ...
@@ -1639,3 +1639,7 @@ func (daemon *Daemon) setupSeccompProfile() error {
1639 1639
 	}
1640 1640
 	return nil
1641 1641
 }
1642
+
1643
+func (daemon *Daemon) useShimV2() bool {
1644
+	return cgroups.IsCgroup2UnifiedMode()
1645
+}
... ...
@@ -653,3 +653,7 @@ func (daemon *Daemon) initRuntimes(_ map[string]types.Runtime) error {
653 653
 
654 654
 func setupResolvConf(config *config.Config) {
655 655
 }
656
+
657
+func (daemon *Daemon) useShimV2() bool {
658
+	return true
659
+}
... ...
@@ -8,6 +8,7 @@ import (
8 8
 	"path/filepath"
9 9
 
10 10
 	"github.com/containerd/containerd/runtime/linux/runctypes"
11
+	v2runcoptions "github.com/containerd/containerd/runtime/v2/runc/options"
11 12
 	"github.com/docker/docker/container"
12 13
 	"github.com/docker/docker/errdefs"
13 14
 	"github.com/pkg/errors"
... ...
@@ -43,6 +44,20 @@ func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Contain
43 43
 	if err != nil {
44 44
 		return nil, err
45 45
 	}
46
+	if daemon.useShimV2() {
47
+		opts := &v2runcoptions.Options{
48
+			BinaryName: path,
49
+			Root: filepath.Join(daemon.configStore.ExecRoot,
50
+				fmt.Sprintf("runtime-%s", container.HostConfig.Runtime)),
51
+		}
52
+
53
+		if UsingSystemd(daemon.configStore) {
54
+			opts.SystemdCgroup = true
55
+		}
56
+
57
+		return opts, nil
58
+
59
+	}
46 60
 	opts := &runctypes.RuncOptions{
47 61
 		Runtime: path,
48 62
 		RuntimeRoot: filepath.Join(daemon.configStore.ExecRoot,
... ...
@@ -9,6 +9,6 @@ import (
9 9
 )
10 10
 
11 11
 // NewClient creates a new libcontainerd client from a containerd client
12
-func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend) (libcontainerdtypes.Client, error) {
13
-	return remote.NewClient(ctx, cli, stateDir, ns, b)
12
+func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend, useShimV2 bool) (libcontainerdtypes.Client, error) {
13
+	return remote.NewClient(ctx, cli, stateDir, ns, b, useShimV2)
14 14
 }
... ...
@@ -11,9 +11,10 @@ import (
11 11
 )
12 12
 
13 13
 // NewClient creates a new libcontainerd client from a containerd client
14
-func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend) (libcontainerdtypes.Client, error) {
14
+func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend, useShimV2 bool) (libcontainerdtypes.Client, error) {
15 15
 	if !system.ContainerdRuntimeSupported() {
16
+		// useShimV2 is ignored for windows
16 17
 		return local.NewClient(ctx, cli, stateDir, ns, b)
17 18
 	}
18
-	return remote.NewClient(ctx, cli, stateDir, ns, b)
19
+	return remote.NewClient(ctx, cli, stateDir, ns, b, useShimV2)
19 20
 }
... ...
@@ -23,6 +23,7 @@ import (
23 23
 	"github.com/containerd/containerd/events"
24 24
 	"github.com/containerd/containerd/images"
25 25
 	"github.com/containerd/containerd/runtime/linux/runctypes"
26
+	v2runcoptions "github.com/containerd/containerd/runtime/v2/runc/options"
26 27
 	"github.com/containerd/typeurl"
27 28
 	"github.com/docker/docker/errdefs"
28 29
 	"github.com/docker/docker/libcontainerd/queue"
... ...
@@ -45,21 +46,27 @@ type client struct {
45 45
 	logger   *logrus.Entry
46 46
 	ns       string
47 47
 
48
-	backend libcontainerdtypes.Backend
49
-	eventQ  queue.Queue
50
-	oomMu   sync.Mutex
51
-	oom     map[string]bool
48
+	backend         libcontainerdtypes.Backend
49
+	eventQ          queue.Queue
50
+	oomMu           sync.Mutex
51
+	oom             map[string]bool
52
+	useShimV2       bool
53
+	v2runcoptionsMu sync.Mutex
54
+	// v2runcoptions is used for copying options specified on Create() to Start()
55
+	v2runcoptions map[string]v2runcoptions.Options
52 56
 }
53 57
 
54 58
 // NewClient creates a new libcontainerd client from a containerd client
55
-func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend) (libcontainerdtypes.Client, error) {
59
+func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend, useShimV2 bool) (libcontainerdtypes.Client, error) {
56 60
 	c := &client{
57
-		client:   cli,
58
-		stateDir: stateDir,
59
-		logger:   logrus.WithField("module", "libcontainerd").WithField("namespace", ns),
60
-		ns:       ns,
61
-		backend:  b,
62
-		oom:      make(map[string]bool),
61
+		client:        cli,
62
+		stateDir:      stateDir,
63
+		logger:        logrus.WithField("module", "libcontainerd").WithField("namespace", ns),
64
+		ns:            ns,
65
+		backend:       b,
66
+		oom:           make(map[string]bool),
67
+		useShimV2:     useShimV2,
68
+		v2runcoptions: make(map[string]v2runcoptions.Options),
63 69
 	}
64 70
 
65 71
 	go c.processEventStream(ctx, ns)
... ...
@@ -126,9 +133,13 @@ func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, run
126 126
 	bdir := c.bundleDir(id)
127 127
 	c.logger.WithField("bundle", bdir).WithField("root", ociSpec.Root.Path).Debug("bundle dir created")
128 128
 
129
+	rt := runtimeName
130
+	if c.useShimV2 {
131
+		rt = shimV2RuntimeName
132
+	}
129 133
 	newOpts := []containerd.NewContainerOpts{
130 134
 		containerd.WithSpec(ociSpec),
131
-		containerd.WithRuntime(runtimeName, runtimeOptions),
135
+		containerd.WithRuntime(rt, runtimeOptions),
132 136
 		WithBundle(bdir, ociSpec),
133 137
 	}
134 138
 	opts = append(opts, newOpts...)
... ...
@@ -140,6 +151,13 @@ func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, run
140 140
 		}
141 141
 		return wrapError(err)
142 142
 	}
143
+	if c.useShimV2 {
144
+		if x, ok := runtimeOptions.(*v2runcoptions.Options); ok {
145
+			c.v2runcoptionsMu.Lock()
146
+			c.v2runcoptions[id] = *x
147
+			c.v2runcoptionsMu.Unlock()
148
+		}
149
+	}
143 150
 	return nil
144 151
 }
145 152
 
... ...
@@ -200,11 +218,26 @@ func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin
200 200
 
201 201
 	if runtime.GOOS != "windows" {
202 202
 		taskOpts = append(taskOpts, func(_ context.Context, _ *containerd.Client, info *containerd.TaskInfo) error {
203
-			info.Options = &runctypes.CreateOptions{
204
-				IoUid:       uint32(uid),
205
-				IoGid:       uint32(gid),
206
-				NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "",
203
+			if c.useShimV2 {
204
+				// For v2, we need to inherit options specified on Create
205
+				c.v2runcoptionsMu.Lock()
206
+				opts, ok := c.v2runcoptions[id]
207
+				c.v2runcoptionsMu.Unlock()
208
+				if !ok {
209
+					opts = v2runcoptions.Options{}
210
+				}
211
+				opts.IoUid = uint32(uid)
212
+				opts.IoGid = uint32(gid)
213
+				opts.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
214
+				info.Options = &opts
215
+			} else {
216
+				info.Options = &runctypes.CreateOptions{
217
+					IoUid:       uint32(uid),
218
+					IoGid:       uint32(gid),
219
+					NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "",
220
+				}
207 221
 			}
222
+
208 223
 			return nil
209 224
 		})
210 225
 	} else {
... ...
@@ -466,6 +499,9 @@ func (c *client) Delete(ctx context.Context, containerID string) error {
466 466
 	c.oomMu.Lock()
467 467
 	delete(c.oom, containerID)
468 468
 	c.oomMu.Unlock()
469
+	c.v2runcoptionsMu.Lock()
470
+	delete(c.v2runcoptions, containerID)
471
+	c.v2runcoptionsMu.Unlock()
469 472
 	if os.Getenv("LIBCONTAINERD_NOCLEAN") != "1" {
470 473
 		if err := os.RemoveAll(bundle); err != nil {
471 474
 			c.logger.WithError(err).WithFields(logrus.Fields{
... ...
@@ -16,7 +16,10 @@ import (
16 16
 	"github.com/sirupsen/logrus"
17 17
 )
18 18
 
19
-const runtimeName = "io.containerd.runtime.v1.linux"
19
+const (
20
+	runtimeName       = "io.containerd.runtime.v1.linux"
21
+	shimV2RuntimeName = "io.containerd.runc.v2"
22
+)
20 23
 
21 24
 func summaryFromInterface(i interface{}) (*libcontainerdtypes.Summary, error) {
22 25
 	return &libcontainerdtypes.Summary{}, nil
... ...
@@ -16,7 +16,10 @@ import (
16 16
 	"github.com/sirupsen/logrus"
17 17
 )
18 18
 
19
-const runtimeName = "io.containerd.runhcs.v1"
19
+const (
20
+	runtimeName       = "io.containerd.runhcs.v1"
21
+	shimV2RuntimeName = runtimeName
22
+)
20 23
 
21 24
 func summaryFromInterface(i interface{}) (*libcontainerdtypes.Summary, error) {
22 25
 	switch pd := i.(type) {
... ...
@@ -26,13 +26,13 @@ type ExitHandler interface {
26 26
 }
27 27
 
28 28
 // New creates a new containerd plugin executor
29
-func New(ctx context.Context, rootDir string, cli *containerd.Client, ns string, exitHandler ExitHandler) (*Executor, error) {
29
+func New(ctx context.Context, rootDir string, cli *containerd.Client, ns string, exitHandler ExitHandler, useShimV2 bool) (*Executor, error) {
30 30
 	e := &Executor{
31 31
 		rootDir:     rootDir,
32 32
 		exitHandler: exitHandler,
33 33
 	}
34 34
 
35
-	client, err := libcontainerd.NewClient(ctx, cli, rootDir, ns, e)
35
+	client, err := libcontainerd.NewClient(ctx, cli, rootDir, ns, e, useShimV2)
36 36
 	if err != nil {
37 37
 		return nil, errors.Wrap(err, "error creating containerd exec client")
38 38
 	}