Browse code

libcontainerd: split client and supervisor

Adds a supervisor package for starting and monitoring containerd.
Separates grpc connection allowing access from daemon.

Signed-off-by: Derek McGowan <derek@mcgstyle.net>

Derek McGowan authored on 2018/05/24 04:15:21
Showing 23 changed files
... ...
@@ -36,7 +36,7 @@ import (
36 36
 	"github.com/docker/docker/daemon/config"
37 37
 	"github.com/docker/docker/daemon/listeners"
38 38
 	"github.com/docker/docker/dockerversion"
39
-	"github.com/docker/docker/libcontainerd"
39
+	"github.com/docker/docker/libcontainerd/supervisor"
40 40
 	dopts "github.com/docker/docker/opts"
41 41
 	"github.com/docker/docker/pkg/authorization"
42 42
 	"github.com/docker/docker/pkg/jsonmessage"
... ...
@@ -45,7 +45,6 @@ import (
45 45
 	"github.com/docker/docker/pkg/signal"
46 46
 	"github.com/docker/docker/pkg/system"
47 47
 	"github.com/docker/docker/plugin"
48
-	"github.com/docker/docker/registry"
49 48
 	"github.com/docker/docker/runconfig"
50 49
 	"github.com/docker/go-connections/tlsconfig"
51 50
 	swarmapi "github.com/docker/swarmkit/api"
... ...
@@ -112,6 +111,10 @@ func (cli *DaemonCli) start(opts *daemonOptions) (err error) {
112 112
 		return err
113 113
 	}
114 114
 
115
+	if err := system.MkdirAll(cli.Config.ExecRoot, 0700, ""); err != nil {
116
+		return err
117
+	}
118
+
115 119
 	if cli.Pidfile != "" {
116 120
 		pf, err := pidfile.New(cli.Pidfile)
117 121
 		if err != nil {
... ...
@@ -135,19 +138,27 @@ func (cli *DaemonCli) start(opts *daemonOptions) (err error) {
135 135
 		return fmt.Errorf("Failed to load listeners: %v", err)
136 136
 	}
137 137
 
138
-	registryService, err := registry.NewService(cli.Config.ServiceOptions)
139
-	if err != nil {
140
-		return err
141
-	}
138
+	ctx, cancel := context.WithCancel(context.Background())
139
+	if cli.Config.ContainerdAddr == "" && runtime.GOOS != "windows" {
140
+		opts, err := cli.getContainerdDaemonOpts()
141
+		if err != nil {
142
+			cancel()
143
+			return fmt.Errorf("Failed to generate containerd options: %v", err)
144
+		}
142 145
 
143
-	rOpts, err := cli.getRemoteOptions()
144
-	if err != nil {
145
-		return fmt.Errorf("Failed to generate containerd options: %v", err)
146
-	}
147
-	containerdRemote, err := libcontainerd.New(filepath.Join(cli.Config.Root, "containerd"), filepath.Join(cli.Config.ExecRoot, "containerd"), rOpts...)
148
-	if err != nil {
149
-		return err
146
+		r, err := supervisor.Start(ctx, filepath.Join(cli.Config.Root, "containerd"), filepath.Join(cli.Config.ExecRoot, "containerd"), opts...)
147
+		if err != nil {
148
+			cancel()
149
+			return fmt.Errorf("Failed to start containerd: %v", err)
150
+		}
151
+
152
+		cli.Config.ContainerdAddr = r.Address()
153
+
154
+		// Try to wait for containerd to shutdown
155
+		defer r.WaitTimeout(10 * time.Second)
150 156
 	}
157
+	defer cancel()
158
+
151 159
 	signal.Trap(func() {
152 160
 		cli.stop()
153 161
 		<-stopc // wait for daemonCli.start() to return
... ...
@@ -162,7 +173,7 @@ func (cli *DaemonCli) start(opts *daemonOptions) (err error) {
162 162
 		logrus.Fatalf("Error creating middlewares: %v", err)
163 163
 	}
164 164
 
165
-	d, err := daemon.NewDaemon(cli.Config, registryService, containerdRemote, pluginStore)
165
+	d, err := daemon.NewDaemon(ctx, cli.Config, pluginStore)
166 166
 	if err != nil {
167 167
 		return fmt.Errorf("Error starting daemon: %v", err)
168 168
 	}
... ...
@@ -207,10 +218,7 @@ func (cli *DaemonCli) start(opts *daemonOptions) (err error) {
207 207
 
208 208
 	initRouter(routerOptions)
209 209
 
210
-	// process cluster change notifications
211
-	watchCtx, cancel := context.WithCancel(context.Background())
212
-	defer cancel()
213
-	go d.ProcessClusterNotifications(watchCtx, c.GetWatchStream())
210
+	go d.ProcessClusterNotifications(ctx, c.GetWatchStream())
214 211
 
215 212
 	cli.setupConfigReloadTrap()
216 213
 
... ...
@@ -227,8 +235,12 @@ func (cli *DaemonCli) start(opts *daemonOptions) (err error) {
227 227
 	// Wait for serve API to complete
228 228
 	errAPI := <-serveAPIWait
229 229
 	c.Cleanup()
230
+
230 231
 	shutdownDaemon(d)
231
-	containerdRemote.Cleanup()
232
+
233
+	// Stop notification processing and any background processes
234
+	cancel()
235
+
232 236
 	if errAPI != nil {
233 237
 		return fmt.Errorf("Shutting down due to ServeAPI error: %v", errAPI)
234 238
 	}
... ...
@@ -511,14 +523,22 @@ func (cli *DaemonCli) initMiddlewares(s *apiserver.Server, cfg *apiserver.Config
511 511
 	return nil
512 512
 }
513 513
 
514
-func (cli *DaemonCli) getRemoteOptions() ([]libcontainerd.RemoteOption, error) {
515
-	opts := []libcontainerd.RemoteOption{}
516
-
517
-	pOpts, err := cli.getPlatformRemoteOptions()
514
+func (cli *DaemonCli) getContainerdDaemonOpts() ([]supervisor.DaemonOpt, error) {
515
+	opts, err := cli.getPlatformContainerdDaemonOpts()
518 516
 	if err != nil {
519 517
 		return nil, err
520 518
 	}
521
-	opts = append(opts, pOpts...)
519
+
520
+	if cli.Config.Debug {
521
+		opts = append(opts, supervisor.WithLogLevel("debug"))
522
+	} else if cli.Config.LogLevel != "" {
523
+		opts = append(opts, supervisor.WithLogLevel(cli.Config.LogLevel))
524
+	}
525
+
526
+	if !cli.Config.CriContainerd {
527
+		opts = append(opts, supervisor.WithPlugin("cri", nil))
528
+	}
529
+
522 530
 	return opts, nil
523 531
 }
524 532
 
... ...
@@ -13,7 +13,7 @@ import (
13 13
 	"github.com/containerd/containerd/runtime/linux"
14 14
 	"github.com/docker/docker/cmd/dockerd/hack"
15 15
 	"github.com/docker/docker/daemon"
16
-	"github.com/docker/docker/libcontainerd"
16
+	"github.com/docker/docker/libcontainerd/supervisor"
17 17
 	"github.com/docker/libnetwork/portallocator"
18 18
 	"golang.org/x/sys/unix"
19 19
 )
... ...
@@ -36,29 +36,16 @@ func getDaemonConfDir(_ string) string {
36 36
 	return "/etc/docker"
37 37
 }
38 38
 
39
-func (cli *DaemonCli) getPlatformRemoteOptions() ([]libcontainerd.RemoteOption, error) {
40
-	opts := []libcontainerd.RemoteOption{
41
-		libcontainerd.WithOOMScore(cli.Config.OOMScoreAdjust),
42
-		libcontainerd.WithPlugin("linux", &linux.Config{
39
+func (cli *DaemonCli) getPlatformContainerdDaemonOpts() ([]supervisor.DaemonOpt, error) {
40
+	opts := []supervisor.DaemonOpt{
41
+		supervisor.WithOOMScore(cli.Config.OOMScoreAdjust),
42
+		supervisor.WithPlugin("linux", &linux.Config{
43 43
 			Shim:        daemon.DefaultShimBinary,
44 44
 			Runtime:     daemon.DefaultRuntimeBinary,
45 45
 			RuntimeRoot: filepath.Join(cli.Config.Root, "runc"),
46 46
 			ShimDebug:   cli.Config.Debug,
47 47
 		}),
48 48
 	}
49
-	if cli.Config.Debug {
50
-		opts = append(opts, libcontainerd.WithLogLevel("debug"))
51
-	} else if cli.Config.LogLevel != "" {
52
-		opts = append(opts, libcontainerd.WithLogLevel(cli.Config.LogLevel))
53
-	}
54
-	if cli.Config.ContainerdAddr != "" {
55
-		opts = append(opts, libcontainerd.WithRemoteAddr(cli.Config.ContainerdAddr))
56
-	} else {
57
-		opts = append(opts, libcontainerd.WithStartDaemon(true))
58
-	}
59
-	if !cli.Config.CriContainerd {
60
-		opts = append(opts, libcontainerd.WithPlugin("cri", nil))
61
-	}
62 49
 
63 50
 	return opts, nil
64 51
 }
... ...
@@ -6,7 +6,7 @@ import (
6 6
 	"os"
7 7
 	"path/filepath"
8 8
 
9
-	"github.com/docker/docker/libcontainerd"
9
+	"github.com/docker/docker/libcontainerd/supervisor"
10 10
 	"github.com/sirupsen/logrus"
11 11
 	"golang.org/x/sys/windows"
12 12
 )
... ...
@@ -48,7 +48,7 @@ func notifyShutdown(err error) {
48 48
 	}
49 49
 }
50 50
 
51
-func (cli *DaemonCli) getPlatformRemoteOptions() ([]libcontainerd.RemoteOption, error) {
51
+func (cli *DaemonCli) getPlatformContainerdDaemonOpts() ([]supervisor.DaemonOpt, error) {
52 52
 	return nil, nil
53 53
 }
54 54
 
... ...
@@ -18,6 +18,11 @@ import (
18 18
 	"sync"
19 19
 	"time"
20 20
 
21
+	"google.golang.org/grpc"
22
+
23
+	"github.com/containerd/containerd"
24
+	"github.com/containerd/containerd/defaults"
25
+	"github.com/containerd/containerd/pkg/dialer"
21 26
 	"github.com/docker/docker/api/types"
22 27
 	containertypes "github.com/docker/docker/api/types/container"
23 28
 	"github.com/docker/docker/api/types/swarm"
... ...
@@ -94,6 +99,7 @@ type Daemon struct {
94 94
 	PluginStore           *plugin.Store // todo: remove
95 95
 	pluginManager         *plugin.Manager
96 96
 	linkIndex             *linkIndex
97
+	containerdCli         *containerd.Client
97 98
 	containerd            libcontainerd.Client
98 99
 	defaultIsolation      containertypes.Isolation // Default isolation mode on Windows
99 100
 	clusterProvider       cluster.Provider
... ...
@@ -565,9 +571,14 @@ func (daemon *Daemon) IsSwarmCompatible() error {
565 565
 
566 566
 // NewDaemon sets up everything for the daemon to be able to service
567 567
 // requests from the webserver.
568
-func NewDaemon(config *config.Config, registryService registry.Service, containerdRemote libcontainerd.Remote, pluginStore *plugin.Store) (daemon *Daemon, err error) {
568
+func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.Store) (daemon *Daemon, err error) {
569 569
 	setDefaultMtu(config)
570 570
 
571
+	registryService, err := registry.NewService(config.ServiceOptions)
572
+	if err != nil {
573
+		return nil, err
574
+	}
575
+
571 576
 	// Ensure that we have a correct root key limit for launching containers.
572 577
 	if err := ModifyRootKeyLimit(); err != nil {
573 578
 		logrus.Warnf("unable to modify root key limit, number of containers could be limited by this quota: %v", err)
... ...
@@ -720,8 +731,35 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
720 720
 	}
721 721
 	registerMetricsPluginCallback(d.PluginStore, metricsSockPath)
722 722
 
723
+	gopts := []grpc.DialOption{
724
+		grpc.WithInsecure(),
725
+		grpc.WithBackoffMaxDelay(3 * time.Second),
726
+		grpc.WithDialer(dialer.Dialer),
727
+
728
+		// TODO(stevvooe): We may need to allow configuration of this on the client.
729
+		grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(defaults.DefaultMaxRecvMsgSize)),
730
+		grpc.WithDefaultCallOptions(grpc.MaxCallSendMsgSize(defaults.DefaultMaxSendMsgSize)),
731
+	}
732
+	if config.ContainerdAddr != "" {
733
+		d.containerdCli, err = containerd.New(config.ContainerdAddr, containerd.WithDefaultNamespace(ContainersNamespace), containerd.WithDialOpts(gopts))
734
+		if err != nil {
735
+			return nil, errors.Wrapf(err, "failed to dial %q", config.ContainerdAddr)
736
+		}
737
+	}
738
+
723 739
 	createPluginExec := func(m *plugin.Manager) (plugin.Executor, error) {
724
-		return pluginexec.New(getPluginExecRoot(config.Root), containerdRemote, m)
740
+		var pluginCli *containerd.Client
741
+
742
+		// Windows is not currently using containerd, keep the
743
+		// client as nil
744
+		if config.ContainerdAddr != "" {
745
+			pluginCli, err = containerd.New(config.ContainerdAddr, containerd.WithDefaultNamespace(pluginexec.PluginNamespace), containerd.WithDialOpts(gopts))
746
+			if err != nil {
747
+				return nil, errors.Wrapf(err, "failed to dial %q", config.ContainerdAddr)
748
+			}
749
+		}
750
+
751
+		return pluginexec.New(ctx, getPluginExecRoot(config.Root), pluginCli, m)
725 752
 	}
726 753
 
727 754
 	// Plugin system initialization should happen before restore. Do not change order.
... ...
@@ -880,7 +918,7 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
880 880
 
881 881
 	go d.execCommandGC()
882 882
 
883
-	d.containerd, err = containerdRemote.NewClient(ContainersNamespace, d)
883
+	d.containerd, err = libcontainerd.NewClient(ctx, d.containerdCli, filepath.Join(config.ExecRoot, "containerd"), ContainersNamespace, d)
884 884
 	if err != nil {
885 885
 		return nil, err
886 886
 	}
... ...
@@ -1037,6 +1075,10 @@ func (daemon *Daemon) Shutdown() error {
1037 1037
 		daemon.netController.Stop()
1038 1038
 	}
1039 1039
 
1040
+	if daemon.containerdCli != nil {
1041
+		daemon.containerdCli.Close()
1042
+	}
1043
+
1040 1044
 	return daemon.cleanupMounts()
1041 1045
 }
1042 1046
 
... ...
@@ -102,38 +102,34 @@ func (c *container) getOOMKilled() bool {
102 102
 type client struct {
103 103
 	sync.RWMutex // protects containers map
104 104
 
105
-	remote   *containerd.Client
105
+	client   *containerd.Client
106 106
 	stateDir string
107 107
 	logger   *logrus.Entry
108
+	ns       string
108 109
 
109
-	namespace  string
110 110
 	backend    Backend
111 111
 	eventQ     queue
112 112
 	containers map[string]*container
113 113
 }
114 114
 
115
-func (c *client) reconnect() error {
116
-	c.Lock()
117
-	err := c.remote.Reconnect()
118
-	c.Unlock()
119
-	return err
120
-}
115
+// NewClient creates a new libcontainerd client from a containerd client
116
+func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b Backend) (Client, error) {
117
+	c := &client{
118
+		client:     cli,
119
+		stateDir:   stateDir,
120
+		logger:     logrus.WithField("module", "libcontainerd").WithField("namespace", ns),
121
+		ns:         ns,
122
+		backend:    b,
123
+		containers: make(map[string]*container),
124
+	}
121 125
 
122
-func (c *client) setRemote(remote *containerd.Client) {
123
-	c.Lock()
124
-	c.remote = remote
125
-	c.Unlock()
126
-}
126
+	go c.processEventStream(ctx, ns)
127 127
 
128
-func (c *client) getRemote() *containerd.Client {
129
-	c.RLock()
130
-	remote := c.remote
131
-	c.RUnlock()
132
-	return remote
128
+	return c, nil
133 129
 }
134 130
 
135 131
 func (c *client) Version(ctx context.Context) (containerd.Version, error) {
136
-	return c.getRemote().Version(ctx)
132
+	return c.client.Version(ctx)
137 133
 }
138 134
 
139 135
 // Restore loads the containerd container.
... ...
@@ -170,7 +166,7 @@ func (c *client) Restore(ctx context.Context, id string, attachStdio StdioCallba
170 170
 		err = wrapError(err)
171 171
 	}()
172 172
 
173
-	ctr, err := c.getRemote().LoadContainer(ctx, id)
173
+	ctr, err := c.client.LoadContainer(ctx, id)
174 174
 	if err != nil {
175 175
 		return false, -1, errors.WithStack(wrapError(err))
176 176
 	}
... ...
@@ -225,7 +221,7 @@ func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, run
225 225
 
226 226
 	c.logger.WithField("bundle", bdir).WithField("root", ociSpec.Root.Path).Debug("bundle dir created")
227 227
 
228
-	cdCtr, err := c.getRemote().NewContainer(ctx, id,
228
+	cdCtr, err := c.client.NewContainer(ctx, id,
229 229
 		containerd.WithSpec(ociSpec),
230 230
 		// TODO(mlaventure): when containerd support lcow, revisit runtime value
231 231
 		containerd.WithRuntime(fmt.Sprintf("io.containerd.runtime.v1.%s", runtime.GOOS), runtimeOptions))
... ...
@@ -268,7 +264,7 @@ func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin
268 268
 		// remove the checkpoint when we're done
269 269
 		defer func() {
270 270
 			if cp != nil {
271
-				err := c.getRemote().ContentStore().Delete(context.Background(), cp.Digest)
271
+				err := c.client.ContentStore().Delete(context.Background(), cp.Digest)
272 272
 				if err != nil {
273 273
 					c.logger.WithError(err).WithFields(logrus.Fields{
274 274
 						"ref":    checkpointDir,
... ...
@@ -571,14 +567,14 @@ func (c *client) CreateCheckpoint(ctx context.Context, containerID, checkpointDi
571 571
 	}
572 572
 	// Whatever happens, delete the checkpoint from containerd
573 573
 	defer func() {
574
-		err := c.getRemote().ImageService().Delete(context.Background(), img.Name())
574
+		err := c.client.ImageService().Delete(context.Background(), img.Name())
575 575
 		if err != nil {
576 576
 			c.logger.WithError(err).WithField("digest", img.Target().Digest).
577 577
 				Warnf("failed to delete checkpoint image")
578 578
 		}
579 579
 	}()
580 580
 
581
-	b, err := content.ReadBlob(ctx, c.getRemote().ContentStore(), img.Target())
581
+	b, err := content.ReadBlob(ctx, c.client.ContentStore(), img.Target())
582 582
 	if err != nil {
583 583
 		return errdefs.System(errors.Wrapf(err, "failed to retrieve checkpoint data"))
584 584
 	}
... ...
@@ -598,7 +594,7 @@ func (c *client) CreateCheckpoint(ctx context.Context, containerID, checkpointDi
598 598
 		return errdefs.System(errors.Wrapf(err, "invalid checkpoint"))
599 599
 	}
600 600
 
601
-	rat, err := c.getRemote().ContentStore().ReaderAt(ctx, *cpDesc)
601
+	rat, err := c.client.ContentStore().ReaderAt(ctx, *cpDesc)
602 602
 	if err != nil {
603 603
 		return errdefs.System(errors.Wrapf(err, "failed to get checkpoint reader"))
604 604
 	}
... ...
@@ -735,7 +731,7 @@ func (c *client) processEvent(ctr *container, et EventType, ei EventInfo) {
735 735
 	})
736 736
 }
737 737
 
738
-func (c *client) processEventStream(ctx context.Context) {
738
+func (c *client) processEventStream(ctx context.Context, ns string) {
739 739
 	var (
740 740
 		err error
741 741
 		ev  *events.Envelope
... ...
@@ -746,9 +742,9 @@ func (c *client) processEventStream(ctx context.Context) {
746 746
 
747 747
 	// Filter on both namespace *and* topic. To create an "and" filter,
748 748
 	// this must be a single, comma-separated string
749
-	eventStream, errC := c.getRemote().EventService().Subscribe(ctx, "namespace=="+c.namespace+",topic~=|^/tasks/|")
749
+	eventStream, errC := c.client.EventService().Subscribe(ctx, "namespace=="+ns+",topic~=|^/tasks/|")
750 750
 
751
-	c.logger.WithField("namespace", c.namespace).Debug("processing event stream")
751
+	c.logger.Debug("processing event stream")
752 752
 
753 753
 	var oomKilled bool
754 754
 	for {
... ...
@@ -758,7 +754,7 @@ func (c *client) processEventStream(ctx context.Context) {
758 758
 				errStatus, ok := status.FromError(err)
759 759
 				if !ok || errStatus.Code() != codes.Canceled {
760 760
 					c.logger.WithError(err).Error("failed to get event")
761
-					go c.processEventStream(ctx)
761
+					go c.processEventStream(ctx, ns)
762 762
 				} else {
763 763
 					c.logger.WithError(ctx.Err()).Info("stopping event stream following graceful shutdown")
764 764
 				}
... ...
@@ -858,7 +854,7 @@ func (c *client) processEventStream(ctx context.Context) {
858 858
 }
859 859
 
860 860
 func (c *client) writeContent(ctx context.Context, mediaType, ref string, r io.Reader) (*types.Descriptor, error) {
861
-	writer, err := c.getRemote().ContentStore().Writer(ctx, content.WithRef(ref))
861
+	writer, err := c.client.ContentStore().Writer(ctx, content.WithRef(ref))
862 862
 	if err != nil {
863 863
 		return nil, err
864 864
 	}
... ...
@@ -71,6 +71,28 @@ const (
71 71
 // of docker.
72 72
 const defaultOwner = "docker"
73 73
 
74
+type client struct {
75
+	sync.Mutex
76
+
77
+	stateDir   string
78
+	backend    Backend
79
+	logger     *logrus.Entry
80
+	eventQ     queue
81
+	containers map[string]*container
82
+}
83
+
84
+// NewClient creates a new local executor for windows
85
+func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b Backend) (Client, error) {
86
+	c := &client{
87
+		stateDir:   stateDir,
88
+		backend:    b,
89
+		logger:     logrus.WithField("module", "libcontainerd").WithField("module", "libcontainerd").WithField("namespace", ns),
90
+		containers: make(map[string]*container),
91
+	}
92
+
93
+	return c, nil
94
+}
95
+
74 96
 func (c *client) Version(ctx context.Context) (containerd.Version, error) {
75 97
 	return containerd.Version{}, errors.New("not implemented on Windows")
76 98
 }
77 99
deleted file mode 100644
... ...
@@ -1,349 +0,0 @@
1
-// +build !windows
2
-
3
-package libcontainerd // import "github.com/docker/docker/libcontainerd"
4
-
5
-import (
6
-	"context"
7
-	"fmt"
8
-	"io"
9
-	"io/ioutil"
10
-	"os"
11
-	"os/exec"
12
-	"path/filepath"
13
-	"strconv"
14
-	"strings"
15
-	"sync"
16
-	"syscall"
17
-	"time"
18
-
19
-	"github.com/BurntSushi/toml"
20
-	"github.com/containerd/containerd"
21
-	"github.com/containerd/containerd/services/server"
22
-	"github.com/docker/docker/pkg/system"
23
-	"github.com/pkg/errors"
24
-	"github.com/sirupsen/logrus"
25
-)
26
-
27
-const (
28
-	maxConnectionRetryCount = 3
29
-	healthCheckTimeout      = 3 * time.Second
30
-	shutdownTimeout         = 15 * time.Second
31
-	configFile              = "containerd.toml"
32
-	binaryName              = "docker-containerd"
33
-	pidFile                 = "docker-containerd.pid"
34
-)
35
-
36
-type pluginConfigs struct {
37
-	Plugins map[string]interface{} `toml:"plugins"`
38
-}
39
-
40
-type remote struct {
41
-	sync.RWMutex
42
-	server.Config
43
-
44
-	daemonPid int
45
-	logger    *logrus.Entry
46
-
47
-	daemonWaitCh    chan struct{}
48
-	clients         []*client
49
-	shutdownContext context.Context
50
-	shutdownCancel  context.CancelFunc
51
-	shutdown        bool
52
-
53
-	// Options
54
-	startDaemon bool
55
-	rootDir     string
56
-	stateDir    string
57
-	snapshotter string
58
-	pluginConfs pluginConfigs
59
-}
60
-
61
-// New creates a fresh instance of libcontainerd remote.
62
-func New(rootDir, stateDir string, options ...RemoteOption) (rem Remote, err error) {
63
-	defer func() {
64
-		if err != nil {
65
-			err = errors.Wrap(err, "Failed to connect to containerd")
66
-		}
67
-	}()
68
-
69
-	r := &remote{
70
-		rootDir:  rootDir,
71
-		stateDir: stateDir,
72
-		Config: server.Config{
73
-			Root:  filepath.Join(rootDir, "daemon"),
74
-			State: filepath.Join(stateDir, "daemon"),
75
-		},
76
-		pluginConfs: pluginConfigs{make(map[string]interface{})},
77
-		daemonPid:   -1,
78
-		logger:      logrus.WithField("module", "libcontainerd"),
79
-	}
80
-	r.shutdownContext, r.shutdownCancel = context.WithCancel(context.Background())
81
-
82
-	rem = r
83
-	for _, option := range options {
84
-		if err = option.Apply(r); err != nil {
85
-			return
86
-		}
87
-	}
88
-	r.setDefaults()
89
-
90
-	if err = system.MkdirAll(stateDir, 0700, ""); err != nil {
91
-		return
92
-	}
93
-
94
-	if r.startDaemon {
95
-		os.Remove(r.GRPC.Address)
96
-		if err = r.startContainerd(); err != nil {
97
-			return
98
-		}
99
-		defer func() {
100
-			if err != nil {
101
-				r.Cleanup()
102
-			}
103
-		}()
104
-	}
105
-
106
-	// This connection is just used to monitor the connection
107
-	client, err := containerd.New(r.GRPC.Address)
108
-	if err != nil {
109
-		return
110
-	}
111
-	if _, err := client.Version(context.Background()); err != nil {
112
-		system.KillProcess(r.daemonPid)
113
-		return nil, errors.Wrapf(err, "unable to get containerd version")
114
-	}
115
-
116
-	go r.monitorConnection(client)
117
-
118
-	return r, nil
119
-}
120
-
121
-func (r *remote) NewClient(ns string, b Backend) (Client, error) {
122
-	c := &client{
123
-		stateDir:   r.stateDir,
124
-		logger:     r.logger.WithField("namespace", ns),
125
-		namespace:  ns,
126
-		backend:    b,
127
-		containers: make(map[string]*container),
128
-	}
129
-
130
-	rclient, err := containerd.New(r.GRPC.Address, containerd.WithDefaultNamespace(ns))
131
-	if err != nil {
132
-		return nil, err
133
-	}
134
-	c.remote = rclient
135
-
136
-	go c.processEventStream(r.shutdownContext)
137
-
138
-	r.Lock()
139
-	r.clients = append(r.clients, c)
140
-	r.Unlock()
141
-	return c, nil
142
-}
143
-
144
-func (r *remote) Cleanup() {
145
-	if r.daemonPid != -1 {
146
-		r.shutdownCancel()
147
-		r.stopDaemon()
148
-	}
149
-
150
-	// cleanup some files
151
-	os.Remove(filepath.Join(r.stateDir, pidFile))
152
-
153
-	r.platformCleanup()
154
-}
155
-
156
-func (r *remote) getContainerdPid() (int, error) {
157
-	pidFile := filepath.Join(r.stateDir, pidFile)
158
-	f, err := os.OpenFile(pidFile, os.O_RDWR, 0600)
159
-	if err != nil {
160
-		if os.IsNotExist(err) {
161
-			return -1, nil
162
-		}
163
-		return -1, err
164
-	}
165
-	defer f.Close()
166
-
167
-	b := make([]byte, 8)
168
-	n, err := f.Read(b)
169
-	if err != nil && err != io.EOF {
170
-		return -1, err
171
-	}
172
-
173
-	if n > 0 {
174
-		pid, err := strconv.ParseUint(string(b[:n]), 10, 64)
175
-		if err != nil {
176
-			return -1, err
177
-		}
178
-		if system.IsProcessAlive(int(pid)) {
179
-			return int(pid), nil
180
-		}
181
-	}
182
-
183
-	return -1, nil
184
-}
185
-
186
-func (r *remote) getContainerdConfig() (string, error) {
187
-	path := filepath.Join(r.stateDir, configFile)
188
-	f, err := os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
189
-	if err != nil {
190
-		return "", errors.Wrapf(err, "failed to open containerd config file at %s", path)
191
-	}
192
-	defer f.Close()
193
-
194
-	enc := toml.NewEncoder(f)
195
-	if err = enc.Encode(r.Config); err != nil {
196
-		return "", errors.Wrapf(err, "failed to encode general config")
197
-	}
198
-	if err = enc.Encode(r.pluginConfs); err != nil {
199
-		return "", errors.Wrapf(err, "failed to encode plugin configs")
200
-	}
201
-
202
-	return path, nil
203
-}
204
-
205
-func (r *remote) startContainerd() error {
206
-	pid, err := r.getContainerdPid()
207
-	if err != nil {
208
-		return err
209
-	}
210
-
211
-	if pid != -1 {
212
-		r.daemonPid = pid
213
-		logrus.WithField("pid", pid).
214
-			Infof("libcontainerd: %s is still running", binaryName)
215
-		return nil
216
-	}
217
-
218
-	configFile, err := r.getContainerdConfig()
219
-	if err != nil {
220
-		return err
221
-	}
222
-
223
-	args := []string{"--config", configFile}
224
-
225
-	if r.Debug.Level != "" {
226
-		args = append(args, "--log-level", r.Debug.Level)
227
-	}
228
-
229
-	cmd := exec.Command(binaryName, args...)
230
-	// redirect containerd logs to docker logs
231
-	cmd.Stdout = os.Stdout
232
-	cmd.Stderr = os.Stderr
233
-	cmd.SysProcAttr = containerdSysProcAttr()
234
-	// clear the NOTIFY_SOCKET from the env when starting containerd
235
-	cmd.Env = nil
236
-	for _, e := range os.Environ() {
237
-		if !strings.HasPrefix(e, "NOTIFY_SOCKET") {
238
-			cmd.Env = append(cmd.Env, e)
239
-		}
240
-	}
241
-	if err := cmd.Start(); err != nil {
242
-		return err
243
-	}
244
-
245
-	r.daemonWaitCh = make(chan struct{})
246
-	go func() {
247
-		// Reap our child when needed
248
-		if err := cmd.Wait(); err != nil {
249
-			r.logger.WithError(err).Errorf("containerd did not exit successfully")
250
-		}
251
-		close(r.daemonWaitCh)
252
-	}()
253
-
254
-	r.daemonPid = cmd.Process.Pid
255
-
256
-	err = ioutil.WriteFile(filepath.Join(r.stateDir, pidFile), []byte(fmt.Sprintf("%d", r.daemonPid)), 0660)
257
-	if err != nil {
258
-		system.KillProcess(r.daemonPid)
259
-		return errors.Wrap(err, "libcontainerd: failed to save daemon pid to disk")
260
-	}
261
-
262
-	logrus.WithField("pid", r.daemonPid).
263
-		Infof("libcontainerd: started new %s process", binaryName)
264
-
265
-	return nil
266
-}
267
-
268
-func (r *remote) monitorConnection(monitor *containerd.Client) {
269
-	var transientFailureCount = 0
270
-
271
-	for {
272
-		select {
273
-		case <-r.shutdownContext.Done():
274
-			r.logger.Info("stopping healthcheck following graceful shutdown")
275
-			monitor.Close()
276
-			return
277
-		case <-time.After(500 * time.Millisecond):
278
-		}
279
-
280
-		ctx, cancel := context.WithTimeout(r.shutdownContext, healthCheckTimeout)
281
-		_, err := monitor.IsServing(ctx)
282
-		cancel()
283
-		if err == nil {
284
-			transientFailureCount = 0
285
-			continue
286
-		}
287
-
288
-		select {
289
-		case <-r.shutdownContext.Done():
290
-			r.logger.Info("stopping healthcheck following graceful shutdown")
291
-			monitor.Close()
292
-			return
293
-		default:
294
-		}
295
-
296
-		r.logger.WithError(err).WithField("binary", binaryName).Debug("daemon is not responding")
297
-
298
-		if r.daemonPid == -1 {
299
-			continue
300
-		}
301
-
302
-		transientFailureCount++
303
-		if transientFailureCount < maxConnectionRetryCount || system.IsProcessAlive(r.daemonPid) {
304
-			continue
305
-		}
306
-
307
-		transientFailureCount = 0
308
-		if system.IsProcessAlive(r.daemonPid) {
309
-			r.logger.WithField("pid", r.daemonPid).Info("killing and restarting containerd")
310
-			// Try to get a stack trace
311
-			syscall.Kill(r.daemonPid, syscall.SIGUSR1)
312
-			<-time.After(100 * time.Millisecond)
313
-			system.KillProcess(r.daemonPid)
314
-		}
315
-		if r.daemonWaitCh != nil {
316
-			<-r.daemonWaitCh
317
-		}
318
-
319
-		os.Remove(r.GRPC.Address)
320
-		if err := r.startContainerd(); err != nil {
321
-			r.logger.WithError(err).Error("failed restarting containerd")
322
-			continue
323
-		}
324
-
325
-		if err := monitor.Reconnect(); err != nil {
326
-			r.logger.WithError(err).Error("failed connect to containerd")
327
-			continue
328
-		}
329
-
330
-		var wg sync.WaitGroup
331
-
332
-		for _, c := range r.clients {
333
-			wg.Add(1)
334
-
335
-			go func(c *client) {
336
-				defer wg.Done()
337
-				c.logger.WithField("namespace", c.namespace).Debug("creating new containerd remote client")
338
-				if err := c.reconnect(); err != nil {
339
-					r.logger.WithError(err).Error("failed to connect to containerd")
340
-					// TODO: Better way to handle this?
341
-					// This *shouldn't* happen, but this could wind up where the daemon
342
-					// is not able to communicate with an eventually up containerd
343
-				}
344
-			}(c)
345
-
346
-			wg.Wait()
347
-		}
348
-	}
349
-}
350 1
deleted file mode 100644
... ...
@@ -1,66 +0,0 @@
1
-package libcontainerd // import "github.com/docker/docker/libcontainerd"
2
-
3
-import (
4
-	"os"
5
-	"path/filepath"
6
-	"syscall"
7
-	"time"
8
-
9
-	"github.com/containerd/containerd/defaults"
10
-	"github.com/docker/docker/pkg/system"
11
-)
12
-
13
-const (
14
-	sockFile      = "docker-containerd.sock"
15
-	debugSockFile = "docker-containerd-debug.sock"
16
-)
17
-
18
-func (r *remote) setDefaults() {
19
-	if r.GRPC.Address == "" {
20
-		r.GRPC.Address = filepath.Join(r.stateDir, sockFile)
21
-	}
22
-	if r.GRPC.MaxRecvMsgSize == 0 {
23
-		r.GRPC.MaxRecvMsgSize = defaults.DefaultMaxRecvMsgSize
24
-	}
25
-	if r.GRPC.MaxSendMsgSize == 0 {
26
-		r.GRPC.MaxSendMsgSize = defaults.DefaultMaxSendMsgSize
27
-	}
28
-	if r.Debug.Address == "" {
29
-		r.Debug.Address = filepath.Join(r.stateDir, debugSockFile)
30
-	}
31
-	if r.OOMScore == 0 {
32
-		r.OOMScore = -999
33
-	}
34
-
35
-	for key, conf := range r.pluginConfs.Plugins {
36
-		if conf == nil {
37
-			r.DisabledPlugins = append(r.DisabledPlugins, key)
38
-			delete(r.pluginConfs.Plugins, key)
39
-		}
40
-	}
41
-
42
-	if r.snapshotter == "" {
43
-		r.snapshotter = "overlay"
44
-	}
45
-}
46
-
47
-func (r *remote) stopDaemon() {
48
-	// Ask the daemon to quit
49
-	syscall.Kill(r.daemonPid, syscall.SIGTERM)
50
-	// Wait up to 15secs for it to stop
51
-	for i := time.Duration(0); i < shutdownTimeout; i += time.Second {
52
-		if !system.IsProcessAlive(r.daemonPid) {
53
-			break
54
-		}
55
-		time.Sleep(time.Second)
56
-	}
57
-
58
-	if system.IsProcessAlive(r.daemonPid) {
59
-		r.logger.WithField("pid", r.daemonPid).Warn("daemon didn't stop within 15 secs, killing it")
60
-		syscall.Kill(r.daemonPid, syscall.SIGKILL)
61
-	}
62
-}
63
-
64
-func (r *remote) platformCleanup() {
65
-	os.Remove(filepath.Join(r.stateDir, sockFile))
66
-}
67 1
deleted file mode 100644
... ...
@@ -1,142 +0,0 @@
1
-// +build !windows
2
-
3
-package libcontainerd // import "github.com/docker/docker/libcontainerd"
4
-
5
-import "fmt"
6
-
7
-// WithRemoteAddr sets the external containerd socket to connect to.
8
-func WithRemoteAddr(addr string) RemoteOption {
9
-	return rpcAddr(addr)
10
-}
11
-
12
-type rpcAddr string
13
-
14
-func (a rpcAddr) Apply(r Remote) error {
15
-	if remote, ok := r.(*remote); ok {
16
-		remote.GRPC.Address = string(a)
17
-		return nil
18
-	}
19
-	return fmt.Errorf("WithRemoteAddr option not supported for this remote")
20
-}
21
-
22
-// WithRemoteAddrUser sets the uid and gid to create the RPC address with
23
-func WithRemoteAddrUser(uid, gid int) RemoteOption {
24
-	return rpcUser{uid, gid}
25
-}
26
-
27
-type rpcUser struct {
28
-	uid int
29
-	gid int
30
-}
31
-
32
-func (u rpcUser) Apply(r Remote) error {
33
-	if remote, ok := r.(*remote); ok {
34
-		remote.GRPC.UID = u.uid
35
-		remote.GRPC.GID = u.gid
36
-		return nil
37
-	}
38
-	return fmt.Errorf("WithRemoteAddr option not supported for this remote")
39
-}
40
-
41
-// WithStartDaemon defines if libcontainerd should also run containerd daemon.
42
-func WithStartDaemon(start bool) RemoteOption {
43
-	return startDaemon(start)
44
-}
45
-
46
-type startDaemon bool
47
-
48
-func (s startDaemon) Apply(r Remote) error {
49
-	if remote, ok := r.(*remote); ok {
50
-		remote.startDaemon = bool(s)
51
-		return nil
52
-	}
53
-	return fmt.Errorf("WithStartDaemon option not supported for this remote")
54
-}
55
-
56
-// WithLogLevel defines which log level to starts containerd with.
57
-// This only makes sense if WithStartDaemon() was set to true.
58
-func WithLogLevel(lvl string) RemoteOption {
59
-	return logLevel(lvl)
60
-}
61
-
62
-type logLevel string
63
-
64
-func (l logLevel) Apply(r Remote) error {
65
-	if remote, ok := r.(*remote); ok {
66
-		remote.Debug.Level = string(l)
67
-		return nil
68
-	}
69
-	return fmt.Errorf("WithDebugLog option not supported for this remote")
70
-}
71
-
72
-// WithDebugAddress defines at which location the debug GRPC connection
73
-// should be made
74
-func WithDebugAddress(addr string) RemoteOption {
75
-	return debugAddress(addr)
76
-}
77
-
78
-type debugAddress string
79
-
80
-func (d debugAddress) Apply(r Remote) error {
81
-	if remote, ok := r.(*remote); ok {
82
-		remote.Debug.Address = string(d)
83
-		return nil
84
-	}
85
-	return fmt.Errorf("WithDebugAddress option not supported for this remote")
86
-}
87
-
88
-// WithMetricsAddress defines at which location the debug GRPC connection
89
-// should be made
90
-func WithMetricsAddress(addr string) RemoteOption {
91
-	return metricsAddress(addr)
92
-}
93
-
94
-type metricsAddress string
95
-
96
-func (m metricsAddress) Apply(r Remote) error {
97
-	if remote, ok := r.(*remote); ok {
98
-		remote.Metrics.Address = string(m)
99
-		return nil
100
-	}
101
-	return fmt.Errorf("WithMetricsAddress option not supported for this remote")
102
-}
103
-
104
-// WithSnapshotter defines snapshotter driver should be used
105
-func WithSnapshotter(name string) RemoteOption {
106
-	return snapshotter(name)
107
-}
108
-
109
-type snapshotter string
110
-
111
-func (s snapshotter) Apply(r Remote) error {
112
-	if remote, ok := r.(*remote); ok {
113
-		remote.snapshotter = string(s)
114
-		return nil
115
-	}
116
-	return fmt.Errorf("WithSnapshotter option not supported for this remote")
117
-}
118
-
119
-// WithPlugin allow configuring a containerd plugin
120
-// configuration values passed needs to be quoted if quotes are needed in
121
-// the toml format.
122
-// Setting the config to nil will disable a built-in plugin
123
-func WithPlugin(name string, conf interface{}) RemoteOption {
124
-	return pluginConf{
125
-		name: name,
126
-		conf: conf,
127
-	}
128
-}
129
-
130
-type pluginConf struct {
131
-	// Name is the name of the plugin
132
-	name string
133
-	conf interface{}
134
-}
135
-
136
-func (p pluginConf) Apply(r Remote) error {
137
-	if remote, ok := r.(*remote); ok {
138
-		remote.pluginConfs.Plugins[p.name] = p.conf
139
-		return nil
140
-	}
141
-	return fmt.Errorf("WithPlugin option not supported for this remote")
142
-}
143 1
deleted file mode 100644
... ...
@@ -1,18 +0,0 @@
1
-package libcontainerd // import "github.com/docker/docker/libcontainerd"
2
-
3
-import "fmt"
4
-
5
-// WithOOMScore defines the oom_score_adj to set for the containerd process.
6
-func WithOOMScore(score int) RemoteOption {
7
-	return oomScore(score)
8
-}
9
-
10
-type oomScore int
11
-
12
-func (o oomScore) Apply(r Remote) error {
13
-	if remote, ok := r.(*remote); ok {
14
-		remote.OOMScore = int(o)
15
-		return nil
16
-	}
17
-	return fmt.Errorf("WithOOMScore option not supported for this remote")
18
-}
19 1
deleted file mode 100644
... ...
@@ -1,47 +0,0 @@
1
-// +build remote_daemon
2
-
3
-package libcontainerd // import "github.com/docker/docker/libcontainerd"
4
-
5
-import (
6
-	"os"
7
-)
8
-
9
-const (
10
-	grpcPipeName  = `\\.\pipe\docker-containerd-containerd`
11
-	debugPipeName = `\\.\pipe\docker-containerd-debug`
12
-)
13
-
14
-func (r *remote) setDefaults() {
15
-	if r.GRPC.Address == "" {
16
-		r.GRPC.Address = grpcPipeName
17
-	}
18
-	if r.Debug.Address == "" {
19
-		r.Debug.Address = debugPipeName
20
-	}
21
-	if r.snapshotter == "" {
22
-		r.snapshotter = "naive" // TODO(mlaventure): switch to "windows" once implemented
23
-	}
24
-}
25
-
26
-func (r *remote) stopDaemon() {
27
-	p, err := os.FindProcess(r.daemonPid)
28
-	if err != nil {
29
-		r.logger.WithField("pid", r.daemonPid).Warn("could not find daemon process")
30
-		return
31
-	}
32
-
33
-	if err = p.Kill(); err != nil {
34
-		r.logger.WithError(err).WithField("pid", r.daemonPid).Warn("could not kill daemon process")
35
-		return
36
-	}
37
-
38
-	_, err = p.Wait()
39
-	if err != nil {
40
-		r.logger.WithError(err).WithField("pid", r.daemonPid).Warn("wait for daemon process")
41
-		return
42
-	}
43
-}
44
-
45
-func (r *remote) platformCleanup() {
46
-	// Nothing to do
47
-}
48 1
deleted file mode 100644
... ...
@@ -1,59 +0,0 @@
1
-// +build windows
2
-
3
-package libcontainerd // import "github.com/docker/docker/libcontainerd"
4
-
5
-import (
6
-	"sync"
7
-
8
-	"github.com/sirupsen/logrus"
9
-)
10
-
11
-type remote struct {
12
-	sync.RWMutex
13
-
14
-	logger  *logrus.Entry
15
-	clients []*client
16
-
17
-	// Options
18
-	rootDir  string
19
-	stateDir string
20
-}
21
-
22
-// New creates a fresh instance of libcontainerd remote.
23
-func New(rootDir, stateDir string, options ...RemoteOption) (Remote, error) {
24
-	return &remote{
25
-		logger:   logrus.WithField("module", "libcontainerd"),
26
-		rootDir:  rootDir,
27
-		stateDir: stateDir,
28
-	}, nil
29
-}
30
-
31
-type client struct {
32
-	sync.Mutex
33
-
34
-	rootDir    string
35
-	stateDir   string
36
-	backend    Backend
37
-	logger     *logrus.Entry
38
-	eventQ     queue
39
-	containers map[string]*container
40
-}
41
-
42
-func (r *remote) NewClient(ns string, b Backend) (Client, error) {
43
-	c := &client{
44
-		rootDir:    r.rootDir,
45
-		stateDir:   r.stateDir,
46
-		backend:    b,
47
-		logger:     r.logger.WithField("namespace", ns),
48
-		containers: make(map[string]*container),
49
-	}
50
-	r.Lock()
51
-	r.clients = append(r.clients, c)
52
-	r.Unlock()
53
-
54
-	return c, nil
55
-}
56
-
57
-func (r *remote) Cleanup() {
58
-	// Nothing to do
59
-}
60 1
new file mode 100644
... ...
@@ -0,0 +1,311 @@
0
+package supervisor // import "github.com/docker/docker/libcontainerd/supervisor"
1
+
2
+import (
3
+	"context"
4
+	"fmt"
5
+	"io"
6
+	"io/ioutil"
7
+	"os"
8
+	"os/exec"
9
+	"path/filepath"
10
+	"strconv"
11
+	"strings"
12
+	"sync"
13
+	"time"
14
+
15
+	"github.com/BurntSushi/toml"
16
+	"github.com/containerd/containerd"
17
+	"github.com/containerd/containerd/services/server"
18
+	"github.com/docker/docker/pkg/system"
19
+	"github.com/pkg/errors"
20
+	"github.com/sirupsen/logrus"
21
+)
22
+
23
+const (
24
+	maxConnectionRetryCount = 3
25
+	healthCheckTimeout      = 3 * time.Second
26
+	shutdownTimeout         = 15 * time.Second
27
+	startupTimeout          = 15 * time.Second
28
+	configFile              = "containerd.toml"
29
+	binaryName              = "docker-containerd"
30
+	pidFile                 = "docker-containerd.pid"
31
+)
32
+
33
+type pluginConfigs struct {
34
+	Plugins map[string]interface{} `toml:"plugins"`
35
+}
36
+
37
+type remote struct {
38
+	sync.RWMutex
39
+	server.Config
40
+
41
+	daemonPid int
42
+	logger    *logrus.Entry
43
+
44
+	daemonWaitCh  chan struct{}
45
+	daemonStartCh chan struct{}
46
+	daemonStopCh  chan struct{}
47
+
48
+	rootDir     string
49
+	stateDir    string
50
+	pluginConfs pluginConfigs
51
+}
52
+
53
+// Daemon represents a running containerd daemon
54
+type Daemon interface {
55
+	WaitTimeout(time.Duration) error
56
+	Address() string
57
+}
58
+
59
+// DaemonOpt allows to configure parameters of container daemons
60
+type DaemonOpt func(c *remote) error
61
+
62
+// Start starts a containerd daemon and monitors it
63
+func Start(ctx context.Context, rootDir, stateDir string, opts ...DaemonOpt) (Daemon, error) {
64
+	r := &remote{
65
+		rootDir:  rootDir,
66
+		stateDir: stateDir,
67
+		Config: server.Config{
68
+			Root:  filepath.Join(rootDir, "daemon"),
69
+			State: filepath.Join(stateDir, "daemon"),
70
+		},
71
+		pluginConfs:   pluginConfigs{make(map[string]interface{})},
72
+		daemonPid:     -1,
73
+		logger:        logrus.WithField("module", "libcontainerd"),
74
+		daemonStartCh: make(chan struct{}),
75
+		daemonStopCh:  make(chan struct{}),
76
+	}
77
+
78
+	for _, opt := range opts {
79
+		if err := opt(r); err != nil {
80
+			return nil, err
81
+		}
82
+	}
83
+	r.setDefaults()
84
+
85
+	if err := system.MkdirAll(stateDir, 0700, ""); err != nil {
86
+		return nil, err
87
+	}
88
+
89
+	go r.monitorDaemon(ctx)
90
+
91
+	select {
92
+	case <-time.After(startupTimeout):
93
+		return nil, errors.New("timeout waiting for containerd to start")
94
+	case <-r.daemonStartCh:
95
+	}
96
+
97
+	return r, nil
98
+}
99
+func (r *remote) WaitTimeout(d time.Duration) error {
100
+	select {
101
+	case <-time.After(d):
102
+		return errors.New("timeout waiting for containerd to stop")
103
+	case <-r.daemonStopCh:
104
+	}
105
+
106
+	return nil
107
+}
108
+
109
+func (r *remote) Address() string {
110
+	return r.GRPC.Address
111
+}
112
+func (r *remote) getContainerdPid() (int, error) {
113
+	pidFile := filepath.Join(r.stateDir, pidFile)
114
+	f, err := os.OpenFile(pidFile, os.O_RDWR, 0600)
115
+	if err != nil {
116
+		if os.IsNotExist(err) {
117
+			return -1, nil
118
+		}
119
+		return -1, err
120
+	}
121
+	defer f.Close()
122
+
123
+	b := make([]byte, 8)
124
+	n, err := f.Read(b)
125
+	if err != nil && err != io.EOF {
126
+		return -1, err
127
+	}
128
+
129
+	if n > 0 {
130
+		pid, err := strconv.ParseUint(string(b[:n]), 10, 64)
131
+		if err != nil {
132
+			return -1, err
133
+		}
134
+		if system.IsProcessAlive(int(pid)) {
135
+			return int(pid), nil
136
+		}
137
+	}
138
+
139
+	return -1, nil
140
+}
141
+
142
+func (r *remote) getContainerdConfig() (string, error) {
143
+	path := filepath.Join(r.stateDir, configFile)
144
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
145
+	if err != nil {
146
+		return "", errors.Wrapf(err, "failed to open containerd config file at %s", path)
147
+	}
148
+	defer f.Close()
149
+
150
+	enc := toml.NewEncoder(f)
151
+	if err = enc.Encode(r.Config); err != nil {
152
+		return "", errors.Wrapf(err, "failed to encode general config")
153
+	}
154
+	if err = enc.Encode(r.pluginConfs); err != nil {
155
+		return "", errors.Wrapf(err, "failed to encode plugin configs")
156
+	}
157
+
158
+	return path, nil
159
+}
160
+
161
+func (r *remote) startContainerd() error {
162
+	pid, err := r.getContainerdPid()
163
+	if err != nil {
164
+		return err
165
+	}
166
+
167
+	if pid != -1 {
168
+		r.daemonPid = pid
169
+		logrus.WithField("pid", pid).
170
+			Infof("libcontainerd: %s is still running", binaryName)
171
+		return nil
172
+	}
173
+
174
+	configFile, err := r.getContainerdConfig()
175
+	if err != nil {
176
+		return err
177
+	}
178
+
179
+	args := []string{"--config", configFile}
180
+
181
+	if r.Debug.Level != "" {
182
+		args = append(args, "--log-level", r.Debug.Level)
183
+	}
184
+
185
+	cmd := exec.Command(binaryName, args...)
186
+	// redirect containerd logs to docker logs
187
+	cmd.Stdout = os.Stdout
188
+	cmd.Stderr = os.Stderr
189
+	cmd.SysProcAttr = containerdSysProcAttr()
190
+	// clear the NOTIFY_SOCKET from the env when starting containerd
191
+	cmd.Env = nil
192
+	for _, e := range os.Environ() {
193
+		if !strings.HasPrefix(e, "NOTIFY_SOCKET") {
194
+			cmd.Env = append(cmd.Env, e)
195
+		}
196
+	}
197
+	if err := cmd.Start(); err != nil {
198
+		return err
199
+	}
200
+
201
+	r.daemonWaitCh = make(chan struct{})
202
+	go func() {
203
+		// Reap our child when needed
204
+		if err := cmd.Wait(); err != nil {
205
+			r.logger.WithError(err).Errorf("containerd did not exit successfully")
206
+		}
207
+		close(r.daemonWaitCh)
208
+	}()
209
+
210
+	r.daemonPid = cmd.Process.Pid
211
+
212
+	err = ioutil.WriteFile(filepath.Join(r.stateDir, pidFile), []byte(fmt.Sprintf("%d", r.daemonPid)), 0660)
213
+	if err != nil {
214
+		system.KillProcess(r.daemonPid)
215
+		return errors.Wrap(err, "libcontainerd: failed to save daemon pid to disk")
216
+	}
217
+
218
+	logrus.WithField("pid", r.daemonPid).
219
+		Infof("libcontainerd: started new %s process", binaryName)
220
+
221
+	return nil
222
+}
223
+
224
+func (r *remote) monitorDaemon(ctx context.Context) {
225
+	var (
226
+		transientFailureCount = 0
227
+		client                *containerd.Client
228
+		err                   error
229
+		delay                 <-chan time.Time
230
+		started               bool
231
+	)
232
+
233
+	defer func() {
234
+		if r.daemonPid != -1 {
235
+			r.stopDaemon()
236
+		}
237
+
238
+		// cleanup some files
239
+		os.Remove(filepath.Join(r.stateDir, pidFile))
240
+
241
+		r.platformCleanup()
242
+
243
+		close(r.daemonStopCh)
244
+	}()
245
+
246
+	for {
247
+		select {
248
+		case <-ctx.Done():
249
+			r.logger.Info("stopping healthcheck following graceful shutdown")
250
+			if client != nil {
251
+				client.Close()
252
+			}
253
+			return
254
+		case <-delay:
255
+		default:
256
+		}
257
+
258
+		if r.daemonPid == -1 {
259
+			if r.daemonWaitCh != nil {
260
+				<-r.daemonWaitCh
261
+			}
262
+
263
+			os.RemoveAll(r.GRPC.Address)
264
+			if err := r.startContainerd(); err != nil {
265
+				r.logger.WithError(err).Error("failed starting containerd")
266
+				delay = time.After(50 * time.Millisecond)
267
+				continue
268
+			}
269
+
270
+			client, err = containerd.New(r.GRPC.Address)
271
+			if err != nil {
272
+				r.logger.WithError(err).Error("failed connecting to containerd")
273
+				delay = time.After(100 * time.Millisecond)
274
+				continue
275
+			}
276
+		}
277
+
278
+		tctx, cancel := context.WithTimeout(ctx, healthCheckTimeout)
279
+		_, err := client.IsServing(tctx)
280
+		cancel()
281
+		if err == nil {
282
+			if !started {
283
+				close(r.daemonStartCh)
284
+				started = true
285
+			}
286
+
287
+			transientFailureCount = 0
288
+			delay = time.After(500 * time.Millisecond)
289
+			continue
290
+		}
291
+
292
+		r.logger.WithError(err).WithField("binary", binaryName).Debug("daemon is not responding")
293
+
294
+		transientFailureCount++
295
+		if transientFailureCount < maxConnectionRetryCount || system.IsProcessAlive(r.daemonPid) {
296
+			delay = time.After(time.Duration(transientFailureCount) * 200 * time.Millisecond)
297
+			continue
298
+		}
299
+
300
+		if system.IsProcessAlive(r.daemonPid) {
301
+			r.logger.WithField("pid", r.daemonPid).Info("killing and restarting containerd")
302
+			r.killDaemon()
303
+		}
304
+
305
+		client.Close()
306
+		r.daemonPid = -1
307
+		delay = nil
308
+		transientFailureCount = 0
309
+	}
310
+}
0 311
new file mode 100644
... ...
@@ -0,0 +1,69 @@
0
+package supervisor // import "github.com/docker/docker/libcontainerd/supervisor"
1
+
2
+import (
3
+	"os"
4
+	"path/filepath"
5
+	"syscall"
6
+	"time"
7
+
8
+	"github.com/containerd/containerd/defaults"
9
+	"github.com/docker/docker/pkg/system"
10
+)
11
+
12
+const (
13
+	sockFile      = "docker-containerd.sock"
14
+	debugSockFile = "docker-containerd-debug.sock"
15
+)
16
+
17
+func (r *remote) setDefaults() {
18
+	if r.GRPC.Address == "" {
19
+		r.GRPC.Address = filepath.Join(r.stateDir, sockFile)
20
+	}
21
+	if r.GRPC.MaxRecvMsgSize == 0 {
22
+		r.GRPC.MaxRecvMsgSize = defaults.DefaultMaxRecvMsgSize
23
+	}
24
+	if r.GRPC.MaxSendMsgSize == 0 {
25
+		r.GRPC.MaxSendMsgSize = defaults.DefaultMaxSendMsgSize
26
+	}
27
+	if r.Debug.Address == "" {
28
+		r.Debug.Address = filepath.Join(r.stateDir, debugSockFile)
29
+	}
30
+	if r.OOMScore == 0 {
31
+		r.OOMScore = -999
32
+	}
33
+
34
+	for key, conf := range r.pluginConfs.Plugins {
35
+		if conf == nil {
36
+			r.DisabledPlugins = append(r.DisabledPlugins, key)
37
+			delete(r.pluginConfs.Plugins, key)
38
+		}
39
+	}
40
+}
41
+
42
+func (r *remote) stopDaemon() {
43
+	// Ask the daemon to quit
44
+	syscall.Kill(r.daemonPid, syscall.SIGTERM)
45
+	// Wait up to 15secs for it to stop
46
+	for i := time.Duration(0); i < shutdownTimeout; i += time.Second {
47
+		if !system.IsProcessAlive(r.daemonPid) {
48
+			break
49
+		}
50
+		time.Sleep(time.Second)
51
+	}
52
+
53
+	if system.IsProcessAlive(r.daemonPid) {
54
+		r.logger.WithField("pid", r.daemonPid).Warn("daemon didn't stop within 15 secs, killing it")
55
+		syscall.Kill(r.daemonPid, syscall.SIGKILL)
56
+	}
57
+}
58
+
59
+func (r *remote) killDaemon() {
60
+	// Try to get a stack trace
61
+	syscall.Kill(r.daemonPid, syscall.SIGUSR1)
62
+	<-time.After(100 * time.Millisecond)
63
+	system.KillProcess(r.daemonPid)
64
+}
65
+
66
+func (r *remote) platformCleanup() {
67
+	os.Remove(filepath.Join(r.stateDir, sockFile))
68
+}
0 69
new file mode 100644
... ...
@@ -0,0 +1,55 @@
0
+package supervisor // import "github.com/docker/docker/libcontainerd/supervisor"
1
+
2
+// WithRemoteAddr sets the external containerd socket to connect to.
3
+func WithRemoteAddr(addr string) DaemonOpt {
4
+	return func(r *remote) error {
5
+		r.GRPC.Address = addr
6
+		return nil
7
+	}
8
+}
9
+
10
+// WithRemoteAddrUser sets the uid and gid to create the RPC address with
11
+func WithRemoteAddrUser(uid, gid int) DaemonOpt {
12
+	return func(r *remote) error {
13
+		r.GRPC.UID = uid
14
+		r.GRPC.GID = gid
15
+		return nil
16
+	}
17
+}
18
+
19
+// WithLogLevel defines which log level to starts containerd with.
20
+// This only makes sense if WithStartDaemon() was set to true.
21
+func WithLogLevel(lvl string) DaemonOpt {
22
+	return func(r *remote) error {
23
+		r.Debug.Level = lvl
24
+		return nil
25
+	}
26
+}
27
+
28
+// WithDebugAddress defines at which location the debug GRPC connection
29
+// should be made
30
+func WithDebugAddress(addr string) DaemonOpt {
31
+	return func(r *remote) error {
32
+		r.Debug.Address = addr
33
+		return nil
34
+	}
35
+}
36
+
37
+// WithMetricsAddress defines at which location the debug GRPC connection
38
+// should be made
39
+func WithMetricsAddress(addr string) DaemonOpt {
40
+	return func(r *remote) error {
41
+		r.Metrics.Address = addr
42
+		return nil
43
+	}
44
+}
45
+
46
+// WithPlugin allow configuring a containerd plugin
47
+// configuration values passed needs to be quoted if quotes are needed in
48
+// the toml format.
49
+func WithPlugin(name string, conf interface{}) DaemonOpt {
50
+	return func(r *remote) error {
51
+		r.pluginConfs.Plugins[name] = conf
52
+		return nil
53
+	}
54
+}
0 55
new file mode 100644
... ...
@@ -0,0 +1,9 @@
0
+package supervisor // import "github.com/docker/docker/libcontainerd/supervisor"
1
+
2
+// WithOOMScore defines the oom_score_adj to set for the containerd process.
3
+func WithOOMScore(score int) DaemonOpt {
4
+	return func(r *remote) error {
5
+		r.OOMScore = score
6
+		return nil
7
+	}
8
+}
0 9
new file mode 100644
... ...
@@ -0,0 +1,48 @@
0
+package supervisor // import "github.com/docker/docker/libcontainerd/supervisor"
1
+
2
+import (
3
+	"os"
4
+
5
+	"github.com/docker/docker/pkg/system"
6
+)
7
+
8
+const (
9
+	grpcPipeName  = `\\.\pipe\docker-containerd-containerd`
10
+	debugPipeName = `\\.\pipe\docker-containerd-debug`
11
+)
12
+
13
+func (r *remote) setDefaults() {
14
+	if r.GRPC.Address == "" {
15
+		r.GRPC.Address = grpcPipeName
16
+	}
17
+	if r.Debug.Address == "" {
18
+		r.Debug.Address = debugPipeName
19
+	}
20
+}
21
+
22
+func (r *remote) stopDaemon() {
23
+	p, err := os.FindProcess(r.daemonPid)
24
+	if err != nil {
25
+		r.logger.WithField("pid", r.daemonPid).Warn("could not find daemon process")
26
+		return
27
+	}
28
+
29
+	if err = p.Kill(); err != nil {
30
+		r.logger.WithError(err).WithField("pid", r.daemonPid).Warn("could not kill daemon process")
31
+		return
32
+	}
33
+
34
+	_, err = p.Wait()
35
+	if err != nil {
36
+		r.logger.WithError(err).WithField("pid", r.daemonPid).Warn("wait for daemon process")
37
+		return
38
+	}
39
+}
40
+
41
+func (r *remote) killDaemon() {
42
+	system.KillProcess(r.daemonPid)
43
+}
44
+
45
+func (r *remote) platformCleanup() {
46
+	// Nothing to do
47
+}
0 48
new file mode 100644
... ...
@@ -0,0 +1,12 @@
0
+package supervisor // import "github.com/docker/docker/libcontainerd/supervisor"
1
+
2
+import "syscall"
3
+
4
+// containerdSysProcAttr returns the SysProcAttr to use when exec'ing
5
+// containerd
6
+func containerdSysProcAttr() *syscall.SysProcAttr {
7
+	return &syscall.SysProcAttr{
8
+		Setsid:    true,
9
+		Pdeathsig: syscall.SIGKILL,
10
+	}
11
+}
0 12
new file mode 100644
... ...
@@ -0,0 +1,9 @@
0
+package supervisor // import "github.com/docker/docker/libcontainerd/supervisor"
1
+
2
+import "syscall"
3
+
4
+// containerdSysProcAttr returns the SysProcAttr to use when exec'ing
5
+// containerd
6
+func containerdSysProcAttr() *syscall.SysProcAttr {
7
+	return nil
8
+}
... ...
@@ -46,23 +46,6 @@ const (
46 46
 	StatusUnknown Status = "unknown"
47 47
 )
48 48
 
49
-// Remote on Linux defines the accesspoint to the containerd grpc API.
50
-// Remote on Windows is largely an unimplemented interface as there is
51
-// no remote containerd.
52
-type Remote interface {
53
-	// Client returns a new Client instance connected with given Backend.
54
-	NewClient(namespace string, backend Backend) (Client, error)
55
-	// Cleanup stops containerd if it was started by libcontainerd.
56
-	// Note this is not used on Windows as there is no remote containerd.
57
-	Cleanup()
58
-}
59
-
60
-// RemoteOption allows to configure parameters of remotes.
61
-// This is unused on Windows.
62
-type RemoteOption interface {
63
-	Apply(Remote) error
64
-}
65
-
66 49
 // EventInfo contains the event info
67 50
 type EventInfo struct {
68 51
 	ContainerID string
69 52
deleted file mode 100644
... ...
@@ -1,12 +0,0 @@
1
-package libcontainerd // import "github.com/docker/docker/libcontainerd"
2
-
3
-import "syscall"
4
-
5
-// containerdSysProcAttr returns the SysProcAttr to use when exec'ing
6
-// containerd
7
-func containerdSysProcAttr() *syscall.SysProcAttr {
8
-	return &syscall.SysProcAttr{
9
-		Setsid:    true,
10
-		Pdeathsig: syscall.SIGKILL,
11
-	}
12
-}
... ...
@@ -3,8 +3,6 @@ package libcontainerd // import "github.com/docker/docker/libcontainerd"
3 3
 import (
4 4
 	"strings"
5 5
 
6
-	"syscall"
7
-
8 6
 	opengcs "github.com/Microsoft/opengcs/client"
9 7
 )
10 8
 
... ...
@@ -38,9 +36,3 @@ func (c *container) debugGCS() {
38 38
 	}
39 39
 	cfg.DebugGCS()
40 40
 }
41
-
42
-// containerdSysProcAttr returns the SysProcAttr to use when exec'ing
43
-// containerd
44
-func containerdSysProcAttr() *syscall.SysProcAttr {
45
-	return nil
46
-}
... ...
@@ -7,6 +7,7 @@ import (
7 7
 	"sync"
8 8
 	"time"
9 9
 
10
+	"github.com/containerd/containerd"
10 11
 	"github.com/containerd/containerd/cio"
11 12
 	"github.com/containerd/containerd/runtime/linux/runctypes"
12 13
 	"github.com/docker/docker/errdefs"
... ...
@@ -16,8 +17,8 @@ import (
16 16
 	"github.com/sirupsen/logrus"
17 17
 )
18 18
 
19
-// pluginNamespace is the name used for the plugins namespace
20
-const pluginNamespace = "plugins.moby"
19
+// PluginNamespace is the name used for the plugins namespace
20
+const PluginNamespace = "plugins.moby"
21 21
 
22 22
 // ExitHandler represents an object that is called when the exit event is received from containerd
23 23
 type ExitHandler interface {
... ...
@@ -38,12 +39,13 @@ type Client interface {
38 38
 }
39 39
 
40 40
 // New creates a new containerd plugin executor
41
-func New(rootDir string, remote libcontainerd.Remote, exitHandler ExitHandler) (*Executor, error) {
41
+func New(ctx context.Context, rootDir string, cli *containerd.Client, exitHandler ExitHandler) (*Executor, error) {
42 42
 	e := &Executor{
43 43
 		rootDir:     rootDir,
44 44
 		exitHandler: exitHandler,
45 45
 	}
46
-	client, err := remote.NewClient(pluginNamespace, e)
46
+
47
+	client, err := libcontainerd.NewClient(ctx, cli, rootDir, PluginNamespace, e)
47 48
 	if err != nil {
48 49
 		return nil, errors.Wrap(err, "error creating containerd exec client")
49 50
 	}