Browse code

Update libcontainerd to use containerd 1.0

Signed-off-by: Kenfe-Mickael Laventure <mickael.laventure@gmail.com>

Kenfe-Mickael Laventure authored on 2017/09/22 22:52:41
Showing 115 changed files
... ...
@@ -126,7 +126,7 @@ func (s *containerRouter) postContainerExecStart(ctx context.Context, w http.Res
126 126
 			return err
127 127
 		}
128 128
 		stdout.Write([]byte(err.Error() + "\r\n"))
129
-		logrus.Errorf("Error running exec in container: %v", err)
129
+		logrus.Errorf("Error running exec %s in container: %v", execName, err)
130 130
 	}
131 131
 	return nil
132 132
 }
... ...
@@ -102,7 +102,7 @@ func (c *containerManager) Run(ctx context.Context, cID string, stdout, stderr i
102 102
 
103 103
 func logCancellationError(cancelErrCh chan error, msg string) {
104 104
 	if cancelErr := <-cancelErrCh; cancelErr != nil {
105
-		logrus.Debugf("Build cancelled (%v): ", cancelErr, msg)
105
+		logrus.Debugf("Build cancelled (%v): %s", cancelErr, msg)
106 106
 	}
107 107
 }
108 108
 
... ...
@@ -27,6 +27,8 @@ func installCommonConfigFlags(conf *config.Config, flags *pflag.FlagSet) {
27 27
 	flags.Var(opts.NewNamedListOptsRef("exec-opts", &conf.ExecOptions, nil), "exec-opt", "Runtime execution options")
28 28
 	flags.StringVarP(&conf.Pidfile, "pidfile", "p", defaultPidFile, "Path to use for daemon PID file")
29 29
 	flags.StringVarP(&conf.Root, "graph", "g", defaultDataRoot, "Root of the Docker runtime")
30
+	flags.StringVar(&conf.ExecRoot, "exec-root", defaultExecRoot, "Root directory for execution state files")
31
+	flags.StringVar(&conf.ContainerdAddr, "containerd", "", "containerd grpc address")
30 32
 
31 33
 	// "--graph" is "soft-deprecated" in favor of "data-root". This flag was added
32 34
 	// before Docker 1.0, so won't be removed, only hidden, to discourage its usage.
... ...
@@ -29,13 +29,11 @@ func installConfigFlags(conf *config.Config, flags *pflag.FlagSet) {
29 29
 	flags.BoolVar(&conf.BridgeConfig.EnableIPForward, "ip-forward", true, "Enable net.ipv4.ip_forward")
30 30
 	flags.BoolVar(&conf.BridgeConfig.EnableIPMasq, "ip-masq", true, "Enable IP masquerading")
31 31
 	flags.BoolVar(&conf.BridgeConfig.EnableIPv6, "ipv6", false, "Enable IPv6 networking")
32
-	flags.StringVar(&conf.ExecRoot, "exec-root", defaultExecRoot, "Root directory for execution state files")
33 32
 	flags.StringVar(&conf.BridgeConfig.FixedCIDRv6, "fixed-cidr-v6", "", "IPv6 subnet for fixed IPs")
34 33
 	flags.BoolVar(&conf.BridgeConfig.EnableUserlandProxy, "userland-proxy", true, "Use userland proxy for loopback traffic")
35 34
 	flags.StringVar(&conf.BridgeConfig.UserlandProxyPath, "userland-proxy-path", "", "Path to the userland proxy binary")
36 35
 	flags.StringVar(&conf.CgroupParent, "cgroup-parent", "", "Set parent cgroup for all containers")
37 36
 	flags.StringVar(&conf.RemappedRoot, "userns-remap", "", "User/Group setting for user namespaces")
38
-	flags.StringVar(&conf.ContainerdAddr, "containerd", "", "Path to containerd socket")
39 37
 	flags.BoolVar(&conf.LiveRestoreEnabled, "live-restore", false, "Enable live restore of docker when containers are still running")
40 38
 	flags.IntVar(&conf.OOMScoreAdjust, "oom-score-adjust", -500, "Set the oom_score_adj for the daemon")
41 39
 	flags.BoolVar(&conf.Init, "init", false, "Run an init in the container to forward signals and reap processes")
... ...
@@ -11,6 +11,7 @@ import (
11 11
 var (
12 12
 	defaultPidFile  string
13 13
 	defaultDataRoot = filepath.Join(os.Getenv("programdata"), "docker")
14
+	defaultExecRoot = filepath.Join(os.Getenv("programdata"), "docker", "exec-root")
14 15
 )
15 16
 
16 17
 // installConfigFlags adds flags to the pflag.FlagSet to configure the daemon
... ...
@@ -204,7 +204,11 @@ func (cli *DaemonCli) start(opts *daemonOptions) (err error) {
204 204
 		return err
205 205
 	}
206 206
 
207
-	containerdRemote, err := libcontainerd.New(cli.getLibcontainerdRoot(), cli.getPlatformRemoteOptions()...)
207
+	rOpts, err := cli.getRemoteOptions()
208
+	if err != nil {
209
+		return fmt.Errorf("Failed to generate containerd options: %s", err)
210
+	}
211
+	containerdRemote, err := libcontainerd.New(filepath.Join(cli.Config.Root, "containerd"), filepath.Join(cli.Config.ExecRoot, "containerd"), rOpts...)
208 212
 	if err != nil {
209 213
 		return err
210 214
 	}
... ...
@@ -560,6 +564,17 @@ func (cli *DaemonCli) initMiddlewares(s *apiserver.Server, cfg *apiserver.Config
560 560
 	return nil
561 561
 }
562 562
 
563
+func (cli *DaemonCli) getRemoteOptions() ([]libcontainerd.RemoteOption, error) {
564
+	opts := []libcontainerd.RemoteOption{}
565
+
566
+	pOpts, err := cli.getPlatformRemoteOptions()
567
+	if err != nil {
568
+		return nil, err
569
+	}
570
+	opts = append(opts, pOpts...)
571
+	return opts, nil
572
+}
573
+
563 574
 // validates that the plugins requested with the --authorization-plugin flag are valid AuthzDriver
564 575
 // plugins present on the host and available to the daemon
565 576
 func validateAuthzPlugins(requestedPlugins []string, pg plugingetter.PluginGetter) error {
... ...
@@ -11,5 +11,5 @@ func preNotifySystem() {
11 11
 // notifySystem sends a message to the host when the server is ready to be used
12 12
 func notifySystem() {
13 13
 	// Tell the init daemon we are accepting requests
14
-	go systemdDaemon.SdNotify("READY=1")
14
+	go systemdDaemon.SdNotify(false, "READY=1")
15 15
 }
... ...
@@ -41,20 +41,8 @@ func preNotifySystem() {
41 41
 func notifySystem() {
42 42
 }
43 43
 
44
-func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption {
45
-	opts := []libcontainerd.RemoteOption{}
46
-	if cli.Config.ContainerdAddr != "" {
47
-		opts = append(opts, libcontainerd.WithRemoteAddr(cli.Config.ContainerdAddr))
48
-	} else {
49
-		opts = append(opts, libcontainerd.WithStartDaemon(true))
50
-	}
51
-	return opts
52
-}
53
-
54
-// getLibcontainerdRoot gets the root directory for libcontainerd/containerd to
55
-// store their state.
56
-func (cli *DaemonCli) getLibcontainerdRoot() string {
57
-	return filepath.Join(cli.Config.ExecRoot, "libcontainerd")
44
+func (cli *DaemonCli) getPlatformRemoteOptions() ([]libcontainerd.RemoteOption, error) {
45
+	return nil, nil
58 46
 }
59 47
 
60 48
 // getSwarmRunRoot gets the root directory for swarm to store runtime state
... ...
@@ -10,9 +10,11 @@ import (
10 10
 	"path/filepath"
11 11
 	"strconv"
12 12
 
13
+	"github.com/containerd/containerd/linux"
13 14
 	"github.com/docker/docker/cmd/dockerd/hack"
14 15
 	"github.com/docker/docker/daemon"
15 16
 	"github.com/docker/docker/libcontainerd"
17
+	"github.com/docker/docker/pkg/parsers/kernel"
16 18
 	"github.com/docker/libnetwork/portallocator"
17 19
 	"golang.org/x/sys/unix"
18 20
 )
... ...
@@ -35,42 +37,48 @@ func getDaemonConfDir(_ string) string {
35 35
 	return "/etc/docker"
36 36
 }
37 37
 
38
-// setupConfigReloadTrap configures the USR2 signal to reload the configuration.
39
-func (cli *DaemonCli) setupConfigReloadTrap() {
40
-	c := make(chan os.Signal, 1)
41
-	signal.Notify(c, unix.SIGHUP)
42
-	go func() {
43
-		for range c {
44
-			cli.reloadConfig()
45
-		}
46
-	}()
47
-}
38
+func (cli *DaemonCli) getPlatformRemoteOptions() ([]libcontainerd.RemoteOption, error) {
39
+	// On older kernel, letting putting the containerd-shim in its own
40
+	// namespace will effectively prevent operations such as unlink, rename
41
+	// and remove on mountpoints that were present at the time the shim
42
+	// namespace was created. This would led to a famous EBUSY will trying to
43
+	// remove shm mounts.
44
+	var noNewNS bool
45
+	if !kernel.CheckKernelVersion(3, 18, 0) {
46
+		noNewNS = true
47
+	}
48 48
 
49
-func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption {
50 49
 	opts := []libcontainerd.RemoteOption{
51
-		libcontainerd.WithDebugLog(cli.Config.Debug),
52 50
 		libcontainerd.WithOOMScore(cli.Config.OOMScoreAdjust),
51
+		libcontainerd.WithPlugin("linux", &linux.Config{
52
+			Shim:          daemon.DefaultShimBinary,
53
+			Runtime:       daemon.DefaultRuntimeBinary,
54
+			RuntimeRoot:   filepath.Join(cli.Config.Root, "runc"),
55
+			ShimDebug:     cli.Config.Debug,
56
+			ShimNoMountNS: noNewNS,
57
+		}),
58
+	}
59
+	if cli.Config.Debug {
60
+		opts = append(opts, libcontainerd.WithLogLevel("debug"))
53 61
 	}
54 62
 	if cli.Config.ContainerdAddr != "" {
55 63
 		opts = append(opts, libcontainerd.WithRemoteAddr(cli.Config.ContainerdAddr))
56 64
 	} else {
57 65
 		opts = append(opts, libcontainerd.WithStartDaemon(true))
58 66
 	}
59
-	if daemon.UsingSystemd(cli.Config) {
60
-		args := []string{"--systemd-cgroup=true"}
61
-		opts = append(opts, libcontainerd.WithRuntimeArgs(args))
62
-	}
63
-	if cli.Config.LiveRestoreEnabled {
64
-		opts = append(opts, libcontainerd.WithLiveRestore(true))
65
-	}
66
-	opts = append(opts, libcontainerd.WithRuntimePath(daemon.DefaultRuntimeBinary))
67
-	return opts
67
+
68
+	return opts, nil
68 69
 }
69 70
 
70
-// getLibcontainerdRoot gets the root directory for libcontainerd/containerd to
71
-// store their state.
72
-func (cli *DaemonCli) getLibcontainerdRoot() string {
73
-	return filepath.Join(cli.Config.ExecRoot, "libcontainerd")
71
+// setupConfigReloadTrap configures the USR2 signal to reload the configuration.
72
+func (cli *DaemonCli) setupConfigReloadTrap() {
73
+	c := make(chan os.Signal, 1)
74
+	signal.Notify(c, unix.SIGHUP)
75
+	go func() {
76
+		for range c {
77
+			cli.reloadConfig()
78
+		}
79
+	}()
74 80
 }
75 81
 
76 82
 // getSwarmRunRoot gets the root directory for swarm to store runtime state
... ...
@@ -48,6 +48,10 @@ func notifyShutdown(err error) {
48 48
 	}
49 49
 }
50 50
 
51
+func (cli *DaemonCli) getPlatformRemoteOptions() ([]libcontainerd.RemoteOption, error) {
52
+	return nil, nil
53
+}
54
+
51 55
 // setupConfigReloadTrap configures a Win32 event to reload the configuration.
52 56
 func (cli *DaemonCli) setupConfigReloadTrap() {
53 57
 	go func() {
... ...
@@ -65,17 +69,6 @@ func (cli *DaemonCli) setupConfigReloadTrap() {
65 65
 	}()
66 66
 }
67 67
 
68
-func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption {
69
-	return nil
70
-}
71
-
72
-// getLibcontainerdRoot gets the root directory for libcontainerd to store its
73
-// state. The Windows libcontainerd implementation does not need to write a spec
74
-// or state to disk, so this is a no-op.
75
-func (cli *DaemonCli) getLibcontainerdRoot() string {
76
-	return ""
77
-}
78
-
79 68
 // getSwarmRunRoot gets the root directory for swarm to store runtime state
80 69
 // For example, the control socket
81 70
 func (cli *DaemonCli) getSwarmRunRoot() string {
... ...
@@ -15,6 +15,7 @@ import (
15 15
 	"syscall"
16 16
 	"time"
17 17
 
18
+	"github.com/containerd/containerd"
18 19
 	containertypes "github.com/docker/docker/api/types/container"
19 20
 	mounttypes "github.com/docker/docker/api/types/mount"
20 21
 	networktypes "github.com/docker/docker/api/types/network"
... ...
@@ -61,6 +62,18 @@ var (
61 61
 	errInvalidNetwork  = errors.New("invalid network settings while building port map info")
62 62
 )
63 63
 
64
+// ExitStatus provides exit reasons for a container.
65
+type ExitStatus struct {
66
+	// The exit code with which the container exited.
67
+	ExitCode int
68
+
69
+	// Whether the container encountered an OOM.
70
+	OOMKilled bool
71
+
72
+	// Time at which the container died
73
+	ExitedAt time.Time
74
+}
75
+
64 76
 // Container holds the structure defining a container object.
65 77
 type Container struct {
66 78
 	StreamConfig *stream.Config
... ...
@@ -996,10 +1009,10 @@ func (container *Container) CloseStreams() error {
996 996
 }
997 997
 
998 998
 // InitializeStdio is called by libcontainerd to connect the stdio.
999
-func (container *Container) InitializeStdio(iop libcontainerd.IOPipe) error {
999
+func (container *Container) InitializeStdio(iop *libcontainerd.IOPipe) (containerd.IO, error) {
1000 1000
 	if err := container.startLogging(); err != nil {
1001 1001
 		container.Reset(false)
1002
-		return err
1002
+		return nil, err
1003 1003
 	}
1004 1004
 
1005 1005
 	container.StreamConfig.CopyToPipe(iop)
... ...
@@ -1012,7 +1025,7 @@ func (container *Container) InitializeStdio(iop libcontainerd.IOPipe) error {
1012 1012
 		}
1013 1013
 	}
1014 1014
 
1015
-	return nil
1015
+	return &cio{IO: iop, sc: container.StreamConfig}, nil
1016 1016
 }
1017 1017
 
1018 1018
 // SecretMountPath returns the path of the secret mount for the container
... ...
@@ -1069,3 +1082,21 @@ func (container *Container) CreateDaemonEnvironment(tty bool, linkedEnv []string
1069 1069
 	env = ReplaceOrAppendEnvValues(env, container.Config.Env)
1070 1070
 	return env
1071 1071
 }
1072
+
1073
+type cio struct {
1074
+	containerd.IO
1075
+
1076
+	sc *stream.Config
1077
+}
1078
+
1079
+func (i *cio) Close() error {
1080
+	i.IO.Close()
1081
+
1082
+	return i.sc.CloseStreams()
1083
+}
1084
+
1085
+func (i *cio) Wait() {
1086
+	i.sc.Wait()
1087
+
1088
+	i.IO.Wait()
1089
+}
... ...
@@ -24,15 +24,6 @@ const (
24 24
 	containerSecretMountPath = "/run/secrets"
25 25
 )
26 26
 
27
-// ExitStatus provides exit reasons for a container.
28
-type ExitStatus struct {
29
-	// The exit code with which the container exited.
30
-	ExitCode int
31
-
32
-	// Whether the container encountered an OOM.
33
-	OOMKilled bool
34
-}
35
-
36 27
 // TrySetNetworkMount attempts to set the network mounts given a provided destination and
37 28
 // the path to use for it; return true if the given destination was a network mount file
38 29
 func (container *Container) TrySetNetworkMount(destination string, path string) bool {
... ...
@@ -18,12 +18,6 @@ const (
18 18
 	containerInternalConfigsDirPath  = `C:\ProgramData\Docker\internal\configs`
19 19
 )
20 20
 
21
-// ExitStatus provides exit reasons for a container.
22
-type ExitStatus struct {
23
-	// The exit code with which the container exited.
24
-	ExitCode int
25
-}
26
-
27 21
 // UnmountIpcMount unmounts Ipc related mounts.
28 22
 // This is a NOOP on windows.
29 23
 func (container *Container) UnmountIpcMount(unmount func(pth string) error) error {
... ...
@@ -276,6 +276,7 @@ func (s *State) SetExitCode(ec int) {
276 276
 // SetRunning sets the state of the container to "running".
277 277
 func (s *State) SetRunning(pid int, initial bool) {
278 278
 	s.ErrorMsg = ""
279
+	s.Paused = false
279 280
 	s.Running = true
280 281
 	s.Restarting = false
281 282
 	if initial {
... ...
@@ -294,9 +295,14 @@ func (s *State) SetStopped(exitStatus *ExitStatus) {
294 294
 	s.Paused = false
295 295
 	s.Restarting = false
296 296
 	s.Pid = 0
297
-	s.FinishedAt = time.Now().UTC()
298
-	s.setFromExitStatus(exitStatus)
299
-	close(s.waitStop) // Fire waiters for stop
297
+	if exitStatus.ExitedAt.IsZero() {
298
+		s.FinishedAt = time.Now().UTC()
299
+	} else {
300
+		s.FinishedAt = exitStatus.ExitedAt
301
+	}
302
+	s.ExitCodeValue = exitStatus.ExitCode
303
+	s.OOMKilled = exitStatus.OOMKilled
304
+	close(s.waitStop) // fire waiters for stop
300 305
 	s.waitStop = make(chan struct{})
301 306
 }
302 307
 
... ...
@@ -310,8 +316,9 @@ func (s *State) SetRestarting(exitStatus *ExitStatus) {
310 310
 	s.Paused = false
311 311
 	s.Pid = 0
312 312
 	s.FinishedAt = time.Now().UTC()
313
-	s.setFromExitStatus(exitStatus)
314
-	close(s.waitStop) // Fire waiters for stop
313
+	s.ExitCodeValue = exitStatus.ExitCode
314
+	s.OOMKilled = exitStatus.OOMKilled
315
+	close(s.waitStop) // fire waiters for stop
315 316
 	s.waitStop = make(chan struct{})
316 317
 }
317 318
 
318 319
deleted file mode 100644
... ...
@@ -1,10 +0,0 @@
1
-// +build linux freebsd
2
-
3
-package container
4
-
5
-// setFromExitStatus is a platform specific helper function to set the state
6
-// based on the ExitStatus structure.
7
-func (s *State) setFromExitStatus(exitStatus *ExitStatus) {
8
-	s.ExitCodeValue = exitStatus.ExitCode
9
-	s.OOMKilled = exitStatus.OOMKilled
10
-}
11 1
deleted file mode 100644
... ...
@@ -1,7 +0,0 @@
1
-package container
2
-
3
-// setFromExitStatus is a platform specific helper function to set the state
4
-// based on the ExitStatus structure.
5
-func (s *State) setFromExitStatus(exitStatus *ExitStatus) {
6
-	s.ExitCodeValue = exitStatus.ExitCode
7
-}
... ...
@@ -114,12 +114,12 @@ func (c *Config) CloseStreams() error {
114 114
 }
115 115
 
116 116
 // CopyToPipe connects streamconfig with a libcontainerd.IOPipe
117
-func (c *Config) CopyToPipe(iop libcontainerd.IOPipe) {
117
+func (c *Config) CopyToPipe(iop *libcontainerd.IOPipe) {
118 118
 	copyFunc := func(w io.Writer, r io.ReadCloser) {
119 119
 		c.Add(1)
120 120
 		go func() {
121 121
 			if _, err := pools.Copy(w, r); err != nil {
122
-				logrus.Errorf("stream copy error: %+v", err)
122
+				logrus.Errorf("stream copy error: %v", err)
123 123
 			}
124 124
 			r.Close()
125 125
 			c.Done()
... ...
@@ -138,7 +138,7 @@ func (c *Config) CopyToPipe(iop libcontainerd.IOPipe) {
138 138
 			go func() {
139 139
 				pools.Copy(iop.Stdin, stdin)
140 140
 				if err := iop.Stdin.Close(); err != nil {
141
-					logrus.Warnf("failed to close stdin: %+v", err)
141
+					logrus.Warnf("failed to close stdin: %v", err)
142 142
 				}
143 143
 			}()
144 144
 		}
... ...
@@ -1,6 +1,7 @@
1 1
 package daemon
2 2
 
3 3
 import (
4
+	"context"
4 5
 	"encoding/json"
5 6
 	"fmt"
6 7
 	"io/ioutil"
... ...
@@ -17,7 +18,7 @@ var (
17 17
 )
18 18
 
19 19
 // getCheckpointDir verifies checkpoint directory for create,remove, list options and checks if checkpoint already exists
20
-func getCheckpointDir(checkDir, checkpointID string, ctrName string, ctrID string, ctrCheckpointDir string, create bool) (string, error) {
20
+func getCheckpointDir(checkDir, checkpointID, ctrName, ctrID, ctrCheckpointDir string, create bool) (string, error) {
21 21
 	var checkpointDir string
22 22
 	var err2 error
23 23
 	if checkDir != "" {
... ...
@@ -32,7 +33,10 @@ func getCheckpointDir(checkDir, checkpointID string, ctrName string, ctrID strin
32 32
 		case err == nil && stat.IsDir():
33 33
 			err2 = fmt.Errorf("checkpoint with name %s already exists for container %s", checkpointID, ctrName)
34 34
 		case err != nil && os.IsNotExist(err):
35
-			err2 = nil
35
+			err2 = os.MkdirAll(checkpointAbsDir, 0700)
36
+			if os.IsExist(err2) {
37
+				err2 = nil
38
+			}
36 39
 		case err != nil:
37 40
 			err2 = err
38 41
 		case err == nil:
... ...
@@ -48,7 +52,7 @@ func getCheckpointDir(checkDir, checkpointID string, ctrName string, ctrID strin
48 48
 			err2 = fmt.Errorf("%s exists and is not a directory", checkpointAbsDir)
49 49
 		}
50 50
 	}
51
-	return checkpointDir, err2
51
+	return checkpointAbsDir, err2
52 52
 }
53 53
 
54 54
 // CheckpointCreate checkpoints the process running in a container with CRIU
... ...
@@ -62,6 +66,10 @@ func (daemon *Daemon) CheckpointCreate(name string, config types.CheckpointCreat
62 62
 		return fmt.Errorf("Container %s not running", name)
63 63
 	}
64 64
 
65
+	if container.Config.Tty {
66
+		return fmt.Errorf("checkpoint not support on containers with tty")
67
+	}
68
+
65 69
 	if !validCheckpointNamePattern.MatchString(config.CheckpointID) {
66 70
 		return fmt.Errorf("Invalid checkpoint ID (%s), only %s are allowed", config.CheckpointID, validCheckpointNameChars)
67 71
 	}
... ...
@@ -71,8 +79,9 @@ func (daemon *Daemon) CheckpointCreate(name string, config types.CheckpointCreat
71 71
 		return fmt.Errorf("cannot checkpoint container %s: %s", name, err)
72 72
 	}
73 73
 
74
-	err = daemon.containerd.CreateCheckpoint(container.ID, config.CheckpointID, checkpointDir, config.Exit)
74
+	err = daemon.containerd.CreateCheckpoint(context.Background(), container.ID, checkpointDir, config.Exit)
75 75
 	if err != nil {
76
+		os.RemoveAll(checkpointDir)
76 77
 		return fmt.Errorf("Cannot checkpoint container %s: %s", name, err)
77 78
 	}
78 79
 
... ...
@@ -101,6 +101,7 @@ type CommonConfig struct {
101 101
 	RawLogs              bool                      `json:"raw-logs,omitempty"`
102 102
 	RootDeprecated       string                    `json:"graph,omitempty"`
103 103
 	Root                 string                    `json:"data-root,omitempty"`
104
+	ExecRoot             string                    `json:"exec-root,omitempty"`
104 105
 	SocketGroup          string                    `json:"group,omitempty"`
105 106
 	CorsHeaders          string                    `json:"api-cors-header,omitempty"`
106 107
 
... ...
@@ -172,6 +173,10 @@ type CommonConfig struct {
172 172
 	NodeGenericResources string `json:"node-generic-resources,omitempty"`
173 173
 	// NetworkControlPlaneMTU allows to specify the control plane MTU, this will allow to optimize the network use in some components
174 174
 	NetworkControlPlaneMTU int `json:"network-control-plane-mtu,omitempty"`
175
+
176
+	// ContainerAddr is the address used to connect to containerd if we're
177
+	// not starting it ourselves
178
+	ContainerdAddr string `json:"containerd,omitempty"`
175 179
 }
176 180
 
177 181
 // IsValueSet returns true if a configuration value
... ...
@@ -11,8 +11,6 @@ import (
11 11
 // CommonUnixConfig defines configuration of a docker daemon that is
12 12
 // common across Unix platforms.
13 13
 type CommonUnixConfig struct {
14
-	ExecRoot          string                   `json:"exec-root,omitempty"`
15
-	ContainerdAddr    string                   `json:"containerd,omitempty"`
16 14
 	Runtimes          map[string]types.Runtime `json:"runtimes,omitempty"`
17 15
 	DefaultRuntime    string                   `json:"default-runtime,omitempty"`
18 16
 	DefaultInitBinary string                   `json:"default-init,omitempty"`
... ...
@@ -18,7 +18,7 @@ import (
18 18
 	"sync"
19 19
 	"time"
20 20
 
21
-	containerd "github.com/containerd/containerd/api/grpc/types"
21
+	"github.com/docker/docker/api/errdefs"
22 22
 	"github.com/docker/docker/api/types"
23 23
 	containertypes "github.com/docker/docker/api/types/container"
24 24
 	"github.com/docker/docker/api/types/swarm"
... ...
@@ -62,11 +62,10 @@ import (
62 62
 	"github.com/pkg/errors"
63 63
 )
64 64
 
65
-var (
66
-	// DefaultRuntimeBinary is the default runtime to be used by
67
-	// containerd if none is specified
68
-	DefaultRuntimeBinary = "docker-runc"
65
+// MainNamespace is the name of the namespace used for users containers
66
+const MainNamespace = "moby"
69 67
 
68
+var (
70 69
 	errSystemNotSupported = errors.New("the Docker daemon is not supported on this platform")
71 70
 )
72 71
 
... ...
@@ -170,7 +169,7 @@ func (daemon *Daemon) restore() error {
170 170
 				continue
171 171
 			}
172 172
 			container.RWLayer = rwlayer
173
-			logrus.Debugf("Loaded container %v", container.ID)
173
+			logrus.Debugf("Loaded container %v, isRunning: %v", container.ID, container.IsRunning())
174 174
 
175 175
 			containers[container.ID] = container
176 176
 		} else {
... ...
@@ -209,8 +208,10 @@ func (daemon *Daemon) restore() error {
209 209
 		}
210 210
 	}
211 211
 
212
-	var wg sync.WaitGroup
213
-	var mapLock sync.Mutex
212
+	var (
213
+		wg      sync.WaitGroup
214
+		mapLock sync.Mutex
215
+	)
214 216
 	for _, c := range containers {
215 217
 		wg.Add(1)
216 218
 		go func(c *container.Container) {
... ...
@@ -221,11 +222,74 @@ func (daemon *Daemon) restore() error {
221 221
 			}
222 222
 
223 223
 			daemon.setStateCounter(c)
224
+
225
+			logrus.WithFields(logrus.Fields{
226
+				"container": c.ID,
227
+				"running":   c.IsRunning(),
228
+				"paused":    c.IsPaused(),
229
+			}).Debug("restoring container")
230
+
231
+			var (
232
+				err      error
233
+				alive    bool
234
+				ec       uint32
235
+				exitedAt time.Time
236
+			)
237
+
238
+			alive, _, err = daemon.containerd.Restore(context.Background(), c.ID, c.InitializeStdio)
239
+			if err != nil && !errdefs.IsNotFound(err) {
240
+				logrus.Errorf("Failed to restore container %s with containerd: %s", c.ID, err)
241
+				return
242
+			}
243
+			if !alive {
244
+				ec, exitedAt, err = daemon.containerd.DeleteTask(context.Background(), c.ID)
245
+				if err != nil && !errdefs.IsNotFound(err) {
246
+					logrus.WithError(err).Errorf("Failed to delete container %s from containerd", c.ID)
247
+					return
248
+				}
249
+			}
250
+
224 251
 			if c.IsRunning() || c.IsPaused() {
225 252
 				c.RestartManager().Cancel() // manually start containers because some need to wait for swarm networking
226
-				if err := daemon.containerd.Restore(c.ID, c.InitializeStdio); err != nil {
227
-					logrus.Errorf("Failed to restore %s with containerd: %s", c.ID, err)
228
-					return
253
+
254
+				if c.IsPaused() && alive {
255
+					s, err := daemon.containerd.Status(context.Background(), c.ID)
256
+					if err != nil {
257
+						logrus.WithError(err).WithField("container", c.ID).
258
+							Errorf("Failed to get container status")
259
+					} else {
260
+						logrus.WithField("container", c.ID).WithField("state", s).
261
+							Info("restored container paused")
262
+						switch s {
263
+						case libcontainerd.StatusPaused, libcontainerd.StatusPausing:
264
+							// nothing to do
265
+						case libcontainerd.StatusStopped:
266
+							alive = false
267
+						case libcontainerd.StatusUnknown:
268
+							logrus.WithField("container", c.ID).
269
+								Error("Unknown status for container during restore")
270
+						default:
271
+							// running
272
+							c.Lock()
273
+							c.Paused = false
274
+							daemon.setStateCounter(c)
275
+							if err := c.CheckpointTo(daemon.containersReplica); err != nil {
276
+								logrus.WithError(err).WithField("container", c.ID).
277
+									Error("Failed to update stopped container state")
278
+							}
279
+							c.Unlock()
280
+						}
281
+					}
282
+				}
283
+
284
+				if !alive {
285
+					c.Lock()
286
+					c.SetStopped(&container.ExitStatus{ExitCode: int(ec), ExitedAt: exitedAt})
287
+					daemon.Cleanup(c)
288
+					if err := c.CheckpointTo(daemon.containersReplica); err != nil {
289
+						logrus.Errorf("Failed to update stopped container %s state: %v", c.ID, err)
290
+					}
291
+					c.Unlock()
229 292
 				}
230 293
 
231 294
 				// we call Mount and then Unmount to get BaseFs of the container
... ...
@@ -253,11 +317,9 @@ func (daemon *Daemon) restore() error {
253 253
 					activeSandboxes[c.NetworkSettings.SandboxID] = options
254 254
 					mapLock.Unlock()
255 255
 				}
256
+			} else {
257
+				// get list of containers we need to restart
256 258
 
257
-			}
258
-			// fixme: only if not running
259
-			// get list of containers we need to restart
260
-			if !c.IsRunning() && !c.IsPaused() {
261 259
 				// Do not autostart containers which
262 260
 				// has endpoints in a swarm scope
263 261
 				// network yet since the cluster is
... ...
@@ -289,7 +351,7 @@ func (daemon *Daemon) restore() error {
289 289
 				c.RemovalInProgress = false
290 290
 				c.Dead = true
291 291
 				if err := c.CheckpointTo(daemon.containersReplica); err != nil {
292
-					logrus.Errorf("Failed to update container %s state: %v", c.ID, err)
292
+					logrus.Errorf("Failed to update RemovalInProgress container %s state: %v", c.ID, err)
293 293
 				}
294 294
 			}
295 295
 			c.Unlock()
... ...
@@ -559,6 +621,7 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
559 559
 
560 560
 	d := &Daemon{
561 561
 		configStore: config,
562
+		PluginStore: pluginStore,
562 563
 		startupDone: make(chan struct{}),
563 564
 	}
564 565
 	// Ensure the daemon is properly shutdown if there is a failure during
... ...
@@ -606,6 +669,16 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
606 606
 		return nil, err
607 607
 	}
608 608
 
609
+	// Create the directory where we'll store the runtime scripts (i.e. in
610
+	// order to support runtimeArgs)
611
+	daemonRuntimes := filepath.Join(config.Root, "runtimes")
612
+	if err := system.MkdirAll(daemonRuntimes, 0700, ""); err != nil && !os.IsExist(err) {
613
+		return nil, err
614
+	}
615
+	if err := d.loadRuntimes(); err != nil {
616
+		return nil, err
617
+	}
618
+
609 619
 	if runtime.GOOS == "windows" {
610 620
 		if err := system.MkdirAll(filepath.Join(config.Root, "credentialspecs"), 0, ""); err != nil && !os.IsExist(err) {
611 621
 			return nil, err
... ...
@@ -635,7 +708,6 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
635 635
 	}
636 636
 
637 637
 	d.RegistryService = registryService
638
-	d.PluginStore = pluginStore
639 638
 	logger.RegisterPluginGetter(d.PluginStore)
640 639
 
641 640
 	metricsSockPath, err := d.listenMetricsSock()
... ...
@@ -645,7 +717,7 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
645 645
 	registerMetricsPluginCallback(d.PluginStore, metricsSockPath)
646 646
 
647 647
 	createPluginExec := func(m *plugin.Manager) (plugin.Executor, error) {
648
-		return pluginexec.New(containerdRemote, m)
648
+		return pluginexec.New(getPluginExecRoot(config.Root), containerdRemote, m)
649 649
 	}
650 650
 
651 651
 	// Plugin system initialization should happen before restore. Do not change order.
... ...
@@ -802,13 +874,13 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
802 802
 	d.idMappings = idMappings
803 803
 	d.seccompEnabled = sysInfo.Seccomp
804 804
 	d.apparmorEnabled = sysInfo.AppArmor
805
+	d.containerdRemote = containerdRemote
805 806
 
806 807
 	d.linkIndex = newLinkIndex()
807
-	d.containerdRemote = containerdRemote
808 808
 
809 809
 	go d.execCommandGC()
810 810
 
811
-	d.containerd, err = containerdRemote.Client(d)
811
+	d.containerd, err = containerdRemote.NewClient(MainNamespace, d)
812 812
 	if err != nil {
813 813
 		return nil, err
814 814
 	}
... ...
@@ -1171,19 +1243,6 @@ func (daemon *Daemon) networkOptions(dconfig *config.Config, pg plugingetter.Plu
1171 1171
 	return options, nil
1172 1172
 }
1173 1173
 
1174
-func copyBlkioEntry(entries []*containerd.BlkioStatsEntry) []types.BlkioStatEntry {
1175
-	out := make([]types.BlkioStatEntry, len(entries))
1176
-	for i, re := range entries {
1177
-		out[i] = types.BlkioStatEntry{
1178
-			Major: re.Major,
1179
-			Minor: re.Minor,
1180
-			Op:    re.Op,
1181
-			Value: re.Value,
1182
-		}
1183
-	}
1184
-	return out
1185
-}
1186
-
1187 1174
 // GetCluster returns the cluster
1188 1175
 func (daemon *Daemon) GetCluster() Cluster {
1189 1176
 	return daemon.cluster
... ...
@@ -5,6 +5,7 @@ package daemon
5 5
 import (
6 6
 	"bufio"
7 7
 	"bytes"
8
+	"context"
8 9
 	"fmt"
9 10
 	"io/ioutil"
10 11
 	"net"
... ...
@@ -16,6 +17,7 @@ import (
16 16
 	"strings"
17 17
 	"time"
18 18
 
19
+	containerd_cgroups "github.com/containerd/cgroups"
19 20
 	"github.com/docker/docker/api/types"
20 21
 	"github.com/docker/docker/api/types/blkiodev"
21 22
 	pblkiodev "github.com/docker/docker/api/types/blkiodev"
... ...
@@ -26,6 +28,7 @@ import (
26 26
 	"github.com/docker/docker/opts"
27 27
 	"github.com/docker/docker/pkg/containerfs"
28 28
 	"github.com/docker/docker/pkg/idtools"
29
+	"github.com/docker/docker/pkg/ioutils"
29 30
 	"github.com/docker/docker/pkg/parsers"
30 31
 	"github.com/docker/docker/pkg/parsers/kernel"
31 32
 	"github.com/docker/docker/pkg/sysinfo"
... ...
@@ -38,7 +41,6 @@ import (
38 38
 	"github.com/docker/libnetwork/netutils"
39 39
 	"github.com/docker/libnetwork/options"
40 40
 	lntypes "github.com/docker/libnetwork/types"
41
-	"github.com/golang/protobuf/ptypes"
42 41
 	"github.com/opencontainers/runc/libcontainer/cgroups"
43 42
 	rsystem "github.com/opencontainers/runc/libcontainer/system"
44 43
 	specs "github.com/opencontainers/runtime-spec/specs-go"
... ...
@@ -50,6 +52,14 @@ import (
50 50
 )
51 51
 
52 52
 const (
53
+	// DefaultShimBinary is the default shim to be used by containerd if none
54
+	// is specified
55
+	DefaultShimBinary = "docker-containerd-shim"
56
+
57
+	// DefaultRuntimeBinary is the default runtime to be used by
58
+	// containerd if none is specified
59
+	DefaultRuntimeBinary = "docker-runc"
60
+
53 61
 	// See https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/tree/kernel/sched/sched.h?id=8cd9234c64c584432f6992fe944ca9e46ca8ea76#n269
54 62
 	linuxMinCPUShares = 2
55 63
 	linuxMaxCPUShares = 262144
... ...
@@ -63,6 +73,10 @@ const (
63 63
 	// constant for cgroup drivers
64 64
 	cgroupFsDriver      = "cgroupfs"
65 65
 	cgroupSystemdDriver = "systemd"
66
+
67
+	// DefaultRuntimeName is the default runtime to be used by
68
+	// containerd if none is specified
69
+	DefaultRuntimeName = "docker-runc"
66 70
 )
67 71
 
68 72
 type containerGetter interface {
... ...
@@ -623,6 +637,54 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.
623 623
 	return warnings, nil
624 624
 }
625 625
 
626
+func (daemon *Daemon) loadRuntimes() error {
627
+	return daemon.initRuntimes(daemon.configStore.Runtimes)
628
+}
629
+
630
+func (daemon *Daemon) initRuntimes(runtimes map[string]types.Runtime) (err error) {
631
+	runtimeDir := filepath.Join(daemon.configStore.Root, "runtimes")
632
+	// Remove old temp directory if any
633
+	os.RemoveAll(runtimeDir + "-old")
634
+	tmpDir, err := ioutils.TempDir(daemon.configStore.Root, "gen-runtimes")
635
+	if err != nil {
636
+		return errors.Wrapf(err, "failed to get temp dir to generate runtime scripts")
637
+	}
638
+	defer func() {
639
+		if err != nil {
640
+			if err1 := os.RemoveAll(tmpDir); err1 != nil {
641
+				logrus.WithError(err1).WithField("dir", tmpDir).
642
+					Warnf("failed to remove tmp dir")
643
+			}
644
+			return
645
+		}
646
+
647
+		if err = os.Rename(runtimeDir, runtimeDir+"-old"); err != nil {
648
+			return
649
+		}
650
+		if err = os.Rename(tmpDir, runtimeDir); err != nil {
651
+			err = errors.Wrapf(err, "failed to setup runtimes dir, new containers may not start")
652
+			return
653
+		}
654
+		if err = os.RemoveAll(runtimeDir + "-old"); err != nil {
655
+			logrus.WithError(err).WithField("dir", tmpDir).
656
+				Warnf("failed to remove old runtimes dir")
657
+		}
658
+	}()
659
+
660
+	for name, rt := range runtimes {
661
+		if len(rt.Args) == 0 {
662
+			continue
663
+		}
664
+
665
+		script := filepath.Join(tmpDir, name)
666
+		content := fmt.Sprintf("#!/bin/sh\n%s %s $@\n", rt.Path, strings.Join(rt.Args, " "))
667
+		if err := ioutil.WriteFile(script, []byte(content), 0700); err != nil {
668
+			return err
669
+		}
670
+	}
671
+	return nil
672
+}
673
+
626 674
 // reloadPlatform updates configuration with platform specific options
627 675
 // and updates the passed attributes
628 676
 func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]string) error {
... ...
@@ -631,9 +693,12 @@ func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]
631 631
 	}
632 632
 
633 633
 	if conf.IsValueSet("runtimes") {
634
-		daemon.configStore.Runtimes = conf.Runtimes
635 634
 		// Always set the default one
636
-		daemon.configStore.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
635
+		conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
636
+		if err := daemon.initRuntimes(conf.Runtimes); err != nil {
637
+			return err
638
+		}
639
+		daemon.configStore.Runtimes = conf.Runtimes
637 640
 	}
638 641
 
639 642
 	if conf.DefaultRuntime != "" {
... ...
@@ -692,7 +757,7 @@ func verifyDaemonSettings(conf *config.Config) error {
692 692
 	if conf.Runtimes == nil {
693 693
 		conf.Runtimes = make(map[string]types.Runtime)
694 694
 	}
695
-	conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
695
+	conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeName}
696 696
 
697 697
 	return nil
698 698
 }
... ...
@@ -1214,11 +1279,24 @@ func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container
1214 1214
 	return daemon.Unmount(container)
1215 1215
 }
1216 1216
 
1217
+func copyBlkioEntry(entries []*containerd_cgroups.BlkIOEntry) []types.BlkioStatEntry {
1218
+	out := make([]types.BlkioStatEntry, len(entries))
1219
+	for i, re := range entries {
1220
+		out[i] = types.BlkioStatEntry{
1221
+			Major: re.Major,
1222
+			Minor: re.Minor,
1223
+			Op:    re.Op,
1224
+			Value: re.Value,
1225
+		}
1226
+	}
1227
+	return out
1228
+}
1229
+
1217 1230
 func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
1218 1231
 	if !c.IsRunning() {
1219 1232
 		return nil, errNotRunning(c.ID)
1220 1233
 	}
1221
-	stats, err := daemon.containerd.Stats(c.ID)
1234
+	cs, err := daemon.containerd.Stats(context.Background(), c.ID)
1222 1235
 	if err != nil {
1223 1236
 		if strings.Contains(err.Error(), "container not found") {
1224 1237
 			return nil, containerNotFound(c.ID)
... ...
@@ -1226,54 +1304,98 @@ func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
1226 1226
 		return nil, err
1227 1227
 	}
1228 1228
 	s := &types.StatsJSON{}
1229
-	cgs := stats.CgroupStats
1230
-	if cgs != nil {
1229
+	s.Read = cs.Read
1230
+	stats := cs.Metrics
1231
+	if stats.Blkio != nil {
1231 1232
 		s.BlkioStats = types.BlkioStats{
1232
-			IoServiceBytesRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceBytesRecursive),
1233
-			IoServicedRecursive:     copyBlkioEntry(cgs.BlkioStats.IoServicedRecursive),
1234
-			IoQueuedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoQueuedRecursive),
1235
-			IoServiceTimeRecursive:  copyBlkioEntry(cgs.BlkioStats.IoServiceTimeRecursive),
1236
-			IoWaitTimeRecursive:     copyBlkioEntry(cgs.BlkioStats.IoWaitTimeRecursive),
1237
-			IoMergedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoMergedRecursive),
1238
-			IoTimeRecursive:         copyBlkioEntry(cgs.BlkioStats.IoTimeRecursive),
1239
-			SectorsRecursive:        copyBlkioEntry(cgs.BlkioStats.SectorsRecursive),
1240
-		}
1241
-		cpu := cgs.CpuStats
1233
+			IoServiceBytesRecursive: copyBlkioEntry(stats.Blkio.IoServiceBytesRecursive),
1234
+			IoServicedRecursive:     copyBlkioEntry(stats.Blkio.IoServicedRecursive),
1235
+			IoQueuedRecursive:       copyBlkioEntry(stats.Blkio.IoQueuedRecursive),
1236
+			IoServiceTimeRecursive:  copyBlkioEntry(stats.Blkio.IoServiceTimeRecursive),
1237
+			IoWaitTimeRecursive:     copyBlkioEntry(stats.Blkio.IoWaitTimeRecursive),
1238
+			IoMergedRecursive:       copyBlkioEntry(stats.Blkio.IoMergedRecursive),
1239
+			IoTimeRecursive:         copyBlkioEntry(stats.Blkio.IoTimeRecursive),
1240
+			SectorsRecursive:        copyBlkioEntry(stats.Blkio.SectorsRecursive),
1241
+		}
1242
+	}
1243
+	if stats.CPU != nil {
1242 1244
 		s.CPUStats = types.CPUStats{
1243 1245
 			CPUUsage: types.CPUUsage{
1244
-				TotalUsage:        cpu.CpuUsage.TotalUsage,
1245
-				PercpuUsage:       cpu.CpuUsage.PercpuUsage,
1246
-				UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
1247
-				UsageInUsermode:   cpu.CpuUsage.UsageInUsermode,
1246
+				TotalUsage:        stats.CPU.Usage.Total,
1247
+				PercpuUsage:       stats.CPU.Usage.PerCPU,
1248
+				UsageInKernelmode: stats.CPU.Usage.Kernel,
1249
+				UsageInUsermode:   stats.CPU.Usage.User,
1248 1250
 			},
1249 1251
 			ThrottlingData: types.ThrottlingData{
1250
-				Periods:          cpu.ThrottlingData.Periods,
1251
-				ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
1252
-				ThrottledTime:    cpu.ThrottlingData.ThrottledTime,
1252
+				Periods:          stats.CPU.Throttling.Periods,
1253
+				ThrottledPeriods: stats.CPU.Throttling.ThrottledPeriods,
1254
+				ThrottledTime:    stats.CPU.Throttling.ThrottledTime,
1253 1255
 			},
1254 1256
 		}
1255
-		mem := cgs.MemoryStats.Usage
1256
-		s.MemoryStats = types.MemoryStats{
1257
-			Usage:    mem.Usage,
1258
-			MaxUsage: mem.MaxUsage,
1259
-			Stats:    cgs.MemoryStats.Stats,
1260
-			Failcnt:  mem.Failcnt,
1261
-			Limit:    mem.Limit,
1257
+	}
1258
+
1259
+	if stats.Memory != nil {
1260
+		raw := make(map[string]uint64)
1261
+		raw["cache"] = stats.Memory.Cache
1262
+		raw["rss"] = stats.Memory.RSS
1263
+		raw["rss_huge"] = stats.Memory.RSSHuge
1264
+		raw["mapped_file"] = stats.Memory.MappedFile
1265
+		raw["dirty"] = stats.Memory.Dirty
1266
+		raw["writeback"] = stats.Memory.Writeback
1267
+		raw["pgpgin"] = stats.Memory.PgPgIn
1268
+		raw["pgpgout"] = stats.Memory.PgPgOut
1269
+		raw["pgfault"] = stats.Memory.PgFault
1270
+		raw["pgmajfault"] = stats.Memory.PgMajFault
1271
+		raw["inactive_anon"] = stats.Memory.InactiveAnon
1272
+		raw["active_anon"] = stats.Memory.ActiveAnon
1273
+		raw["inactive_file"] = stats.Memory.InactiveFile
1274
+		raw["active_file"] = stats.Memory.ActiveFile
1275
+		raw["unevictable"] = stats.Memory.Unevictable
1276
+		raw["hierarchical_memory_limit"] = stats.Memory.HierarchicalMemoryLimit
1277
+		raw["hierarchical_memsw_limit"] = stats.Memory.HierarchicalSwapLimit
1278
+		raw["total_cache"] = stats.Memory.TotalCache
1279
+		raw["total_rss"] = stats.Memory.TotalRSS
1280
+		raw["total_rss_huge"] = stats.Memory.TotalRSSHuge
1281
+		raw["total_mapped_file"] = stats.Memory.TotalMappedFile
1282
+		raw["total_dirty"] = stats.Memory.TotalDirty
1283
+		raw["total_writeback"] = stats.Memory.TotalWriteback
1284
+		raw["total_pgpgin"] = stats.Memory.TotalPgPgIn
1285
+		raw["total_pgpgout"] = stats.Memory.TotalPgPgOut
1286
+		raw["total_pgfault"] = stats.Memory.TotalPgFault
1287
+		raw["total_pgmajfault"] = stats.Memory.TotalPgMajFault
1288
+		raw["total_inactive_anon"] = stats.Memory.TotalInactiveAnon
1289
+		raw["total_active_anon"] = stats.Memory.TotalActiveAnon
1290
+		raw["total_inactive_file"] = stats.Memory.TotalInactiveFile
1291
+		raw["total_active_file"] = stats.Memory.TotalActiveFile
1292
+		raw["total_unevictable"] = stats.Memory.TotalUnevictable
1293
+
1294
+		if stats.Memory.Usage != nil {
1295
+			s.MemoryStats = types.MemoryStats{
1296
+				Stats:    raw,
1297
+				Usage:    stats.Memory.Usage.Usage,
1298
+				MaxUsage: stats.Memory.Usage.Max,
1299
+				Limit:    stats.Memory.Usage.Limit,
1300
+				Failcnt:  stats.Memory.Usage.Failcnt,
1301
+			}
1302
+		} else {
1303
+			s.MemoryStats = types.MemoryStats{
1304
+				Stats: raw,
1305
+			}
1262 1306
 		}
1307
+
1263 1308
 		// if the container does not set memory limit, use the machineMemory
1264
-		if mem.Limit > daemon.machineMemory && daemon.machineMemory > 0 {
1309
+		if s.MemoryStats.Limit > daemon.machineMemory && daemon.machineMemory > 0 {
1265 1310
 			s.MemoryStats.Limit = daemon.machineMemory
1266 1311
 		}
1267
-		if cgs.PidsStats != nil {
1268
-			s.PidsStats = types.PidsStats{
1269
-				Current: cgs.PidsStats.Current,
1270
-			}
1271
-		}
1272 1312
 	}
1273
-	s.Read, err = ptypes.Timestamp(stats.Timestamp)
1274
-	if err != nil {
1275
-		return nil, err
1313
+
1314
+	if stats.Pids != nil {
1315
+		s.PidsStats = types.PidsStats{
1316
+			Current: stats.Pids.Current,
1317
+			Limit:   stats.Pids.Limit,
1318
+		}
1276 1319
 	}
1320
+
1277 1321
 	return s, nil
1278 1322
 }
1279 1323
 
... ...
@@ -1,6 +1,7 @@
1 1
 package daemon
2 2
 
3 3
 import (
4
+	"context"
4 5
 	"fmt"
5 6
 	"os"
6 7
 	"path/filepath"
... ...
@@ -532,7 +533,7 @@ func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
532 532
 	}
533 533
 
534 534
 	// Obtain the stats from HCS via libcontainerd
535
-	stats, err := daemon.containerd.Stats(c.ID)
535
+	stats, err := daemon.containerd.Stats(context.Background(), c.ID)
536 536
 	if err != nil {
537 537
 		if strings.Contains(err.Error(), "container not found") {
538 538
 			return nil, containerNotFound(c.ID)
... ...
@@ -542,49 +543,48 @@ func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
542 542
 
543 543
 	// Start with an empty structure
544 544
 	s := &types.StatsJSON{}
545
+	s.Stats.Read = stats.Read
546
+	s.Stats.NumProcs = platform.NumProcs()
545 547
 
546
-	// Populate the CPU/processor statistics
547
-	s.CPUStats = types.CPUStats{
548
-		CPUUsage: types.CPUUsage{
549
-			TotalUsage:        stats.Processor.TotalRuntime100ns,
550
-			UsageInKernelmode: stats.Processor.RuntimeKernel100ns,
551
-			UsageInUsermode:   stats.Processor.RuntimeKernel100ns,
552
-		},
553
-	}
554
-
555
-	// Populate the memory statistics
556
-	s.MemoryStats = types.MemoryStats{
557
-		Commit:            stats.Memory.UsageCommitBytes,
558
-		CommitPeak:        stats.Memory.UsageCommitPeakBytes,
559
-		PrivateWorkingSet: stats.Memory.UsagePrivateWorkingSetBytes,
560
-	}
561
-
562
-	// Populate the storage statistics
563
-	s.StorageStats = types.StorageStats{
564
-		ReadCountNormalized:  stats.Storage.ReadCountNormalized,
565
-		ReadSizeBytes:        stats.Storage.ReadSizeBytes,
566
-		WriteCountNormalized: stats.Storage.WriteCountNormalized,
567
-		WriteSizeBytes:       stats.Storage.WriteSizeBytes,
568
-	}
569
-
570
-	// Populate the network statistics
571
-	s.Networks = make(map[string]types.NetworkStats)
572
-
573
-	for _, nstats := range stats.Network {
574
-		s.Networks[nstats.EndpointId] = types.NetworkStats{
575
-			RxBytes:   nstats.BytesReceived,
576
-			RxPackets: nstats.PacketsReceived,
577
-			RxDropped: nstats.DroppedPacketsIncoming,
578
-			TxBytes:   nstats.BytesSent,
579
-			TxPackets: nstats.PacketsSent,
580
-			TxDropped: nstats.DroppedPacketsOutgoing,
548
+	if stats.HCSStats != nil {
549
+		hcss := stats.HCSStats
550
+		// Populate the CPU/processor statistics
551
+		s.CPUStats = types.CPUStats{
552
+			CPUUsage: types.CPUUsage{
553
+				TotalUsage:        hcss.Processor.TotalRuntime100ns,
554
+				UsageInKernelmode: hcss.Processor.RuntimeKernel100ns,
555
+				UsageInUsermode:   hcss.Processor.RuntimeKernel100ns,
556
+			},
581 557
 		}
582
-	}
583 558
 
584
-	// Set the timestamp
585
-	s.Stats.Read = stats.Timestamp
586
-	s.Stats.NumProcs = platform.NumProcs()
559
+		// Populate the memory statistics
560
+		s.MemoryStats = types.MemoryStats{
561
+			Commit:            hcss.Memory.UsageCommitBytes,
562
+			CommitPeak:        hcss.Memory.UsageCommitPeakBytes,
563
+			PrivateWorkingSet: hcss.Memory.UsagePrivateWorkingSetBytes,
564
+		}
587 565
 
566
+		// Populate the storage statistics
567
+		s.StorageStats = types.StorageStats{
568
+			ReadCountNormalized:  hcss.Storage.ReadCountNormalized,
569
+			ReadSizeBytes:        hcss.Storage.ReadSizeBytes,
570
+			WriteCountNormalized: hcss.Storage.WriteCountNormalized,
571
+			WriteSizeBytes:       hcss.Storage.WriteSizeBytes,
572
+		}
573
+
574
+		// Populate the network statistics
575
+		s.Networks = make(map[string]types.NetworkStats)
576
+		for _, nstats := range hcss.Network {
577
+			s.Networks[nstats.EndpointId] = types.NetworkStats{
578
+				RxBytes:   nstats.BytesReceived,
579
+				RxPackets: nstats.PacketsReceived,
580
+				RxDropped: nstats.DroppedPacketsIncoming,
581
+				TxBytes:   nstats.BytesSent,
582
+				TxPackets: nstats.PacketsSent,
583
+				TxDropped: nstats.DroppedPacketsOutgoing,
584
+			}
585
+		}
586
+	}
588 587
 	return s, nil
589 588
 }
590 589
 
... ...
@@ -664,3 +664,11 @@ func getRealPath(path string) (string, error) {
664 664
 	}
665 665
 	return fileutils.ReadSymlinkedDirectory(path)
666 666
 }
667
+
668
+func (daemon *Daemon) loadRuntimes() error {
669
+	return nil
670
+}
671
+
672
+func (daemon *Daemon) initRuntimes(_ map[string]types.Runtime) error {
673
+	return nil
674
+}
... ...
@@ -141,6 +141,7 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo
141 141
 	}
142 142
 	container.SetRemoved()
143 143
 	stateCtr.del(container.ID)
144
+
144 145
 	daemon.LogContainerEvent(container, "destroy")
145 146
 	return nil
146 147
 }
... ...
@@ -64,6 +64,11 @@ func errExecPaused(id string) error {
64 64
 	return stateConflictError{cause}
65 65
 }
66 66
 
67
+func errNotPaused(id string) error {
68
+	cause := errors.Errorf("Container %s is already paused", id)
69
+	return stateConflictError{cause}
70
+}
71
+
67 72
 type nameConflictError struct {
68 73
 	id   string
69 74
 	name string
... ...
@@ -13,10 +13,10 @@ import (
13 13
 	"github.com/docker/docker/container"
14 14
 	"github.com/docker/docker/container/stream"
15 15
 	"github.com/docker/docker/daemon/exec"
16
-	"github.com/docker/docker/libcontainerd"
17 16
 	"github.com/docker/docker/pkg/pools"
18 17
 	"github.com/docker/docker/pkg/signal"
19 18
 	"github.com/docker/docker/pkg/term"
19
+	specs "github.com/opencontainers/runtime-spec/specs-go"
20 20
 	"github.com/pkg/errors"
21 21
 	"github.com/sirupsen/logrus"
22 22
 )
... ...
@@ -31,6 +31,14 @@ func (d *Daemon) registerExecCommand(container *container.Container, config *exe
31 31
 	d.execCommands.Add(config.ID, config)
32 32
 }
33 33
 
34
+func (d *Daemon) registerExecPidUnlocked(container *container.Container, config *exec.Config) {
35
+	logrus.Debugf("registering pid %v for exec %v", config.Pid, config.ID)
36
+	// Storing execs in container in order to kill them gracefully whenever the container is stopped or removed.
37
+	container.ExecCommands.SetPidUnlocked(config.ID, config.Pid)
38
+	// Storing execs in daemon for easy access via Engine API.
39
+	d.execCommands.SetPidUnlocked(config.ID, config.Pid)
40
+}
41
+
34 42
 // ExecExists looks up the exec instance and returns a bool if it exists or not.
35 43
 // It will also return the error produced by `getConfig`
36 44
 func (d *Daemon) ExecExists(name string) (bool, error) {
... ...
@@ -70,8 +78,8 @@ func (d *Daemon) getExecConfig(name string) (*exec.Config, error) {
70 70
 }
71 71
 
72 72
 func (d *Daemon) unregisterExecCommand(container *container.Container, execConfig *exec.Config) {
73
-	container.ExecCommands.Delete(execConfig.ID)
74
-	d.execCommands.Delete(execConfig.ID)
73
+	container.ExecCommands.Delete(execConfig.ID, execConfig.Pid)
74
+	d.execCommands.Delete(execConfig.ID, execConfig.Pid)
75 75
 }
76 76
 
77 77
 func (d *Daemon) getActiveContainer(name string) (*container.Container, error) {
... ...
@@ -181,7 +189,7 @@ func (d *Daemon) ContainerExecStart(ctx context.Context, name string, stdin io.R
181 181
 				logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
182 182
 			}
183 183
 			ec.Unlock()
184
-			c.ExecCommands.Delete(ec.ID)
184
+			c.ExecCommands.Delete(ec.ID, ec.Pid)
185 185
 		}
186 186
 	}()
187 187
 
... ...
@@ -207,13 +215,17 @@ func (d *Daemon) ContainerExecStart(ctx context.Context, name string, stdin io.R
207 207
 		ec.StreamConfig.NewNopInputPipe()
208 208
 	}
209 209
 
210
-	p := libcontainerd.Process{
210
+	p := &specs.Process{
211 211
 		Args:     append([]string{ec.Entrypoint}, ec.Args...),
212 212
 		Env:      ec.Env,
213 213
 		Terminal: ec.Tty,
214
+		Cwd:      c.Config.WorkingDir,
215
+	}
216
+	if p.Cwd == "" {
217
+		p.Cwd = "/"
214 218
 	}
215 219
 
216
-	if err := execSetPlatformOpt(c, ec, &p); err != nil {
220
+	if err := d.execSetPlatformOpt(c, ec, p); err != nil {
217 221
 		return err
218 222
 	}
219 223
 
... ...
@@ -231,22 +243,28 @@ func (d *Daemon) ContainerExecStart(ctx context.Context, name string, stdin io.R
231 231
 	ec.StreamConfig.AttachStreams(&attachConfig)
232 232
 	attachErr := ec.StreamConfig.CopyStreams(ctx, &attachConfig)
233 233
 
234
-	systemPid, err := d.containerd.AddProcess(ctx, c.ID, name, p, ec.InitializeStdio)
234
+	// Synchronize with libcontainerd event loop
235
+	ec.Lock()
236
+	c.ExecCommands.Lock()
237
+	systemPid, err := d.containerd.Exec(ctx, c.ID, ec.ID, p, cStdin != nil, ec.InitializeStdio)
235 238
 	if err != nil {
239
+		c.ExecCommands.Unlock()
240
+		ec.Unlock()
236 241
 		return translateContainerdStartErr(ec.Entrypoint, ec.SetExitCode, err)
237 242
 	}
238
-	ec.Lock()
239 243
 	ec.Pid = systemPid
244
+	d.registerExecPidUnlocked(c, ec)
245
+	c.ExecCommands.Unlock()
240 246
 	ec.Unlock()
241 247
 
242 248
 	select {
243 249
 	case <-ctx.Done():
244 250
 		logrus.Debugf("Sending TERM signal to process %v in container %v", name, c.ID)
245
-		d.containerd.SignalProcess(c.ID, name, int(signal.SignalMap["TERM"]))
251
+		d.containerd.SignalProcess(ctx, c.ID, name, int(signal.SignalMap["TERM"]))
246 252
 		select {
247 253
 		case <-time.After(termProcessTimeout * time.Second):
248 254
 			logrus.Infof("Container %v, process %v failed to exit within %d seconds of signal TERM - using the force", c.ID, name, termProcessTimeout)
249
-			d.containerd.SignalProcess(c.ID, name, int(signal.SignalMap["KILL"]))
255
+			d.containerd.SignalProcess(ctx, c.ID, name, int(signal.SignalMap["KILL"]))
250 256
 		case <-attachErr:
251 257
 			// TERM signal worked
252 258
 		}
... ...
@@ -273,7 +291,7 @@ func (d *Daemon) execCommandGC() {
273 273
 		for id, config := range d.execCommands.Commands() {
274 274
 			if config.CanRemove {
275 275
 				cleaned++
276
-				d.execCommands.Delete(id)
276
+				d.execCommands.Delete(id, config.Pid)
277 277
 			} else {
278 278
 				if _, exists := liveExecCommands[id]; !exists {
279 279
 					config.CanRemove = true
... ...
@@ -4,6 +4,7 @@ import (
4 4
 	"runtime"
5 5
 	"sync"
6 6
 
7
+	"github.com/containerd/containerd"
7 8
 	"github.com/docker/docker/container/stream"
8 9
 	"github.com/docker/docker/libcontainerd"
9 10
 	"github.com/docker/docker/pkg/stringid"
... ...
@@ -42,8 +43,26 @@ func NewConfig() *Config {
42 42
 	}
43 43
 }
44 44
 
45
+type cio struct {
46
+	containerd.IO
47
+
48
+	sc *stream.Config
49
+}
50
+
51
+func (i *cio) Close() error {
52
+	i.IO.Close()
53
+
54
+	return i.sc.CloseStreams()
55
+}
56
+
57
+func (i *cio) Wait() {
58
+	i.sc.Wait()
59
+
60
+	i.IO.Wait()
61
+}
62
+
45 63
 // InitializeStdio is called by libcontainerd to connect the stdio.
46
-func (c *Config) InitializeStdio(iop libcontainerd.IOPipe) error {
64
+func (c *Config) InitializeStdio(iop *libcontainerd.IOPipe) (containerd.IO, error) {
47 65
 	c.StreamConfig.CopyToPipe(iop)
48 66
 
49 67
 	if c.StreamConfig.Stdin() == nil && !c.Tty && runtime.GOOS == "windows" {
... ...
@@ -54,7 +73,7 @@ func (c *Config) InitializeStdio(iop libcontainerd.IOPipe) error {
54 54
 		}
55 55
 	}
56 56
 
57
-	return nil
57
+	return &cio{IO: iop, sc: c.StreamConfig}, nil
58 58
 }
59 59
 
60 60
 // CloseStreams closes the stdio streams for the exec
... ...
@@ -69,45 +88,66 @@ func (c *Config) SetExitCode(code int) {
69 69
 
70 70
 // Store keeps track of the exec configurations.
71 71
 type Store struct {
72
-	commands map[string]*Config
72
+	byID  map[string]*Config
73
+	byPid map[int]*Config
73 74
 	sync.RWMutex
74 75
 }
75 76
 
76 77
 // NewStore initializes a new exec store.
77 78
 func NewStore() *Store {
78
-	return &Store{commands: make(map[string]*Config)}
79
+	return &Store{
80
+		byID:  make(map[string]*Config),
81
+		byPid: make(map[int]*Config),
82
+	}
79 83
 }
80 84
 
81 85
 // Commands returns the exec configurations in the store.
82 86
 func (e *Store) Commands() map[string]*Config {
83 87
 	e.RLock()
84
-	commands := make(map[string]*Config, len(e.commands))
85
-	for id, config := range e.commands {
86
-		commands[id] = config
88
+	byID := make(map[string]*Config, len(e.byID))
89
+	for id, config := range e.byID {
90
+		byID[id] = config
87 91
 	}
88 92
 	e.RUnlock()
89
-	return commands
93
+	return byID
90 94
 }
91 95
 
92 96
 // Add adds a new exec configuration to the store.
93 97
 func (e *Store) Add(id string, Config *Config) {
94 98
 	e.Lock()
95
-	e.commands[id] = Config
99
+	e.byID[id] = Config
96 100
 	e.Unlock()
97 101
 }
98 102
 
103
+// SetPidUnlocked adds an association between a Pid and a config, it does not
104
+// synchronized with other operations.
105
+func (e *Store) SetPidUnlocked(id string, pid int) {
106
+	if config, ok := e.byID[id]; ok {
107
+		e.byPid[pid] = config
108
+	}
109
+}
110
+
99 111
 // Get returns an exec configuration by its id.
100 112
 func (e *Store) Get(id string) *Config {
101 113
 	e.RLock()
102
-	res := e.commands[id]
114
+	res := e.byID[id]
115
+	e.RUnlock()
116
+	return res
117
+}
118
+
119
+// ByPid returns an exec configuration by its pid.
120
+func (e *Store) ByPid(pid int) *Config {
121
+	e.RLock()
122
+	res := e.byPid[pid]
103 123
 	e.RUnlock()
104 124
 	return res
105 125
 }
106 126
 
107 127
 // Delete removes an exec configuration from the store.
108
-func (e *Store) Delete(id string) {
128
+func (e *Store) Delete(id string, pid int) {
109 129
 	e.Lock()
110
-	delete(e.commands, id)
130
+	delete(e.byPid, pid)
131
+	delete(e.byID, id)
111 132
 	e.Unlock()
112 133
 }
113 134
 
... ...
@@ -115,7 +155,7 @@ func (e *Store) Delete(id string) {
115 115
 func (e *Store) List() []string {
116 116
 	var IDs []string
117 117
 	e.RLock()
118
-	for id := range e.commands {
118
+	for id := range e.byID {
119 119
 		IDs = append(IDs, id)
120 120
 	}
121 121
 	e.RUnlock()
... ...
@@ -4,25 +4,30 @@ import (
4 4
 	"github.com/docker/docker/container"
5 5
 	"github.com/docker/docker/daemon/caps"
6 6
 	"github.com/docker/docker/daemon/exec"
7
-	"github.com/docker/docker/libcontainerd"
8 7
 	"github.com/opencontainers/runc/libcontainer/apparmor"
9 8
 	"github.com/opencontainers/runtime-spec/specs-go"
10 9
 )
11 10
 
12
-func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainerd.Process) error {
11
+func (daemon *Daemon) execSetPlatformOpt(c *container.Container, ec *exec.Config, p *specs.Process) error {
13 12
 	if len(ec.User) > 0 {
14 13
 		uid, gid, additionalGids, err := getUser(c, ec.User)
15 14
 		if err != nil {
16 15
 			return err
17 16
 		}
18
-		p.User = &specs.User{
17
+		p.User = specs.User{
19 18
 			UID:            uid,
20 19
 			GID:            gid,
21 20
 			AdditionalGids: additionalGids,
22 21
 		}
23 22
 	}
24 23
 	if ec.Privileged {
25
-		p.Capabilities = caps.GetAllCapabilities()
24
+		if p.Capabilities == nil {
25
+			p.Capabilities = &specs.LinuxCapabilities{}
26
+		}
27
+		p.Capabilities.Bounding = caps.GetAllCapabilities()
28
+		p.Capabilities.Permitted = p.Capabilities.Bounding
29
+		p.Capabilities.Inheritable = p.Capabilities.Bounding
30
+		p.Capabilities.Effective = p.Capabilities.Bounding
26 31
 	}
27 32
 	if apparmor.IsEnabled() {
28 33
 		var appArmorProfile string
... ...
@@ -46,5 +51,6 @@ func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainer
46 46
 			}
47 47
 		}
48 48
 	}
49
+	daemon.setRlimits(&specs.Spec{Process: p}, c)
49 50
 	return nil
50 51
 }
... ...
@@ -3,9 +3,9 @@ package daemon
3 3
 import (
4 4
 	"github.com/docker/docker/container"
5 5
 	"github.com/docker/docker/daemon/exec"
6
-	"github.com/docker/docker/libcontainerd"
6
+	specs "github.com/opencontainers/runtime-spec/specs-go"
7 7
 )
8 8
 
9
-func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainerd.Process) error {
9
+func (daemon *Daemon) execSetPlatformOpt(_ *container.Container, _ *exec.Config, _ *specs.Process) error {
10 10
 	return nil
11 11
 }
... ...
@@ -3,10 +3,10 @@ package daemon
3 3
 import (
4 4
 	"github.com/docker/docker/container"
5 5
 	"github.com/docker/docker/daemon/exec"
6
-	"github.com/docker/docker/libcontainerd"
6
+	specs "github.com/opencontainers/runtime-spec/specs-go"
7 7
 )
8 8
 
9
-func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainerd.Process) error {
9
+func (daemon *Daemon) execSetPlatformOpt(c *container.Container, ec *exec.Config, p *specs.Process) error {
10 10
 	// Process arguments need to be escaped before sending to OCI.
11 11
 	if c.OS == "windows" {
12 12
 		p.Args = escapeArgs(p.Args)
... ...
@@ -3,7 +3,6 @@
3 3
 package daemon
4 4
 
5 5
 import (
6
-	"context"
7 6
 	"os/exec"
8 7
 	"strings"
9 8
 
... ...
@@ -28,16 +27,8 @@ func (daemon *Daemon) FillPlatformInfo(v *types.Info, sysInfo *sysinfo.SysInfo)
28 28
 	v.DefaultRuntime = daemon.configStore.GetDefaultRuntimeName()
29 29
 	v.InitBinary = daemon.configStore.GetInitPath()
30 30
 
31
-	v.ContainerdCommit.Expected = dockerversion.ContainerdCommitID
32
-	if sv, err := daemon.containerd.GetServerVersion(context.Background()); err == nil {
33
-		v.ContainerdCommit.ID = sv.Revision
34
-	} else {
35
-		logrus.Warnf("failed to retrieve containerd version: %v", err)
36
-		v.ContainerdCommit.ID = "N/A"
37
-	}
38
-
39 31
 	v.RuncCommit.Expected = dockerversion.RuncCommitID
40
-	defaultRuntimeBinary := daemon.configStore.GetRuntime(daemon.configStore.GetDefaultRuntimeName()).Path
32
+	defaultRuntimeBinary := daemon.configStore.GetRuntime(v.DefaultRuntime).Path
41 33
 	if rv, err := exec.Command(defaultRuntimeBinary, "--version").Output(); err == nil {
42 34
 		parts := strings.Split(strings.TrimSpace(string(rv)), "\n")
43 35
 		if len(parts) == 3 {
... ...
@@ -56,6 +47,24 @@ func (daemon *Daemon) FillPlatformInfo(v *types.Info, sysInfo *sysinfo.SysInfo)
56 56
 		v.RuncCommit.ID = "N/A"
57 57
 	}
58 58
 
59
+	v.ContainerdCommit.Expected = dockerversion.ContainerdCommitID
60
+	if rv, err := exec.Command("docker-containerd", "--version").Output(); err == nil {
61
+		parts := strings.Split(strings.TrimSpace(string(rv)), " ")
62
+		if len(parts) == 3 {
63
+			v.ContainerdCommit.ID = parts[2]
64
+		}
65
+		switch {
66
+		case v.ContainerdCommit.ID == "":
67
+			logrus.Warnf("failed to retrieve docker-containerd version: unknown format", string(rv))
68
+			v.ContainerdCommit.ID = "N/A"
69
+		case strings.HasSuffix(v.ContainerdCommit.ID, "-g"+v.ContainerdCommit.ID[len(v.ContainerdCommit.ID)-7:]):
70
+			v.ContainerdCommit.ID = v.ContainerdCommit.Expected
71
+		}
72
+	} else {
73
+		logrus.Warnf("failed to retrieve docker-containerd version: %v", err)
74
+		v.ContainerdCommit.ID = "N/A"
75
+	}
76
+
59 77
 	defaultInitBinary := daemon.configStore.GetInitPath()
60 78
 	if rv, err := exec.Command(defaultInitBinary, "--version").Output(); err == nil {
61 79
 		ver, err := parseInitVersion(string(rv))
... ...
@@ -9,6 +9,7 @@ import (
9 9
 	"time"
10 10
 
11 11
 	containerpkg "github.com/docker/docker/container"
12
+	"github.com/docker/docker/libcontainerd"
12 13
 	"github.com/docker/docker/pkg/signal"
13 14
 	"github.com/pkg/errors"
14 15
 	"github.com/sirupsen/logrus"
... ...
@@ -108,7 +109,7 @@ func (daemon *Daemon) killWithSignal(container *containerpkg.Container, sig int)
108 108
 
109 109
 	if unpause {
110 110
 		// above kill signal will be sent once resume is finished
111
-		if err := daemon.containerd.Resume(container.ID); err != nil {
111
+		if err := daemon.containerd.Resume(context.Background(), container.ID); err != nil {
112 112
 			logrus.Warn("Cannot unpause container %s: %s", container.ID, err)
113 113
 		}
114 114
 	}
... ...
@@ -177,5 +178,5 @@ func (daemon *Daemon) killPossiblyDeadProcess(container *containerpkg.Container,
177 177
 }
178 178
 
179 179
 func (daemon *Daemon) kill(c *containerpkg.Container, sig int) error {
180
-	return daemon.containerd.Signal(c.ID, sig)
180
+	return daemon.containerd.SignalProcess(context.Background(), c.ID, libcontainerd.InitProcessName, sig)
181 181
 }
... ...
@@ -6,8 +6,8 @@ import (
6 6
 	"context"
7 7
 	"io"
8 8
 
9
+	"github.com/containerd/fifo"
9 10
 	"github.com/pkg/errors"
10
-	"github.com/tonistiigi/fifo"
11 11
 	"golang.org/x/sys/unix"
12 12
 )
13 13
 
... ...
@@ -1,6 +1,7 @@
1 1
 package daemon
2 2
 
3 3
 import (
4
+	"context"
4 5
 	"errors"
5 6
 	"fmt"
6 7
 	"runtime"
... ...
@@ -25,15 +26,15 @@ func (daemon *Daemon) setStateCounter(c *container.Container) {
25 25
 	}
26 26
 }
27 27
 
28
-// StateChanged updates daemon state changes from containerd
29
-func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
30
-	c := daemon.containers.Get(id)
31
-	if c == nil {
28
+// ProcessEvent is called by libcontainerd whenever an event occurs
29
+func (daemon *Daemon) ProcessEvent(id string, e libcontainerd.EventType, ei libcontainerd.EventInfo) error {
30
+	c, err := daemon.GetContainer(id)
31
+	if c == nil || err != nil {
32 32
 		return fmt.Errorf("no such container: %s", id)
33 33
 	}
34 34
 
35
-	switch e.State {
36
-	case libcontainerd.StateOOM:
35
+	switch e {
36
+	case libcontainerd.EventOOM:
37 37
 		// StateOOM is Linux specific and should never be hit on Windows
38 38
 		if runtime.GOOS == "windows" {
39 39
 			return errors.New("received StateOOM from libcontainerd on Windows. This should never happen")
... ...
@@ -43,63 +44,72 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
43 43
 			return err
44 44
 		}
45 45
 		daemon.LogContainerEvent(c, "oom")
46
-	case libcontainerd.StateExit:
46
+	case libcontainerd.EventExit:
47
+		if int(ei.Pid) == c.Pid {
48
+			_, _, err := daemon.containerd.DeleteTask(context.Background(), c.ID)
49
+			if err != nil {
50
+				logrus.WithError(err).Warnf("failed to delete container %s from containerd", c.ID)
51
+			}
47 52
 
48
-		c.Lock()
49
-		c.StreamConfig.Wait()
50
-		c.Reset(false)
51
-
52
-		// If daemon is being shutdown, don't let the container restart
53
-		restart, wait, err := c.RestartManager().ShouldRestart(e.ExitCode, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
54
-		if err == nil && restart {
55
-			c.RestartCount++
56
-			c.SetRestarting(platformConstructExitStatus(e))
57
-		} else {
58
-			c.SetStopped(platformConstructExitStatus(e))
59
-			defer daemon.autoRemove(c)
60
-		}
53
+			c.Lock()
54
+			c.StreamConfig.Wait()
55
+			c.Reset(false)
61 56
 
62
-		// cancel healthcheck here, they will be automatically
63
-		// restarted if/when the container is started again
64
-		daemon.stopHealthchecks(c)
65
-		attributes := map[string]string{
66
-			"exitCode": strconv.Itoa(int(e.ExitCode)),
67
-		}
68
-		daemon.LogContainerEventWithAttributes(c, "die", attributes)
69
-		daemon.Cleanup(c)
70
-
71
-		if err == nil && restart {
72
-			go func() {
73
-				err := <-wait
74
-				if err == nil {
75
-					// daemon.netController is initialized when daemon is restoring containers.
76
-					// But containerStart will use daemon.netController segment.
77
-					// So to avoid panic at startup process, here must wait util daemon restore done.
78
-					daemon.waitForStartupDone()
79
-					if err = daemon.containerStart(c, "", "", false); err != nil {
80
-						logrus.Debugf("failed to restart container: %+v", err)
57
+			exitStatus := container.ExitStatus{
58
+				ExitCode:  int(ei.ExitCode),
59
+				ExitedAt:  ei.ExitedAt,
60
+				OOMKilled: ei.OOMKilled,
61
+			}
62
+			restart, wait, err := c.RestartManager().ShouldRestart(ei.ExitCode, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
63
+			if err == nil && restart {
64
+				c.RestartCount++
65
+				c.SetRestarting(&exitStatus)
66
+			} else {
67
+				c.SetStopped(&exitStatus)
68
+				defer daemon.autoRemove(c)
69
+			}
70
+
71
+			// cancel healthcheck here, they will be automatically
72
+			// restarted if/when the container is started again
73
+			daemon.stopHealthchecks(c)
74
+			attributes := map[string]string{
75
+				"exitCode": strconv.Itoa(int(ei.ExitCode)),
76
+			}
77
+			daemon.LogContainerEventWithAttributes(c, "die", attributes)
78
+			daemon.Cleanup(c)
79
+
80
+			if err == nil && restart {
81
+				go func() {
82
+					err := <-wait
83
+					if err == nil {
84
+						// daemon.netController is initialized when daemon is restoring containers.
85
+						// But containerStart will use daemon.netController segment.
86
+						// So to avoid panic at startup process, here must wait util daemon restore done.
87
+						daemon.waitForStartupDone()
88
+						if err = daemon.containerStart(c, "", "", false); err != nil {
89
+							logrus.Debugf("failed to restart container: %+v", err)
90
+						}
81 91
 					}
82
-				}
83
-				if err != nil {
84
-					c.SetStopped(platformConstructExitStatus(e))
85
-					defer daemon.autoRemove(c)
86
-					if err != restartmanager.ErrRestartCanceled {
87
-						logrus.Errorf("restartmanger wait error: %+v", err)
92
+					if err != nil {
93
+						c.SetStopped(&exitStatus)
94
+						defer daemon.autoRemove(c)
95
+						if err != restartmanager.ErrRestartCanceled {
96
+							logrus.Errorf("restartmanger wait error: %+v", err)
97
+						}
88 98
 					}
89
-				}
90
-			}()
91
-		}
92
-
93
-		daemon.setStateCounter(c)
99
+				}()
100
+			}
94 101
 
95
-		defer c.Unlock()
96
-		if err := c.CheckpointTo(daemon.containersReplica); err != nil {
97
-			return err
102
+			daemon.setStateCounter(c)
103
+			defer c.Unlock()
104
+			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
105
+				return err
106
+			}
107
+			return daemon.postRunProcessing(c, ei)
98 108
 		}
99
-		return daemon.postRunProcessing(c, e)
100
-	case libcontainerd.StateExitProcess:
101
-		if execConfig := c.ExecCommands.Get(e.ProcessID); execConfig != nil {
102
-			ec := int(e.ExitCode)
109
+
110
+		if execConfig := c.ExecCommands.ByPid(int(ei.Pid)); execConfig != nil {
111
+			ec := int(ei.ExitCode)
103 112
 			execConfig.Lock()
104 113
 			defer execConfig.Unlock()
105 114
 			execConfig.ExitCode = &ec
... ...
@@ -111,42 +121,59 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
111 111
 
112 112
 			// remove the exec command from the container's store only and not the
113 113
 			// daemon's store so that the exec command can be inspected.
114
-			c.ExecCommands.Delete(execConfig.ID)
114
+			c.ExecCommands.Delete(execConfig.ID, execConfig.Pid)
115 115
 		} else {
116
-			logrus.Warnf("Ignoring StateExitProcess for %v but no exec command found", e)
116
+			logrus.WithFields(logrus.Fields{
117
+				"container": c.ID,
118
+				"exec-pid":  ei.Pid,
119
+			}).Warnf("Ignoring Exit Event, no such exec command found")
117 120
 		}
118
-	case libcontainerd.StateStart, libcontainerd.StateRestore:
119
-		// Container is already locked in this case
120
-		c.SetRunning(int(e.Pid), e.State == libcontainerd.StateStart)
121
-		c.HasBeenManuallyStopped = false
122
-		c.HasBeenStartedBefore = true
123
-		daemon.setStateCounter(c)
124
-
125
-		daemon.initHealthMonitor(c)
126
-		if err := c.CheckpointTo(daemon.containersReplica); err != nil {
127
-			c.Reset(false)
128
-			return err
121
+	case libcontainerd.EventStart:
122
+		c.Lock()
123
+		defer c.Unlock()
124
+
125
+		// This is here to handle start not generated by docker
126
+		if !c.Running {
127
+			c.SetRunning(int(ei.Pid), false)
128
+			c.HasBeenManuallyStopped = false
129
+			c.HasBeenStartedBefore = true
130
+			daemon.setStateCounter(c)
131
+
132
+			daemon.initHealthMonitor(c)
133
+
134
+			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
135
+				return err
136
+			}
137
+			daemon.LogContainerEvent(c, "start")
129 138
 		}
130 139
 
131
-		daemon.LogContainerEvent(c, "start")
132
-	case libcontainerd.StatePause:
133
-		// Container is already locked in this case
134
-		c.Paused = true
135
-		daemon.setStateCounter(c)
136
-		daemon.updateHealthMonitor(c)
137
-		if err := c.CheckpointTo(daemon.containersReplica); err != nil {
138
-			return err
140
+	case libcontainerd.EventPaused:
141
+		c.Lock()
142
+		defer c.Unlock()
143
+
144
+		if !c.Paused {
145
+			c.Paused = true
146
+			daemon.setStateCounter(c)
147
+			daemon.updateHealthMonitor(c)
148
+			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
149
+				return err
150
+			}
151
+			daemon.LogContainerEvent(c, "pause")
139 152
 		}
140
-		daemon.LogContainerEvent(c, "pause")
141
-	case libcontainerd.StateResume:
142
-		// Container is already locked in this case
143
-		c.Paused = false
144
-		daemon.setStateCounter(c)
145
-		daemon.updateHealthMonitor(c)
146
-		if err := c.CheckpointTo(daemon.containersReplica); err != nil {
147
-			return err
153
+	case libcontainerd.EventResumed:
154
+		c.Lock()
155
+		defer c.Unlock()
156
+
157
+		if c.Paused {
158
+			c.Paused = false
159
+			daemon.setStateCounter(c)
160
+			daemon.updateHealthMonitor(c)
161
+
162
+			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
163
+				return err
164
+			}
165
+			daemon.LogContainerEvent(c, "unpause")
148 166
 		}
149
-		daemon.LogContainerEvent(c, "unpause")
150 167
 	}
151 168
 	return nil
152 169
 }
... ...
@@ -5,15 +5,7 @@ import (
5 5
 	"github.com/docker/docker/libcontainerd"
6 6
 )
7 7
 
8
-// platformConstructExitStatus returns a platform specific exit status structure
9
-func platformConstructExitStatus(e libcontainerd.StateInfo) *container.ExitStatus {
10
-	return &container.ExitStatus{
11
-		ExitCode:  int(e.ExitCode),
12
-		OOMKilled: e.OOMKilled,
13
-	}
14
-}
15
-
16 8
 // postRunProcessing perfoms any processing needed on the container after it has stopped.
17
-func (daemon *Daemon) postRunProcessing(container *container.Container, e libcontainerd.StateInfo) error {
9
+func (daemon *Daemon) postRunProcessing(_ *container.Container, _ libcontainerd.EventInfo) error {
18 10
 	return nil
19 11
 }
... ...
@@ -5,14 +5,7 @@ import (
5 5
 	"github.com/docker/docker/libcontainerd"
6 6
 )
7 7
 
8
-// platformConstructExitStatus returns a platform specific exit status structure
9
-func platformConstructExitStatus(e libcontainerd.StateInfo) *container.ExitStatus {
10
-	return &container.ExitStatus{
11
-		ExitCode: int(e.ExitCode),
12
-	}
13
-}
14
-
15 8
 // postRunProcessing perfoms any processing needed on the container after it has stopped.
16
-func (daemon *Daemon) postRunProcessing(container *container.Container, e libcontainerd.StateInfo) error {
9
+func (daemon *Daemon) postRunProcessing(_ *container.Container, _ libcontainerd.EventInfo) error {
17 10
 	return nil
18 11
 }
... ...
@@ -1,40 +1,52 @@
1 1
 package daemon
2 2
 
3 3
 import (
4
-	"fmt"
4
+	"context"
5 5
 
6 6
 	"github.com/docker/docker/container"
7 7
 	"github.com/docker/docker/libcontainerd"
8
+	"github.com/pkg/errors"
9
+	"github.com/sirupsen/logrus"
8 10
 )
9 11
 
10
-// platformConstructExitStatus returns a platform specific exit status structure
11
-func platformConstructExitStatus(e libcontainerd.StateInfo) *container.ExitStatus {
12
-	return &container.ExitStatus{
13
-		ExitCode: int(e.ExitCode),
14
-	}
15
-}
16
-
17
-// postRunProcessing perfoms any processing needed on the container after it has stopped.
18
-func (daemon *Daemon) postRunProcessing(container *container.Container, e libcontainerd.StateInfo) error {
19
-	if e.ExitCode == 0 && e.UpdatePending {
20
-		spec, err := daemon.createSpec(container)
12
+// postRunProcessing starts a servicing container if required
13
+func (daemon *Daemon) postRunProcessing(c *container.Container, ei libcontainerd.EventInfo) error {
14
+	if ei.ExitCode == 0 && ei.UpdatePending {
15
+		spec, err := daemon.createSpec(c)
21 16
 		if err != nil {
22 17
 			return err
23 18
 		}
24
-
25 19
 		// Turn on servicing
26 20
 		spec.Windows.Servicing = true
27 21
 
28
-		copts, err := daemon.getLibcontainerdCreateOptions(container)
22
+		copts, err := daemon.getLibcontainerdCreateOptions(c)
29 23
 		if err != nil {
30 24
 			return err
31 25
 		}
32 26
 
33
-		// Create a new servicing container, which will start, complete the update, and merge back the
34
-		// results if it succeeded, all as part of the below function call.
35
-		if err := daemon.containerd.Create((container.ID + "_servicing"), "", "", *spec, container.InitializeStdio, copts...); err != nil {
36
-			container.SetExitCode(-1)
37
-			return fmt.Errorf("Post-run update servicing failed: %s", err)
27
+		// Create a new servicing container, which will start, complete the
28
+		// update, and merge back the results if it succeeded, all as part of
29
+		// the below function call.
30
+		ctx := context.Background()
31
+		svcID := c.ID + "_servicing"
32
+		logger := logrus.WithField("container", svcID)
33
+		if err := daemon.containerd.Create(ctx, svcID, spec, copts); err != nil {
34
+			c.SetExitCode(-1)
35
+			return errors.Wrap(err, "post-run update servicing failed")
36
+		}
37
+		_, err = daemon.containerd.Start(ctx, svcID, "", false, nil)
38
+		if err != nil {
39
+			logger.WithError(err).Warn("failed to run servicing container")
40
+			if err := daemon.containerd.Delete(ctx, svcID); err != nil {
41
+				logger.WithError(err).Warn("failed to delete servicing container")
42
+			}
43
+		} else {
44
+			if _, _, err := daemon.containerd.DeleteTask(ctx, svcID); err != nil {
45
+				logger.WithError(err).Warn("failed to delete servicing container task")
46
+			}
47
+			if err := daemon.containerd.Delete(ctx, svcID); err != nil {
48
+				logger.WithError(err).Warn("failed to delete servicing container")
49
+			}
38 50
 		}
39 51
 	}
40 52
 	return nil
... ...
@@ -156,7 +156,7 @@ func setDevices(s *specs.Spec, c *container.Container) error {
156 156
 	return nil
157 157
 }
158 158
 
159
-func setRlimits(daemon *Daemon, s *specs.Spec, c *container.Container) error {
159
+func (daemon *Daemon) setRlimits(s *specs.Spec, c *container.Container) error {
160 160
 	var rlimits []specs.POSIXRlimit
161 161
 
162 162
 	// We want to leave the original HostConfig alone so make a copy here
... ...
@@ -755,6 +755,7 @@ func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) {
755 755
 	if err := setResources(&s, c.HostConfig.Resources); err != nil {
756 756
 		return nil, fmt.Errorf("linux runtime spec resources: %v", err)
757 757
 	}
758
+	s.Process.OOMScoreAdj = &c.HostConfig.OomScoreAdj
758 759
 	s.Linux.Sysctl = c.HostConfig.Sysctls
759 760
 
760 761
 	p := s.Linux.CgroupsPath
... ...
@@ -763,11 +764,11 @@ func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) {
763 763
 		if err != nil {
764 764
 			return nil, err
765 765
 		}
766
-		p, _ = cgroups.GetOwnCgroup("cpu")
766
+		_, err = cgroups.GetOwnCgroup("cpu")
767 767
 		if err != nil {
768 768
 			return nil, err
769 769
 		}
770
-		p = filepath.Join(initPath, p)
770
+		p = filepath.Join(initPath, s.Linux.CgroupsPath)
771 771
 	}
772 772
 
773 773
 	// Clean path to guard against things like ../../../BAD
... ...
@@ -782,7 +783,7 @@ func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) {
782 782
 	if err := setDevices(&s, c); err != nil {
783 783
 		return nil, fmt.Errorf("linux runtime spec devices: %v", err)
784 784
 	}
785
-	if err := setRlimits(daemon, &s, c); err != nil {
785
+	if err := daemon.setRlimits(&s, c); err != nil {
786 786
 		return nil, fmt.Errorf("linux runtime spec rlimits: %v", err)
787 787
 	}
788 788
 	if err := setUser(&s, c); err != nil {
... ...
@@ -1,9 +1,11 @@
1 1
 package daemon
2 2
 
3 3
 import (
4
+	"context"
4 5
 	"fmt"
5 6
 
6 7
 	"github.com/docker/docker/container"
8
+	"github.com/sirupsen/logrus"
7 9
 )
8 10
 
9 11
 // ContainerPause pauses a container
... ...
@@ -33,7 +35,7 @@ func (daemon *Daemon) containerPause(container *container.Container) error {
33 33
 
34 34
 	// We cannot Pause the container which is already paused
35 35
 	if container.Paused {
36
-		return fmt.Errorf("Container %s is already paused", container.ID)
36
+		return errNotPaused(container.ID)
37 37
 	}
38 38
 
39 39
 	// We cannot Pause the container which is restarting
... ...
@@ -41,9 +43,18 @@ func (daemon *Daemon) containerPause(container *container.Container) error {
41 41
 		return errContainerIsRestarting(container.ID)
42 42
 	}
43 43
 
44
-	if err := daemon.containerd.Pause(container.ID); err != nil {
44
+	if err := daemon.containerd.Pause(context.Background(), container.ID); err != nil {
45 45
 		return fmt.Errorf("Cannot pause container %s: %s", container.ID, err)
46 46
 	}
47 47
 
48
+	container.Paused = true
49
+	daemon.setStateCounter(container)
50
+	daemon.updateHealthMonitor(container)
51
+	daemon.LogContainerEvent(container, "pause")
52
+
53
+	if err := container.CheckpointTo(daemon.containersReplica); err != nil {
54
+		logrus.WithError(err).Warn("could not save container to disk")
55
+	}
56
+
48 57
 	return nil
49 58
 }
... ...
@@ -6,7 +6,6 @@ import (
6 6
 
7 7
 	"github.com/docker/docker/daemon/config"
8 8
 	"github.com/docker/docker/daemon/discovery"
9
-	"github.com/docker/docker/libcontainerd"
10 9
 	"github.com/sirupsen/logrus"
11 10
 )
12 11
 
... ...
@@ -303,9 +302,6 @@ func (daemon *Daemon) reloadLiveRestore(conf *config.Config, attributes map[stri
303 303
 	// update corresponding configuration
304 304
 	if conf.IsValueSet("live-restore") {
305 305
 		daemon.configStore.LiveRestoreEnabled = conf.LiveRestoreEnabled
306
-		if err := daemon.containerdRemote.UpdateOptions(libcontainerd.WithLiveRestore(conf.LiveRestoreEnabled)); err != nil {
307
-			return err
308
-		}
309 306
 	}
310 307
 
311 308
 	// prepare reload event attributes with updatable configurations
... ...
@@ -1,6 +1,7 @@
1 1
 package daemon
2 2
 
3 3
 import (
4
+	"context"
4 5
 	"fmt"
5 6
 
6 7
 	"github.com/docker/docker/libcontainerd"
... ...
@@ -18,7 +19,7 @@ func (daemon *Daemon) ContainerResize(name string, height, width int) error {
18 18
 		return errNotRunning(container.ID)
19 19
 	}
20 20
 
21
-	if err = daemon.containerd.Resize(container.ID, libcontainerd.InitFriendlyName, width, height); err == nil {
21
+	if err = daemon.containerd.ResizeTerminal(context.Background(), container.ID, libcontainerd.InitProcessName, width, height); err == nil {
22 22
 		attributes := map[string]string{
23 23
 			"height": fmt.Sprintf("%d", height),
24 24
 			"width":  fmt.Sprintf("%d", width),
... ...
@@ -36,5 +37,5 @@ func (daemon *Daemon) ContainerExecResize(name string, height, width int) error
36 36
 	if err != nil {
37 37
 		return err
38 38
 	}
39
-	return daemon.containerd.Resize(ec.ContainerID, ec.ID, width, height)
39
+	return daemon.containerd.ResizeTerminal(context.Background(), ec.ContainerID, ec.ID, width, height)
40 40
 }
... ...
@@ -1,6 +1,7 @@
1 1
 package daemon
2 2
 
3 3
 import (
4
+	"context"
4 5
 	"runtime"
5 6
 	"time"
6 7
 
... ...
@@ -113,6 +114,11 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint
113 113
 		return stateConflictError{errors.New("container is marked for removal and cannot be started")}
114 114
 	}
115 115
 
116
+	if checkpointDir != "" {
117
+		// TODO(mlaventure): how would we support that?
118
+		return notAllowedError{errors.New("custom checkpointdir is not supported")}
119
+	}
120
+
116 121
 	// if we encounter an error during start we need to ensure that any other
117 122
 	// setup has been cleaned up properly
118 123
 	defer func() {
... ...
@@ -152,28 +158,56 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint
152 152
 		return systemError{err}
153 153
 	}
154 154
 
155
-	createOptions, err := daemon.getLibcontainerdCreateOptions(container)
156
-	if err != nil {
157
-		return err
158
-	}
159
-
160 155
 	if resetRestartManager {
161 156
 		container.ResetRestartManager(true)
162 157
 	}
163 158
 
164
-	if checkpointDir == "" {
165
-		checkpointDir = container.CheckpointDir()
159
+	if daemon.saveApparmorConfig(container); err != nil {
160
+		return err
166 161
 	}
167 162
 
168
-	if daemon.saveApparmorConfig(container); err != nil {
163
+	if checkpoint != "" {
164
+		checkpointDir, err = getCheckpointDir(checkpointDir, checkpoint, container.Name, container.ID, container.CheckpointDir(), false)
165
+		if err != nil {
166
+			return err
167
+		}
168
+	}
169
+
170
+	createOptions, err := daemon.getLibcontainerdCreateOptions(container)
171
+	if err != nil {
169 172
 		return err
170 173
 	}
171 174
 
172
-	if err := daemon.containerd.Create(container.ID, checkpoint, checkpointDir, *spec, container.InitializeStdio, createOptions...); err != nil {
175
+	err = daemon.containerd.Create(context.Background(), container.ID, spec, createOptions)
176
+	if err != nil {
177
+		return translateContainerdStartErr(container.Path, container.SetExitCode, err)
178
+	}
179
+
180
+	// TODO(mlaventure): we need to specify checkpoint options here
181
+	pid, err := daemon.containerd.Start(context.Background(), container.ID, checkpointDir,
182
+		container.StreamConfig.Stdin() != nil || container.Config.Tty,
183
+		container.InitializeStdio)
184
+	if err != nil {
185
+		if err := daemon.containerd.Delete(context.Background(), container.ID); err != nil {
186
+			logrus.WithError(err).WithField("container", container.ID).
187
+				Error("failed to delete failed start container")
188
+		}
173 189
 		return translateContainerdStartErr(container.Path, container.SetExitCode, err)
190
+	}
191
+
192
+	container.SetRunning(pid, true)
193
+	container.HasBeenManuallyStopped = false
194
+	container.HasBeenStartedBefore = true
195
+	daemon.setStateCounter(container)
196
+
197
+	daemon.initHealthMonitor(container)
174 198
 
199
+	if err := container.CheckpointTo(daemon.containersReplica); err != nil {
200
+		logrus.WithError(err).WithField("container", container.ID).
201
+			Errorf("failed to store container")
175 202
 	}
176 203
 
204
+	daemon.LogContainerEvent(container, "start")
177 205
 	containerActions.WithValues("start").UpdateSince(start)
178 206
 
179 207
 	return nil
... ...
@@ -209,5 +243,10 @@ func (daemon *Daemon) Cleanup(container *container.Container) {
209 209
 			logrus.Warnf("%s cleanup: Failed to umount volumes: %v", container.ID, err)
210 210
 		}
211 211
 	}
212
+
212 213
 	container.CancelAttachContext()
214
+
215
+	if err := daemon.containerd.Delete(context.Background(), container.ID); err != nil {
216
+		logrus.Errorf("%s cleanup: failed to delete container from containerd: %v", container.ID, err)
217
+	}
213 218
 }
... ...
@@ -3,29 +3,54 @@
3 3
 package daemon
4 4
 
5 5
 import (
6
+	"fmt"
7
+	"os/exec"
8
+	"path/filepath"
9
+
10
+	"github.com/containerd/containerd/linux/runcopts"
6 11
 	"github.com/docker/docker/container"
7
-	"github.com/docker/docker/libcontainerd"
8 12
 	"github.com/pkg/errors"
9 13
 )
10 14
 
11
-// getLibcontainerdCreateOptions callers must hold a lock on the container
12
-func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) ([]libcontainerd.CreateOption, error) {
13
-	createOptions := []libcontainerd.CreateOption{}
15
+func (daemon *Daemon) getRuntimeScript(container *container.Container) (string, error) {
16
+	name := container.HostConfig.Runtime
17
+	rt := daemon.configStore.GetRuntime(name)
18
+	if rt == nil {
19
+		return "", validationError{errors.Errorf("no such runtime '%s'", name)}
20
+	}
14 21
 
22
+	if len(rt.Args) > 0 {
23
+		// First check that the target exist, as using it in a script won't
24
+		// give us the right error
25
+		if _, err := exec.LookPath(rt.Path); err != nil {
26
+			return "", translateContainerdStartErr(container.Path, container.SetExitCode, err)
27
+		}
28
+		return filepath.Join(daemon.configStore.Root, "runtimes", name), nil
29
+	}
30
+	return rt.Path, nil
31
+}
32
+
33
+// getLibcontainerdCreateOptions callers must hold a lock on the container
34
+func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) (interface{}, error) {
15 35
 	// Ensure a runtime has been assigned to this container
16 36
 	if container.HostConfig.Runtime == "" {
17 37
 		container.HostConfig.Runtime = daemon.configStore.GetDefaultRuntimeName()
18 38
 		container.CheckpointTo(daemon.containersReplica)
19 39
 	}
20 40
 
21
-	rt := daemon.configStore.GetRuntime(container.HostConfig.Runtime)
22
-	if rt == nil {
23
-		return nil, validationError{errors.Errorf("no such runtime '%s'", container.HostConfig.Runtime)}
41
+	path, err := daemon.getRuntimeScript(container)
42
+	if err != nil {
43
+		return nil, err
24 44
 	}
45
+	opts := &runcopts.RuncOptions{
46
+		Runtime: path,
47
+		RuntimeRoot: filepath.Join(daemon.configStore.ExecRoot,
48
+			fmt.Sprintf("runtime-%s", container.HostConfig.Runtime)),
49
+	}
50
+
25 51
 	if UsingSystemd(daemon.configStore) {
26
-		rt.Args = append(rt.Args, "--systemd-cgroup=true")
52
+		opts.SystemdCgroup = true
27 53
 	}
28
-	createOptions = append(createOptions, libcontainerd.WithRuntime(rt.Path, rt.Args))
29 54
 
30
-	return createOptions, nil
55
+	return opts, nil
31 56
 }
... ...
@@ -3,12 +3,9 @@ package daemon
3 3
 import (
4 4
 	"github.com/Microsoft/opengcs/client"
5 5
 	"github.com/docker/docker/container"
6
-	"github.com/docker/docker/libcontainerd"
7 6
 )
8 7
 
9
-func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) ([]libcontainerd.CreateOption, error) {
10
-	createOptions := []libcontainerd.CreateOption{}
11
-
8
+func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) (interface{}, error) {
12 9
 	// LCOW options.
13 10
 	if container.OS == "linux" {
14 11
 		config := &client.Config{}
... ...
@@ -33,11 +30,9 @@ func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Contain
33 33
 		if err := config.Validate(); err != nil {
34 34
 			return nil, err
35 35
 		}
36
-		lcowOpts := &libcontainerd.LCOWOption{
37
-			Config: config,
38
-		}
39
-		createOptions = append(createOptions, lcowOpts)
36
+
37
+		return config, nil
40 38
 	}
41 39
 
42
-	return createOptions, nil
40
+	return nil, nil
43 41
 }
... ...
@@ -3,6 +3,7 @@
3 3
 package daemon
4 4
 
5 5
 import (
6
+	"context"
6 7
 	"fmt"
7 8
 	"os/exec"
8 9
 	"regexp"
... ...
@@ -50,16 +51,16 @@ func appendProcess2ProcList(procList *container.ContainerTopOKBody, fields []str
50 50
 	procList.Processes = append(procList.Processes, process)
51 51
 }
52 52
 
53
-func hasPid(pids []int, pid int) bool {
54
-	for _, i := range pids {
55
-		if i == pid {
53
+func hasPid(procs []uint32, pid int) bool {
54
+	for _, p := range procs {
55
+		if int(p) == pid {
56 56
 			return true
57 57
 		}
58 58
 	}
59 59
 	return false
60 60
 }
61 61
 
62
-func parsePSOutput(output []byte, pids []int) (*container.ContainerTopOKBody, error) {
62
+func parsePSOutput(output []byte, procs []uint32) (*container.ContainerTopOKBody, error) {
63 63
 	procList := &container.ContainerTopOKBody{}
64 64
 
65 65
 	lines := strings.Split(string(output), "\n")
... ...
@@ -101,7 +102,7 @@ func parsePSOutput(output []byte, pids []int) (*container.ContainerTopOKBody, er
101 101
 			return nil, fmt.Errorf("Unexpected pid '%s': %s", fields[pidIndex], err)
102 102
 		}
103 103
 
104
-		if hasPid(pids, p) {
104
+		if hasPid(procs, p) {
105 105
 			preContainedPidFlag = true
106 106
 			appendProcess2ProcList(procList, fields)
107 107
 			continue
... ...
@@ -138,7 +139,7 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*container.Conta
138 138
 		return nil, errContainerIsRestarting(container.ID)
139 139
 	}
140 140
 
141
-	pids, err := daemon.containerd.GetPidsForContainer(container.ID)
141
+	procs, err := daemon.containerd.ListPids(context.Background(), container.ID)
142 142
 	if err != nil {
143 143
 		return nil, err
144 144
 	}
... ...
@@ -147,7 +148,7 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*container.Conta
147 147
 	if err != nil {
148 148
 		return nil, fmt.Errorf("Error running ps: %v", err)
149 149
 	}
150
-	procList, err := parsePSOutput(output, pids)
150
+	procList, err := parsePSOutput(output, procs)
151 151
 	if err != nil {
152 152
 		return nil, err
153 153
 	}
... ...
@@ -36,7 +36,7 @@ func TestContainerTopValidatePSArgs(t *testing.T) {
36 36
 func TestContainerTopParsePSOutput(t *testing.T) {
37 37
 	tests := []struct {
38 38
 		output      []byte
39
-		pids        []int
39
+		pids        []uint32
40 40
 		errExpected bool
41 41
 	}{
42 42
 		{[]byte(`  PID COMMAND
... ...
@@ -44,26 +44,26 @@ func TestContainerTopParsePSOutput(t *testing.T) {
44 44
    43 bar
45 45
 		- -
46 46
   100 baz
47
-`), []int{42, 43}, false},
47
+`), []uint32{42, 43}, false},
48 48
 		{[]byte(`  UID COMMAND
49 49
    42 foo
50 50
    43 bar
51 51
 		- -
52 52
   100 baz
53
-`), []int{42, 43}, true},
53
+`), []uint32{42, 43}, true},
54 54
 		// unicode space (U+2003, 0xe2 0x80 0x83)
55 55
 		{[]byte(` PID COMMAND
56 56
    42 foo
57 57
    43 bar
58 58
 		- -
59 59
   100 baz
60
-`), []int{42, 43}, true},
60
+`), []uint32{42, 43}, true},
61 61
 		// the first space is U+2003, the second one is ascii.
62 62
 		{[]byte(` PID COMMAND
63 63
    42 foo
64 64
    43 bar
65 65
   100 baz
66
-`), []int{42, 43}, true},
66
+`), []uint32{42, 43}, true},
67 67
 	}
68 68
 
69 69
 	for _, f := range tests {
... ...
@@ -1,6 +1,7 @@
1 1
 package daemon
2 2
 
3 3
 import (
4
+	"context"
4 5
 	"errors"
5 6
 	"fmt"
6 7
 	"time"
... ...
@@ -34,7 +35,15 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*containertypes.
34 34
 		return nil, err
35 35
 	}
36 36
 
37
-	s, err := daemon.containerd.Summary(container.ID)
37
+	if !container.IsRunning() {
38
+		return nil, errNotRunning(container.ID)
39
+	}
40
+
41
+	if container.IsRestarting() {
42
+		return nil, errContainerIsRestarting(container.ID)
43
+	}
44
+
45
+	s, err := daemon.containerd.Summary(context.Background(), container.ID)
38 46
 	if err != nil {
39 47
 		return nil, err
40 48
 	}
... ...
@@ -49,5 +58,6 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*containertypes.
49 49
 			fmt.Sprintf("%02d:%02d:%02d.%03d", int(d.Hours()), int(d.Minutes())%60, int(d.Seconds())%60, int(d.Nanoseconds()/1000000)%1000),
50 50
 			units.HumanSize(float64(j.MemoryWorkingSetPrivateBytes))})
51 51
 	}
52
+
52 53
 	return procList, nil
53 54
 }
... ...
@@ -1,9 +1,11 @@
1 1
 package daemon
2 2
 
3 3
 import (
4
+	"context"
4 5
 	"fmt"
5 6
 
6 7
 	"github.com/docker/docker/container"
8
+	"github.com/sirupsen/logrus"
7 9
 )
8 10
 
9 11
 // ContainerUnpause unpauses a container
... ...
@@ -30,9 +32,18 @@ func (daemon *Daemon) containerUnpause(container *container.Container) error {
30 30
 		return fmt.Errorf("Container %s is not paused", container.ID)
31 31
 	}
32 32
 
33
-	if err := daemon.containerd.Resume(container.ID); err != nil {
33
+	if err := daemon.containerd.Resume(context.Background(), container.ID); err != nil {
34 34
 		return fmt.Errorf("Cannot unpause container %s: %s", container.ID, err)
35 35
 	}
36 36
 
37
+	container.Paused = false
38
+	daemon.setStateCounter(container)
39
+	daemon.updateHealthMonitor(container)
40
+	daemon.LogContainerEvent(container, "unpause")
41
+
42
+	if err := container.CheckpointTo(daemon.containersReplica); err != nil {
43
+		logrus.WithError(err).Warnf("could not save container to disk")
44
+	}
45
+
37 46
 	return nil
38 47
 }
... ...
@@ -1,6 +1,7 @@
1 1
 package daemon
2 2
 
3 3
 import (
4
+	"context"
4 5
 	"fmt"
5 6
 
6 7
 	"github.com/docker/docker/api/types/container"
... ...
@@ -76,7 +77,7 @@ func (daemon *Daemon) update(name string, hostConfig *container.HostConfig) erro
76 76
 	// If container is running (including paused), we need to update configs
77 77
 	// to the real world.
78 78
 	if container.IsRunning() && !container.IsRestarting() {
79
-		if err := daemon.containerd.UpdateResources(container.ID, toContainerdResources(hostConfig.Resources)); err != nil {
79
+		if err := daemon.containerd.UpdateResources(context.Background(), container.ID, toContainerdResources(hostConfig.Resources)); err != nil {
80 80
 			restoreConfig = true
81 81
 			// TODO: it would be nice if containerd responded with better errors here so we can classify this better.
82 82
 			return errCannotUpdate(container.ID, systemError{err})
... ...
@@ -7,26 +7,43 @@ import (
7 7
 
8 8
 	"github.com/docker/docker/api/types/container"
9 9
 	"github.com/docker/docker/libcontainerd"
10
+	specs "github.com/opencontainers/runtime-spec/specs-go"
10 11
 )
11 12
 
12
-func toContainerdResources(resources container.Resources) libcontainerd.Resources {
13
+func toContainerdResources(resources container.Resources) *libcontainerd.Resources {
13 14
 	var r libcontainerd.Resources
14
-	r.BlkioWeight = uint64(resources.BlkioWeight)
15
-	r.CpuShares = uint64(resources.CPUShares)
15
+
16
+	r.BlockIO = &specs.LinuxBlockIO{
17
+		Weight: &resources.BlkioWeight,
18
+	}
19
+
20
+	shares := uint64(resources.CPUShares)
21
+	r.CPU = &specs.LinuxCPU{
22
+		Shares: &shares,
23
+		Cpus:   resources.CpusetCpus,
24
+		Mems:   resources.CpusetMems,
25
+	}
26
+
27
+	var (
28
+		period uint64
29
+		quota  int64
30
+	)
16 31
 	if resources.NanoCPUs != 0 {
17
-		r.CpuPeriod = uint64(100 * time.Millisecond / time.Microsecond)
18
-		r.CpuQuota = uint64(resources.NanoCPUs) * r.CpuPeriod / 1e9
19
-	} else {
20
-		r.CpuPeriod = uint64(resources.CPUPeriod)
21
-		r.CpuQuota = uint64(resources.CPUQuota)
32
+		period = uint64(100 * time.Millisecond / time.Microsecond)
33
+		quota = resources.NanoCPUs * int64(period) / 1e9
22 34
 	}
23
-	r.CpusetCpus = resources.CpusetCpus
24
-	r.CpusetMems = resources.CpusetMems
25
-	r.MemoryLimit = uint64(resources.Memory)
35
+	r.CPU.Period = &period
36
+	r.CPU.Quota = &quota
37
+
38
+	r.Memory = &specs.LinuxMemory{
39
+		Limit:       &resources.Memory,
40
+		Reservation: &resources.MemoryReservation,
41
+		Kernel:      &resources.KernelMemory,
42
+	}
43
+
26 44
 	if resources.MemorySwap > 0 {
27
-		r.MemorySwap = uint64(resources.MemorySwap)
45
+		r.Memory.Swap = &resources.MemorySwap
28 46
 	}
29
-	r.MemoryReservation = uint64(resources.MemoryReservation)
30
-	r.KernelMemoryLimit = uint64(resources.KernelMemory)
31
-	return r
47
+
48
+	return &r
32 49
 }
... ...
@@ -7,7 +7,7 @@ import (
7 7
 	"github.com/docker/docker/libcontainerd"
8 8
 )
9 9
 
10
-func toContainerdResources(resources container.Resources) libcontainerd.Resources {
11
-	var r libcontainerd.Resources
12
-	return r
10
+func toContainerdResources(resources container.Resources) *libcontainerd.Resources {
11
+	// We don't support update, so do nothing
12
+	return nil
13 13
 }
... ...
@@ -17,6 +17,7 @@ const (
17 17
 	Version            string = "$VERSION"
18 18
 	BuildTime          string = "$BUILDTIME"
19 19
 	IAmStatic          string = "${IAMSTATIC:-true}"
20
+	ContainerdCommitID string = "${CONTAINERD_COMMIT}"
20 21
 )
21 22
 
22 23
 // AUTOGENERATED FILE; see /go/src/github.com/docker/docker/hack/make/.go-autogen
... ...
@@ -31,9 +32,8 @@ package dockerversion
31 31
 // Default build-time variable for library-import.
32 32
 // This file is overridden on build with build-time informations.
33 33
 const (
34
-	ContainerdCommitID string = "${CONTAINERD_COMMIT}"
35
-	RuncCommitID       string = "${RUNC_COMMIT}"
36
-	InitCommitID       string = "${TINI_COMMIT}"
34
+	RuncCommitID string = "${RUNC_COMMIT}"
35
+	InitCommitID string = "${TINI_COMMIT}"
37 36
 )
38 37
 
39 38
 // AUTOGENERATED FILE; see /go/src/github.com/docker/docker/hack/make/.go-autogen
... ...
@@ -222,7 +222,7 @@ func (d *Daemon) StartWithLogFile(out *os.File, providedArgs ...string) error {
222 222
 		return errors.Wrapf(err, "[%s] could not find docker binary in $PATH", d.id)
223 223
 	}
224 224
 	args := append(d.GlobalFlags,
225
-		"--containerd", "/var/run/docker/libcontainerd/docker-containerd.sock",
225
+		"--containerd", "/var/run/docker/containerd/docker-containerd.sock",
226 226
 		"--data-root", d.Root,
227 227
 		"--exec-root", d.execRoot,
228 228
 		"--pidfile", fmt.Sprintf("%s/docker.pid", d.Folder),
... ...
@@ -457,6 +457,8 @@ out2:
457 457
 		return err
458 458
 	}
459 459
 
460
+	d.cmd.Wait()
461
+
460 462
 	if err := os.Remove(fmt.Sprintf("%s/docker.pid", d.Folder)); err != nil {
461 463
 		return err
462 464
 	}
... ...
@@ -285,7 +285,7 @@ func (s *DockerSuite) TestAPIStatsNoStreamConnectedContainers(c *check.C) {
285 285
 	id2 := strings.TrimSpace(out2)
286 286
 	c.Assert(waitRun(id2), checker.IsNil)
287 287
 
288
-	ch := make(chan error)
288
+	ch := make(chan error, 1)
289 289
 	go func() {
290 290
 		resp, body, err := request.Get(fmt.Sprintf("/containers/%s/stats?stream=false", id2))
291 291
 		defer body.Close()
... ...
@@ -147,7 +147,10 @@ func (s *DockerSuite) TestAttachDisconnect(c *check.C) {
147 147
 	c.Assert(err, check.IsNil)
148 148
 	defer stdout.Close()
149 149
 	c.Assert(cmd.Start(), check.IsNil)
150
-	defer cmd.Process.Kill()
150
+	defer func() {
151
+		cmd.Process.Kill()
152
+		cmd.Wait()
153
+	}()
151 154
 
152 155
 	_, err = stdin.Write([]byte("hello\n"))
153 156
 	c.Assert(err, check.IsNil)
... ...
@@ -149,6 +149,11 @@ func (s *DockerSuite) TestBuildCancellationKillsSleep(c *check.C) {
149 149
 	if err := buildCmd.Start(); err != nil {
150 150
 		c.Fatalf("failed to run build: %s", err)
151 151
 	}
152
+	// always clean up
153
+	defer func() {
154
+		buildCmd.Process.Kill()
155
+		buildCmd.Wait()
156
+	}()
152 157
 
153 158
 	matchCID := regexp.MustCompile("Running in (.+)")
154 159
 	scanner := bufio.NewScanner(stdoutBuild)
... ...
@@ -28,6 +28,7 @@ import (
28 28
 	"github.com/docker/docker/api"
29 29
 	"github.com/docker/docker/api/types"
30 30
 	"github.com/docker/docker/client"
31
+	moby_daemon "github.com/docker/docker/daemon"
31 32
 	"github.com/docker/docker/integration-cli/checker"
32 33
 	"github.com/docker/docker/integration-cli/cli"
33 34
 	"github.com/docker/docker/integration-cli/daemon"
... ...
@@ -1448,7 +1449,8 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonAndContainerKill(c *chec
1448 1448
 	c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment)
1449 1449
 
1450 1450
 	// kill the container
1451
-	icmd.RunCommand(ctrBinary, "--address", "unix:///var/run/docker/libcontainerd/docker-containerd.sock", "containers", "kill", id).Assert(c, icmd.Success)
1451
+	icmd.RunCommand(ctrBinary, "--address", "/var/run/docker/containerd/docker-containerd.sock",
1452
+		"--namespace", moby_daemon.MainNamespace, "tasks", "kill", id).Assert(c, icmd.Success)
1452 1453
 
1453 1454
 	// restart daemon.
1454 1455
 	d.Restart(c)
... ...
@@ -1987,7 +1989,6 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithNames(c *check.C) {
1987 1987
 
1988 1988
 // TestDaemonRestartWithKilledRunningContainer requires live restore of running containers
1989 1989
 func (s *DockerDaemonSuite) TestDaemonRestartWithKilledRunningContainer(t *check.C) {
1990
-	// TODO(mlaventure): Not sure what would the exit code be on windows
1991 1990
 	testRequires(t, DaemonIsLinux)
1992 1991
 	s.d.StartWithBusybox(t)
1993 1992
 
... ...
@@ -2008,7 +2009,8 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithKilledRunningContainer(t *check
2008 2008
 	}
2009 2009
 
2010 2010
 	// kill the container
2011
-	icmd.RunCommand(ctrBinary, "--address", "unix:///var/run/docker/libcontainerd/docker-containerd.sock", "containers", "kill", cid).Assert(t, icmd.Success)
2011
+	icmd.RunCommand(ctrBinary, "--address", "/var/run/docker/containerd/docker-containerd.sock",
2012
+		"--namespace", moby_daemon.MainNamespace, "tasks", "kill", cid).Assert(t, icmd.Success)
2012 2013
 
2013 2014
 	// Give time to containerd to process the command if we don't
2014 2015
 	// the exit event might be received after we do the inspect
... ...
@@ -2076,7 +2078,6 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonCrash(c *check.C) {
2076 2076
 
2077 2077
 // TestDaemonRestartWithUnpausedRunningContainer requires live restore of running containers.
2078 2078
 func (s *DockerDaemonSuite) TestDaemonRestartWithUnpausedRunningContainer(t *check.C) {
2079
-	// TODO(mlaventure): Not sure what would the exit code be on windows
2080 2079
 	testRequires(t, DaemonIsLinux)
2081 2080
 	s.d.StartWithBusybox(t, "--live-restore")
2082 2081
 
... ...
@@ -2103,8 +2104,9 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithUnpausedRunningContainer(t *che
2103 2103
 	// resume the container
2104 2104
 	result := icmd.RunCommand(
2105 2105
 		ctrBinary,
2106
-		"--address", "unix:///var/run/docker/libcontainerd/docker-containerd.sock",
2107
-		"containers", "resume", cid)
2106
+		"--address", "/var/run/docker/containerd/docker-containerd.sock",
2107
+		"--namespace", moby_daemon.MainNamespace,
2108
+		"tasks", "resume", cid)
2108 2109
 	result.Assert(t, icmd.Success)
2109 2110
 
2110 2111
 	// Give time to containerd to process the command if we don't
... ...
@@ -86,6 +86,7 @@ func (s *DockerSuite) TestEventsLimit(c *check.C) {
86 86
 	// timeouts creating so many containers simultaneously. This is a due to
87 87
 	// a bug in the Windows platform. It will be fixed in a Windows Update.
88 88
 	numContainers := 17
89
+	eventPerContainer := 7 // create, attach, network connect, start, die, network disconnect, destroy
89 90
 	numConcurrentContainers := numContainers
90 91
 	if testEnv.DaemonPlatform() == "windows" {
91 92
 		numConcurrentContainers = 4
... ...
@@ -93,17 +94,19 @@ func (s *DockerSuite) TestEventsLimit(c *check.C) {
93 93
 	sem := make(chan bool, numConcurrentContainers)
94 94
 	errChan := make(chan error, numContainers)
95 95
 
96
+	startTime := daemonUnixTime(c)
97
+
96 98
 	args := []string{"run", "--rm", "busybox", "true"}
97 99
 	for i := 0; i < numContainers; i++ {
98 100
 		sem <- true
99
-		go func() {
101
+		go func(i int) {
100 102
 			defer func() { <-sem }()
101 103
 			out, err := exec.Command(dockerBinary, args...).CombinedOutput()
102 104
 			if err != nil {
103 105
 				err = fmt.Errorf("%v: %s", err, string(out))
104 106
 			}
105 107
 			errChan <- err
106
-		}()
108
+		}(i)
107 109
 	}
108 110
 
109 111
 	// Wait for all goroutines to finish
... ...
@@ -116,10 +119,10 @@ func (s *DockerSuite) TestEventsLimit(c *check.C) {
116 116
 		c.Assert(err, checker.IsNil, check.Commentf("%q failed with error", strings.Join(args, " ")))
117 117
 	}
118 118
 
119
-	out, _ := dockerCmd(c, "events", "--since=0", "--until", daemonUnixTime(c))
119
+	out, _ := dockerCmd(c, "events", "--since="+startTime, "--until", daemonUnixTime(c))
120 120
 	events := strings.Split(out, "\n")
121 121
 	nEvents := len(events) - 1
122
-	c.Assert(nEvents, checker.Equals, 256, check.Commentf("events should be limited to 256, but received %d", nEvents))
122
+	c.Assert(nEvents, checker.Equals, numContainers*eventPerContainer, check.Commentf("events should be limited to 256, but received %d", nEvents))
123 123
 }
124 124
 
125 125
 func (s *DockerSuite) TestEventsContainerEvents(c *check.C) {
... ...
@@ -533,7 +536,10 @@ func (s *DockerSuite) TestEventsAttach(c *check.C) {
533 533
 	c.Assert(err, checker.IsNil)
534 534
 	defer stdout.Close()
535 535
 	c.Assert(cmd.Start(), checker.IsNil)
536
-	defer cmd.Process.Kill()
536
+	defer func() {
537
+		cmd.Process.Kill()
538
+		cmd.Wait()
539
+	}()
537 540
 
538 541
 	// Make sure we're done attaching by writing/reading some stuff
539 542
 	_, err = stdin.Write([]byte("hello\n"))
... ...
@@ -230,6 +230,7 @@ func (s *DockerSuite) TestLogsFollowSlowStdoutConsumer(c *check.C) {
230 230
 	stdout, err := logCmd.StdoutPipe()
231 231
 	c.Assert(err, checker.IsNil)
232 232
 	c.Assert(logCmd.Start(), checker.IsNil)
233
+	defer func() { go logCmd.Wait() }()
233 234
 
234 235
 	// First read slowly
235 236
 	bytes1, err := ConsumeWithSpeed(stdout, 10, 50*time.Millisecond, stopSlowRead)
... ...
@@ -1625,6 +1625,7 @@ func (s *DockerSuite) TestEmbeddedDNSInvalidInput(c *check.C) {
1625 1625
 func (s *DockerSuite) TestDockerNetworkConnectFailsNoInspectChange(c *check.C) {
1626 1626
 	dockerCmd(c, "run", "-d", "--name=bb", "busybox", "top")
1627 1627
 	c.Assert(waitRun("bb"), check.IsNil)
1628
+	defer dockerCmd(c, "stop", "bb")
1628 1629
 
1629 1630
 	ns0 := inspectField(c, "bb", "NetworkSettings.Networks.bridge")
1630 1631
 
... ...
@@ -2249,6 +2249,7 @@ func (s *DockerSuite) TestRunSlowStdoutConsumer(c *check.C) {
2249 2249
 	if err := cont.Start(); err != nil {
2250 2250
 		c.Fatal(err)
2251 2251
 	}
2252
+	defer func() { go cont.Wait() }()
2252 2253
 	n, err := ConsumeWithSpeed(stdout, 10000, 5*time.Millisecond, nil)
2253 2254
 	if err != nil {
2254 2255
 		c.Fatal(err)
... ...
@@ -206,8 +206,10 @@ func (s *DockerSuite) TestDeprecatedPostContainersStartWithLinksInHostConfigIdLi
206 206
 	testRequires(c, DaemonIsLinux)
207 207
 	name := "test-host-config-links"
208 208
 	out, _ := dockerCmd(c, "run", "--name", "link0", "-d", "busybox", "top")
209
+	defer dockerCmd(c, "stop", "link0")
209 210
 	id := strings.TrimSpace(out)
210 211
 	dockerCmd(c, "create", "--name", name, "--link", id, "busybox", "top")
212
+	defer dockerCmd(c, "stop", name)
211 213
 
212 214
 	hc := inspectFieldJSON(c, name, "HostConfig")
213 215
 	config := `{"HostConfig":` + hc + `}`
... ...
@@ -69,7 +69,7 @@ func (e *eventObserver) Start() error {
69 69
 // Stop stops the events command.
70 70
 func (e *eventObserver) Stop() {
71 71
 	e.command.Process.Kill()
72
-	e.command.Process.Release()
72
+	e.command.Wait()
73 73
 }
74 74
 
75 75
 // Match tries to match the events output with a given matcher.
... ...
@@ -1,6 +1,7 @@
1 1
 package service
2 2
 
3 3
 import (
4
+	"runtime"
4 5
 	"testing"
5 6
 	"time"
6 7
 
... ...
@@ -42,8 +43,15 @@ func TestCreateWithLBSandbox(t *testing.T) {
42 42
 	})
43 43
 	require.NoError(t, err)
44 44
 
45
+	pollSettings := func(config *poll.Settings) {
46
+		if runtime.GOARCH == "arm" {
47
+			config.Timeout = 30 * time.Second
48
+			config.Delay = 100 * time.Millisecond
49
+		}
50
+	}
51
+
45 52
 	serviceID := serviceResp.ID
46
-	poll.WaitOn(t, serviceRunningTasksCount(client, serviceID, instances))
53
+	poll.WaitOn(t, serviceRunningTasksCount(client, serviceID, instances), pollSettings)
47 54
 
48 55
 	_, _, err = client.ServiceInspectWithRaw(context.Background(), serviceID, types.ServiceInspectOptions{})
49 56
 	require.NoError(t, err)
... ...
@@ -55,7 +63,7 @@ func TestCreateWithLBSandbox(t *testing.T) {
55 55
 	err = client.ServiceRemove(context.Background(), serviceID)
56 56
 	require.NoError(t, err)
57 57
 
58
-	poll.WaitOn(t, serviceIsRemoved(client, serviceID))
58
+	poll.WaitOn(t, serviceIsRemoved(client, serviceID), pollSettings)
59 59
 	err = client.NetworkRemove(context.Background(), overlayID)
60 60
 	require.NoError(t, err)
61 61
 
62 62
deleted file mode 100644
... ...
@@ -1,46 +0,0 @@
1
-package libcontainerd
2
-
3
-import (
4
-	"fmt"
5
-	"sync"
6
-
7
-	"github.com/docker/docker/pkg/locker"
8
-)
9
-
10
-// clientCommon contains the platform agnostic fields used in the client structure
11
-type clientCommon struct {
12
-	backend    Backend
13
-	containers map[string]*container
14
-	locker     *locker.Locker
15
-	mapMutex   sync.RWMutex // protects read/write operations from containers map
16
-}
17
-
18
-func (clnt *client) lock(containerID string) {
19
-	clnt.locker.Lock(containerID)
20
-}
21
-
22
-func (clnt *client) unlock(containerID string) {
23
-	clnt.locker.Unlock(containerID)
24
-}
25
-
26
-// must hold a lock for cont.containerID
27
-func (clnt *client) appendContainer(cont *container) {
28
-	clnt.mapMutex.Lock()
29
-	clnt.containers[cont.containerID] = cont
30
-	clnt.mapMutex.Unlock()
31
-}
32
-func (clnt *client) deleteContainer(containerID string) {
33
-	clnt.mapMutex.Lock()
34
-	delete(clnt.containers, containerID)
35
-	clnt.mapMutex.Unlock()
36
-}
37
-
38
-func (clnt *client) getContainer(containerID string) (*container, error) {
39
-	clnt.mapMutex.RLock()
40
-	container, ok := clnt.containers[containerID]
41
-	defer clnt.mapMutex.RUnlock()
42
-	if !ok {
43
-		return nil, fmt.Errorf("invalid container: %s", containerID) // fixme: typed error
44
-	}
45
-	return container, nil
46
-}
47 1
new file mode 100644
... ...
@@ -0,0 +1,802 @@
0
+// +build !windows
1
+
2
+package libcontainerd
3
+
4
+import (
5
+	"context"
6
+	"encoding/json"
7
+	"fmt"
8
+	"io"
9
+	"os"
10
+	"path/filepath"
11
+	"reflect"
12
+	"runtime"
13
+	"strings"
14
+	"sync"
15
+	"syscall"
16
+	"time"
17
+
18
+	"google.golang.org/grpc"
19
+
20
+	"github.com/containerd/containerd"
21
+	eventsapi "github.com/containerd/containerd/api/services/events/v1"
22
+	"github.com/containerd/containerd/api/types"
23
+	"github.com/containerd/containerd/archive"
24
+	"github.com/containerd/containerd/content"
25
+	"github.com/containerd/containerd/images"
26
+	"github.com/containerd/containerd/linux/runcopts"
27
+	"github.com/containerd/typeurl"
28
+	"github.com/docker/docker/pkg/ioutils"
29
+	"github.com/opencontainers/image-spec/specs-go/v1"
30
+	"github.com/opencontainers/runtime-spec/specs-go"
31
+	"github.com/pkg/errors"
32
+	"github.com/sirupsen/logrus"
33
+)
34
+
35
+// InitProcessName is the name given to the first process of a
36
+// container
37
+const InitProcessName = "init"
38
+
39
+type container struct {
40
+	sync.Mutex
41
+
42
+	bundleDir string
43
+	ctr       containerd.Container
44
+	task      containerd.Task
45
+	execs     map[string]containerd.Process
46
+	oomKilled bool
47
+}
48
+
49
+type client struct {
50
+	sync.RWMutex // protects containers map
51
+
52
+	remote   *containerd.Client
53
+	stateDir string
54
+	logger   *logrus.Entry
55
+
56
+	namespace  string
57
+	backend    Backend
58
+	eventQ     queue
59
+	containers map[string]*container
60
+}
61
+
62
+func (c *client) Restore(ctx context.Context, id string, attachStdio StdioCallback) (alive bool, pid int, err error) {
63
+	c.Lock()
64
+	defer c.Unlock()
65
+
66
+	var cio containerd.IO
67
+	defer func() {
68
+		err = wrapError(err)
69
+	}()
70
+
71
+	ctr, err := c.remote.LoadContainer(ctx, id)
72
+	if err != nil {
73
+		return false, -1, errors.WithStack(err)
74
+	}
75
+
76
+	defer func() {
77
+		if err != nil && cio != nil {
78
+			cio.Cancel()
79
+			cio.Close()
80
+		}
81
+	}()
82
+
83
+	t, err := ctr.Task(ctx, func(fifos *containerd.FIFOSet) (containerd.IO, error) {
84
+		io, err := newIOPipe(fifos)
85
+		if err != nil {
86
+			return nil, err
87
+		}
88
+
89
+		cio, err = attachStdio(io)
90
+		return cio, err
91
+	})
92
+	if err != nil && !strings.Contains(err.Error(), "no running task found") {
93
+		return false, -1, err
94
+	}
95
+
96
+	if t != nil {
97
+		s, err := t.Status(ctx)
98
+		if err != nil {
99
+			return false, -1, err
100
+		}
101
+
102
+		alive = s.Status != containerd.Stopped
103
+		pid = int(t.Pid())
104
+	}
105
+	c.containers[id] = &container{
106
+		bundleDir: filepath.Join(c.stateDir, id),
107
+		ctr:       ctr,
108
+		task:      t,
109
+		// TODO(mlaventure): load execs
110
+	}
111
+
112
+	c.logger.WithFields(logrus.Fields{
113
+		"container": id,
114
+		"alive":     alive,
115
+		"pid":       pid,
116
+	}).Debug("restored container")
117
+
118
+	return alive, pid, nil
119
+}
120
+
121
+func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, runtimeOptions interface{}) error {
122
+	if ctr := c.getContainer(id); ctr != nil {
123
+		return errors.WithStack(newConflictError("id already in use"))
124
+	}
125
+
126
+	bdir, err := prepareBundleDir(filepath.Join(c.stateDir, id), ociSpec)
127
+	if err != nil {
128
+		return wrapSystemError(errors.Wrap(err, "prepare bundle dir failed"))
129
+	}
130
+
131
+	c.logger.WithField("bundle", bdir).WithField("root", ociSpec.Root.Path).Debug("bundle dir created")
132
+
133
+	cdCtr, err := c.remote.NewContainer(ctx, id,
134
+		containerd.WithSpec(ociSpec),
135
+		// TODO(mlaventure): when containerd support lcow, revisit runtime value
136
+		containerd.WithRuntime(fmt.Sprintf("io.containerd.runtime.v1.%s", runtime.GOOS), runtimeOptions))
137
+	if err != nil {
138
+		return err
139
+	}
140
+
141
+	c.Lock()
142
+	c.containers[id] = &container{
143
+		bundleDir: bdir,
144
+		ctr:       cdCtr,
145
+	}
146
+	c.Unlock()
147
+
148
+	return nil
149
+}
150
+
151
+// Start create and start a task for the specified containerd id
152
+func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin bool, attachStdio StdioCallback) (int, error) {
153
+	ctr := c.getContainer(id)
154
+	switch {
155
+	case ctr == nil:
156
+		return -1, errors.WithStack(newNotFoundError("no such container"))
157
+	case ctr.task != nil:
158
+		return -1, errors.WithStack(newConflictError("container already started"))
159
+	}
160
+
161
+	var (
162
+		cp             *types.Descriptor
163
+		t              containerd.Task
164
+		cio            containerd.IO
165
+		err            error
166
+		stdinCloseSync = make(chan struct{})
167
+	)
168
+
169
+	if checkpointDir != "" {
170
+		// write checkpoint to the content store
171
+		tar := archive.Diff(ctx, "", checkpointDir)
172
+		cp, err = c.writeContent(ctx, images.MediaTypeContainerd1Checkpoint, checkpointDir, tar)
173
+		// remove the checkpoint when we're done
174
+		defer func() {
175
+			if cp != nil {
176
+				err := c.remote.ContentStore().Delete(context.Background(), cp.Digest)
177
+				if err != nil {
178
+					c.logger.WithError(err).WithFields(logrus.Fields{
179
+						"ref":    checkpointDir,
180
+						"digest": cp.Digest,
181
+					}).Warnf("failed to delete temporary checkpoint entry")
182
+				}
183
+			}
184
+		}()
185
+		if err := tar.Close(); err != nil {
186
+			return -1, errors.Wrap(err, "failed to close checkpoint tar stream")
187
+		}
188
+		if err != nil {
189
+			return -1, errors.Wrapf(err, "failed to upload checkpoint to containerd")
190
+		}
191
+	}
192
+
193
+	spec, err := ctr.ctr.Spec(ctx)
194
+	if err != nil {
195
+		return -1, errors.Wrap(err, "failed to retrieve spec")
196
+	}
197
+	uid, gid := getSpecUser(spec)
198
+	t, err = ctr.ctr.NewTask(ctx,
199
+		func(id string) (containerd.IO, error) {
200
+			cio, err = c.createIO(ctr.bundleDir, id, InitProcessName, stdinCloseSync, withStdin, spec.Process.Terminal, attachStdio)
201
+			return cio, err
202
+		},
203
+		func(_ context.Context, _ *containerd.Client, info *containerd.TaskInfo) error {
204
+			info.Checkpoint = cp
205
+			info.Options = &runcopts.CreateOptions{
206
+				IoUid: uint32(uid),
207
+				IoGid: uint32(gid),
208
+			}
209
+			return nil
210
+		})
211
+	if err != nil {
212
+		close(stdinCloseSync)
213
+		if cio != nil {
214
+			cio.Cancel()
215
+			cio.Close()
216
+		}
217
+		return -1, err
218
+	}
219
+
220
+	c.Lock()
221
+	c.containers[id].task = t
222
+	c.Unlock()
223
+
224
+	// Signal c.createIO that it can call CloseIO
225
+	close(stdinCloseSync)
226
+
227
+	if err := t.Start(ctx); err != nil {
228
+		if _, err := t.Delete(ctx); err != nil {
229
+			c.logger.WithError(err).WithField("container", id).
230
+				Error("failed to delete task after fail start")
231
+		}
232
+		c.Lock()
233
+		c.containers[id].task = nil
234
+		c.Unlock()
235
+		return -1, err
236
+	}
237
+
238
+	return int(t.Pid()), nil
239
+}
240
+
241
+func (c *client) Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (int, error) {
242
+	ctr := c.getContainer(containerID)
243
+	switch {
244
+	case ctr == nil:
245
+		return -1, errors.WithStack(newNotFoundError("no such container"))
246
+	case ctr.task == nil:
247
+		return -1, errors.WithStack(newInvalidParameterError("container is not running"))
248
+	case ctr.execs != nil && ctr.execs[processID] != nil:
249
+		return -1, errors.WithStack(newConflictError("id already in use"))
250
+	}
251
+
252
+	var (
253
+		p              containerd.Process
254
+		cio            containerd.IO
255
+		err            error
256
+		stdinCloseSync = make(chan struct{})
257
+	)
258
+	defer func() {
259
+		if err != nil {
260
+			if cio != nil {
261
+				cio.Cancel()
262
+				cio.Close()
263
+			}
264
+		}
265
+	}()
266
+
267
+	p, err = ctr.task.Exec(ctx, processID, spec, func(id string) (containerd.IO, error) {
268
+		cio, err = c.createIO(ctr.bundleDir, containerID, processID, stdinCloseSync, withStdin, spec.Terminal, attachStdio)
269
+		return cio, err
270
+	})
271
+	if err != nil {
272
+		close(stdinCloseSync)
273
+		if cio != nil {
274
+			cio.Cancel()
275
+			cio.Close()
276
+		}
277
+		return -1, err
278
+	}
279
+
280
+	ctr.Lock()
281
+	if ctr.execs == nil {
282
+		ctr.execs = make(map[string]containerd.Process)
283
+	}
284
+	ctr.execs[processID] = p
285
+	ctr.Unlock()
286
+
287
+	// Signal c.createIO that it can call CloseIO
288
+	close(stdinCloseSync)
289
+
290
+	if err = p.Start(ctx); err != nil {
291
+		p.Delete(context.Background())
292
+		ctr.Lock()
293
+		delete(ctr.execs, processID)
294
+		ctr.Unlock()
295
+		return -1, err
296
+	}
297
+
298
+	return int(p.Pid()), nil
299
+}
300
+
301
+func (c *client) SignalProcess(ctx context.Context, containerID, processID string, signal int) error {
302
+	p, err := c.getProcess(containerID, processID)
303
+	if err != nil {
304
+		return err
305
+	}
306
+	return p.Kill(ctx, syscall.Signal(signal))
307
+}
308
+
309
+func (c *client) ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error {
310
+	p, err := c.getProcess(containerID, processID)
311
+	if err != nil {
312
+		return err
313
+	}
314
+
315
+	return p.Resize(ctx, uint32(width), uint32(height))
316
+}
317
+
318
+func (c *client) CloseStdin(ctx context.Context, containerID, processID string) error {
319
+	p, err := c.getProcess(containerID, processID)
320
+	if err != nil {
321
+		return err
322
+	}
323
+
324
+	return p.CloseIO(ctx, containerd.WithStdinCloser)
325
+}
326
+
327
+func (c *client) Pause(ctx context.Context, containerID string) error {
328
+	p, err := c.getProcess(containerID, InitProcessName)
329
+	if err != nil {
330
+		return err
331
+	}
332
+
333
+	return p.(containerd.Task).Pause(ctx)
334
+}
335
+
336
+func (c *client) Resume(ctx context.Context, containerID string) error {
337
+	p, err := c.getProcess(containerID, InitProcessName)
338
+	if err != nil {
339
+		return err
340
+	}
341
+
342
+	return p.(containerd.Task).Resume(ctx)
343
+}
344
+
345
+func (c *client) Stats(ctx context.Context, containerID string) (*Stats, error) {
346
+	p, err := c.getProcess(containerID, InitProcessName)
347
+	if err != nil {
348
+		return nil, err
349
+	}
350
+
351
+	m, err := p.(containerd.Task).Metrics(ctx)
352
+	if err != nil {
353
+		return nil, err
354
+	}
355
+
356
+	v, err := typeurl.UnmarshalAny(m.Data)
357
+	if err != nil {
358
+		return nil, err
359
+	}
360
+	return interfaceToStats(m.Timestamp, v), nil
361
+}
362
+
363
+func (c *client) ListPids(ctx context.Context, containerID string) ([]uint32, error) {
364
+	p, err := c.getProcess(containerID, InitProcessName)
365
+	if err != nil {
366
+		return nil, err
367
+	}
368
+
369
+	pis, err := p.(containerd.Task).Pids(ctx)
370
+	if err != nil {
371
+		return nil, err
372
+	}
373
+
374
+	var pids []uint32
375
+	for _, i := range pis {
376
+		pids = append(pids, i.Pid)
377
+	}
378
+
379
+	return pids, nil
380
+}
381
+
382
+func (c *client) Summary(ctx context.Context, containerID string) ([]Summary, error) {
383
+	p, err := c.getProcess(containerID, InitProcessName)
384
+	if err != nil {
385
+		return nil, err
386
+	}
387
+
388
+	pis, err := p.(containerd.Task).Pids(ctx)
389
+	if err != nil {
390
+		return nil, err
391
+	}
392
+
393
+	var infos []Summary
394
+	for _, pi := range pis {
395
+		i, err := typeurl.UnmarshalAny(pi.Info)
396
+		if err != nil {
397
+			return nil, errors.Wrap(err, "unable to decode process details")
398
+		}
399
+		s, err := summaryFromInterface(i)
400
+		if err != nil {
401
+			return nil, err
402
+		}
403
+		infos = append(infos, *s)
404
+	}
405
+
406
+	return infos, nil
407
+}
408
+
409
+func (c *client) DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error) {
410
+	p, err := c.getProcess(containerID, InitProcessName)
411
+	if err != nil {
412
+		return 255, time.Now(), nil
413
+	}
414
+
415
+	status, err := p.(containerd.Task).Delete(ctx)
416
+	if err != nil {
417
+		return 255, time.Now(), nil
418
+	}
419
+
420
+	c.Lock()
421
+	if ctr, ok := c.containers[containerID]; ok {
422
+		ctr.task = nil
423
+	}
424
+	c.Unlock()
425
+
426
+	return status.ExitCode(), status.ExitTime(), nil
427
+}
428
+
429
+func (c *client) Delete(ctx context.Context, containerID string) error {
430
+	ctr := c.getContainer(containerID)
431
+	if ctr == nil {
432
+		return errors.WithStack(newNotFoundError("no such container"))
433
+	}
434
+
435
+	if err := ctr.ctr.Delete(ctx); err != nil {
436
+		return err
437
+	}
438
+
439
+	if os.Getenv("LIBCONTAINERD_NOCLEAN") == "1" {
440
+		if err := os.RemoveAll(ctr.bundleDir); err != nil {
441
+			c.logger.WithError(err).WithFields(logrus.Fields{
442
+				"container": containerID,
443
+				"bundle":    ctr.bundleDir,
444
+			}).Error("failed to remove state dir")
445
+		}
446
+	}
447
+
448
+	c.removeContainer(containerID)
449
+
450
+	return nil
451
+}
452
+
453
+func (c *client) Status(ctx context.Context, containerID string) (Status, error) {
454
+	ctr := c.getContainer(containerID)
455
+	if ctr == nil {
456
+		return StatusUnknown, errors.WithStack(newNotFoundError("no such container"))
457
+	}
458
+
459
+	s, err := ctr.task.Status(ctx)
460
+	if err != nil {
461
+		return StatusUnknown, err
462
+	}
463
+
464
+	return Status(s.Status), nil
465
+}
466
+
467
+func (c *client) CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error {
468
+	p, err := c.getProcess(containerID, InitProcessName)
469
+	if err != nil {
470
+		return err
471
+	}
472
+
473
+	img, err := p.(containerd.Task).Checkpoint(ctx)
474
+	if err != nil {
475
+		return err
476
+	}
477
+	// Whatever happens, delete the checkpoint from containerd
478
+	defer func() {
479
+		err := c.remote.ImageService().Delete(context.Background(), img.Name())
480
+		if err != nil {
481
+			c.logger.WithError(err).WithField("digest", img.Target().Digest).
482
+				Warnf("failed to delete checkpoint image")
483
+		}
484
+	}()
485
+
486
+	b, err := content.ReadBlob(ctx, c.remote.ContentStore(), img.Target().Digest)
487
+	if err != nil {
488
+		return wrapSystemError(errors.Wrapf(err, "failed to retrieve checkpoint data"))
489
+	}
490
+	var index v1.Index
491
+	if err := json.Unmarshal(b, &index); err != nil {
492
+		return wrapSystemError(errors.Wrapf(err, "failed to decode checkpoint data"))
493
+	}
494
+
495
+	var cpDesc *v1.Descriptor
496
+	for _, m := range index.Manifests {
497
+		if m.MediaType == images.MediaTypeContainerd1Checkpoint {
498
+			cpDesc = &m
499
+			break
500
+		}
501
+	}
502
+	if cpDesc == nil {
503
+		return wrapSystemError(errors.Wrapf(err, "invalid checkpoint"))
504
+	}
505
+
506
+	rat, err := c.remote.ContentStore().ReaderAt(ctx, cpDesc.Digest)
507
+	if err != nil {
508
+		return wrapSystemError(errors.Wrapf(err, "failed to get checkpoint reader"))
509
+	}
510
+	defer rat.Close()
511
+	_, err = archive.Apply(ctx, checkpointDir, content.NewReader(rat))
512
+	if err != nil {
513
+		return wrapSystemError(errors.Wrapf(err, "failed to read checkpoint reader"))
514
+	}
515
+
516
+	return err
517
+}
518
+
519
+func (c *client) getContainer(id string) *container {
520
+	c.RLock()
521
+	ctr := c.containers[id]
522
+	c.RUnlock()
523
+
524
+	return ctr
525
+}
526
+
527
+func (c *client) removeContainer(id string) {
528
+	c.Lock()
529
+	delete(c.containers, id)
530
+	c.Unlock()
531
+}
532
+
533
+func (c *client) getProcess(containerID, processID string) (containerd.Process, error) {
534
+	ctr := c.getContainer(containerID)
535
+	switch {
536
+	case ctr == nil:
537
+		return nil, errors.WithStack(newNotFoundError("no such container"))
538
+	case ctr.task == nil:
539
+		return nil, errors.WithStack(newNotFoundError("container is not running"))
540
+	case processID == InitProcessName:
541
+		return ctr.task, nil
542
+	default:
543
+		ctr.Lock()
544
+		defer ctr.Unlock()
545
+		if ctr.execs == nil {
546
+			return nil, errors.WithStack(newNotFoundError("no execs"))
547
+		}
548
+	}
549
+
550
+	p := ctr.execs[processID]
551
+	if p == nil {
552
+		return nil, errors.WithStack(newNotFoundError("no such exec"))
553
+	}
554
+
555
+	return p, nil
556
+}
557
+
558
+// createIO creates the io to be used by a process
559
+// This needs to get a pointer to interface as upon closure the process may not have yet been registered
560
+func (c *client) createIO(bundleDir, containerID, processID string, stdinCloseSync chan struct{}, withStdin, withTerminal bool, attachStdio StdioCallback) (containerd.IO, error) {
561
+	fifos := newFIFOSet(bundleDir, containerID, processID, withStdin, withTerminal)
562
+	io, err := newIOPipe(fifos)
563
+	if err != nil {
564
+		return nil, err
565
+	}
566
+
567
+	if io.Stdin != nil {
568
+		var (
569
+			err       error
570
+			stdinOnce sync.Once
571
+		)
572
+		pipe := io.Stdin
573
+		io.Stdin = ioutils.NewWriteCloserWrapper(pipe, func() error {
574
+			stdinOnce.Do(func() {
575
+				err = pipe.Close()
576
+				// Do the rest in a new routine to avoid a deadlock if the
577
+				// Exec/Start call failed.
578
+				go func() {
579
+					<-stdinCloseSync
580
+					p, err := c.getProcess(containerID, processID)
581
+					if err == nil {
582
+						err = p.CloseIO(context.Background(), containerd.WithStdinCloser)
583
+						if err != nil && strings.Contains(err.Error(), "transport is closing") {
584
+							err = nil
585
+						}
586
+					}
587
+				}()
588
+			})
589
+			return err
590
+		})
591
+	}
592
+
593
+	cio, err := attachStdio(io)
594
+	if err != nil {
595
+		io.Cancel()
596
+		io.Close()
597
+	}
598
+	return cio, err
599
+}
600
+
601
+func (c *client) processEvent(ctr *container, et EventType, ei EventInfo) {
602
+	c.eventQ.append(ei.ContainerID, func() {
603
+		err := c.backend.ProcessEvent(ei.ContainerID, et, ei)
604
+		if err != nil {
605
+			c.logger.WithError(err).WithFields(logrus.Fields{
606
+				"container":  ei.ContainerID,
607
+				"event":      et,
608
+				"event-info": ei,
609
+			}).Error("failed to process event")
610
+		}
611
+
612
+		if et == EventExit && ei.ProcessID != ei.ContainerID {
613
+			var p containerd.Process
614
+			ctr.Lock()
615
+			if ctr.execs != nil {
616
+				p = ctr.execs[ei.ProcessID]
617
+			}
618
+			ctr.Unlock()
619
+			if p == nil {
620
+				c.logger.WithError(errors.New("no such process")).
621
+					WithFields(logrus.Fields{
622
+						"container": ei.ContainerID,
623
+						"process":   ei.ProcessID,
624
+					}).Error("exit event")
625
+				return
626
+			}
627
+			_, err = p.Delete(context.Background())
628
+			if err != nil {
629
+				c.logger.WithError(err).WithFields(logrus.Fields{
630
+					"container": ei.ContainerID,
631
+					"process":   ei.ProcessID,
632
+				}).Warn("failed to delete process")
633
+			}
634
+			c.Lock()
635
+			delete(ctr.execs, ei.ProcessID)
636
+			c.Unlock()
637
+		}
638
+	})
639
+}
640
+
641
+func (c *client) processEventStream(ctx context.Context) {
642
+	var (
643
+		err         error
644
+		eventStream eventsapi.Events_SubscribeClient
645
+		ev          *eventsapi.Envelope
646
+		et          EventType
647
+		ei          EventInfo
648
+		ctr         *container
649
+	)
650
+	defer func() {
651
+		if err != nil {
652
+			select {
653
+			case <-ctx.Done():
654
+				c.logger.WithError(ctx.Err()).
655
+					Info("stopping event stream following graceful shutdown")
656
+			default:
657
+				go c.processEventStream(ctx)
658
+			}
659
+		}
660
+	}()
661
+
662
+	eventStream, err = c.remote.EventService().Subscribe(ctx, &eventsapi.SubscribeRequest{
663
+		Filters: []string{"namespace==" + c.namespace + ",topic~=/tasks/.+"},
664
+	}, grpc.FailFast(false))
665
+	if err != nil {
666
+		return
667
+	}
668
+
669
+	var oomKilled bool
670
+	for {
671
+		ev, err = eventStream.Recv()
672
+		if err != nil {
673
+			c.logger.WithError(err).Error("failed to get event")
674
+			return
675
+		}
676
+
677
+		if ev.Event == nil {
678
+			c.logger.WithField("event", ev).Warn("invalid event")
679
+			continue
680
+		}
681
+
682
+		v, err := typeurl.UnmarshalAny(ev.Event)
683
+		if err != nil {
684
+			c.logger.WithError(err).WithField("event", ev).Warn("failed to unmarshal event")
685
+			continue
686
+		}
687
+
688
+		c.logger.WithField("topic", ev.Topic).Debug("event")
689
+
690
+		switch t := v.(type) {
691
+		case *eventsapi.TaskCreate:
692
+			et = EventCreate
693
+			ei = EventInfo{
694
+				ContainerID: t.ContainerID,
695
+				ProcessID:   t.ContainerID,
696
+				Pid:         t.Pid,
697
+			}
698
+		case *eventsapi.TaskStart:
699
+			et = EventStart
700
+			ei = EventInfo{
701
+				ContainerID: t.ContainerID,
702
+				ProcessID:   t.ContainerID,
703
+				Pid:         t.Pid,
704
+			}
705
+		case *eventsapi.TaskExit:
706
+			et = EventExit
707
+			ei = EventInfo{
708
+				ContainerID: t.ContainerID,
709
+				ProcessID:   t.ID,
710
+				Pid:         t.Pid,
711
+				ExitCode:    t.ExitStatus,
712
+				ExitedAt:    t.ExitedAt,
713
+			}
714
+		case *eventsapi.TaskOOM:
715
+			et = EventOOM
716
+			ei = EventInfo{
717
+				ContainerID: t.ContainerID,
718
+				OOMKilled:   true,
719
+			}
720
+			oomKilled = true
721
+		case *eventsapi.TaskExecAdded:
722
+			et = EventExecAdded
723
+			ei = EventInfo{
724
+				ContainerID: t.ContainerID,
725
+				ProcessID:   t.ExecID,
726
+			}
727
+		case *eventsapi.TaskExecStarted:
728
+			et = EventExecStarted
729
+			ei = EventInfo{
730
+				ContainerID: t.ContainerID,
731
+				ProcessID:   t.ExecID,
732
+				Pid:         t.Pid,
733
+			}
734
+		case *eventsapi.TaskPaused:
735
+			et = EventPaused
736
+			ei = EventInfo{
737
+				ContainerID: t.ContainerID,
738
+			}
739
+		case *eventsapi.TaskResumed:
740
+			et = EventResumed
741
+			ei = EventInfo{
742
+				ContainerID: t.ContainerID,
743
+			}
744
+		default:
745
+			c.logger.WithFields(logrus.Fields{
746
+				"topic": ev.Topic,
747
+				"type":  reflect.TypeOf(t)},
748
+			).Info("ignoring event")
749
+			continue
750
+		}
751
+
752
+		ctr = c.getContainer(ei.ContainerID)
753
+		if ctr == nil {
754
+			c.logger.WithField("container", ei.ContainerID).Warn("unknown container")
755
+			continue
756
+		}
757
+
758
+		if oomKilled {
759
+			ctr.oomKilled = true
760
+			oomKilled = false
761
+		}
762
+		ei.OOMKilled = ctr.oomKilled
763
+
764
+		c.processEvent(ctr, et, ei)
765
+	}
766
+}
767
+
768
+func (c *client) writeContent(ctx context.Context, mediaType, ref string, r io.Reader) (*types.Descriptor, error) {
769
+	writer, err := c.remote.ContentStore().Writer(ctx, ref, 0, "")
770
+	if err != nil {
771
+		return nil, err
772
+	}
773
+	defer writer.Close()
774
+	size, err := io.Copy(writer, r)
775
+	if err != nil {
776
+		return nil, err
777
+	}
778
+	labels := map[string]string{
779
+		"containerd.io/gc.root": time.Now().UTC().Format(time.RFC3339),
780
+	}
781
+	if err := writer.Commit(ctx, 0, "", content.WithLabels(labels)); err != nil {
782
+		return nil, err
783
+	}
784
+	return &types.Descriptor{
785
+		MediaType: mediaType,
786
+		Digest:    writer.Digest(),
787
+		Size_:     size,
788
+	}, nil
789
+}
790
+
791
+func wrapError(err error) error {
792
+	if err != nil {
793
+		msg := err.Error()
794
+		for _, s := range []string{"container does not exist", "not found", "no such container"} {
795
+			if strings.Contains(msg, s) {
796
+				return wrapNotFoundError(err)
797
+			}
798
+		}
799
+	}
800
+	return err
801
+}
0 802
new file mode 100644
... ...
@@ -0,0 +1,96 @@
0
+package libcontainerd
1
+
2
+import (
3
+	"context"
4
+	"fmt"
5
+	"os"
6
+	"path/filepath"
7
+	"strings"
8
+
9
+	"github.com/containerd/containerd"
10
+	"github.com/docker/docker/pkg/idtools"
11
+	specs "github.com/opencontainers/runtime-spec/specs-go"
12
+)
13
+
14
+func summaryFromInterface(i interface{}) (*Summary, error) {
15
+	return &Summary{}, nil
16
+}
17
+
18
+func (c *client) UpdateResources(ctx context.Context, containerID string, resources *Resources) error {
19
+	p, err := c.getProcess(containerID, InitProcessName)
20
+	if err != nil {
21
+		return err
22
+	}
23
+
24
+	// go doesn't like the alias in 1.8, this means this need to be
25
+	// platform specific
26
+	return p.(containerd.Task).Update(ctx, containerd.WithResources((*specs.LinuxResources)(resources)))
27
+}
28
+
29
+func hostIDFromMap(id uint32, mp []specs.LinuxIDMapping) int {
30
+	for _, m := range mp {
31
+		if id >= m.ContainerID && id <= m.ContainerID+m.Size-1 {
32
+			return int(m.HostID + id - m.ContainerID)
33
+		}
34
+	}
35
+	return 0
36
+}
37
+
38
+func getSpecUser(ociSpec *specs.Spec) (int, int) {
39
+	var (
40
+		uid int
41
+		gid int
42
+	)
43
+
44
+	for _, ns := range ociSpec.Linux.Namespaces {
45
+		if ns.Type == specs.UserNamespace {
46
+			uid = hostIDFromMap(0, ociSpec.Linux.UIDMappings)
47
+			gid = hostIDFromMap(0, ociSpec.Linux.GIDMappings)
48
+			break
49
+		}
50
+	}
51
+
52
+	return uid, gid
53
+}
54
+
55
+func prepareBundleDir(bundleDir string, ociSpec *specs.Spec) (string, error) {
56
+	uid, gid := getSpecUser(ociSpec)
57
+	if uid == 0 && gid == 0 {
58
+		return bundleDir, idtools.MkdirAllAndChownNew(bundleDir, 0755, idtools.IDPair{0, 0})
59
+	}
60
+
61
+	p := string(filepath.Separator)
62
+	components := strings.Split(bundleDir, string(filepath.Separator))
63
+	for _, d := range components[1:] {
64
+		p = filepath.Join(p, d)
65
+		fi, err := os.Stat(p)
66
+		if err != nil && !os.IsNotExist(err) {
67
+			return "", err
68
+		}
69
+		if os.IsNotExist(err) || fi.Mode()&1 == 0 {
70
+			p = fmt.Sprintf("%s.%d.%d", p, uid, gid)
71
+			if err := idtools.MkdirAndChown(p, 0700, idtools.IDPair{uid, gid}); err != nil && !os.IsExist(err) {
72
+				return "", err
73
+			}
74
+		}
75
+	}
76
+
77
+	return p, nil
78
+}
79
+
80
+func newFIFOSet(bundleDir, containerID, processID string, withStdin, withTerminal bool) *containerd.FIFOSet {
81
+	fifos := &containerd.FIFOSet{
82
+		Terminal: withTerminal,
83
+		Out:      filepath.Join(bundleDir, processID+"-stdout"),
84
+	}
85
+
86
+	if withStdin {
87
+		fifos.In = filepath.Join(bundleDir, processID+"-stdin")
88
+	}
89
+
90
+	if !fifos.Terminal {
91
+		fifos.Err = filepath.Join(bundleDir, processID+"-stderr")
92
+	}
93
+
94
+	return fifos
95
+}
0 96
new file mode 100644
... ...
@@ -0,0 +1,53 @@
0
+package libcontainerd
1
+
2
+import (
3
+	"fmt"
4
+
5
+	"github.com/containerd/containerd"
6
+	"github.com/containerd/containerd/windows/hcsshimtypes"
7
+	specs "github.com/opencontainers/runtime-spec/specs-go"
8
+	"github.com/pkg/errors"
9
+)
10
+
11
+func summaryFromInterface(i interface{}) (*Summary, error) {
12
+	switch pd := i.(type) {
13
+	case *hcsshimtypes.ProcessDetails:
14
+		return &Summary{
15
+			CreateTimestamp:              pd.CreatedAt,
16
+			ImageName:                    pd.ImageName,
17
+			KernelTime100ns:              pd.KernelTime_100Ns,
18
+			MemoryCommitBytes:            pd.MemoryCommitBytes,
19
+			MemoryWorkingSetPrivateBytes: pd.MemoryWorkingSetPrivateBytes,
20
+			MemoryWorkingSetSharedBytes:  pd.MemoryWorkingSetSharedBytes,
21
+			ProcessId:                    pd.ProcessID,
22
+			UserTime100ns:                pd.UserTime_100Ns,
23
+		}, nil
24
+	default:
25
+		return nil, errors.Errorf("Unknown process details type %T", pd)
26
+	}
27
+}
28
+
29
+func prepareBundleDir(bundleDir string, ociSpec *specs.Spec) (string, error) {
30
+	return bundleDir, nil
31
+}
32
+
33
+func pipeName(containerID, processID, name string) string {
34
+	return fmt.Sprintf(`\\.\pipe\containerd-%s-%s-%s`, containerID, processID, name)
35
+}
36
+
37
+func newFIFOSet(bundleDir, containerID, processID string, withStdin, withTerminal bool) *containerd.FIFOSet {
38
+	fifos := &containerd.FIFOSet{
39
+		Terminal: withTerminal,
40
+		Out:      pipeName(containerID, processID, "stdout"),
41
+	}
42
+
43
+	if withStdin {
44
+		fifos.In = pipeName(containerID, processID, "stdin")
45
+	}
46
+
47
+	if !fifos.Terminal {
48
+		fifos.Err = pipeName(containerID, processID, "stderr")
49
+	}
50
+
51
+	return fifos
52
+}
0 53
deleted file mode 100644
... ...
@@ -1,616 +0,0 @@
1
-package libcontainerd
2
-
3
-import (
4
-	"fmt"
5
-	"os"
6
-	"strings"
7
-	"sync"
8
-	"time"
9
-
10
-	containerd "github.com/containerd/containerd/api/grpc/types"
11
-	containerd_runtime_types "github.com/containerd/containerd/runtime"
12
-	"github.com/docker/docker/pkg/ioutils"
13
-	"github.com/docker/docker/pkg/mount"
14
-	"github.com/golang/protobuf/ptypes"
15
-	"github.com/golang/protobuf/ptypes/timestamp"
16
-	specs "github.com/opencontainers/runtime-spec/specs-go"
17
-	"github.com/sirupsen/logrus"
18
-	"golang.org/x/net/context"
19
-	"golang.org/x/sys/unix"
20
-)
21
-
22
-type client struct {
23
-	clientCommon
24
-
25
-	// Platform specific properties below here.
26
-	remote        *remote
27
-	q             queue
28
-	exitNotifiers map[string]*exitNotifier
29
-	liveRestore   bool
30
-}
31
-
32
-// GetServerVersion returns the connected server version information
33
-func (clnt *client) GetServerVersion(ctx context.Context) (*ServerVersion, error) {
34
-	resp, err := clnt.remote.apiClient.GetServerVersion(ctx, &containerd.GetServerVersionRequest{})
35
-	if err != nil {
36
-		return nil, err
37
-	}
38
-
39
-	sv := &ServerVersion{
40
-		GetServerVersionResponse: *resp,
41
-	}
42
-
43
-	return sv, nil
44
-}
45
-
46
-// AddProcess is the handler for adding a process to an already running
47
-// container. It's called through docker exec. It returns the system pid of the
48
-// exec'd process.
49
-func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendlyName string, specp Process, attachStdio StdioCallback) (pid int, err error) {
50
-	clnt.lock(containerID)
51
-	defer clnt.unlock(containerID)
52
-	container, err := clnt.getContainer(containerID)
53
-	if err != nil {
54
-		return -1, err
55
-	}
56
-
57
-	spec, err := container.spec()
58
-	if err != nil {
59
-		return -1, err
60
-	}
61
-	sp := spec.Process
62
-	sp.Args = specp.Args
63
-	sp.Terminal = specp.Terminal
64
-	if len(specp.Env) > 0 {
65
-		sp.Env = specp.Env
66
-	}
67
-	if specp.Cwd != nil {
68
-		sp.Cwd = *specp.Cwd
69
-	}
70
-	if specp.User != nil {
71
-		sp.User = specs.User{
72
-			UID:            specp.User.UID,
73
-			GID:            specp.User.GID,
74
-			AdditionalGids: specp.User.AdditionalGids,
75
-		}
76
-	}
77
-	if specp.Capabilities != nil {
78
-		sp.Capabilities.Bounding = specp.Capabilities
79
-		sp.Capabilities.Effective = specp.Capabilities
80
-		sp.Capabilities.Inheritable = specp.Capabilities
81
-		sp.Capabilities.Permitted = specp.Capabilities
82
-	}
83
-
84
-	p := container.newProcess(processFriendlyName)
85
-
86
-	r := &containerd.AddProcessRequest{
87
-		Args:     sp.Args,
88
-		Cwd:      sp.Cwd,
89
-		Terminal: sp.Terminal,
90
-		Id:       containerID,
91
-		Env:      sp.Env,
92
-		User: &containerd.User{
93
-			Uid:            sp.User.UID,
94
-			Gid:            sp.User.GID,
95
-			AdditionalGids: sp.User.AdditionalGids,
96
-		},
97
-		Pid:             processFriendlyName,
98
-		Stdin:           p.fifo(unix.Stdin),
99
-		Stdout:          p.fifo(unix.Stdout),
100
-		Stderr:          p.fifo(unix.Stderr),
101
-		Capabilities:    sp.Capabilities.Effective,
102
-		ApparmorProfile: sp.ApparmorProfile,
103
-		SelinuxLabel:    sp.SelinuxLabel,
104
-		NoNewPrivileges: sp.NoNewPrivileges,
105
-		Rlimits:         convertRlimits(sp.Rlimits),
106
-	}
107
-
108
-	fifoCtx, cancel := context.WithCancel(context.Background())
109
-	defer func() {
110
-		if err != nil {
111
-			cancel()
112
-		}
113
-	}()
114
-
115
-	iopipe, err := p.openFifos(fifoCtx, sp.Terminal)
116
-	if err != nil {
117
-		return -1, err
118
-	}
119
-
120
-	resp, err := clnt.remote.apiClient.AddProcess(ctx, r)
121
-	if err != nil {
122
-		p.closeFifos(iopipe)
123
-		return -1, err
124
-	}
125
-
126
-	var stdinOnce sync.Once
127
-	stdin := iopipe.Stdin
128
-	iopipe.Stdin = ioutils.NewWriteCloserWrapper(stdin, func() error {
129
-		var err error
130
-		stdinOnce.Do(func() { // on error from attach we don't know if stdin was already closed
131
-			err = stdin.Close()
132
-			if err2 := p.sendCloseStdin(); err == nil {
133
-				err = err2
134
-			}
135
-		})
136
-		return err
137
-	})
138
-
139
-	container.processes[processFriendlyName] = p
140
-
141
-	if err := attachStdio(*iopipe); err != nil {
142
-		p.closeFifos(iopipe)
143
-		return -1, err
144
-	}
145
-
146
-	return int(resp.SystemPid), nil
147
-}
148
-
149
-func (clnt *client) SignalProcess(containerID string, pid string, sig int) error {
150
-	clnt.lock(containerID)
151
-	defer clnt.unlock(containerID)
152
-	_, err := clnt.remote.apiClient.Signal(context.Background(), &containerd.SignalRequest{
153
-		Id:     containerID,
154
-		Pid:    pid,
155
-		Signal: uint32(sig),
156
-	})
157
-	return err
158
-}
159
-
160
-func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error {
161
-	clnt.lock(containerID)
162
-	defer clnt.unlock(containerID)
163
-	if _, err := clnt.getContainer(containerID); err != nil {
164
-		return err
165
-	}
166
-	_, err := clnt.remote.apiClient.UpdateProcess(context.Background(), &containerd.UpdateProcessRequest{
167
-		Id:     containerID,
168
-		Pid:    processFriendlyName,
169
-		Width:  uint32(width),
170
-		Height: uint32(height),
171
-	})
172
-	return err
173
-}
174
-
175
-func (clnt *client) Pause(containerID string) error {
176
-	return clnt.setState(containerID, StatePause)
177
-}
178
-
179
-func (clnt *client) setState(containerID, state string) error {
180
-	clnt.lock(containerID)
181
-	container, err := clnt.getContainer(containerID)
182
-	if err != nil {
183
-		clnt.unlock(containerID)
184
-		return err
185
-	}
186
-	if container.systemPid == 0 {
187
-		clnt.unlock(containerID)
188
-		return fmt.Errorf("No active process for container %s", containerID)
189
-	}
190
-	st := "running"
191
-	if state == StatePause {
192
-		st = "paused"
193
-	}
194
-	chstate := make(chan struct{})
195
-	_, err = clnt.remote.apiClient.UpdateContainer(context.Background(), &containerd.UpdateContainerRequest{
196
-		Id:     containerID,
197
-		Pid:    InitFriendlyName,
198
-		Status: st,
199
-	})
200
-	if err != nil {
201
-		clnt.unlock(containerID)
202
-		return err
203
-	}
204
-	container.pauseMonitor.append(state, chstate)
205
-	clnt.unlock(containerID)
206
-	<-chstate
207
-	return nil
208
-}
209
-
210
-func (clnt *client) Resume(containerID string) error {
211
-	return clnt.setState(containerID, StateResume)
212
-}
213
-
214
-func (clnt *client) Stats(containerID string) (*Stats, error) {
215
-	resp, err := clnt.remote.apiClient.Stats(context.Background(), &containerd.StatsRequest{containerID})
216
-	if err != nil {
217
-		return nil, err
218
-	}
219
-	return (*Stats)(resp), nil
220
-}
221
-
222
-// Take care of the old 1.11.0 behavior in case the version upgrade
223
-// happened without a clean daemon shutdown
224
-func (clnt *client) cleanupOldRootfs(containerID string) {
225
-	// Unmount and delete the bundle folder
226
-	if mts, err := mount.GetMounts(); err == nil {
227
-		for _, mts := range mts {
228
-			if strings.HasSuffix(mts.Mountpoint, containerID+"/rootfs") {
229
-				if err := unix.Unmount(mts.Mountpoint, unix.MNT_DETACH); err == nil {
230
-					os.RemoveAll(strings.TrimSuffix(mts.Mountpoint, "/rootfs"))
231
-				}
232
-				break
233
-			}
234
-		}
235
-	}
236
-}
237
-
238
-func (clnt *client) setExited(containerID string, exitCode uint32) error {
239
-	clnt.lock(containerID)
240
-	defer clnt.unlock(containerID)
241
-
242
-	err := clnt.backend.StateChanged(containerID, StateInfo{
243
-		CommonStateInfo: CommonStateInfo{
244
-			State:    StateExit,
245
-			ExitCode: exitCode,
246
-		}})
247
-
248
-	clnt.cleanupOldRootfs(containerID)
249
-
250
-	return err
251
-}
252
-
253
-func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) {
254
-	cont, err := clnt.getContainerdContainer(containerID)
255
-	if err != nil {
256
-		return nil, err
257
-	}
258
-	pids := make([]int, len(cont.Pids))
259
-	for i, p := range cont.Pids {
260
-		pids[i] = int(p)
261
-	}
262
-	return pids, nil
263
-}
264
-
265
-// Summary returns a summary of the processes running in a container.
266
-// This is a no-op on Linux.
267
-func (clnt *client) Summary(containerID string) ([]Summary, error) {
268
-	return nil, nil
269
-}
270
-
271
-func (clnt *client) getContainerdContainer(containerID string) (*containerd.Container, error) {
272
-	resp, err := clnt.remote.apiClient.State(context.Background(), &containerd.StateRequest{Id: containerID})
273
-	if err != nil {
274
-		return nil, err
275
-	}
276
-	for _, cont := range resp.Containers {
277
-		if cont.Id == containerID {
278
-			return cont, nil
279
-		}
280
-	}
281
-	return nil, fmt.Errorf("invalid state response")
282
-}
283
-
284
-func (clnt *client) UpdateResources(containerID string, resources Resources) error {
285
-	clnt.lock(containerID)
286
-	defer clnt.unlock(containerID)
287
-	container, err := clnt.getContainer(containerID)
288
-	if err != nil {
289
-		return err
290
-	}
291
-	if container.systemPid == 0 {
292
-		return fmt.Errorf("No active process for container %s", containerID)
293
-	}
294
-	_, err = clnt.remote.apiClient.UpdateContainer(context.Background(), &containerd.UpdateContainerRequest{
295
-		Id:        containerID,
296
-		Pid:       InitFriendlyName,
297
-		Resources: (*containerd.UpdateResource)(&resources),
298
-	})
299
-	return err
300
-}
301
-
302
-func (clnt *client) getExitNotifier(containerID string) *exitNotifier {
303
-	clnt.mapMutex.RLock()
304
-	defer clnt.mapMutex.RUnlock()
305
-	return clnt.exitNotifiers[containerID]
306
-}
307
-
308
-func (clnt *client) getOrCreateExitNotifier(containerID string) *exitNotifier {
309
-	clnt.mapMutex.Lock()
310
-	w, ok := clnt.exitNotifiers[containerID]
311
-	defer clnt.mapMutex.Unlock()
312
-	if !ok {
313
-		w = &exitNotifier{c: make(chan struct{}), client: clnt}
314
-		clnt.exitNotifiers[containerID] = w
315
-	}
316
-	return w
317
-}
318
-
319
-func (clnt *client) restore(cont *containerd.Container, lastEvent *containerd.Event, attachStdio StdioCallback, options ...CreateOption) (err error) {
320
-	clnt.lock(cont.Id)
321
-	defer clnt.unlock(cont.Id)
322
-
323
-	logrus.Debugf("libcontainerd: restore container %s state %s", cont.Id, cont.Status)
324
-
325
-	containerID := cont.Id
326
-	if _, err := clnt.getContainer(containerID); err == nil {
327
-		return fmt.Errorf("container %s is already active", containerID)
328
-	}
329
-
330
-	defer func() {
331
-		if err != nil {
332
-			clnt.deleteContainer(cont.Id)
333
-		}
334
-	}()
335
-
336
-	container := clnt.newContainer(cont.BundlePath, options...)
337
-	container.systemPid = systemPid(cont)
338
-
339
-	var terminal bool
340
-	for _, p := range cont.Processes {
341
-		if p.Pid == InitFriendlyName {
342
-			terminal = p.Terminal
343
-		}
344
-	}
345
-
346
-	fifoCtx, cancel := context.WithCancel(context.Background())
347
-	defer func() {
348
-		if err != nil {
349
-			cancel()
350
-		}
351
-	}()
352
-
353
-	iopipe, err := container.openFifos(fifoCtx, terminal)
354
-	if err != nil {
355
-		return err
356
-	}
357
-	var stdinOnce sync.Once
358
-	stdin := iopipe.Stdin
359
-	iopipe.Stdin = ioutils.NewWriteCloserWrapper(stdin, func() error {
360
-		var err error
361
-		stdinOnce.Do(func() { // on error from attach we don't know if stdin was already closed
362
-			err = stdin.Close()
363
-		})
364
-		return err
365
-	})
366
-
367
-	if err := attachStdio(*iopipe); err != nil {
368
-		container.closeFifos(iopipe)
369
-		return err
370
-	}
371
-
372
-	clnt.appendContainer(container)
373
-
374
-	err = clnt.backend.StateChanged(containerID, StateInfo{
375
-		CommonStateInfo: CommonStateInfo{
376
-			State: StateRestore,
377
-			Pid:   container.systemPid,
378
-		}})
379
-
380
-	if err != nil {
381
-		container.closeFifos(iopipe)
382
-		return err
383
-	}
384
-
385
-	if lastEvent != nil {
386
-		// This should only be a pause or resume event
387
-		if lastEvent.Type == StatePause || lastEvent.Type == StateResume {
388
-			return clnt.backend.StateChanged(containerID, StateInfo{
389
-				CommonStateInfo: CommonStateInfo{
390
-					State: lastEvent.Type,
391
-					Pid:   container.systemPid,
392
-				}})
393
-		}
394
-
395
-		logrus.Warnf("libcontainerd: unexpected backlog event: %#v", lastEvent)
396
-	}
397
-
398
-	return nil
399
-}
400
-
401
-func (clnt *client) getContainerLastEventSinceTime(id string, tsp *timestamp.Timestamp) (*containerd.Event, error) {
402
-	er := &containerd.EventsRequest{
403
-		Timestamp:  tsp,
404
-		StoredOnly: true,
405
-		Id:         id,
406
-	}
407
-	events, err := clnt.remote.apiClient.Events(context.Background(), er)
408
-	if err != nil {
409
-		logrus.Errorf("libcontainerd: failed to get container events stream for %s: %q", er.Id, err)
410
-		return nil, err
411
-	}
412
-
413
-	var ev *containerd.Event
414
-	for {
415
-		e, err := events.Recv()
416
-		if err != nil {
417
-			if err.Error() == "EOF" {
418
-				break
419
-			}
420
-			logrus.Errorf("libcontainerd: failed to get container event for %s: %q", id, err)
421
-			return nil, err
422
-		}
423
-		ev = e
424
-		logrus.Debugf("libcontainerd: received past event %#v", ev)
425
-	}
426
-
427
-	return ev, nil
428
-}
429
-
430
-func (clnt *client) getContainerLastEvent(id string) (*containerd.Event, error) {
431
-	ev, err := clnt.getContainerLastEventSinceTime(id, clnt.remote.restoreFromTimestamp)
432
-	if err == nil && ev == nil {
433
-		// If ev is nil and the container is running in containerd,
434
-		// we already consumed all the event of the
435
-		// container, included the "exit" one.
436
-		// Thus, we request all events containerd has in memory for
437
-		// this container in order to get the last one (which should
438
-		// be an exit event)
439
-		logrus.Warnf("libcontainerd: client is out of sync, restore was called on a fully synced container (%s).", id)
440
-		// Request all events since beginning of time
441
-		t := time.Unix(0, 0)
442
-		tsp, err := ptypes.TimestampProto(t)
443
-		if err != nil {
444
-			logrus.Errorf("libcontainerd: getLastEventSinceTime() failed to convert timestamp: %q", err)
445
-			return nil, err
446
-		}
447
-
448
-		return clnt.getContainerLastEventSinceTime(id, tsp)
449
-	}
450
-
451
-	return ev, err
452
-}
453
-
454
-func (clnt *client) Restore(containerID string, attachStdio StdioCallback, options ...CreateOption) error {
455
-	// Synchronize with live events
456
-	clnt.remote.Lock()
457
-	defer clnt.remote.Unlock()
458
-	// Check that containerd still knows this container.
459
-	//
460
-	// In the unlikely event that Restore for this container process
461
-	// the its past event before the main loop, the event will be
462
-	// processed twice. However, this is not an issue as all those
463
-	// events will do is change the state of the container to be
464
-	// exactly the same.
465
-	cont, err := clnt.getContainerdContainer(containerID)
466
-	// Get its last event
467
-	ev, eerr := clnt.getContainerLastEvent(containerID)
468
-	if err != nil || containerd_runtime_types.State(cont.Status) == containerd_runtime_types.Stopped {
469
-		if err != nil {
470
-			logrus.Warnf("libcontainerd: failed to retrieve container %s state: %v", containerID, err)
471
-		}
472
-		if ev != nil && (ev.Pid != InitFriendlyName || ev.Type != StateExit) {
473
-			// Wait a while for the exit event
474
-			timeout := time.NewTimer(10 * time.Second)
475
-			tick := time.NewTicker(100 * time.Millisecond)
476
-		stop:
477
-			for {
478
-				select {
479
-				case <-timeout.C:
480
-					break stop
481
-				case <-tick.C:
482
-					ev, eerr = clnt.getContainerLastEvent(containerID)
483
-					if eerr != nil {
484
-						break stop
485
-					}
486
-					if ev != nil && ev.Pid == InitFriendlyName && ev.Type == StateExit {
487
-						break stop
488
-					}
489
-				}
490
-			}
491
-			timeout.Stop()
492
-			tick.Stop()
493
-		}
494
-
495
-		// get the exit status for this container, if we don't have
496
-		// one, indicate an error
497
-		ec := uint32(255)
498
-		if eerr == nil && ev != nil && ev.Pid == InitFriendlyName && ev.Type == StateExit {
499
-			ec = ev.Status
500
-		}
501
-		clnt.setExited(containerID, ec)
502
-
503
-		return nil
504
-	}
505
-
506
-	// container is still alive
507
-	if clnt.liveRestore {
508
-		if err := clnt.restore(cont, ev, attachStdio, options...); err != nil {
509
-			logrus.Errorf("libcontainerd: error restoring %s: %v", containerID, err)
510
-		}
511
-		return nil
512
-	}
513
-
514
-	// Kill the container if liveRestore == false
515
-	w := clnt.getOrCreateExitNotifier(containerID)
516
-	clnt.lock(cont.Id)
517
-	container := clnt.newContainer(cont.BundlePath)
518
-	container.systemPid = systemPid(cont)
519
-	clnt.appendContainer(container)
520
-	clnt.unlock(cont.Id)
521
-
522
-	container.discardFifos()
523
-
524
-	if err := clnt.Signal(containerID, int(unix.SIGTERM)); err != nil {
525
-		logrus.Errorf("libcontainerd: error sending sigterm to %v: %v", containerID, err)
526
-	}
527
-
528
-	// Let the main loop handle the exit event
529
-	clnt.remote.Unlock()
530
-
531
-	if ev != nil && ev.Type == StatePause {
532
-		// resume container, it depends on the main loop, so we do it after Unlock()
533
-		logrus.Debugf("libcontainerd: %s was paused, resuming it so it can die", containerID)
534
-		if err := clnt.Resume(containerID); err != nil {
535
-			return fmt.Errorf("failed to resume container: %v", err)
536
-		}
537
-	}
538
-
539
-	select {
540
-	case <-time.After(10 * time.Second):
541
-		if err := clnt.Signal(containerID, int(unix.SIGKILL)); err != nil {
542
-			logrus.Errorf("libcontainerd: error sending sigkill to %v: %v", containerID, err)
543
-		}
544
-		select {
545
-		case <-time.After(2 * time.Second):
546
-		case <-w.wait():
547
-			// relock because of the defer
548
-			clnt.remote.Lock()
549
-			return nil
550
-		}
551
-	case <-w.wait():
552
-		// relock because of the defer
553
-		clnt.remote.Lock()
554
-		return nil
555
-	}
556
-	// relock because of the defer
557
-	clnt.remote.Lock()
558
-
559
-	clnt.deleteContainer(containerID)
560
-
561
-	return clnt.setExited(containerID, uint32(255))
562
-}
563
-
564
-func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error {
565
-	clnt.lock(containerID)
566
-	defer clnt.unlock(containerID)
567
-	if _, err := clnt.getContainer(containerID); err != nil {
568
-		return err
569
-	}
570
-
571
-	_, err := clnt.remote.apiClient.CreateCheckpoint(context.Background(), &containerd.CreateCheckpointRequest{
572
-		Id: containerID,
573
-		Checkpoint: &containerd.Checkpoint{
574
-			Name:        checkpointID,
575
-			Exit:        exit,
576
-			Tcp:         true,
577
-			UnixSockets: true,
578
-			Shell:       false,
579
-			EmptyNS:     []string{"network"},
580
-		},
581
-		CheckpointDir: checkpointDir,
582
-	})
583
-	return err
584
-}
585
-
586
-func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error {
587
-	clnt.lock(containerID)
588
-	defer clnt.unlock(containerID)
589
-	if _, err := clnt.getContainer(containerID); err != nil {
590
-		return err
591
-	}
592
-
593
-	_, err := clnt.remote.apiClient.DeleteCheckpoint(context.Background(), &containerd.DeleteCheckpointRequest{
594
-		Id:            containerID,
595
-		Name:          checkpointID,
596
-		CheckpointDir: checkpointDir,
597
-	})
598
-	return err
599
-}
600
-
601
-func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) {
602
-	clnt.lock(containerID)
603
-	defer clnt.unlock(containerID)
604
-	if _, err := clnt.getContainer(containerID); err != nil {
605
-		return nil, err
606
-	}
607
-
608
-	resp, err := clnt.remote.apiClient.ListCheckpoint(context.Background(), &containerd.ListCheckpointRequest{
609
-		Id:            containerID,
610
-		CheckpointDir: checkpointDir,
611
-	})
612
-	if err != nil {
613
-		return nil, err
614
-	}
615
-	return (*Checkpoints)(resp), nil
616
-}
617 1
new file mode 100644
... ...
@@ -0,0 +1,1340 @@
0
+package libcontainerd
1
+
2
+import (
3
+	"context"
4
+	"encoding/json"
5
+	"fmt"
6
+	"io"
7
+	"io/ioutil"
8
+	"os"
9
+	"path"
10
+	"path/filepath"
11
+	"regexp"
12
+	"strings"
13
+	"sync"
14
+	"syscall"
15
+	"time"
16
+
17
+	"github.com/Microsoft/hcsshim"
18
+	opengcs "github.com/Microsoft/opengcs/client"
19
+	"github.com/docker/docker/pkg/sysinfo"
20
+	"github.com/docker/docker/pkg/system"
21
+	specs "github.com/opencontainers/runtime-spec/specs-go"
22
+	"github.com/pkg/errors"
23
+	"github.com/sirupsen/logrus"
24
+	"golang.org/x/sys/windows"
25
+)
26
+
27
+const InitProcessName = "init"
28
+
29
+type process struct {
30
+	id         string
31
+	pid        int
32
+	hcsProcess hcsshim.Process
33
+}
34
+
35
+type container struct {
36
+	sync.Mutex
37
+
38
+	// The ociSpec is required, as client.Create() needs a spec, but can
39
+	// be called from the RestartManager context which does not otherwise
40
+	// have access to the Spec
41
+	ociSpec *specs.Spec
42
+
43
+	isWindows           bool
44
+	manualStopRequested bool
45
+	hcsContainer        hcsshim.Container
46
+
47
+	id            string
48
+	status        Status
49
+	exitedAt      time.Time
50
+	exitCode      uint32
51
+	waitCh        chan struct{}
52
+	init          *process
53
+	execs         map[string]*process
54
+	updatePending bool
55
+}
56
+
57
+// Win32 error codes that are used for various workarounds
58
+// These really should be ALL_CAPS to match golangs syscall library and standard
59
+// Win32 error conventions, but golint insists on CamelCase.
60
+const (
61
+	CoEClassstring     = syscall.Errno(0x800401F3) // Invalid class string
62
+	ErrorNoNetwork     = syscall.Errno(1222)       // The network is not present or not started
63
+	ErrorBadPathname   = syscall.Errno(161)        // The specified path is invalid
64
+	ErrorInvalidObject = syscall.Errno(0x800710D8) // The object identifier does not represent a valid object
65
+)
66
+
67
+// defaultOwner is a tag passed to HCS to allow it to differentiate between
68
+// container creator management stacks. We hard code "docker" in the case
69
+// of docker.
70
+const defaultOwner = "docker"
71
+
72
+// Create is the entrypoint to create a container from a spec.
73
+// Table below shows the fields required for HCS JSON calling parameters,
74
+// where if not populated, is omitted.
75
+// +-----------------+--------------------------------------------+---------------------------------------------------+
76
+// |                 | Isolation=Process                          | Isolation=Hyper-V                                 |
77
+// +-----------------+--------------------------------------------+---------------------------------------------------+
78
+// | VolumePath      | \\?\\Volume{GUIDa}                         |                                                   |
79
+// | LayerFolderPath | %root%\windowsfilter\containerID           | %root%\windowsfilter\containerID (servicing only) |
80
+// | Layers[]        | ID=GUIDb;Path=%root%\windowsfilter\layerID | ID=GUIDb;Path=%root%\windowsfilter\layerID        |
81
+// | HvRuntime       |                                            | ImagePath=%root%\BaseLayerID\UtilityVM            |
82
+// +-----------------+--------------------------------------------+---------------------------------------------------+
83
+//
84
+// Isolation=Process example:
85
+//
86
+// {
87
+//	"SystemType": "Container",
88
+//	"Name": "5e0055c814a6005b8e57ac59f9a522066e0af12b48b3c26a9416e23907698776",
89
+//	"Owner": "docker",
90
+//	"VolumePath": "\\\\\\\\?\\\\Volume{66d1ef4c-7a00-11e6-8948-00155ddbef9d}",
91
+//	"IgnoreFlushesDuringBoot": true,
92
+//	"LayerFolderPath": "C:\\\\control\\\\windowsfilter\\\\5e0055c814a6005b8e57ac59f9a522066e0af12b48b3c26a9416e23907698776",
93
+//	"Layers": [{
94
+//		"ID": "18955d65-d45a-557b-bf1c-49d6dfefc526",
95
+//		"Path": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c"
96
+//	}],
97
+//	"HostName": "5e0055c814a6",
98
+//	"MappedDirectories": [],
99
+//	"HvPartition": false,
100
+//	"EndpointList": ["eef2649d-bb17-4d53-9937-295a8efe6f2c"],
101
+//	"Servicing": false
102
+//}
103
+//
104
+// Isolation=Hyper-V example:
105
+//
106
+//{
107
+//	"SystemType": "Container",
108
+//	"Name": "475c2c58933b72687a88a441e7e0ca4bd72d76413c5f9d5031fee83b98f6045d",
109
+//	"Owner": "docker",
110
+//	"IgnoreFlushesDuringBoot": true,
111
+//	"Layers": [{
112
+//		"ID": "18955d65-d45a-557b-bf1c-49d6dfefc526",
113
+//		"Path": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c"
114
+//	}],
115
+//	"HostName": "475c2c58933b",
116
+//	"MappedDirectories": [],
117
+//	"HvPartition": true,
118
+//	"EndpointList": ["e1bb1e61-d56f-405e-b75d-fd520cefa0cb"],
119
+//	"DNSSearchList": "a.com,b.com,c.com",
120
+//	"HvRuntime": {
121
+//		"ImagePath": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c\\\\UtilityVM"
122
+//	},
123
+//	"Servicing": false
124
+//}
125
+func (c *client) Create(_ context.Context, id string, spec *specs.Spec, runtimeOptions interface{}) error {
126
+	if ctr := c.getContainer(id); ctr != nil {
127
+		return errors.WithStack(newConflictError("id already in use"))
128
+	}
129
+
130
+	// spec.Linux must be nil for Windows containers, but spec.Windows
131
+	// will be filled in regardless of container platform.  This is a
132
+	// temporary workaround due to LCOW requiring layer folder paths,
133
+	// which are stored under spec.Windows.
134
+	//
135
+	// TODO: @darrenstahlmsft fix this once the OCI spec is updated to
136
+	// support layer folder paths for LCOW
137
+	if spec.Linux == nil {
138
+		return c.createWindows(id, spec, runtimeOptions)
139
+	}
140
+	return c.createLinux(id, spec, runtimeOptions)
141
+}
142
+
143
+func (c *client) createWindows(id string, spec *specs.Spec, runtimeOptions interface{}) error {
144
+	logger := c.logger.WithField("container", id)
145
+	configuration := &hcsshim.ContainerConfig{
146
+		SystemType: "Container",
147
+		Name:       id,
148
+		Owner:      defaultOwner,
149
+		IgnoreFlushesDuringBoot: spec.Windows.IgnoreFlushesDuringBoot,
150
+		HostName:                spec.Hostname,
151
+		HvPartition:             false,
152
+		Servicing:               spec.Windows.Servicing,
153
+	}
154
+
155
+	if spec.Windows.Resources != nil {
156
+		if spec.Windows.Resources.CPU != nil {
157
+			if spec.Windows.Resources.CPU.Count != nil {
158
+				// This check is being done here rather than in adaptContainerSettings
159
+				// because we don't want to update the HostConfig in case this container
160
+				// is moved to a host with more CPUs than this one.
161
+				cpuCount := *spec.Windows.Resources.CPU.Count
162
+				hostCPUCount := uint64(sysinfo.NumCPU())
163
+				if cpuCount > hostCPUCount {
164
+					c.logger.Warnf("Changing requested CPUCount of %d to current number of processors, %d", cpuCount, hostCPUCount)
165
+					cpuCount = hostCPUCount
166
+				}
167
+				configuration.ProcessorCount = uint32(cpuCount)
168
+			}
169
+			if spec.Windows.Resources.CPU.Shares != nil {
170
+				configuration.ProcessorWeight = uint64(*spec.Windows.Resources.CPU.Shares)
171
+			}
172
+			if spec.Windows.Resources.CPU.Maximum != nil {
173
+				configuration.ProcessorMaximum = int64(*spec.Windows.Resources.CPU.Maximum)
174
+			}
175
+		}
176
+		if spec.Windows.Resources.Memory != nil {
177
+			if spec.Windows.Resources.Memory.Limit != nil {
178
+				configuration.MemoryMaximumInMB = int64(*spec.Windows.Resources.Memory.Limit) / 1024 / 1024
179
+			}
180
+		}
181
+		if spec.Windows.Resources.Storage != nil {
182
+			if spec.Windows.Resources.Storage.Bps != nil {
183
+				configuration.StorageBandwidthMaximum = *spec.Windows.Resources.Storage.Bps
184
+			}
185
+			if spec.Windows.Resources.Storage.Iops != nil {
186
+				configuration.StorageIOPSMaximum = *spec.Windows.Resources.Storage.Iops
187
+			}
188
+		}
189
+	}
190
+
191
+	if spec.Windows.HyperV != nil {
192
+		configuration.HvPartition = true
193
+	}
194
+
195
+	if spec.Windows.Network != nil {
196
+		configuration.EndpointList = spec.Windows.Network.EndpointList
197
+		configuration.AllowUnqualifiedDNSQuery = spec.Windows.Network.AllowUnqualifiedDNSQuery
198
+		if spec.Windows.Network.DNSSearchList != nil {
199
+			configuration.DNSSearchList = strings.Join(spec.Windows.Network.DNSSearchList, ",")
200
+		}
201
+		configuration.NetworkSharedContainerName = spec.Windows.Network.NetworkSharedContainerName
202
+	}
203
+
204
+	if cs, ok := spec.Windows.CredentialSpec.(string); ok {
205
+		configuration.Credentials = cs
206
+	}
207
+
208
+	// We must have least two layers in the spec, the bottom one being a
209
+	// base image, the top one being the RW layer.
210
+	if spec.Windows.LayerFolders == nil || len(spec.Windows.LayerFolders) < 2 {
211
+		return fmt.Errorf("OCI spec is invalid - at least two LayerFolders must be supplied to the runtime")
212
+	}
213
+
214
+	// Strip off the top-most layer as that's passed in separately to HCS
215
+	configuration.LayerFolderPath = spec.Windows.LayerFolders[len(spec.Windows.LayerFolders)-1]
216
+	layerFolders := spec.Windows.LayerFolders[:len(spec.Windows.LayerFolders)-1]
217
+
218
+	if configuration.HvPartition {
219
+		// We don't currently support setting the utility VM image explicitly.
220
+		// TODO @swernli/jhowardmsft circa RS3/4, this may be re-locatable.
221
+		if spec.Windows.HyperV.UtilityVMPath != "" {
222
+			return errors.New("runtime does not support an explicit utility VM path for Hyper-V containers")
223
+		}
224
+
225
+		// Find the upper-most utility VM image.
226
+		var uvmImagePath string
227
+		for _, path := range layerFolders {
228
+			fullPath := filepath.Join(path, "UtilityVM")
229
+			_, err := os.Stat(fullPath)
230
+			if err == nil {
231
+				uvmImagePath = fullPath
232
+				break
233
+			}
234
+			if !os.IsNotExist(err) {
235
+				return err
236
+			}
237
+		}
238
+		if uvmImagePath == "" {
239
+			return errors.New("utility VM image could not be found")
240
+		}
241
+		configuration.HvRuntime = &hcsshim.HvRuntime{ImagePath: uvmImagePath}
242
+
243
+		if spec.Root.Path != "" {
244
+			return errors.New("OCI spec is invalid - Root.Path must be omitted for a Hyper-V container")
245
+		}
246
+	} else {
247
+		const volumeGUIDRegex = `^\\\\\?\\(Volume)\{{0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}(\}){0,1}\}\\$`
248
+		if _, err := regexp.MatchString(volumeGUIDRegex, spec.Root.Path); err != nil {
249
+			return fmt.Errorf(`OCI spec is invalid - Root.Path '%s' must be a volume GUID path in the format '\\?\Volume{GUID}\'`, spec.Root.Path)
250
+		}
251
+		// HCS API requires the trailing backslash to be removed
252
+		configuration.VolumePath = spec.Root.Path[:len(spec.Root.Path)-1]
253
+	}
254
+
255
+	if spec.Root.Readonly {
256
+		return errors.New(`OCI spec is invalid - Root.Readonly must not be set on Windows`)
257
+	}
258
+
259
+	for _, layerPath := range layerFolders {
260
+		_, filename := filepath.Split(layerPath)
261
+		g, err := hcsshim.NameToGuid(filename)
262
+		if err != nil {
263
+			return err
264
+		}
265
+		configuration.Layers = append(configuration.Layers, hcsshim.Layer{
266
+			ID:   g.ToString(),
267
+			Path: layerPath,
268
+		})
269
+	}
270
+
271
+	// Add the mounts (volumes, bind mounts etc) to the structure
272
+	var mds []hcsshim.MappedDir
273
+	var mps []hcsshim.MappedPipe
274
+	for _, mount := range spec.Mounts {
275
+		const pipePrefix = `\\.\pipe\`
276
+		if mount.Type != "" {
277
+			return fmt.Errorf("OCI spec is invalid - Mount.Type '%s' must not be set", mount.Type)
278
+		}
279
+		if strings.HasPrefix(mount.Destination, pipePrefix) {
280
+			mp := hcsshim.MappedPipe{
281
+				HostPath:          mount.Source,
282
+				ContainerPipeName: mount.Destination[len(pipePrefix):],
283
+			}
284
+			mps = append(mps, mp)
285
+		} else {
286
+			md := hcsshim.MappedDir{
287
+				HostPath:      mount.Source,
288
+				ContainerPath: mount.Destination,
289
+				ReadOnly:      false,
290
+			}
291
+			for _, o := range mount.Options {
292
+				if strings.ToLower(o) == "ro" {
293
+					md.ReadOnly = true
294
+				}
295
+			}
296
+			mds = append(mds, md)
297
+		}
298
+	}
299
+	configuration.MappedDirectories = mds
300
+	if len(mps) > 0 && system.GetOSVersion().Build < 16210 { // replace with Win10 RS3 build number at RTM
301
+		return errors.New("named pipe mounts are not supported on this version of Windows")
302
+	}
303
+	configuration.MappedPipes = mps
304
+
305
+	hcsContainer, err := hcsshim.CreateContainer(id, configuration)
306
+	if err != nil {
307
+		return err
308
+	}
309
+
310
+	// Construct a container object for calling start on it.
311
+	ctr := &container{
312
+		id:           id,
313
+		execs:        make(map[string]*process),
314
+		isWindows:    true,
315
+		ociSpec:      spec,
316
+		hcsContainer: hcsContainer,
317
+		status:       StatusCreated,
318
+		waitCh:       make(chan struct{}),
319
+	}
320
+
321
+	// Start the container. If this is a servicing container, this call
322
+	// will block until the container is done with the servicing
323
+	// execution.
324
+	logger.Debug("starting container")
325
+	if err = hcsContainer.Start(); err != nil {
326
+		c.logger.WithError(err).Error("failed to start container")
327
+		ctr.debugGCS()
328
+		if err := c.terminateContainer(ctr); err != nil {
329
+			c.logger.WithError(err).Error("failed to cleanup after a failed Start")
330
+		} else {
331
+			c.logger.Debug("cleaned up after failed Start by calling Terminate")
332
+		}
333
+		return err
334
+	}
335
+	ctr.debugGCS()
336
+
337
+	c.Lock()
338
+	c.containers[id] = ctr
339
+	c.Unlock()
340
+
341
+	logger.Debug("createWindows() completed successfully")
342
+	return nil
343
+
344
+}
345
+
346
+func (c *client) createLinux(id string, spec *specs.Spec, runtimeOptions interface{}) error {
347
+	logrus.Debugf("libcontainerd: createLinux(): containerId %s ", id)
348
+	logger := c.logger.WithField("container", id)
349
+
350
+	if runtimeOptions == nil {
351
+		return fmt.Errorf("lcow option must be supplied to the runtime")
352
+	}
353
+	lcowConfig, ok := runtimeOptions.(*opengcs.Config)
354
+	if !ok {
355
+		return fmt.Errorf("lcow option must be supplied to the runtime")
356
+	}
357
+
358
+	configuration := &hcsshim.ContainerConfig{
359
+		HvPartition:   true,
360
+		Name:          id,
361
+		SystemType:    "container",
362
+		ContainerType: "linux",
363
+		Owner:         defaultOwner,
364
+		TerminateOnLastHandleClosed: true,
365
+	}
366
+
367
+	if lcowConfig.ActualMode == opengcs.ModeActualVhdx {
368
+		configuration.HvRuntime = &hcsshim.HvRuntime{
369
+			ImagePath:          lcowConfig.Vhdx,
370
+			BootSource:         "Vhd",
371
+			WritableBootSource: false,
372
+		}
373
+	} else {
374
+		configuration.HvRuntime = &hcsshim.HvRuntime{
375
+			ImagePath:           lcowConfig.KirdPath,
376
+			LinuxKernelFile:     lcowConfig.KernelFile,
377
+			LinuxInitrdFile:     lcowConfig.InitrdFile,
378
+			LinuxBootParameters: lcowConfig.BootParameters,
379
+		}
380
+	}
381
+
382
+	if spec.Windows == nil {
383
+		return fmt.Errorf("spec.Windows must not be nil for LCOW containers")
384
+	}
385
+
386
+	// We must have least one layer in the spec
387
+	if spec.Windows.LayerFolders == nil || len(spec.Windows.LayerFolders) == 0 {
388
+		return fmt.Errorf("OCI spec is invalid - at least one LayerFolders must be supplied to the runtime")
389
+	}
390
+
391
+	// Strip off the top-most layer as that's passed in separately to HCS
392
+	configuration.LayerFolderPath = spec.Windows.LayerFolders[len(spec.Windows.LayerFolders)-1]
393
+	layerFolders := spec.Windows.LayerFolders[:len(spec.Windows.LayerFolders)-1]
394
+
395
+	for _, layerPath := range layerFolders {
396
+		_, filename := filepath.Split(layerPath)
397
+		g, err := hcsshim.NameToGuid(filename)
398
+		if err != nil {
399
+			return err
400
+		}
401
+		configuration.Layers = append(configuration.Layers, hcsshim.Layer{
402
+			ID:   g.ToString(),
403
+			Path: filepath.Join(layerPath, "layer.vhd"),
404
+		})
405
+	}
406
+
407
+	if spec.Windows.Network != nil {
408
+		configuration.EndpointList = spec.Windows.Network.EndpointList
409
+		configuration.AllowUnqualifiedDNSQuery = spec.Windows.Network.AllowUnqualifiedDNSQuery
410
+		if spec.Windows.Network.DNSSearchList != nil {
411
+			configuration.DNSSearchList = strings.Join(spec.Windows.Network.DNSSearchList, ",")
412
+		}
413
+		configuration.NetworkSharedContainerName = spec.Windows.Network.NetworkSharedContainerName
414
+	}
415
+
416
+	// Add the mounts (volumes, bind mounts etc) to the structure. We have to do
417
+	// some translation for both the mapped directories passed into HCS and in
418
+	// the spec.
419
+	//
420
+	// For HCS, we only pass in the mounts from the spec which are type "bind".
421
+	// Further, the "ContainerPath" field (which is a little mis-leadingly
422
+	// named when it applies to the utility VM rather than the container in the
423
+	// utility VM) is moved to under /tmp/gcs/<ID>/binds, where this is passed
424
+	// by the caller through a 'uvmpath' option.
425
+	//
426
+	// We do similar translation for the mounts in the spec by stripping out
427
+	// the uvmpath option, and translating the Source path to the location in the
428
+	// utility VM calculated above.
429
+	//
430
+	// From inside the utility VM, you would see a 9p mount such as in the following
431
+	// where a host folder has been mapped to /target. The line with /tmp/gcs/<ID>/binds
432
+	// specifically:
433
+	//
434
+	//	/ # mount
435
+	//	rootfs on / type rootfs (rw,size=463736k,nr_inodes=115934)
436
+	//	proc on /proc type proc (rw,relatime)
437
+	//	sysfs on /sys type sysfs (rw,relatime)
438
+	//	udev on /dev type devtmpfs (rw,relatime,size=498100k,nr_inodes=124525,mode=755)
439
+	//	tmpfs on /run type tmpfs (rw,relatime)
440
+	//	cgroup on /sys/fs/cgroup type cgroup (rw,relatime,cpuset,cpu,cpuacct,blkio,memory,devices,freezer,net_cls,perf_event,net_prio,hugetlb,pids,rdma)
441
+	//	mqueue on /dev/mqueue type mqueue (rw,relatime)
442
+	//	devpts on /dev/pts type devpts (rw,relatime,mode=600,ptmxmode=000)
443
+	//	/binds/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/target on /binds/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/target type 9p (rw,sync,dirsync,relatime,trans=fd,rfdno=6,wfdno=6)
444
+	//	/dev/pmem0 on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/layer0 type ext4 (ro,relatime,block_validity,delalloc,norecovery,barrier,dax,user_xattr,acl)
445
+	//	/dev/sda on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch type ext4 (rw,relatime,block_validity,delalloc,barrier,user_xattr,acl)
446
+	//	overlay on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/rootfs type overlay (rw,relatime,lowerdir=/tmp/base/:/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/layer0,upperdir=/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch/upper,workdir=/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch/work)
447
+	//
448
+	//  /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc # ls -l
449
+	//	total 16
450
+	//	drwx------    3 0        0               60 Sep  7 18:54 binds
451
+	//	-rw-r--r--    1 0        0             3345 Sep  7 18:54 config.json
452
+	//	drwxr-xr-x   10 0        0             4096 Sep  6 17:26 layer0
453
+	//	drwxr-xr-x    1 0        0             4096 Sep  7 18:54 rootfs
454
+	//	drwxr-xr-x    5 0        0             4096 Sep  7 18:54 scratch
455
+	//
456
+	//	/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc # ls -l binds
457
+	//	total 0
458
+	//	drwxrwxrwt    2 0        0             4096 Sep  7 16:51 target
459
+
460
+	mds := []hcsshim.MappedDir{}
461
+	specMounts := []specs.Mount{}
462
+	for _, mount := range spec.Mounts {
463
+		specMount := mount
464
+		if mount.Type == "bind" {
465
+			// Strip out the uvmpath from the options
466
+			updatedOptions := []string{}
467
+			uvmPath := ""
468
+			readonly := false
469
+			for _, opt := range mount.Options {
470
+				dropOption := false
471
+				elements := strings.SplitN(opt, "=", 2)
472
+				switch elements[0] {
473
+				case "uvmpath":
474
+					uvmPath = elements[1]
475
+					dropOption = true
476
+				case "rw":
477
+				case "ro":
478
+					readonly = true
479
+				case "rbind":
480
+				default:
481
+					return fmt.Errorf("unsupported option %q", opt)
482
+				}
483
+				if !dropOption {
484
+					updatedOptions = append(updatedOptions, opt)
485
+				}
486
+			}
487
+			mount.Options = updatedOptions
488
+			if uvmPath == "" {
489
+				return fmt.Errorf("no uvmpath for bind mount %+v", mount)
490
+			}
491
+			md := hcsshim.MappedDir{
492
+				HostPath:          mount.Source,
493
+				ContainerPath:     path.Join(uvmPath, mount.Destination),
494
+				CreateInUtilityVM: true,
495
+				ReadOnly:          readonly,
496
+			}
497
+			mds = append(mds, md)
498
+			specMount.Source = path.Join(uvmPath, mount.Destination)
499
+		}
500
+		specMounts = append(specMounts, specMount)
501
+	}
502
+	configuration.MappedDirectories = mds
503
+
504
+	hcsContainer, err := hcsshim.CreateContainer(id, configuration)
505
+	if err != nil {
506
+		return err
507
+	}
508
+
509
+	spec.Mounts = specMounts
510
+
511
+	// Construct a container object for calling start on it.
512
+	ctr := &container{
513
+		id:           id,
514
+		execs:        make(map[string]*process),
515
+		isWindows:    true,
516
+		ociSpec:      spec,
517
+		hcsContainer: hcsContainer,
518
+		status:       StatusCreated,
519
+		waitCh:       make(chan struct{}),
520
+	}
521
+
522
+	// Start the container. If this is a servicing container, this call
523
+	// will block until the container is done with the servicing
524
+	// execution.
525
+	logger.Debug("starting container")
526
+	if err = hcsContainer.Start(); err != nil {
527
+		c.logger.WithError(err).Error("failed to start container")
528
+		ctr.debugGCS()
529
+		if err := c.terminateContainer(ctr); err != nil {
530
+			c.logger.WithError(err).Error("failed to cleanup after a failed Start")
531
+		} else {
532
+			c.logger.Debug("cleaned up after failed Start by calling Terminate")
533
+		}
534
+		return err
535
+	}
536
+	ctr.debugGCS()
537
+
538
+	c.Lock()
539
+	c.containers[id] = ctr
540
+	c.Unlock()
541
+
542
+	c.eventQ.append(id, func() {
543
+		ei := EventInfo{
544
+			ContainerID: id,
545
+		}
546
+		c.logger.WithFields(logrus.Fields{
547
+			"container": ctr.id,
548
+			"event":     EventCreate,
549
+		}).Info("sending event")
550
+		err := c.backend.ProcessEvent(id, EventCreate, ei)
551
+		if err != nil {
552
+			c.logger.WithError(err).WithFields(logrus.Fields{
553
+				"container": id,
554
+				"event":     EventCreate,
555
+			}).Error("failed to process event")
556
+		}
557
+	})
558
+
559
+	logger.Debug("createLinux() completed successfully")
560
+	return nil
561
+}
562
+
563
+func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachStdio StdioCallback) (int, error) {
564
+	ctr := c.getContainer(id)
565
+	switch {
566
+	case ctr == nil:
567
+		return -1, errors.WithStack(newNotFoundError("no such container"))
568
+	case ctr.init != nil:
569
+		return -1, errors.WithStack(newConflictError("container already started"))
570
+	}
571
+
572
+	logger := c.logger.WithField("container", id)
573
+
574
+	// Note we always tell HCS to create stdout as it's required
575
+	// regardless of '-i' or '-t' options, so that docker can always grab
576
+	// the output through logs. We also tell HCS to always create stdin,
577
+	// even if it's not used - it will be closed shortly. Stderr is only
578
+	// created if it we're not -t.
579
+	var (
580
+		emulateConsole   bool
581
+		createStdErrPipe bool
582
+	)
583
+	if ctr.ociSpec.Process != nil {
584
+		emulateConsole = ctr.ociSpec.Process.Terminal
585
+		createStdErrPipe = !ctr.ociSpec.Process.Terminal && !ctr.ociSpec.Windows.Servicing
586
+	}
587
+
588
+	createProcessParms := &hcsshim.ProcessConfig{
589
+		EmulateConsole:   emulateConsole,
590
+		WorkingDirectory: ctr.ociSpec.Process.Cwd,
591
+		CreateStdInPipe:  !ctr.ociSpec.Windows.Servicing,
592
+		CreateStdOutPipe: !ctr.ociSpec.Windows.Servicing,
593
+		CreateStdErrPipe: createStdErrPipe,
594
+	}
595
+
596
+	if ctr.ociSpec.Process != nil && ctr.ociSpec.Process.ConsoleSize != nil {
597
+		createProcessParms.ConsoleSize[0] = uint(ctr.ociSpec.Process.ConsoleSize.Height)
598
+		createProcessParms.ConsoleSize[1] = uint(ctr.ociSpec.Process.ConsoleSize.Width)
599
+	}
600
+
601
+	// Configure the environment for the process
602
+	createProcessParms.Environment = setupEnvironmentVariables(ctr.ociSpec.Process.Env)
603
+	if ctr.isWindows {
604
+		createProcessParms.CommandLine = strings.Join(ctr.ociSpec.Process.Args, " ")
605
+	} else {
606
+		createProcessParms.CommandArgs = ctr.ociSpec.Process.Args
607
+	}
608
+	createProcessParms.User = ctr.ociSpec.Process.User.Username
609
+
610
+	// LCOW requires the raw OCI spec passed through HCS and onwards to
611
+	// GCS for the utility VM.
612
+	if !ctr.isWindows {
613
+		ociBuf, err := json.Marshal(ctr.ociSpec)
614
+		if err != nil {
615
+			return -1, err
616
+		}
617
+		ociRaw := json.RawMessage(ociBuf)
618
+		createProcessParms.OCISpecification = &ociRaw
619
+	}
620
+
621
+	ctr.Lock()
622
+	defer ctr.Unlock()
623
+
624
+	// Start the command running in the container.
625
+	newProcess, err := ctr.hcsContainer.CreateProcess(createProcessParms)
626
+	if err != nil {
627
+		logger.WithError(err).Error("CreateProcess() failed")
628
+		return -1, err
629
+	}
630
+	defer func() {
631
+		if err != nil {
632
+			if err := newProcess.Kill(); err != nil {
633
+				logger.WithError(err).Error("failed to kill process")
634
+			}
635
+			go func() {
636
+				if err := newProcess.Wait(); err != nil {
637
+					logger.WithError(err).Error("failed to wait for process")
638
+				}
639
+				if err := newProcess.Close(); err != nil {
640
+					logger.WithError(err).Error("failed to clean process resources")
641
+				}
642
+			}()
643
+		}
644
+	}()
645
+	p := &process{
646
+		hcsProcess: newProcess,
647
+		id:         InitProcessName,
648
+		pid:        newProcess.Pid(),
649
+	}
650
+	logger.WithField("pid", p.pid).Debug("init process started")
651
+
652
+	// If this is a servicing container, wait on the process synchronously here and
653
+	// if it succeeds, wait for it cleanly shutdown and merge into the parent container.
654
+	if ctr.ociSpec.Windows.Servicing {
655
+		// reapProcess takes the lock
656
+		ctr.Unlock()
657
+		defer ctr.Lock()
658
+		exitCode := c.reapProcess(ctr, p)
659
+
660
+		if exitCode != 0 {
661
+			return -1, errors.Errorf("libcontainerd: servicing container %s returned non-zero exit code %d", ctr.id, exitCode)
662
+		}
663
+
664
+		return p.pid, nil
665
+	}
666
+
667
+	var (
668
+		stdout, stderr io.ReadCloser
669
+		stdin          io.WriteCloser
670
+	)
671
+	stdin, stdout, stderr, err = newProcess.Stdio()
672
+	if err != nil {
673
+		logger.WithError(err).Error("failed to get stdio pipes")
674
+		return -1, err
675
+	}
676
+
677
+	iopipe := &IOPipe{Terminal: ctr.ociSpec.Process.Terminal}
678
+	iopipe.Stdin = createStdInCloser(stdin, newProcess)
679
+
680
+	// Convert io.ReadClosers to io.Readers
681
+	if stdout != nil {
682
+		iopipe.Stdout = ioutil.NopCloser(&autoClosingReader{ReadCloser: stdout})
683
+	}
684
+	if stderr != nil {
685
+		iopipe.Stderr = ioutil.NopCloser(&autoClosingReader{ReadCloser: stderr})
686
+	}
687
+
688
+	_, err = attachStdio(iopipe)
689
+	if err != nil {
690
+		logger.WithError(err).Error("failed to attache stdio")
691
+		return -1, err
692
+	}
693
+	ctr.status = StatusRunning
694
+	ctr.init = p
695
+
696
+	// Spin up a go routine waiting for exit to handle cleanup
697
+	go c.reapProcess(ctr, p)
698
+
699
+	// Generate the associated event
700
+	c.eventQ.append(id, func() {
701
+		ei := EventInfo{
702
+			ContainerID: id,
703
+			ProcessID:   InitProcessName,
704
+			Pid:         uint32(p.pid),
705
+		}
706
+		c.logger.WithFields(logrus.Fields{
707
+			"container":  ctr.id,
708
+			"event":      EventStart,
709
+			"event-info": ei,
710
+		}).Info("sending event")
711
+		err := c.backend.ProcessEvent(ei.ContainerID, EventStart, ei)
712
+		if err != nil {
713
+			c.logger.WithError(err).WithFields(logrus.Fields{
714
+				"container":  id,
715
+				"event":      EventStart,
716
+				"event-info": ei,
717
+			}).Error("failed to process event")
718
+		}
719
+	})
720
+	logger.Debug("start() completed")
721
+	return p.pid, nil
722
+}
723
+
724
+// Exec adds a process in an running container
725
+func (c *client) Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (int, error) {
726
+	ctr := c.getContainer(containerID)
727
+	switch {
728
+	case ctr == nil:
729
+		return -1, errors.WithStack(newNotFoundError("no such container"))
730
+	case ctr.hcsContainer == nil:
731
+		return -1, errors.WithStack(newInvalidParameterError("container is not running"))
732
+	case ctr.execs != nil && ctr.execs[processID] != nil:
733
+		return -1, errors.WithStack(newConflictError("id already in use"))
734
+	}
735
+	logger := c.logger.WithFields(logrus.Fields{
736
+		"container": containerID,
737
+		"exec":      processID,
738
+	})
739
+
740
+	// Note we always tell HCS to
741
+	// create stdout as it's required regardless of '-i' or '-t' options, so that
742
+	// docker can always grab the output through logs. We also tell HCS to always
743
+	// create stdin, even if it's not used - it will be closed shortly. Stderr
744
+	// is only created if it we're not -t.
745
+	createProcessParms := hcsshim.ProcessConfig{
746
+		CreateStdInPipe:  true,
747
+		CreateStdOutPipe: true,
748
+		CreateStdErrPipe: !spec.Terminal,
749
+	}
750
+	if spec.Terminal {
751
+		createProcessParms.EmulateConsole = true
752
+		if spec.ConsoleSize != nil {
753
+			createProcessParms.ConsoleSize[0] = uint(spec.ConsoleSize.Height)
754
+			createProcessParms.ConsoleSize[1] = uint(spec.ConsoleSize.Width)
755
+		}
756
+	}
757
+
758
+	// Take working directory from the process to add if it is defined,
759
+	// otherwise take from the first process.
760
+	if spec.Cwd != "" {
761
+		createProcessParms.WorkingDirectory = spec.Cwd
762
+	} else {
763
+		createProcessParms.WorkingDirectory = ctr.ociSpec.Process.Cwd
764
+	}
765
+
766
+	// Configure the environment for the process
767
+	createProcessParms.Environment = setupEnvironmentVariables(spec.Env)
768
+	if ctr.isWindows {
769
+		createProcessParms.CommandLine = strings.Join(spec.Args, " ")
770
+	} else {
771
+		createProcessParms.CommandArgs = spec.Args
772
+	}
773
+	createProcessParms.User = spec.User.Username
774
+
775
+	logger.Debugf("exec commandLine: %s", createProcessParms.CommandLine)
776
+
777
+	// Start the command running in the container.
778
+	var (
779
+		stdout, stderr io.ReadCloser
780
+		stdin          io.WriteCloser
781
+	)
782
+	newProcess, err := ctr.hcsContainer.CreateProcess(&createProcessParms)
783
+	if err != nil {
784
+		logger.WithError(err).Errorf("exec's CreateProcess() failed")
785
+		return -1, err
786
+	}
787
+	pid := newProcess.Pid()
788
+	defer func() {
789
+		if err != nil {
790
+			if err := newProcess.Kill(); err != nil {
791
+				logger.WithError(err).Error("failed to kill process")
792
+			}
793
+			go func() {
794
+				if err := newProcess.Wait(); err != nil {
795
+					logger.WithError(err).Error("failed to wait for process")
796
+				}
797
+				if err := newProcess.Close(); err != nil {
798
+					logger.WithError(err).Error("failed to clean process resources")
799
+				}
800
+			}()
801
+		}
802
+	}()
803
+
804
+	stdin, stdout, stderr, err = newProcess.Stdio()
805
+	if err != nil {
806
+		logger.WithError(err).Error("getting std pipes failed")
807
+		return -1, err
808
+	}
809
+
810
+	iopipe := &IOPipe{Terminal: spec.Terminal}
811
+	iopipe.Stdin = createStdInCloser(stdin, newProcess)
812
+
813
+	// Convert io.ReadClosers to io.Readers
814
+	if stdout != nil {
815
+		iopipe.Stdout = ioutil.NopCloser(&autoClosingReader{ReadCloser: stdout})
816
+	}
817
+	if stderr != nil {
818
+		iopipe.Stderr = ioutil.NopCloser(&autoClosingReader{ReadCloser: stderr})
819
+	}
820
+
821
+	// Tell the engine to attach streams back to the client
822
+	_, err = attachStdio(iopipe)
823
+	if err != nil {
824
+		return -1, err
825
+	}
826
+
827
+	p := &process{
828
+		id:         processID,
829
+		pid:        pid,
830
+		hcsProcess: newProcess,
831
+	}
832
+
833
+	// Add the process to the container's list of processes
834
+	ctr.Lock()
835
+	ctr.execs[processID] = p
836
+	ctr.Unlock()
837
+
838
+	// Spin up a go routine waiting for exit to handle cleanup
839
+	go c.reapProcess(ctr, p)
840
+
841
+	c.eventQ.append(ctr.id, func() {
842
+		ei := EventInfo{
843
+			ContainerID: ctr.id,
844
+			ProcessID:   p.id,
845
+			Pid:         uint32(p.pid),
846
+		}
847
+		c.logger.WithFields(logrus.Fields{
848
+			"container":  ctr.id,
849
+			"event":      EventExecAdded,
850
+			"event-info": ei,
851
+		}).Info("sending event")
852
+		err := c.backend.ProcessEvent(ctr.id, EventExecAdded, ei)
853
+		if err != nil {
854
+			c.logger.WithError(err).WithFields(logrus.Fields{
855
+				"container":  ctr.id,
856
+				"event":      EventExecAdded,
857
+				"event-info": ei,
858
+			}).Error("failed to process event")
859
+		}
860
+		err = c.backend.ProcessEvent(ctr.id, EventExecStarted, ei)
861
+		if err != nil {
862
+			c.logger.WithError(err).WithFields(logrus.Fields{
863
+				"container":  ctr.id,
864
+				"event":      EventExecStarted,
865
+				"event-info": ei,
866
+			}).Error("failed to process event")
867
+		}
868
+	})
869
+
870
+	return pid, nil
871
+}
872
+
873
+// Signal handles `docker stop` on Windows. While Linux has support for
874
+// the full range of signals, signals aren't really implemented on Windows.
875
+// We fake supporting regular stop and -9 to force kill.
876
+func (c *client) SignalProcess(_ context.Context, containerID, processID string, signal int) error {
877
+	ctr, p, err := c.getProcess(containerID, processID)
878
+	if err != nil {
879
+		return err
880
+	}
881
+
882
+	ctr.manualStopRequested = true
883
+
884
+	logger := c.logger.WithFields(logrus.Fields{
885
+		"container": containerID,
886
+		"process":   processID,
887
+		"pid":       p.pid,
888
+		"signal":    signal,
889
+	})
890
+	logger.Debug("Signal()")
891
+
892
+	if processID == InitProcessName {
893
+		if syscall.Signal(signal) == syscall.SIGKILL {
894
+			// Terminate the compute system
895
+			if err := ctr.hcsContainer.Terminate(); err != nil {
896
+				if !hcsshim.IsPending(err) {
897
+					logger.WithError(err).Error("failed to terminate hccshim container")
898
+				}
899
+			}
900
+		} else {
901
+			// Shut down the container
902
+			if err := ctr.hcsContainer.Shutdown(); err != nil {
903
+				if !hcsshim.IsPending(err) && !hcsshim.IsAlreadyStopped(err) {
904
+					// ignore errors
905
+					logger.WithError(err).Error("failed to shutdown hccshim container")
906
+				}
907
+			}
908
+		}
909
+	} else {
910
+		return p.hcsProcess.Kill()
911
+	}
912
+
913
+	return nil
914
+}
915
+
916
+// Resize handles a CLI event to resize an interactive docker run or docker
917
+// exec window.
918
+func (c *client) ResizeTerminal(_ context.Context, containerID, processID string, width, height int) error {
919
+	_, p, err := c.getProcess(containerID, processID)
920
+	if err != nil {
921
+		return err
922
+	}
923
+
924
+	c.logger.WithFields(logrus.Fields{
925
+		"container": containerID,
926
+		"process":   processID,
927
+		"height":    height,
928
+		"width":     width,
929
+		"pid":       p.pid,
930
+	}).Debug("resizing")
931
+	return p.hcsProcess.ResizeConsole(uint16(height), uint16(width))
932
+}
933
+
934
+func (c *client) CloseStdin(_ context.Context, containerID, processID string) error {
935
+	_, p, err := c.getProcess(containerID, processID)
936
+	if err != nil {
937
+		return err
938
+	}
939
+
940
+	return p.hcsProcess.CloseStdin()
941
+}
942
+
943
+// Pause handles pause requests for containers
944
+func (c *client) Pause(_ context.Context, containerID string) error {
945
+	ctr, _, err := c.getProcess(containerID, InitProcessName)
946
+	if err != nil {
947
+		return err
948
+	}
949
+
950
+	if ctr.ociSpec.Windows.HyperV == nil {
951
+		return errors.New("cannot pause Windows Server Containers")
952
+	}
953
+
954
+	ctr.Lock()
955
+	defer ctr.Unlock()
956
+
957
+	if err = ctr.hcsContainer.Pause(); err != nil {
958
+		return err
959
+	}
960
+
961
+	ctr.status = StatusPaused
962
+
963
+	c.eventQ.append(containerID, func() {
964
+		err := c.backend.ProcessEvent(containerID, EventPaused, EventInfo{
965
+			ContainerID: containerID,
966
+			ProcessID:   InitProcessName,
967
+		})
968
+		c.logger.WithFields(logrus.Fields{
969
+			"container": ctr.id,
970
+			"event":     EventPaused,
971
+		}).Info("sending event")
972
+		if err != nil {
973
+			c.logger.WithError(err).WithFields(logrus.Fields{
974
+				"container": containerID,
975
+				"event":     EventPaused,
976
+			}).Error("failed to process event")
977
+		}
978
+	})
979
+
980
+	return nil
981
+}
982
+
983
+// Resume handles resume requests for containers
984
+func (c *client) Resume(_ context.Context, containerID string) error {
985
+	ctr, _, err := c.getProcess(containerID, InitProcessName)
986
+	if err != nil {
987
+		return err
988
+	}
989
+
990
+	if ctr.ociSpec.Windows.HyperV == nil {
991
+		return errors.New("cannot resume Windows Server Containers")
992
+	}
993
+
994
+	ctr.Lock()
995
+	defer ctr.Unlock()
996
+
997
+	if err = ctr.hcsContainer.Resume(); err != nil {
998
+		return err
999
+	}
1000
+
1001
+	ctr.status = StatusRunning
1002
+
1003
+	c.eventQ.append(containerID, func() {
1004
+		err := c.backend.ProcessEvent(containerID, EventResumed, EventInfo{
1005
+			ContainerID: containerID,
1006
+			ProcessID:   InitProcessName,
1007
+		})
1008
+		c.logger.WithFields(logrus.Fields{
1009
+			"container": ctr.id,
1010
+			"event":     EventResumed,
1011
+		}).Info("sending event")
1012
+		if err != nil {
1013
+			c.logger.WithError(err).WithFields(logrus.Fields{
1014
+				"container": containerID,
1015
+				"event":     EventResumed,
1016
+			}).Error("failed to process event")
1017
+		}
1018
+	})
1019
+
1020
+	return nil
1021
+}
1022
+
1023
+// Stats handles stats requests for containers
1024
+func (c *client) Stats(_ context.Context, containerID string) (*Stats, error) {
1025
+	ctr, _, err := c.getProcess(containerID, InitProcessName)
1026
+	if err != nil {
1027
+		return nil, err
1028
+	}
1029
+
1030
+	readAt := time.Now()
1031
+	s, err := ctr.hcsContainer.Statistics()
1032
+	if err != nil {
1033
+		return nil, err
1034
+	}
1035
+	return &Stats{
1036
+		Read:     readAt,
1037
+		HCSStats: &s,
1038
+	}, nil
1039
+}
1040
+
1041
+// Restore is the handler for restoring a container
1042
+func (c *client) Restore(ctx context.Context, id string, attachStdio StdioCallback) (bool, int, error) {
1043
+	c.logger.WithField("container", id).Debug("restore()")
1044
+
1045
+	// TODO Windows: On RS1, a re-attach isn't possible.
1046
+	// However, there is a scenario in which there is an issue.
1047
+	// Consider a background container. The daemon dies unexpectedly.
1048
+	// HCS will still have the compute service alive and running.
1049
+	// For consistence, we call in to shoot it regardless if HCS knows about it
1050
+	// We explicitly just log a warning if the terminate fails.
1051
+	// Then we tell the backend the container exited.
1052
+	if hc, err := hcsshim.OpenContainer(id); err == nil {
1053
+		const terminateTimeout = time.Minute * 2
1054
+		err := hc.Terminate()
1055
+
1056
+		if hcsshim.IsPending(err) {
1057
+			err = hc.WaitTimeout(terminateTimeout)
1058
+		} else if hcsshim.IsAlreadyStopped(err) {
1059
+			err = nil
1060
+		}
1061
+
1062
+		if err != nil {
1063
+			c.logger.WithField("container", id).WithError(err).Debug("terminate failed on restore")
1064
+			return false, -1, err
1065
+		}
1066
+	}
1067
+	return false, -1, nil
1068
+}
1069
+
1070
+// GetPidsForContainer returns a list of process IDs running in a container.
1071
+// Not used on Windows.
1072
+func (c *client) ListPids(_ context.Context, _ string) ([]uint32, error) {
1073
+	return nil, errors.New("not implemented on Windows")
1074
+}
1075
+
1076
+// Summary returns a summary of the processes running in a container.
1077
+// This is present in Windows to support docker top. In linux, the
1078
+// engine shells out to ps to get process information. On Windows, as
1079
+// the containers could be Hyper-V containers, they would not be
1080
+// visible on the container host. However, libcontainerd does have
1081
+// that information.
1082
+func (c *client) Summary(_ context.Context, containerID string) ([]Summary, error) {
1083
+	ctr, _, err := c.getProcess(containerID, InitProcessName)
1084
+	if err != nil {
1085
+		return nil, err
1086
+	}
1087
+
1088
+	p, err := ctr.hcsContainer.ProcessList()
1089
+	if err != nil {
1090
+		return nil, err
1091
+	}
1092
+
1093
+	pl := make([]Summary, len(p))
1094
+	for i := range p {
1095
+		pl[i] = Summary(p[i])
1096
+	}
1097
+	return pl, nil
1098
+}
1099
+
1100
+func (c *client) DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error) {
1101
+	ec := -1
1102
+	ctr := c.getContainer(containerID)
1103
+	if ctr == nil {
1104
+		return uint32(ec), time.Now(), errors.WithStack(newNotFoundError("no such container"))
1105
+	}
1106
+
1107
+	select {
1108
+	case <-ctx.Done():
1109
+		return uint32(ec), time.Now(), errors.WithStack(ctx.Err())
1110
+	case <-ctr.waitCh:
1111
+	default:
1112
+		return uint32(ec), time.Now(), errors.New("container is not stopped")
1113
+	}
1114
+
1115
+	ctr.Lock()
1116
+	defer ctr.Unlock()
1117
+	return ctr.exitCode, ctr.exitedAt, nil
1118
+}
1119
+
1120
+func (c *client) Delete(_ context.Context, containerID string) error {
1121
+	c.Lock()
1122
+	defer c.Unlock()
1123
+	ctr := c.containers[containerID]
1124
+	if ctr == nil {
1125
+		return errors.WithStack(newNotFoundError("no such container"))
1126
+	}
1127
+
1128
+	ctr.Lock()
1129
+	defer ctr.Unlock()
1130
+
1131
+	switch ctr.status {
1132
+	case StatusCreated:
1133
+		if err := c.shutdownContainer(ctr); err != nil {
1134
+			return err
1135
+		}
1136
+		fallthrough
1137
+	case StatusStopped:
1138
+		delete(c.containers, containerID)
1139
+		return nil
1140
+	}
1141
+
1142
+	return errors.WithStack(newInvalidParameterError("container is not stopped"))
1143
+}
1144
+
1145
+func (c *client) Status(ctx context.Context, containerID string) (Status, error) {
1146
+	c.Lock()
1147
+	defer c.Unlock()
1148
+	ctr := c.containers[containerID]
1149
+	if ctr == nil {
1150
+		return StatusUnknown, errors.WithStack(newNotFoundError("no such container"))
1151
+	}
1152
+
1153
+	ctr.Lock()
1154
+	defer ctr.Unlock()
1155
+	return ctr.status, nil
1156
+}
1157
+
1158
+func (c *client) UpdateResources(ctx context.Context, containerID string, resources *Resources) error {
1159
+	// Updating resource isn't supported on Windows
1160
+	// but we should return nil for enabling updating container
1161
+	return nil
1162
+}
1163
+
1164
+func (c *client) CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error {
1165
+	return errors.New("Windows: Containers do not support checkpoints")
1166
+}
1167
+
1168
+func (c *client) getContainer(id string) *container {
1169
+	c.Lock()
1170
+	ctr := c.containers[id]
1171
+	c.Unlock()
1172
+
1173
+	return ctr
1174
+}
1175
+
1176
+func (c *client) getProcess(containerID, processID string) (*container, *process, error) {
1177
+	ctr := c.getContainer(containerID)
1178
+	switch {
1179
+	case ctr == nil:
1180
+		return nil, nil, errors.WithStack(newNotFoundError("no such container"))
1181
+	case ctr.init == nil:
1182
+		return nil, nil, errors.WithStack(newNotFoundError("container is not running"))
1183
+	case processID == InitProcessName:
1184
+		return ctr, ctr.init, nil
1185
+	default:
1186
+		ctr.Lock()
1187
+		defer ctr.Unlock()
1188
+		if ctr.execs == nil {
1189
+			return nil, nil, errors.WithStack(newNotFoundError("no execs"))
1190
+		}
1191
+	}
1192
+
1193
+	p := ctr.execs[processID]
1194
+	if p == nil {
1195
+		return nil, nil, errors.WithStack(newNotFoundError("no such exec"))
1196
+	}
1197
+
1198
+	return ctr, p, nil
1199
+}
1200
+
1201
+func (c *client) shutdownContainer(ctr *container) error {
1202
+	const shutdownTimeout = time.Minute * 5
1203
+	err := ctr.hcsContainer.Shutdown()
1204
+
1205
+	if hcsshim.IsPending(err) {
1206
+		err = ctr.hcsContainer.WaitTimeout(shutdownTimeout)
1207
+	} else if hcsshim.IsAlreadyStopped(err) {
1208
+		err = nil
1209
+	}
1210
+
1211
+	if err != nil {
1212
+		c.logger.WithError(err).WithField("container", ctr.id).
1213
+			Debug("failed to shutdown container, terminating it")
1214
+		return c.terminateContainer(ctr)
1215
+	}
1216
+
1217
+	return nil
1218
+}
1219
+
1220
+func (c *client) terminateContainer(ctr *container) error {
1221
+	const terminateTimeout = time.Minute * 5
1222
+	err := ctr.hcsContainer.Terminate()
1223
+
1224
+	if hcsshim.IsPending(err) {
1225
+		err = ctr.hcsContainer.WaitTimeout(terminateTimeout)
1226
+	} else if hcsshim.IsAlreadyStopped(err) {
1227
+		err = nil
1228
+	}
1229
+
1230
+	if err != nil {
1231
+		c.logger.WithError(err).WithField("container", ctr.id).
1232
+			Debug("failed to terminate container")
1233
+		return err
1234
+	}
1235
+
1236
+	return nil
1237
+}
1238
+
1239
+func (c *client) reapProcess(ctr *container, p *process) int {
1240
+	logger := c.logger.WithFields(logrus.Fields{
1241
+		"container": ctr.id,
1242
+		"process":   p.id,
1243
+	})
1244
+
1245
+	// Block indefinitely for the process to exit.
1246
+	if err := p.hcsProcess.Wait(); err != nil {
1247
+		if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE {
1248
+			logger.WithError(err).Warnf("Wait() failed (container may have been killed)")
1249
+		}
1250
+		// Fall through here, do not return. This ensures we attempt to
1251
+		// continue the shutdown in HCS and tell the docker engine that the
1252
+		// process/container has exited to avoid a container being dropped on
1253
+		// the floor.
1254
+	}
1255
+	exitedAt := time.Now()
1256
+
1257
+	exitCode, err := p.hcsProcess.ExitCode()
1258
+	if err != nil {
1259
+		if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE {
1260
+			logger.WithError(err).Warnf("unable to get exit code for process")
1261
+		}
1262
+		// Since we got an error retrieving the exit code, make sure that the
1263
+		// code we return doesn't incorrectly indicate success.
1264
+		exitCode = -1
1265
+
1266
+		// Fall through here, do not return. This ensures we attempt to
1267
+		// continue the shutdown in HCS and tell the docker engine that the
1268
+		// process/container has exited to avoid a container being dropped on
1269
+		// the floor.
1270
+	}
1271
+
1272
+	if err := p.hcsProcess.Close(); err != nil {
1273
+		logger.WithError(err).Warnf("failed to cleanup hcs process resources")
1274
+	}
1275
+
1276
+	var pendingUpdates bool
1277
+	if p.id == InitProcessName {
1278
+		// Update container status
1279
+		ctr.Lock()
1280
+		ctr.status = StatusStopped
1281
+		ctr.exitedAt = exitedAt
1282
+		ctr.exitCode = uint32(exitCode)
1283
+		close(ctr.waitCh)
1284
+		ctr.Unlock()
1285
+
1286
+		// Handle any servicing
1287
+		if exitCode == 0 && ctr.isWindows && !ctr.ociSpec.Windows.Servicing {
1288
+			pendingUpdates, err = ctr.hcsContainer.HasPendingUpdates()
1289
+			logger.Infof("Pending updates: %v", pendingUpdates)
1290
+			if err != nil {
1291
+				logger.WithError(err).
1292
+					Warnf("failed to check for pending updates (container may have been killed)")
1293
+			}
1294
+		}
1295
+
1296
+		if err := c.shutdownContainer(ctr); err != nil {
1297
+			logger.WithError(err).Warn("failed to shutdown container")
1298
+		} else {
1299
+			logger.Debug("completed container shutdown")
1300
+		}
1301
+
1302
+		if err := ctr.hcsContainer.Close(); err != nil {
1303
+			logger.WithError(err).Error("failed to clean hcs container resources")
1304
+		}
1305
+	}
1306
+
1307
+	if !(ctr.isWindows && ctr.ociSpec.Windows.Servicing) {
1308
+		c.eventQ.append(ctr.id, func() {
1309
+			ei := EventInfo{
1310
+				ContainerID:   ctr.id,
1311
+				ProcessID:     p.id,
1312
+				Pid:           uint32(p.pid),
1313
+				ExitCode:      uint32(exitCode),
1314
+				ExitedAt:      exitedAt,
1315
+				UpdatePending: pendingUpdates,
1316
+			}
1317
+			c.logger.WithFields(logrus.Fields{
1318
+				"container":  ctr.id,
1319
+				"event":      EventExit,
1320
+				"event-info": ei,
1321
+			}).Info("sending event")
1322
+			err := c.backend.ProcessEvent(ctr.id, EventExit, ei)
1323
+			if err != nil {
1324
+				c.logger.WithError(err).WithFields(logrus.Fields{
1325
+					"container":  ctr.id,
1326
+					"event":      EventExit,
1327
+					"event-info": ei,
1328
+				}).Error("failed to process event")
1329
+			}
1330
+			if p.id != InitProcessName {
1331
+				ctr.Lock()
1332
+				delete(ctr.execs, p.id)
1333
+				ctr.Unlock()
1334
+			}
1335
+		})
1336
+	}
1337
+
1338
+	return exitCode
1339
+}
0 1340
deleted file mode 100644
... ...
@@ -1,104 +0,0 @@
1
-package libcontainerd
2
-
3
-import (
4
-	containerd "github.com/containerd/containerd/api/grpc/types"
5
-	"golang.org/x/net/context"
6
-)
7
-
8
-type client struct {
9
-	clientCommon
10
-
11
-	// Platform specific properties below here.
12
-	remote        *remote
13
-	q             queue
14
-	exitNotifiers map[string]*exitNotifier
15
-	liveRestore   bool
16
-}
17
-
18
-// GetServerVersion returns the connected server version information
19
-func (clnt *client) GetServerVersion(ctx context.Context) (*ServerVersion, error) {
20
-	resp, err := clnt.remote.apiClient.GetServerVersion(ctx, &containerd.GetServerVersionRequest{})
21
-	if err != nil {
22
-		return nil, err
23
-	}
24
-
25
-	sv := &ServerVersion{
26
-		GetServerVersionResponse: *resp,
27
-	}
28
-
29
-	return sv, nil
30
-}
31
-
32
-func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendlyName string, specp Process, attachStdio StdioCallback) (int, error) {
33
-	return -1, nil
34
-}
35
-
36
-func (clnt *client) SignalProcess(containerID string, pid string, sig int) error {
37
-	return nil
38
-}
39
-
40
-func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error {
41
-	return nil
42
-}
43
-
44
-func (clnt *client) Pause(containerID string) error {
45
-	return nil
46
-}
47
-
48
-func (clnt *client) Resume(containerID string) error {
49
-	return nil
50
-}
51
-
52
-func (clnt *client) Stats(containerID string) (*Stats, error) {
53
-	return nil, nil
54
-}
55
-
56
-func (clnt *client) getExitNotifier(containerID string) *exitNotifier {
57
-	clnt.mapMutex.RLock()
58
-	defer clnt.mapMutex.RUnlock()
59
-	return clnt.exitNotifiers[containerID]
60
-}
61
-
62
-func (clnt *client) getOrCreateExitNotifier(containerID string) *exitNotifier {
63
-	clnt.mapMutex.Lock()
64
-	defer clnt.mapMutex.Unlock()
65
-	w, ok := clnt.exitNotifiers[containerID]
66
-	if !ok {
67
-		w = &exitNotifier{c: make(chan struct{}), client: clnt}
68
-		clnt.exitNotifiers[containerID] = w
69
-	}
70
-	return w
71
-}
72
-
73
-// Restore is the handler for restoring a container
74
-func (clnt *client) Restore(containerID string, attachStdio StdioCallback, options ...CreateOption) error {
75
-	return nil
76
-}
77
-
78
-func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) {
79
-	return nil, nil
80
-}
81
-
82
-// Summary returns a summary of the processes running in a container.
83
-func (clnt *client) Summary(containerID string) ([]Summary, error) {
84
-	return nil, nil
85
-}
86
-
87
-// UpdateResources updates resources for a running container.
88
-func (clnt *client) UpdateResources(containerID string, resources Resources) error {
89
-	// Updating resource isn't supported on Solaris
90
-	// but we should return nil for enabling updating container
91
-	return nil
92
-}
93
-
94
-func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error {
95
-	return nil
96
-}
97
-
98
-func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error {
99
-	return nil
100
-}
101
-
102
-func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) {
103
-	return nil, nil
104
-}
105 1
deleted file mode 100644
... ...
@@ -1,141 +0,0 @@
1
-// +build linux solaris
2
-
3
-package libcontainerd
4
-
5
-import (
6
-	"encoding/json"
7
-	"fmt"
8
-	"os"
9
-	"path/filepath"
10
-	"strings"
11
-	"sync"
12
-
13
-	containerd "github.com/containerd/containerd/api/grpc/types"
14
-	"github.com/docker/docker/pkg/idtools"
15
-	specs "github.com/opencontainers/runtime-spec/specs-go"
16
-	"github.com/sirupsen/logrus"
17
-	"golang.org/x/net/context"
18
-)
19
-
20
-func (clnt *client) prepareBundleDir(uid, gid int) (string, error) {
21
-	root, err := filepath.Abs(clnt.remote.stateDir)
22
-	if err != nil {
23
-		return "", err
24
-	}
25
-	if uid == 0 && gid == 0 {
26
-		return root, nil
27
-	}
28
-	p := string(filepath.Separator)
29
-	for _, d := range strings.Split(root, string(filepath.Separator))[1:] {
30
-		p = filepath.Join(p, d)
31
-		fi, err := os.Stat(p)
32
-		if err != nil && !os.IsNotExist(err) {
33
-			return "", err
34
-		}
35
-		if os.IsNotExist(err) || fi.Mode()&1 == 0 {
36
-			p = fmt.Sprintf("%s.%d.%d", p, uid, gid)
37
-			if err := idtools.MkdirAndChown(p, 0700, idtools.IDPair{uid, gid}); err != nil && !os.IsExist(err) {
38
-				return "", err
39
-			}
40
-		}
41
-	}
42
-	return p, nil
43
-}
44
-
45
-func (clnt *client) Create(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) (err error) {
46
-	clnt.lock(containerID)
47
-	defer clnt.unlock(containerID)
48
-
49
-	if _, err := clnt.getContainer(containerID); err == nil {
50
-		return fmt.Errorf("Container %s is already active", containerID)
51
-	}
52
-
53
-	uid, gid, err := getRootIDs(spec)
54
-	if err != nil {
55
-		return err
56
-	}
57
-	dir, err := clnt.prepareBundleDir(uid, gid)
58
-	if err != nil {
59
-		return err
60
-	}
61
-
62
-	container := clnt.newContainer(filepath.Join(dir, containerID), options...)
63
-	if err := container.clean(); err != nil {
64
-		return err
65
-	}
66
-
67
-	defer func() {
68
-		if err != nil {
69
-			container.clean()
70
-			clnt.deleteContainer(containerID)
71
-		}
72
-	}()
73
-
74
-	if err := idtools.MkdirAllAndChown(container.dir, 0700, idtools.IDPair{uid, gid}); err != nil && !os.IsExist(err) {
75
-		return err
76
-	}
77
-
78
-	f, err := os.Create(filepath.Join(container.dir, configFilename))
79
-	if err != nil {
80
-		return err
81
-	}
82
-	defer f.Close()
83
-	if err := json.NewEncoder(f).Encode(spec); err != nil {
84
-		return err
85
-	}
86
-	return container.start(&spec, checkpoint, checkpointDir, attachStdio)
87
-}
88
-
89
-func (clnt *client) Signal(containerID string, sig int) error {
90
-	clnt.lock(containerID)
91
-	defer clnt.unlock(containerID)
92
-	_, err := clnt.remote.apiClient.Signal(context.Background(), &containerd.SignalRequest{
93
-		Id:     containerID,
94
-		Pid:    InitFriendlyName,
95
-		Signal: uint32(sig),
96
-	})
97
-	return err
98
-}
99
-
100
-func (clnt *client) newContainer(dir string, options ...CreateOption) *container {
101
-	container := &container{
102
-		containerCommon: containerCommon{
103
-			process: process{
104
-				dir: dir,
105
-				processCommon: processCommon{
106
-					containerID:  filepath.Base(dir),
107
-					client:       clnt,
108
-					friendlyName: InitFriendlyName,
109
-				},
110
-			},
111
-			processes: make(map[string]*process),
112
-		},
113
-	}
114
-	for _, option := range options {
115
-		if err := option.Apply(container); err != nil {
116
-			logrus.Errorf("libcontainerd: newContainer(): %v", err)
117
-		}
118
-	}
119
-	return container
120
-}
121
-
122
-type exitNotifier struct {
123
-	id     string
124
-	client *client
125
-	c      chan struct{}
126
-	once   sync.Once
127
-}
128
-
129
-func (en *exitNotifier) close() {
130
-	en.once.Do(func() {
131
-		close(en.c)
132
-		en.client.mapMutex.Lock()
133
-		if en == en.client.exitNotifiers[en.id] {
134
-			delete(en.client.exitNotifiers, en.id)
135
-		}
136
-		en.client.mapMutex.Unlock()
137
-	})
138
-}
139
-func (en *exitNotifier) wait() <-chan struct{} {
140
-	return en.c
141
-}
142 1
deleted file mode 100644
... ...
@@ -1,886 +0,0 @@
1
-package libcontainerd
2
-
3
-import (
4
-	"encoding/json"
5
-	"errors"
6
-	"fmt"
7
-	"io"
8
-	"io/ioutil"
9
-	"os"
10
-	"path"
11
-	"path/filepath"
12
-	"regexp"
13
-	"strings"
14
-	"syscall"
15
-	"time"
16
-
17
-	"golang.org/x/net/context"
18
-
19
-	"github.com/Microsoft/hcsshim"
20
-	opengcs "github.com/Microsoft/opengcs/client"
21
-	"github.com/docker/docker/pkg/sysinfo"
22
-	"github.com/docker/docker/pkg/system"
23
-	specs "github.com/opencontainers/runtime-spec/specs-go"
24
-	"github.com/sirupsen/logrus"
25
-)
26
-
27
-type client struct {
28
-	clientCommon
29
-
30
-	// Platform specific properties below here (none presently on Windows)
31
-}
32
-
33
-// Win32 error codes that are used for various workarounds
34
-// These really should be ALL_CAPS to match golangs syscall library and standard
35
-// Win32 error conventions, but golint insists on CamelCase.
36
-const (
37
-	CoEClassstring     = syscall.Errno(0x800401F3) // Invalid class string
38
-	ErrorNoNetwork     = syscall.Errno(1222)       // The network is not present or not started
39
-	ErrorBadPathname   = syscall.Errno(161)        // The specified path is invalid
40
-	ErrorInvalidObject = syscall.Errno(0x800710D8) // The object identifier does not represent a valid object
41
-)
42
-
43
-// defaultOwner is a tag passed to HCS to allow it to differentiate between
44
-// container creator management stacks. We hard code "docker" in the case
45
-// of docker.
46
-const defaultOwner = "docker"
47
-
48
-// Create is the entrypoint to create a container from a spec, and if successfully
49
-// created, start it too. Table below shows the fields required for HCS JSON calling parameters,
50
-// where if not populated, is omitted.
51
-// +-----------------+--------------------------------------------+---------------------------------------------------+
52
-// |                 | Isolation=Process                          | Isolation=Hyper-V                                 |
53
-// +-----------------+--------------------------------------------+---------------------------------------------------+
54
-// | VolumePath      | \\?\\Volume{GUIDa}                         |                                                   |
55
-// | LayerFolderPath | %root%\windowsfilter\containerID           | %root%\windowsfilter\containerID (servicing only) |
56
-// | Layers[]        | ID=GUIDb;Path=%root%\windowsfilter\layerID | ID=GUIDb;Path=%root%\windowsfilter\layerID        |
57
-// | HvRuntime       |                                            | ImagePath=%root%\BaseLayerID\UtilityVM            |
58
-// +-----------------+--------------------------------------------+---------------------------------------------------+
59
-//
60
-// Isolation=Process example:
61
-//
62
-// {
63
-//	"SystemType": "Container",
64
-//	"Name": "5e0055c814a6005b8e57ac59f9a522066e0af12b48b3c26a9416e23907698776",
65
-//	"Owner": "docker",
66
-//	"VolumePath": "\\\\\\\\?\\\\Volume{66d1ef4c-7a00-11e6-8948-00155ddbef9d}",
67
-//	"IgnoreFlushesDuringBoot": true,
68
-//	"LayerFolderPath": "C:\\\\control\\\\windowsfilter\\\\5e0055c814a6005b8e57ac59f9a522066e0af12b48b3c26a9416e23907698776",
69
-//	"Layers": [{
70
-//		"ID": "18955d65-d45a-557b-bf1c-49d6dfefc526",
71
-//		"Path": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c"
72
-//	}],
73
-//	"HostName": "5e0055c814a6",
74
-//	"MappedDirectories": [],
75
-//	"HvPartition": false,
76
-//	"EndpointList": ["eef2649d-bb17-4d53-9937-295a8efe6f2c"],
77
-//	"Servicing": false
78
-//}
79
-//
80
-// Isolation=Hyper-V example:
81
-//
82
-//{
83
-//	"SystemType": "Container",
84
-//	"Name": "475c2c58933b72687a88a441e7e0ca4bd72d76413c5f9d5031fee83b98f6045d",
85
-//	"Owner": "docker",
86
-//	"IgnoreFlushesDuringBoot": true,
87
-//	"Layers": [{
88
-//		"ID": "18955d65-d45a-557b-bf1c-49d6dfefc526",
89
-//		"Path": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c"
90
-//	}],
91
-//	"HostName": "475c2c58933b",
92
-//	"MappedDirectories": [],
93
-//	"HvPartition": true,
94
-//	"EndpointList": ["e1bb1e61-d56f-405e-b75d-fd520cefa0cb"],
95
-//	"DNSSearchList": "a.com,b.com,c.com",
96
-//	"HvRuntime": {
97
-//		"ImagePath": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c\\\\UtilityVM"
98
-//	},
99
-//	"Servicing": false
100
-//}
101
-func (clnt *client) Create(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) error {
102
-	clnt.lock(containerID)
103
-	defer clnt.unlock(containerID)
104
-	if b, err := json.Marshal(spec); err == nil {
105
-		logrus.Debugln("libcontainerd: client.Create() with spec", string(b))
106
-	}
107
-
108
-	// spec.Linux must be nil for Windows containers, but spec.Windows will be filled in regardless of container platform.
109
-	// This is a temporary workaround due to LCOW requiring layer folder paths, which are stored under spec.Windows.
110
-	// TODO: @darrenstahlmsft fix this once the OCI spec is updated to support layer folder paths for LCOW
111
-	if spec.Linux == nil {
112
-		return clnt.createWindows(containerID, checkpoint, checkpointDir, spec, attachStdio, options...)
113
-	}
114
-	return clnt.createLinux(containerID, checkpoint, checkpointDir, spec, attachStdio, options...)
115
-}
116
-
117
-func (clnt *client) createWindows(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) error {
118
-	configuration := &hcsshim.ContainerConfig{
119
-		SystemType: "Container",
120
-		Name:       containerID,
121
-		Owner:      defaultOwner,
122
-		IgnoreFlushesDuringBoot: spec.Windows.IgnoreFlushesDuringBoot,
123
-		HostName:                spec.Hostname,
124
-		HvPartition:             false,
125
-		Servicing:               spec.Windows.Servicing,
126
-	}
127
-
128
-	if spec.Windows.Resources != nil {
129
-		if spec.Windows.Resources.CPU != nil {
130
-			if spec.Windows.Resources.CPU.Count != nil {
131
-				// This check is being done here rather than in adaptContainerSettings
132
-				// because we don't want to update the HostConfig in case this container
133
-				// is moved to a host with more CPUs than this one.
134
-				cpuCount := *spec.Windows.Resources.CPU.Count
135
-				hostCPUCount := uint64(sysinfo.NumCPU())
136
-				if cpuCount > hostCPUCount {
137
-					logrus.Warnf("Changing requested CPUCount of %d to current number of processors, %d", cpuCount, hostCPUCount)
138
-					cpuCount = hostCPUCount
139
-				}
140
-				configuration.ProcessorCount = uint32(cpuCount)
141
-			}
142
-			if spec.Windows.Resources.CPU.Shares != nil {
143
-				configuration.ProcessorWeight = uint64(*spec.Windows.Resources.CPU.Shares)
144
-			}
145
-			if spec.Windows.Resources.CPU.Maximum != nil {
146
-				configuration.ProcessorMaximum = int64(*spec.Windows.Resources.CPU.Maximum)
147
-			}
148
-		}
149
-		if spec.Windows.Resources.Memory != nil {
150
-			if spec.Windows.Resources.Memory.Limit != nil {
151
-				configuration.MemoryMaximumInMB = int64(*spec.Windows.Resources.Memory.Limit) / 1024 / 1024
152
-			}
153
-		}
154
-		if spec.Windows.Resources.Storage != nil {
155
-			if spec.Windows.Resources.Storage.Bps != nil {
156
-				configuration.StorageBandwidthMaximum = *spec.Windows.Resources.Storage.Bps
157
-			}
158
-			if spec.Windows.Resources.Storage.Iops != nil {
159
-				configuration.StorageIOPSMaximum = *spec.Windows.Resources.Storage.Iops
160
-			}
161
-		}
162
-	}
163
-
164
-	if spec.Windows.HyperV != nil {
165
-		configuration.HvPartition = true
166
-	}
167
-
168
-	if spec.Windows.Network != nil {
169
-		configuration.EndpointList = spec.Windows.Network.EndpointList
170
-		configuration.AllowUnqualifiedDNSQuery = spec.Windows.Network.AllowUnqualifiedDNSQuery
171
-		if spec.Windows.Network.DNSSearchList != nil {
172
-			configuration.DNSSearchList = strings.Join(spec.Windows.Network.DNSSearchList, ",")
173
-		}
174
-		configuration.NetworkSharedContainerName = spec.Windows.Network.NetworkSharedContainerName
175
-	}
176
-
177
-	if cs, ok := spec.Windows.CredentialSpec.(string); ok {
178
-		configuration.Credentials = cs
179
-	}
180
-
181
-	// We must have least two layers in the spec, the bottom one being a base image,
182
-	// the top one being the RW layer.
183
-	if spec.Windows.LayerFolders == nil || len(spec.Windows.LayerFolders) < 2 {
184
-		return fmt.Errorf("OCI spec is invalid - at least two LayerFolders must be supplied to the runtime")
185
-	}
186
-
187
-	// Strip off the top-most layer as that's passed in separately to HCS
188
-	configuration.LayerFolderPath = spec.Windows.LayerFolders[len(spec.Windows.LayerFolders)-1]
189
-	layerFolders := spec.Windows.LayerFolders[:len(spec.Windows.LayerFolders)-1]
190
-
191
-	if configuration.HvPartition {
192
-		// We don't currently support setting the utility VM image explicitly.
193
-		// TODO @swernli/jhowardmsft circa RS3/4, this may be re-locatable.
194
-		if spec.Windows.HyperV.UtilityVMPath != "" {
195
-			return errors.New("runtime does not support an explicit utility VM path for Hyper-V containers")
196
-		}
197
-
198
-		// Find the upper-most utility VM image.
199
-		var uvmImagePath string
200
-		for _, path := range layerFolders {
201
-			fullPath := filepath.Join(path, "UtilityVM")
202
-			_, err := os.Stat(fullPath)
203
-			if err == nil {
204
-				uvmImagePath = fullPath
205
-				break
206
-			}
207
-			if !os.IsNotExist(err) {
208
-				return err
209
-			}
210
-		}
211
-		if uvmImagePath == "" {
212
-			return errors.New("utility VM image could not be found")
213
-		}
214
-		configuration.HvRuntime = &hcsshim.HvRuntime{ImagePath: uvmImagePath}
215
-
216
-		if spec.Root.Path != "" {
217
-			return errors.New("OCI spec is invalid - Root.Path must be omitted for a Hyper-V container")
218
-		}
219
-	} else {
220
-		const volumeGUIDRegex = `^\\\\\?\\(Volume)\{{0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}(\}){0,1}\}\\$`
221
-		if _, err := regexp.MatchString(volumeGUIDRegex, spec.Root.Path); err != nil {
222
-			return fmt.Errorf(`OCI spec is invalid - Root.Path '%s' must be a volume GUID path in the format '\\?\Volume{GUID}\'`, spec.Root.Path)
223
-		}
224
-		// HCS API requires the trailing backslash to be removed
225
-		configuration.VolumePath = spec.Root.Path[:len(spec.Root.Path)-1]
226
-	}
227
-
228
-	if spec.Root.Readonly {
229
-		return errors.New(`OCI spec is invalid - Root.Readonly must not be set on Windows`)
230
-	}
231
-
232
-	for _, layerPath := range layerFolders {
233
-		_, filename := filepath.Split(layerPath)
234
-		g, err := hcsshim.NameToGuid(filename)
235
-		if err != nil {
236
-			return err
237
-		}
238
-		configuration.Layers = append(configuration.Layers, hcsshim.Layer{
239
-			ID:   g.ToString(),
240
-			Path: layerPath,
241
-		})
242
-	}
243
-
244
-	// Add the mounts (volumes, bind mounts etc) to the structure
245
-	var mds []hcsshim.MappedDir
246
-	var mps []hcsshim.MappedPipe
247
-	for _, mount := range spec.Mounts {
248
-		const pipePrefix = `\\.\pipe\`
249
-		if mount.Type != "" {
250
-			return fmt.Errorf("OCI spec is invalid - Mount.Type '%s' must not be set", mount.Type)
251
-		}
252
-		if strings.HasPrefix(mount.Destination, pipePrefix) {
253
-			mp := hcsshim.MappedPipe{
254
-				HostPath:          mount.Source,
255
-				ContainerPipeName: mount.Destination[len(pipePrefix):],
256
-			}
257
-			mps = append(mps, mp)
258
-		} else {
259
-			md := hcsshim.MappedDir{
260
-				HostPath:      mount.Source,
261
-				ContainerPath: mount.Destination,
262
-				ReadOnly:      false,
263
-			}
264
-			for _, o := range mount.Options {
265
-				if strings.ToLower(o) == "ro" {
266
-					md.ReadOnly = true
267
-				}
268
-			}
269
-			mds = append(mds, md)
270
-		}
271
-	}
272
-	configuration.MappedDirectories = mds
273
-	if len(mps) > 0 && system.GetOSVersion().Build < 16210 { // replace with Win10 RS3 build number at RTM
274
-		return errors.New("named pipe mounts are not supported on this version of Windows")
275
-	}
276
-	configuration.MappedPipes = mps
277
-
278
-	hcsContainer, err := hcsshim.CreateContainer(containerID, configuration)
279
-	if err != nil {
280
-		return err
281
-	}
282
-
283
-	// Construct a container object for calling start on it.
284
-	container := &container{
285
-		containerCommon: containerCommon{
286
-			process: process{
287
-				processCommon: processCommon{
288
-					containerID:  containerID,
289
-					client:       clnt,
290
-					friendlyName: InitFriendlyName,
291
-				},
292
-			},
293
-			processes: make(map[string]*process),
294
-		},
295
-		isWindows:    true,
296
-		ociSpec:      spec,
297
-		hcsContainer: hcsContainer,
298
-	}
299
-
300
-	container.options = options
301
-	for _, option := range options {
302
-		if err := option.Apply(container); err != nil {
303
-			logrus.Errorf("libcontainerd: %v", err)
304
-		}
305
-	}
306
-
307
-	// Call start, and if it fails, delete the container from our
308
-	// internal structure, start will keep HCS in sync by deleting the
309
-	// container there.
310
-	logrus.Debugf("libcontainerd: createWindows() id=%s, Calling start()", containerID)
311
-	if err := container.start(attachStdio); err != nil {
312
-		clnt.deleteContainer(containerID)
313
-		return err
314
-	}
315
-
316
-	logrus.Debugf("libcontainerd: createWindows() id=%s completed successfully", containerID)
317
-	return nil
318
-
319
-}
320
-
321
-func (clnt *client) createLinux(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) error {
322
-	logrus.Debugf("libcontainerd: createLinux(): containerId %s ", containerID)
323
-
324
-	var lcowOpt *LCOWOption
325
-	for _, option := range options {
326
-		if lcow, ok := option.(*LCOWOption); ok {
327
-			lcowOpt = lcow
328
-		}
329
-	}
330
-	if lcowOpt == nil || lcowOpt.Config == nil {
331
-		return fmt.Errorf("lcow option must be supplied to the runtime")
332
-	}
333
-
334
-	configuration := &hcsshim.ContainerConfig{
335
-		HvPartition:   true,
336
-		Name:          containerID,
337
-		SystemType:    "container",
338
-		ContainerType: "linux",
339
-		Owner:         defaultOwner,
340
-		TerminateOnLastHandleClosed: true,
341
-	}
342
-
343
-	if lcowOpt.Config.ActualMode == opengcs.ModeActualVhdx {
344
-		configuration.HvRuntime = &hcsshim.HvRuntime{
345
-			ImagePath:          lcowOpt.Config.Vhdx,
346
-			BootSource:         "Vhd",
347
-			WritableBootSource: false,
348
-		}
349
-	} else {
350
-		configuration.HvRuntime = &hcsshim.HvRuntime{
351
-			ImagePath:           lcowOpt.Config.KirdPath,
352
-			LinuxKernelFile:     lcowOpt.Config.KernelFile,
353
-			LinuxInitrdFile:     lcowOpt.Config.InitrdFile,
354
-			LinuxBootParameters: lcowOpt.Config.BootParameters,
355
-		}
356
-	}
357
-
358
-	if spec.Windows == nil {
359
-		return fmt.Errorf("spec.Windows must not be nil for LCOW containers")
360
-	}
361
-
362
-	// We must have least one layer in the spec
363
-	if spec.Windows.LayerFolders == nil || len(spec.Windows.LayerFolders) == 0 {
364
-		return fmt.Errorf("OCI spec is invalid - at least one LayerFolders must be supplied to the runtime")
365
-	}
366
-
367
-	// Strip off the top-most layer as that's passed in separately to HCS
368
-	configuration.LayerFolderPath = spec.Windows.LayerFolders[len(spec.Windows.LayerFolders)-1]
369
-	layerFolders := spec.Windows.LayerFolders[:len(spec.Windows.LayerFolders)-1]
370
-
371
-	for _, layerPath := range layerFolders {
372
-		_, filename := filepath.Split(layerPath)
373
-		g, err := hcsshim.NameToGuid(filename)
374
-		if err != nil {
375
-			return err
376
-		}
377
-		configuration.Layers = append(configuration.Layers, hcsshim.Layer{
378
-			ID:   g.ToString(),
379
-			Path: filepath.Join(layerPath, "layer.vhd"),
380
-		})
381
-	}
382
-
383
-	if spec.Windows.Network != nil {
384
-		configuration.EndpointList = spec.Windows.Network.EndpointList
385
-		configuration.AllowUnqualifiedDNSQuery = spec.Windows.Network.AllowUnqualifiedDNSQuery
386
-		if spec.Windows.Network.DNSSearchList != nil {
387
-			configuration.DNSSearchList = strings.Join(spec.Windows.Network.DNSSearchList, ",")
388
-		}
389
-		configuration.NetworkSharedContainerName = spec.Windows.Network.NetworkSharedContainerName
390
-	}
391
-
392
-	// Add the mounts (volumes, bind mounts etc) to the structure. We have to do
393
-	// some translation for both the mapped directories passed into HCS and in
394
-	// the spec.
395
-	//
396
-	// For HCS, we only pass in the mounts from the spec which are type "bind".
397
-	// Further, the "ContainerPath" field (which is a little mis-leadingly
398
-	// named when it applies to the utility VM rather than the container in the
399
-	// utility VM) is moved to under /tmp/gcs/<ID>/binds, where this is passed
400
-	// by the caller through a 'uvmpath' option.
401
-	//
402
-	// We do similar translation for the mounts in the spec by stripping out
403
-	// the uvmpath option, and translating the Source path to the location in the
404
-	// utility VM calculated above.
405
-	//
406
-	// From inside the utility VM, you would see a 9p mount such as in the following
407
-	// where a host folder has been mapped to /target. The line with /tmp/gcs/<ID>/binds
408
-	// specifically:
409
-	//
410
-	//	/ # mount
411
-	//	rootfs on / type rootfs (rw,size=463736k,nr_inodes=115934)
412
-	//	proc on /proc type proc (rw,relatime)
413
-	//	sysfs on /sys type sysfs (rw,relatime)
414
-	//	udev on /dev type devtmpfs (rw,relatime,size=498100k,nr_inodes=124525,mode=755)
415
-	//	tmpfs on /run type tmpfs (rw,relatime)
416
-	//	cgroup on /sys/fs/cgroup type cgroup (rw,relatime,cpuset,cpu,cpuacct,blkio,memory,devices,freezer,net_cls,perf_event,net_prio,hugetlb,pids,rdma)
417
-	//	mqueue on /dev/mqueue type mqueue (rw,relatime)
418
-	//	devpts on /dev/pts type devpts (rw,relatime,mode=600,ptmxmode=000)
419
-	//	/binds/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/target on /binds/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/target type 9p (rw,sync,dirsync,relatime,trans=fd,rfdno=6,wfdno=6)
420
-	//	/dev/pmem0 on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/layer0 type ext4 (ro,relatime,block_validity,delalloc,norecovery,barrier,dax,user_xattr,acl)
421
-	//	/dev/sda on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch type ext4 (rw,relatime,block_validity,delalloc,barrier,user_xattr,acl)
422
-	//	overlay on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/rootfs type overlay (rw,relatime,lowerdir=/tmp/base/:/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/layer0,upperdir=/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch/upper,workdir=/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch/work)
423
-	//
424
-	//  /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc # ls -l
425
-	//	total 16
426
-	//	drwx------    3 0        0               60 Sep  7 18:54 binds
427
-	//	-rw-r--r--    1 0        0             3345 Sep  7 18:54 config.json
428
-	//	drwxr-xr-x   10 0        0             4096 Sep  6 17:26 layer0
429
-	//	drwxr-xr-x    1 0        0             4096 Sep  7 18:54 rootfs
430
-	//	drwxr-xr-x    5 0        0             4096 Sep  7 18:54 scratch
431
-	//
432
-	//	/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc # ls -l binds
433
-	//	total 0
434
-	//	drwxrwxrwt    2 0        0             4096 Sep  7 16:51 target
435
-
436
-	mds := []hcsshim.MappedDir{}
437
-	specMounts := []specs.Mount{}
438
-	for _, mount := range spec.Mounts {
439
-		specMount := mount
440
-		if mount.Type == "bind" {
441
-			// Strip out the uvmpath from the options
442
-			updatedOptions := []string{}
443
-			uvmPath := ""
444
-			readonly := false
445
-			for _, opt := range mount.Options {
446
-				dropOption := false
447
-				elements := strings.SplitN(opt, "=", 2)
448
-				switch elements[0] {
449
-				case "uvmpath":
450
-					uvmPath = elements[1]
451
-					dropOption = true
452
-				case "rw":
453
-				case "ro":
454
-					readonly = true
455
-				case "rbind":
456
-				default:
457
-					return fmt.Errorf("unsupported option %q", opt)
458
-				}
459
-				if !dropOption {
460
-					updatedOptions = append(updatedOptions, opt)
461
-				}
462
-			}
463
-			mount.Options = updatedOptions
464
-			if uvmPath == "" {
465
-				return fmt.Errorf("no uvmpath for bind mount %+v", mount)
466
-			}
467
-			md := hcsshim.MappedDir{
468
-				HostPath:          mount.Source,
469
-				ContainerPath:     path.Join(uvmPath, mount.Destination),
470
-				CreateInUtilityVM: true,
471
-				ReadOnly:          readonly,
472
-			}
473
-			mds = append(mds, md)
474
-			specMount.Source = path.Join(uvmPath, mount.Destination)
475
-		}
476
-		specMounts = append(specMounts, specMount)
477
-	}
478
-	configuration.MappedDirectories = mds
479
-
480
-	hcsContainer, err := hcsshim.CreateContainer(containerID, configuration)
481
-	if err != nil {
482
-		return err
483
-	}
484
-
485
-	spec.Mounts = specMounts
486
-
487
-	// Construct a container object for calling start on it.
488
-	container := &container{
489
-		containerCommon: containerCommon{
490
-			process: process{
491
-				processCommon: processCommon{
492
-					containerID:  containerID,
493
-					client:       clnt,
494
-					friendlyName: InitFriendlyName,
495
-				},
496
-			},
497
-			processes: make(map[string]*process),
498
-		},
499
-		ociSpec:      spec,
500
-		hcsContainer: hcsContainer,
501
-	}
502
-
503
-	container.options = options
504
-	for _, option := range options {
505
-		if err := option.Apply(container); err != nil {
506
-			logrus.Errorf("libcontainerd: createLinux() %v", err)
507
-		}
508
-	}
509
-
510
-	// Call start, and if it fails, delete the container from our
511
-	// internal structure, start will keep HCS in sync by deleting the
512
-	// container there.
513
-	logrus.Debugf("libcontainerd: createLinux() id=%s, Calling start()", containerID)
514
-	if err := container.start(attachStdio); err != nil {
515
-		clnt.deleteContainer(containerID)
516
-		return err
517
-	}
518
-
519
-	logrus.Debugf("libcontainerd: createLinux() id=%s completed successfully", containerID)
520
-	return nil
521
-}
522
-
523
-// AddProcess is the handler for adding a process to an already running
524
-// container. It's called through docker exec. It returns the system pid of the
525
-// exec'd process.
526
-func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendlyName string, procToAdd Process, attachStdio StdioCallback) (int, error) {
527
-	clnt.lock(containerID)
528
-	defer clnt.unlock(containerID)
529
-	container, err := clnt.getContainer(containerID)
530
-	if err != nil {
531
-		return -1, err
532
-	}
533
-
534
-	defer container.debugGCS()
535
-
536
-	// Note we always tell HCS to
537
-	// create stdout as it's required regardless of '-i' or '-t' options, so that
538
-	// docker can always grab the output through logs. We also tell HCS to always
539
-	// create stdin, even if it's not used - it will be closed shortly. Stderr
540
-	// is only created if it we're not -t.
541
-	createProcessParms := hcsshim.ProcessConfig{
542
-		CreateStdInPipe:  true,
543
-		CreateStdOutPipe: true,
544
-		CreateStdErrPipe: !procToAdd.Terminal,
545
-	}
546
-	if procToAdd.Terminal {
547
-		createProcessParms.EmulateConsole = true
548
-		if procToAdd.ConsoleSize != nil {
549
-			createProcessParms.ConsoleSize[0] = uint(procToAdd.ConsoleSize.Height)
550
-			createProcessParms.ConsoleSize[1] = uint(procToAdd.ConsoleSize.Width)
551
-		}
552
-	}
553
-
554
-	// Take working directory from the process to add if it is defined,
555
-	// otherwise take from the first process.
556
-	if procToAdd.Cwd != "" {
557
-		createProcessParms.WorkingDirectory = procToAdd.Cwd
558
-	} else {
559
-		createProcessParms.WorkingDirectory = container.ociSpec.Process.Cwd
560
-	}
561
-
562
-	// Configure the environment for the process
563
-	createProcessParms.Environment = setupEnvironmentVariables(procToAdd.Env)
564
-	if container.isWindows {
565
-		createProcessParms.CommandLine = strings.Join(procToAdd.Args, " ")
566
-	} else {
567
-		createProcessParms.CommandArgs = procToAdd.Args
568
-	}
569
-	createProcessParms.User = procToAdd.User.Username
570
-
571
-	logrus.Debugf("libcontainerd: commandLine: %s", createProcessParms.CommandLine)
572
-
573
-	// Start the command running in the container.
574
-	var stdout, stderr io.ReadCloser
575
-	var stdin io.WriteCloser
576
-	newProcess, err := container.hcsContainer.CreateProcess(&createProcessParms)
577
-	if err != nil {
578
-		logrus.Errorf("libcontainerd: AddProcess(%s) CreateProcess() failed %s", containerID, err)
579
-		return -1, err
580
-	}
581
-
582
-	pid := newProcess.Pid()
583
-
584
-	stdin, stdout, stderr, err = newProcess.Stdio()
585
-	if err != nil {
586
-		logrus.Errorf("libcontainerd: %s getting std pipes failed %s", containerID, err)
587
-		return -1, err
588
-	}
589
-
590
-	iopipe := &IOPipe{Terminal: procToAdd.Terminal}
591
-	iopipe.Stdin = createStdInCloser(stdin, newProcess)
592
-
593
-	// Convert io.ReadClosers to io.Readers
594
-	if stdout != nil {
595
-		iopipe.Stdout = ioutil.NopCloser(&autoClosingReader{ReadCloser: stdout})
596
-	}
597
-	if stderr != nil {
598
-		iopipe.Stderr = ioutil.NopCloser(&autoClosingReader{ReadCloser: stderr})
599
-	}
600
-
601
-	proc := &process{
602
-		processCommon: processCommon{
603
-			containerID:  containerID,
604
-			friendlyName: processFriendlyName,
605
-			client:       clnt,
606
-			systemPid:    uint32(pid),
607
-		},
608
-		hcsProcess: newProcess,
609
-	}
610
-
611
-	// Add the process to the container's list of processes
612
-	container.processes[processFriendlyName] = proc
613
-
614
-	// Tell the engine to attach streams back to the client
615
-	if err := attachStdio(*iopipe); err != nil {
616
-		return -1, err
617
-	}
618
-
619
-	// Spin up a go routine waiting for exit to handle cleanup
620
-	go container.waitExit(proc, false)
621
-
622
-	return pid, nil
623
-}
624
-
625
-// Signal handles `docker stop` on Windows. While Linux has support for
626
-// the full range of signals, signals aren't really implemented on Windows.
627
-// We fake supporting regular stop and -9 to force kill.
628
-func (clnt *client) Signal(containerID string, sig int) error {
629
-	var (
630
-		cont *container
631
-		err  error
632
-	)
633
-
634
-	// Get the container as we need it to get the container handle.
635
-	clnt.lock(containerID)
636
-	defer clnt.unlock(containerID)
637
-	if cont, err = clnt.getContainer(containerID); err != nil {
638
-		return err
639
-	}
640
-
641
-	cont.manualStopRequested = true
642
-
643
-	logrus.Debugf("libcontainerd: Signal() containerID=%s sig=%d pid=%d", containerID, sig, cont.systemPid)
644
-
645
-	if syscall.Signal(sig) == syscall.SIGKILL {
646
-		// Terminate the compute system
647
-		if err := cont.hcsContainer.Terminate(); err != nil {
648
-			if !hcsshim.IsPending(err) {
649
-				logrus.Errorf("libcontainerd: failed to terminate %s - %q", containerID, err)
650
-			}
651
-		}
652
-	} else {
653
-		// Shut down the container
654
-		if err := cont.hcsContainer.Shutdown(); err != nil {
655
-			if !hcsshim.IsPending(err) && !hcsshim.IsAlreadyStopped(err) {
656
-				// ignore errors
657
-				logrus.Warnf("libcontainerd: failed to shutdown container %s: %q", containerID, err)
658
-			}
659
-		}
660
-	}
661
-
662
-	return nil
663
-}
664
-
665
-// While Linux has support for the full range of signals, signals aren't really implemented on Windows.
666
-// We try to terminate the specified process whatever signal is requested.
667
-func (clnt *client) SignalProcess(containerID string, processFriendlyName string, sig int) error {
668
-	clnt.lock(containerID)
669
-	defer clnt.unlock(containerID)
670
-	cont, err := clnt.getContainer(containerID)
671
-	if err != nil {
672
-		return err
673
-	}
674
-
675
-	for _, p := range cont.processes {
676
-		if p.friendlyName == processFriendlyName {
677
-			return p.hcsProcess.Kill()
678
-		}
679
-	}
680
-
681
-	return fmt.Errorf("SignalProcess could not find process %s in %s", processFriendlyName, containerID)
682
-}
683
-
684
-// Resize handles a CLI event to resize an interactive docker run or docker exec
685
-// window.
686
-func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error {
687
-	// Get the libcontainerd container object
688
-	clnt.lock(containerID)
689
-	defer clnt.unlock(containerID)
690
-	cont, err := clnt.getContainer(containerID)
691
-	if err != nil {
692
-		return err
693
-	}
694
-
695
-	h, w := uint16(height), uint16(width)
696
-
697
-	if processFriendlyName == InitFriendlyName {
698
-		logrus.Debugln("libcontainerd: resizing systemPID in", containerID, cont.process.systemPid)
699
-		return cont.process.hcsProcess.ResizeConsole(w, h)
700
-	}
701
-
702
-	for _, p := range cont.processes {
703
-		if p.friendlyName == processFriendlyName {
704
-			logrus.Debugln("libcontainerd: resizing exec'd process", containerID, p.systemPid)
705
-			return p.hcsProcess.ResizeConsole(w, h)
706
-		}
707
-	}
708
-
709
-	return fmt.Errorf("Resize could not find containerID %s to resize", containerID)
710
-
711
-}
712
-
713
-// Pause handles pause requests for containers
714
-func (clnt *client) Pause(containerID string) error {
715
-	unlockContainer := true
716
-	// Get the libcontainerd container object
717
-	clnt.lock(containerID)
718
-	defer func() {
719
-		if unlockContainer {
720
-			clnt.unlock(containerID)
721
-		}
722
-	}()
723
-	container, err := clnt.getContainer(containerID)
724
-	if err != nil {
725
-		return err
726
-	}
727
-
728
-	if container.ociSpec.Windows.HyperV == nil {
729
-		return errors.New("cannot pause Windows Server Containers")
730
-	}
731
-
732
-	err = container.hcsContainer.Pause()
733
-	if err != nil {
734
-		return err
735
-	}
736
-
737
-	// Unlock container before calling back into the daemon
738
-	unlockContainer = false
739
-	clnt.unlock(containerID)
740
-
741
-	return clnt.backend.StateChanged(containerID, StateInfo{
742
-		CommonStateInfo: CommonStateInfo{
743
-			State: StatePause,
744
-		}})
745
-}
746
-
747
-// Resume handles resume requests for containers
748
-func (clnt *client) Resume(containerID string) error {
749
-	unlockContainer := true
750
-	// Get the libcontainerd container object
751
-	clnt.lock(containerID)
752
-	defer func() {
753
-		if unlockContainer {
754
-			clnt.unlock(containerID)
755
-		}
756
-	}()
757
-	container, err := clnt.getContainer(containerID)
758
-	if err != nil {
759
-		return err
760
-	}
761
-
762
-	// This should never happen, since Windows Server Containers cannot be paused
763
-
764
-	if container.ociSpec.Windows.HyperV == nil {
765
-		return errors.New("cannot resume Windows Server Containers")
766
-	}
767
-
768
-	err = container.hcsContainer.Resume()
769
-	if err != nil {
770
-		return err
771
-	}
772
-
773
-	// Unlock container before calling back into the daemon
774
-	unlockContainer = false
775
-	clnt.unlock(containerID)
776
-
777
-	return clnt.backend.StateChanged(containerID, StateInfo{
778
-		CommonStateInfo: CommonStateInfo{
779
-			State: StateResume,
780
-		}})
781
-}
782
-
783
-// Stats handles stats requests for containers
784
-func (clnt *client) Stats(containerID string) (*Stats, error) {
785
-	// Get the libcontainerd container object
786
-	clnt.lock(containerID)
787
-	defer clnt.unlock(containerID)
788
-	container, err := clnt.getContainer(containerID)
789
-	if err != nil {
790
-		return nil, err
791
-	}
792
-	s, err := container.hcsContainer.Statistics()
793
-	if err != nil {
794
-		return nil, err
795
-	}
796
-	st := Stats(s)
797
-	return &st, nil
798
-}
799
-
800
-// Restore is the handler for restoring a container
801
-func (clnt *client) Restore(containerID string, _ StdioCallback, unusedOnWindows ...CreateOption) error {
802
-	logrus.Debugf("libcontainerd: Restore(%s)", containerID)
803
-
804
-	// TODO Windows: On RS1, a re-attach isn't possible.
805
-	// However, there is a scenario in which there is an issue.
806
-	// Consider a background container. The daemon dies unexpectedly.
807
-	// HCS will still have the compute service alive and running.
808
-	// For consistence, we call in to shoot it regardless if HCS knows about it
809
-	// We explicitly just log a warning if the terminate fails.
810
-	// Then we tell the backend the container exited.
811
-	if hc, err := hcsshim.OpenContainer(containerID); err == nil {
812
-		const terminateTimeout = time.Minute * 2
813
-		err := hc.Terminate()
814
-
815
-		if hcsshim.IsPending(err) {
816
-			err = hc.WaitTimeout(terminateTimeout)
817
-		} else if hcsshim.IsAlreadyStopped(err) {
818
-			err = nil
819
-		}
820
-
821
-		if err != nil {
822
-			logrus.Warnf("libcontainerd: failed to terminate %s on restore - %q", containerID, err)
823
-			return err
824
-		}
825
-	}
826
-	return clnt.backend.StateChanged(containerID, StateInfo{
827
-		CommonStateInfo: CommonStateInfo{
828
-			State:    StateExit,
829
-			ExitCode: 1 << 31,
830
-		}})
831
-}
832
-
833
-// GetPidsForContainer returns a list of process IDs running in a container.
834
-// Not used on Windows.
835
-func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) {
836
-	return nil, errors.New("not implemented on Windows")
837
-}
838
-
839
-// Summary returns a summary of the processes running in a container.
840
-// This is present in Windows to support docker top. In linux, the
841
-// engine shells out to ps to get process information. On Windows, as
842
-// the containers could be Hyper-V containers, they would not be
843
-// visible on the container host. However, libcontainerd does have
844
-// that information.
845
-func (clnt *client) Summary(containerID string) ([]Summary, error) {
846
-
847
-	// Get the libcontainerd container object
848
-	clnt.lock(containerID)
849
-	defer clnt.unlock(containerID)
850
-	container, err := clnt.getContainer(containerID)
851
-	if err != nil {
852
-		return nil, err
853
-	}
854
-	p, err := container.hcsContainer.ProcessList()
855
-	if err != nil {
856
-		return nil, err
857
-	}
858
-	pl := make([]Summary, len(p))
859
-	for i := range p {
860
-		pl[i] = Summary(p[i])
861
-	}
862
-	return pl, nil
863
-}
864
-
865
-// UpdateResources updates resources for a running container.
866
-func (clnt *client) UpdateResources(containerID string, resources Resources) error {
867
-	// Updating resource isn't supported on Windows
868
-	// but we should return nil for enabling updating container
869
-	return nil
870
-}
871
-
872
-func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error {
873
-	return errors.New("Windows: Containers do not support checkpoints")
874
-}
875
-
876
-func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error {
877
-	return errors.New("Windows: Containers do not support checkpoints")
878
-}
879
-
880
-func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) {
881
-	return nil, errors.New("Windows: Containers do not support checkpoints")
882
-}
883
-
884
-func (clnt *client) GetServerVersion(ctx context.Context) (*ServerVersion, error) {
885
-	return &ServerVersion{}, nil
886
-}
887 1
deleted file mode 100644
... ...
@@ -1,13 +0,0 @@
1
-package libcontainerd
2
-
3
-const (
4
-	// InitFriendlyName is the name given in the lookup map of processes
5
-	// for the first process started in a container.
6
-	InitFriendlyName = "init"
7
-	configFilename   = "config.json"
8
-)
9
-
10
-type containerCommon struct {
11
-	process
12
-	processes map[string]*process
13
-}
14 1
deleted file mode 100644
... ...
@@ -1,246 +0,0 @@
1
-// +build linux solaris
2
-
3
-package libcontainerd
4
-
5
-import (
6
-	"encoding/json"
7
-	"io"
8
-	"io/ioutil"
9
-	"os"
10
-	"path/filepath"
11
-	"sync"
12
-	"time"
13
-
14
-	containerd "github.com/containerd/containerd/api/grpc/types"
15
-	"github.com/docker/docker/pkg/ioutils"
16
-	specs "github.com/opencontainers/runtime-spec/specs-go"
17
-	"github.com/sirupsen/logrus"
18
-	"github.com/tonistiigi/fifo"
19
-	"golang.org/x/net/context"
20
-	"golang.org/x/sys/unix"
21
-)
22
-
23
-type container struct {
24
-	containerCommon
25
-
26
-	// Platform specific fields are below here.
27
-	pauseMonitor
28
-	oom         bool
29
-	runtime     string
30
-	runtimeArgs []string
31
-}
32
-
33
-type runtime struct {
34
-	path string
35
-	args []string
36
-}
37
-
38
-// WithRuntime sets the runtime to be used for the created container
39
-func WithRuntime(path string, args []string) CreateOption {
40
-	return runtime{path, args}
41
-}
42
-
43
-func (rt runtime) Apply(p interface{}) error {
44
-	if pr, ok := p.(*container); ok {
45
-		pr.runtime = rt.path
46
-		pr.runtimeArgs = rt.args
47
-	}
48
-	return nil
49
-}
50
-
51
-func (ctr *container) clean() error {
52
-	if os.Getenv("LIBCONTAINERD_NOCLEAN") == "1" {
53
-		return nil
54
-	}
55
-	if _, err := os.Lstat(ctr.dir); err != nil {
56
-		if os.IsNotExist(err) {
57
-			return nil
58
-		}
59
-		return err
60
-	}
61
-
62
-	if err := os.RemoveAll(ctr.dir); err != nil {
63
-		return err
64
-	}
65
-	return nil
66
-}
67
-
68
-// cleanProcess removes the fifos used by an additional process.
69
-// Caller needs to lock container ID before calling this method.
70
-func (ctr *container) cleanProcess(id string) {
71
-	if p, ok := ctr.processes[id]; ok {
72
-		for _, i := range []int{unix.Stdin, unix.Stdout, unix.Stderr} {
73
-			if err := os.Remove(p.fifo(i)); err != nil && !os.IsNotExist(err) {
74
-				logrus.Warnf("libcontainerd: failed to remove %v for process %v: %v", p.fifo(i), id, err)
75
-			}
76
-		}
77
-	}
78
-	delete(ctr.processes, id)
79
-}
80
-
81
-func (ctr *container) spec() (*specs.Spec, error) {
82
-	var spec specs.Spec
83
-	dt, err := ioutil.ReadFile(filepath.Join(ctr.dir, configFilename))
84
-	if err != nil {
85
-		return nil, err
86
-	}
87
-	if err := json.Unmarshal(dt, &spec); err != nil {
88
-		return nil, err
89
-	}
90
-	return &spec, nil
91
-}
92
-
93
-func (ctr *container) start(spec *specs.Spec, checkpoint, checkpointDir string, attachStdio StdioCallback) (err error) {
94
-	ctx, cancel := context.WithCancel(context.Background())
95
-	defer cancel()
96
-	ready := make(chan struct{})
97
-
98
-	fifoCtx, cancel := context.WithCancel(context.Background())
99
-	defer func() {
100
-		if err != nil {
101
-			cancel()
102
-		}
103
-	}()
104
-
105
-	iopipe, err := ctr.openFifos(fifoCtx, spec.Process.Terminal)
106
-	if err != nil {
107
-		return err
108
-	}
109
-
110
-	var stdinOnce sync.Once
111
-
112
-	// we need to delay stdin closure after container start or else "stdin close"
113
-	// event will be rejected by containerd.
114
-	// stdin closure happens in attachStdio
115
-	stdin := iopipe.Stdin
116
-	iopipe.Stdin = ioutils.NewWriteCloserWrapper(stdin, func() error {
117
-		var err error
118
-		stdinOnce.Do(func() { // on error from attach we don't know if stdin was already closed
119
-			err = stdin.Close()
120
-			go func() {
121
-				select {
122
-				case <-ready:
123
-				case <-ctx.Done():
124
-				}
125
-				select {
126
-				case <-ready:
127
-					if err := ctr.sendCloseStdin(); err != nil {
128
-						logrus.Warnf("failed to close stdin: %+v", err)
129
-					}
130
-				default:
131
-				}
132
-			}()
133
-		})
134
-		return err
135
-	})
136
-
137
-	r := &containerd.CreateContainerRequest{
138
-		Id:            ctr.containerID,
139
-		BundlePath:    ctr.dir,
140
-		Stdin:         ctr.fifo(unix.Stdin),
141
-		Stdout:        ctr.fifo(unix.Stdout),
142
-		Stderr:        ctr.fifo(unix.Stderr),
143
-		Checkpoint:    checkpoint,
144
-		CheckpointDir: checkpointDir,
145
-		// check to see if we are running in ramdisk to disable pivot root
146
-		NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "",
147
-		Runtime:     ctr.runtime,
148
-		RuntimeArgs: ctr.runtimeArgs,
149
-	}
150
-	ctr.client.appendContainer(ctr)
151
-
152
-	if err := attachStdio(*iopipe); err != nil {
153
-		ctr.closeFifos(iopipe)
154
-		return err
155
-	}
156
-
157
-	resp, err := ctr.client.remote.apiClient.CreateContainer(context.Background(), r)
158
-	if err != nil {
159
-		ctr.closeFifos(iopipe)
160
-		return err
161
-	}
162
-	ctr.systemPid = systemPid(resp.Container)
163
-	close(ready)
164
-
165
-	return ctr.client.backend.StateChanged(ctr.containerID, StateInfo{
166
-		CommonStateInfo: CommonStateInfo{
167
-			State: StateStart,
168
-			Pid:   ctr.systemPid,
169
-		}})
170
-
171
-}
172
-
173
-func (ctr *container) newProcess(friendlyName string) *process {
174
-	return &process{
175
-		dir: ctr.dir,
176
-		processCommon: processCommon{
177
-			containerID:  ctr.containerID,
178
-			friendlyName: friendlyName,
179
-			client:       ctr.client,
180
-		},
181
-	}
182
-}
183
-
184
-func (ctr *container) handleEvent(e *containerd.Event) error {
185
-	ctr.client.lock(ctr.containerID)
186
-	defer ctr.client.unlock(ctr.containerID)
187
-	switch e.Type {
188
-	case StateExit, StatePause, StateResume, StateOOM:
189
-		st := StateInfo{
190
-			CommonStateInfo: CommonStateInfo{
191
-				State:    e.Type,
192
-				ExitCode: e.Status,
193
-			},
194
-			OOMKilled: e.Type == StateExit && ctr.oom,
195
-		}
196
-		if e.Type == StateOOM {
197
-			ctr.oom = true
198
-		}
199
-		if e.Type == StateExit && e.Pid != InitFriendlyName {
200
-			st.ProcessID = e.Pid
201
-			st.State = StateExitProcess
202
-		}
203
-
204
-		// Remove process from list if we have exited
205
-		switch st.State {
206
-		case StateExit:
207
-			ctr.clean()
208
-			ctr.client.deleteContainer(e.Id)
209
-		case StateExitProcess:
210
-			ctr.cleanProcess(st.ProcessID)
211
-		}
212
-		ctr.client.q.append(e.Id, func() {
213
-			if err := ctr.client.backend.StateChanged(e.Id, st); err != nil {
214
-				logrus.Errorf("libcontainerd: backend.StateChanged(): %v", err)
215
-			}
216
-			if e.Type == StatePause || e.Type == StateResume {
217
-				ctr.pauseMonitor.handle(e.Type)
218
-			}
219
-			if e.Type == StateExit {
220
-				if en := ctr.client.getExitNotifier(e.Id); en != nil {
221
-					en.close()
222
-				}
223
-			}
224
-		})
225
-
226
-	default:
227
-		logrus.Debugf("libcontainerd: event unhandled: %+v", e)
228
-	}
229
-	return nil
230
-}
231
-
232
-// discardFifos attempts to fully read the container fifos to unblock processes
233
-// that may be blocked on the writer side.
234
-func (ctr *container) discardFifos() {
235
-	ctx, _ := context.WithTimeout(context.Background(), 3*time.Second)
236
-	for _, i := range []int{unix.Stdout, unix.Stderr} {
237
-		f, err := fifo.OpenFifo(ctx, ctr.fifo(i), unix.O_RDONLY|unix.O_NONBLOCK, 0)
238
-		if err != nil {
239
-			logrus.Warnf("error opening fifo %v for discarding: %+v", f, err)
240
-			continue
241
-		}
242
-		go func() {
243
-			io.Copy(ioutil.Discard, f)
244
-		}()
245
-	}
246
-}
247 1
deleted file mode 100644
... ...
@@ -1,338 +0,0 @@
1
-package libcontainerd
2
-
3
-import (
4
-	"encoding/json"
5
-	"fmt"
6
-	"io"
7
-	"io/ioutil"
8
-	"strings"
9
-	"time"
10
-
11
-	"github.com/Microsoft/hcsshim"
12
-	"github.com/opencontainers/runtime-spec/specs-go"
13
-	"github.com/sirupsen/logrus"
14
-	"golang.org/x/sys/windows"
15
-)
16
-
17
-type container struct {
18
-	containerCommon
19
-
20
-	// Platform specific fields are below here. There are none presently on Windows.
21
-	options []CreateOption
22
-
23
-	// The ociSpec is required, as client.Create() needs a spec,
24
-	// but can be called from the RestartManager context which does not
25
-	// otherwise have access to the Spec
26
-	ociSpec specs.Spec
27
-
28
-	isWindows           bool
29
-	manualStopRequested bool
30
-	hcsContainer        hcsshim.Container
31
-}
32
-
33
-func (ctr *container) newProcess(friendlyName string) *process {
34
-	return &process{
35
-		processCommon: processCommon{
36
-			containerID:  ctr.containerID,
37
-			friendlyName: friendlyName,
38
-			client:       ctr.client,
39
-		},
40
-	}
41
-}
42
-
43
-// start starts a created container.
44
-// Caller needs to lock container ID before calling this method.
45
-func (ctr *container) start(attachStdio StdioCallback) error {
46
-	var err error
47
-
48
-	// Start the container.  If this is a servicing container, this call will block
49
-	// until the container is done with the servicing execution.
50
-	logrus.Debugln("libcontainerd: starting container ", ctr.containerID)
51
-	if err = ctr.hcsContainer.Start(); err != nil {
52
-		logrus.Errorf("libcontainerd: failed to start container: %s", err)
53
-		ctr.debugGCS() // Before terminating!
54
-		if err := ctr.terminate(); err != nil {
55
-			logrus.Errorf("libcontainerd: failed to cleanup after a failed Start. %s", err)
56
-		} else {
57
-			logrus.Debugln("libcontainerd: cleaned up after failed Start by calling Terminate")
58
-		}
59
-		return err
60
-	}
61
-
62
-	defer ctr.debugGCS()
63
-
64
-	// Note we always tell HCS to
65
-	// create stdout as it's required regardless of '-i' or '-t' options, so that
66
-	// docker can always grab the output through logs. We also tell HCS to always
67
-	// create stdin, even if it's not used - it will be closed shortly. Stderr
68
-	// is only created if it we're not -t.
69
-	var (
70
-		emulateConsole   bool
71
-		createStdErrPipe bool
72
-	)
73
-	if ctr.ociSpec.Process != nil {
74
-		emulateConsole = ctr.ociSpec.Process.Terminal
75
-		createStdErrPipe = !ctr.ociSpec.Process.Terminal && !ctr.ociSpec.Windows.Servicing
76
-	}
77
-
78
-	createProcessParms := &hcsshim.ProcessConfig{
79
-		EmulateConsole:   emulateConsole,
80
-		WorkingDirectory: ctr.ociSpec.Process.Cwd,
81
-		CreateStdInPipe:  !ctr.ociSpec.Windows.Servicing,
82
-		CreateStdOutPipe: !ctr.ociSpec.Windows.Servicing,
83
-		CreateStdErrPipe: createStdErrPipe,
84
-	}
85
-
86
-	if ctr.ociSpec.Process != nil && ctr.ociSpec.Process.ConsoleSize != nil {
87
-		createProcessParms.ConsoleSize[0] = uint(ctr.ociSpec.Process.ConsoleSize.Height)
88
-		createProcessParms.ConsoleSize[1] = uint(ctr.ociSpec.Process.ConsoleSize.Width)
89
-	}
90
-
91
-	// Configure the environment for the process
92
-	createProcessParms.Environment = setupEnvironmentVariables(ctr.ociSpec.Process.Env)
93
-	if ctr.isWindows {
94
-		createProcessParms.CommandLine = strings.Join(ctr.ociSpec.Process.Args, " ")
95
-	} else {
96
-		createProcessParms.CommandArgs = ctr.ociSpec.Process.Args
97
-	}
98
-	createProcessParms.User = ctr.ociSpec.Process.User.Username
99
-
100
-	// LCOW requires the raw OCI spec passed through HCS and onwards to GCS for the utility VM.
101
-	if !ctr.isWindows {
102
-		ociBuf, err := json.Marshal(ctr.ociSpec)
103
-		if err != nil {
104
-			return err
105
-		}
106
-		ociRaw := json.RawMessage(ociBuf)
107
-		createProcessParms.OCISpecification = &ociRaw
108
-	}
109
-
110
-	// Start the command running in the container.
111
-	newProcess, err := ctr.hcsContainer.CreateProcess(createProcessParms)
112
-	if err != nil {
113
-		logrus.Errorf("libcontainerd: CreateProcess() failed %s", err)
114
-		if err := ctr.terminate(); err != nil {
115
-			logrus.Errorf("libcontainerd: failed to cleanup after a failed CreateProcess. %s", err)
116
-		} else {
117
-			logrus.Debugln("libcontainerd: cleaned up after failed CreateProcess by calling Terminate")
118
-		}
119
-		return err
120
-	}
121
-
122
-	pid := newProcess.Pid()
123
-
124
-	// Save the hcs Process and PID
125
-	ctr.process.friendlyName = InitFriendlyName
126
-	ctr.process.hcsProcess = newProcess
127
-
128
-	// If this is a servicing container, wait on the process synchronously here and
129
-	// if it succeeds, wait for it cleanly shutdown and merge into the parent container.
130
-	if ctr.ociSpec.Windows.Servicing {
131
-		exitCode := ctr.waitProcessExitCode(&ctr.process)
132
-
133
-		if exitCode != 0 {
134
-			if err := ctr.terminate(); err != nil {
135
-				logrus.Warnf("libcontainerd: terminating servicing container %s failed: %s", ctr.containerID, err)
136
-			}
137
-			return fmt.Errorf("libcontainerd: servicing container %s returned non-zero exit code %d", ctr.containerID, exitCode)
138
-		}
139
-
140
-		return ctr.hcsContainer.WaitTimeout(time.Minute * 5)
141
-	}
142
-
143
-	var stdout, stderr io.ReadCloser
144
-	var stdin io.WriteCloser
145
-	stdin, stdout, stderr, err = newProcess.Stdio()
146
-	if err != nil {
147
-		logrus.Errorf("libcontainerd: failed to get stdio pipes: %s", err)
148
-		if err := ctr.terminate(); err != nil {
149
-			logrus.Errorf("libcontainerd: failed to cleanup after a failed Stdio. %s", err)
150
-		}
151
-		return err
152
-	}
153
-
154
-	iopipe := &IOPipe{Terminal: ctr.ociSpec.Process.Terminal}
155
-
156
-	iopipe.Stdin = createStdInCloser(stdin, newProcess)
157
-
158
-	// Convert io.ReadClosers to io.Readers
159
-	if stdout != nil {
160
-		iopipe.Stdout = ioutil.NopCloser(&autoClosingReader{ReadCloser: stdout})
161
-	}
162
-	if stderr != nil {
163
-		iopipe.Stderr = ioutil.NopCloser(&autoClosingReader{ReadCloser: stderr})
164
-	}
165
-
166
-	// Save the PID
167
-	logrus.Debugf("libcontainerd: process started - PID %d", pid)
168
-	ctr.systemPid = uint32(pid)
169
-
170
-	// Spin up a go routine waiting for exit to handle cleanup
171
-	go ctr.waitExit(&ctr.process, true)
172
-
173
-	ctr.client.appendContainer(ctr)
174
-
175
-	if err := attachStdio(*iopipe); err != nil {
176
-		// OK to return the error here, as waitExit will handle tear-down in HCS
177
-		return err
178
-	}
179
-
180
-	// Tell the docker engine that the container has started.
181
-	si := StateInfo{
182
-		CommonStateInfo: CommonStateInfo{
183
-			State: StateStart,
184
-			Pid:   ctr.systemPid, // Not sure this is needed? Double-check monitor.go in daemon BUGBUG @jhowardmsft
185
-		}}
186
-	logrus.Debugf("libcontainerd: start() completed OK, %+v", si)
187
-	return ctr.client.backend.StateChanged(ctr.containerID, si)
188
-
189
-}
190
-
191
-// waitProcessExitCode will wait for the given process to exit and return its error code.
192
-func (ctr *container) waitProcessExitCode(process *process) int {
193
-	// Block indefinitely for the process to exit.
194
-	err := process.hcsProcess.Wait()
195
-	if err != nil {
196
-		if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE {
197
-			logrus.Warnf("libcontainerd: Wait() failed (container may have been killed): %s", err)
198
-		}
199
-		// Fall through here, do not return. This ensures we attempt to continue the
200
-		// shutdown in HCS and tell the docker engine that the process/container
201
-		// has exited to avoid a container being dropped on the floor.
202
-	}
203
-
204
-	exitCode, err := process.hcsProcess.ExitCode()
205
-	if err != nil {
206
-		if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE {
207
-			logrus.Warnf("libcontainerd: unable to get exit code from container %s", ctr.containerID)
208
-		}
209
-		// Since we got an error retrieving the exit code, make sure that the code we return
210
-		// doesn't incorrectly indicate success.
211
-		exitCode = -1
212
-
213
-		// Fall through here, do not return. This ensures we attempt to continue the
214
-		// shutdown in HCS and tell the docker engine that the process/container
215
-		// has exited to avoid a container being dropped on the floor.
216
-	}
217
-
218
-	return exitCode
219
-}
220
-
221
-// waitExit runs as a goroutine waiting for the process to exit. It's
222
-// equivalent to (in the linux containerd world) where events come in for
223
-// state change notifications from containerd.
224
-func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) error {
225
-	logrus.Debugln("libcontainerd: waitExit() on pid", process.systemPid)
226
-
227
-	exitCode := ctr.waitProcessExitCode(process)
228
-	// Lock the container while removing the process/container from the list
229
-	ctr.client.lock(ctr.containerID)
230
-
231
-	if !isFirstProcessToStart {
232
-		ctr.cleanProcess(process.friendlyName)
233
-	} else {
234
-		ctr.client.deleteContainer(ctr.containerID)
235
-	}
236
-
237
-	// Unlock here so other threads are unblocked
238
-	ctr.client.unlock(ctr.containerID)
239
-
240
-	// Assume the container has exited
241
-	si := StateInfo{
242
-		CommonStateInfo: CommonStateInfo{
243
-			State:     StateExit,
244
-			ExitCode:  uint32(exitCode),
245
-			Pid:       process.systemPid,
246
-			ProcessID: process.friendlyName,
247
-		},
248
-		UpdatePending: false,
249
-	}
250
-
251
-	// But it could have been an exec'd process which exited
252
-	if !isFirstProcessToStart {
253
-		si.State = StateExitProcess
254
-	} else {
255
-		// Pending updates is only applicable for WCOW
256
-		if ctr.isWindows {
257
-			updatePending, err := ctr.hcsContainer.HasPendingUpdates()
258
-			if err != nil {
259
-				logrus.Warnf("libcontainerd: HasPendingUpdates() failed (container may have been killed): %s", err)
260
-			} else {
261
-				si.UpdatePending = updatePending
262
-			}
263
-		}
264
-
265
-		logrus.Debugf("libcontainerd: shutting down container %s", ctr.containerID)
266
-		if err := ctr.shutdown(); err != nil {
267
-			logrus.Debugf("libcontainerd: failed to shutdown container %s", ctr.containerID)
268
-		} else {
269
-			logrus.Debugf("libcontainerd: completed shutting down container %s", ctr.containerID)
270
-		}
271
-		if err := ctr.hcsContainer.Close(); err != nil {
272
-			logrus.Error(err)
273
-		}
274
-	}
275
-
276
-	if err := process.hcsProcess.Close(); err != nil {
277
-		logrus.Errorf("libcontainerd: hcsProcess.Close(): %v", err)
278
-	}
279
-
280
-	// Call into the backend to notify it of the state change.
281
-	logrus.Debugf("libcontainerd: waitExit() calling backend.StateChanged %+v", si)
282
-	if err := ctr.client.backend.StateChanged(ctr.containerID, si); err != nil {
283
-		logrus.Error(err)
284
-	}
285
-
286
-	logrus.Debugf("libcontainerd: waitExit() completed OK, %+v", si)
287
-
288
-	return nil
289
-}
290
-
291
-// cleanProcess removes process from the map.
292
-// Caller needs to lock container ID before calling this method.
293
-func (ctr *container) cleanProcess(id string) {
294
-	delete(ctr.processes, id)
295
-}
296
-
297
-// shutdown shuts down the container in HCS
298
-// Caller needs to lock container ID before calling this method.
299
-func (ctr *container) shutdown() error {
300
-	const shutdownTimeout = time.Minute * 5
301
-	err := ctr.hcsContainer.Shutdown()
302
-	if hcsshim.IsPending(err) {
303
-		// Explicit timeout to avoid a (remote) possibility that shutdown hangs indefinitely.
304
-		err = ctr.hcsContainer.WaitTimeout(shutdownTimeout)
305
-	} else if hcsshim.IsAlreadyStopped(err) {
306
-		err = nil
307
-	}
308
-
309
-	if err != nil {
310
-		logrus.Debugf("libcontainerd: error shutting down container %s %v calling terminate", ctr.containerID, err)
311
-		if err := ctr.terminate(); err != nil {
312
-			return err
313
-		}
314
-		return err
315
-	}
316
-
317
-	return nil
318
-}
319
-
320
-// terminate terminates the container in HCS
321
-// Caller needs to lock container ID before calling this method.
322
-func (ctr *container) terminate() error {
323
-	const terminateTimeout = time.Minute * 5
324
-	err := ctr.hcsContainer.Terminate()
325
-
326
-	if hcsshim.IsPending(err) {
327
-		err = ctr.hcsContainer.WaitTimeout(terminateTimeout)
328
-	} else if hcsshim.IsAlreadyStopped(err) {
329
-		err = nil
330
-	}
331
-
332
-	if err != nil {
333
-		logrus.Debugf("libcontainerd: error terminating container %s %v", ctr.containerID, err)
334
-		return err
335
-	}
336
-
337
-	return nil
338
-}
339 1
new file mode 100644
... ...
@@ -0,0 +1,46 @@
0
+package libcontainerd
1
+
2
+import "errors"
3
+
4
+type liberr struct {
5
+	err error
6
+}
7
+
8
+func (e liberr) Error() string {
9
+	return e.err.Error()
10
+}
11
+
12
+func (e liberr) Cause() error {
13
+	return e.err
14
+}
15
+
16
+type notFoundErr struct {
17
+	liberr
18
+}
19
+
20
+func (notFoundErr) NotFound() {}
21
+
22
+func newNotFoundError(err string) error { return notFoundErr{liberr{errors.New(err)}} }
23
+func wrapNotFoundError(err error) error { return notFoundErr{liberr{err}} }
24
+
25
+type invalidParamErr struct {
26
+	liberr
27
+}
28
+
29
+func (invalidParamErr) InvalidParameter() {}
30
+
31
+func newInvalidParameterError(err string) error { return invalidParamErr{liberr{errors.New(err)}} }
32
+
33
+type conflictErr struct {
34
+	liberr
35
+}
36
+
37
+func (conflictErr) ConflictErr() {}
38
+
39
+func newConflictError(err string) error { return conflictErr{liberr{errors.New(err)}} }
40
+
41
+type sysErr struct {
42
+	liberr
43
+}
44
+
45
+func wrapSystemError(err error) error { return sysErr{liberr{err}} }
0 46
new file mode 100644
... ...
@@ -0,0 +1,36 @@
0
+package libcontainerd
1
+
2
+import "github.com/containerd/containerd"
3
+
4
+// Config returns the containerd.IOConfig of this pipe set
5
+func (p *IOPipe) Config() containerd.IOConfig {
6
+	return p.config
7
+}
8
+
9
+// Cancel aborts ongoing operations if they have not completed yet
10
+func (p *IOPipe) Cancel() {
11
+	p.cancel()
12
+}
13
+
14
+// Wait waits for io operations to finish
15
+func (p *IOPipe) Wait() {
16
+}
17
+
18
+// Close closes the underlying pipes
19
+func (p *IOPipe) Close() error {
20
+	p.cancel()
21
+
22
+	if p.Stdin != nil {
23
+		p.Stdin.Close()
24
+	}
25
+
26
+	if p.Stdout != nil {
27
+		p.Stdout.Close()
28
+	}
29
+
30
+	if p.Stderr != nil {
31
+		p.Stderr.Close()
32
+	}
33
+
34
+	return nil
35
+}
0 36
new file mode 100644
... ...
@@ -0,0 +1,60 @@
0
+// +build !windows
1
+
2
+package libcontainerd
3
+
4
+import (
5
+	"context"
6
+	"io"
7
+	"syscall"
8
+
9
+	"github.com/containerd/containerd"
10
+	"github.com/containerd/fifo"
11
+	"github.com/pkg/errors"
12
+)
13
+
14
+func newIOPipe(fifos *containerd.FIFOSet) (*IOPipe, error) {
15
+	var (
16
+		err         error
17
+		ctx, cancel = context.WithCancel(context.Background())
18
+		f           io.ReadWriteCloser
19
+		iop         = &IOPipe{
20
+			Terminal: fifos.Terminal,
21
+			cancel:   cancel,
22
+			config: containerd.IOConfig{
23
+				Terminal: fifos.Terminal,
24
+				Stdin:    fifos.In,
25
+				Stdout:   fifos.Out,
26
+				Stderr:   fifos.Err,
27
+			},
28
+		}
29
+	)
30
+	defer func() {
31
+		if err != nil {
32
+			cancel()
33
+			iop.Close()
34
+		}
35
+	}()
36
+
37
+	if fifos.In != "" {
38
+		if f, err = fifo.OpenFifo(ctx, fifos.In, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
39
+			return nil, errors.WithStack(err)
40
+		}
41
+		iop.Stdin = f
42
+	}
43
+
44
+	if fifos.Out != "" {
45
+		if f, err = fifo.OpenFifo(ctx, fifos.Out, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
46
+			return nil, errors.WithStack(err)
47
+		}
48
+		iop.Stdout = f
49
+	}
50
+
51
+	if fifos.Err != "" {
52
+		if f, err = fifo.OpenFifo(ctx, fifos.Err, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
53
+			return nil, errors.WithStack(err)
54
+		}
55
+		iop.Stderr = f
56
+	}
57
+
58
+	return iop, nil
59
+}
0 60
new file mode 100644
... ...
@@ -0,0 +1,138 @@
0
+package libcontainerd
1
+
2
+import (
3
+	"context"
4
+	"io"
5
+	"net"
6
+	"sync"
7
+
8
+	winio "github.com/Microsoft/go-winio"
9
+	"github.com/containerd/containerd"
10
+	"github.com/pkg/errors"
11
+)
12
+
13
+type winpipe struct {
14
+	sync.Mutex
15
+
16
+	ctx      context.Context
17
+	listener net.Listener
18
+	readyCh  chan struct{}
19
+	readyErr error
20
+
21
+	client net.Conn
22
+}
23
+
24
+func newWinpipe(ctx context.Context, pipe string) (*winpipe, error) {
25
+	l, err := winio.ListenPipe(pipe, nil)
26
+	if err != nil {
27
+		return nil, errors.Wrapf(err, "%q pipe creation failed", pipe)
28
+	}
29
+	wp := &winpipe{
30
+		ctx:      ctx,
31
+		listener: l,
32
+		readyCh:  make(chan struct{}),
33
+	}
34
+	go func() {
35
+		go func() {
36
+			defer close(wp.readyCh)
37
+			defer wp.listener.Close()
38
+			c, err := wp.listener.Accept()
39
+			if err != nil {
40
+				wp.Lock()
41
+				if wp.readyErr == nil {
42
+					wp.readyErr = err
43
+				}
44
+				wp.Unlock()
45
+				return
46
+			}
47
+			wp.client = c
48
+		}()
49
+
50
+		select {
51
+		case <-wp.readyCh:
52
+		case <-ctx.Done():
53
+			wp.Lock()
54
+			if wp.readyErr == nil {
55
+				wp.listener.Close()
56
+				wp.readyErr = ctx.Err()
57
+			}
58
+			wp.Unlock()
59
+		}
60
+	}()
61
+
62
+	return wp, nil
63
+}
64
+
65
+func (wp *winpipe) Read(b []byte) (int, error) {
66
+	select {
67
+	case <-wp.ctx.Done():
68
+		return 0, wp.ctx.Err()
69
+	case <-wp.readyCh:
70
+		return wp.client.Read(b)
71
+	}
72
+}
73
+
74
+func (wp *winpipe) Write(b []byte) (int, error) {
75
+	select {
76
+	case <-wp.ctx.Done():
77
+		return 0, wp.ctx.Err()
78
+	case <-wp.readyCh:
79
+		return wp.client.Write(b)
80
+	}
81
+}
82
+
83
+func (wp *winpipe) Close() error {
84
+	select {
85
+	case <-wp.readyCh:
86
+		return wp.client.Close()
87
+	default:
88
+		return nil
89
+	}
90
+}
91
+
92
+func newIOPipe(fifos *containerd.FIFOSet) (*IOPipe, error) {
93
+	var (
94
+		err         error
95
+		ctx, cancel = context.WithCancel(context.Background())
96
+		p           io.ReadWriteCloser
97
+		iop         = &IOPipe{
98
+			Terminal: fifos.Terminal,
99
+			cancel:   cancel,
100
+			config: containerd.IOConfig{
101
+				Terminal: fifos.Terminal,
102
+				Stdin:    fifos.In,
103
+				Stdout:   fifos.Out,
104
+				Stderr:   fifos.Err,
105
+			},
106
+		}
107
+	)
108
+	defer func() {
109
+		if err != nil {
110
+			cancel()
111
+			iop.Close()
112
+		}
113
+	}()
114
+
115
+	if fifos.In != "" {
116
+		if p, err = newWinpipe(ctx, fifos.In); err != nil {
117
+			return nil, err
118
+		}
119
+		iop.Stdin = p
120
+	}
121
+
122
+	if fifos.Out != "" {
123
+		if p, err = newWinpipe(ctx, fifos.Out); err != nil {
124
+			return nil, err
125
+		}
126
+		iop.Stdout = p
127
+	}
128
+
129
+	if fifos.Err != "" {
130
+		if p, err = newWinpipe(ctx, fifos.Err); err != nil {
131
+			return nil, err
132
+		}
133
+		iop.Stderr = p
134
+	}
135
+
136
+	return iop, nil
137
+}
0 138
deleted file mode 100644
... ...
@@ -1,31 +0,0 @@
1
-package libcontainerd
2
-
3
-import (
4
-	"fmt"
5
-	"os"
6
-	"strconv"
7
-
8
-	"github.com/opencontainers/runc/libcontainer/system"
9
-	"github.com/sirupsen/logrus"
10
-)
11
-
12
-func setOOMScore(pid, score int) error {
13
-	oomScoreAdjPath := fmt.Sprintf("/proc/%d/oom_score_adj", pid)
14
-	f, err := os.OpenFile(oomScoreAdjPath, os.O_WRONLY, 0)
15
-	if err != nil {
16
-		return err
17
-	}
18
-	stringScore := strconv.Itoa(score)
19
-	_, err = f.WriteString(stringScore)
20
-	f.Close()
21
-	if os.IsPermission(err) {
22
-		// Setting oom_score_adj does not work in an
23
-		// unprivileged container. Ignore the error, but log
24
-		// it if we appear not to be in that situation.
25
-		if !system.RunningInUserNS() {
26
-			logrus.Debugf("Permission denied writing %q to %s", stringScore, oomScoreAdjPath)
27
-		}
28
-		return nil
29
-	}
30
-	return err
31
-}
32 1
deleted file mode 100644
... ...
@@ -1,5 +0,0 @@
1
-package libcontainerd
2
-
3
-func setOOMScore(pid, score int) error {
4
-	return nil
5
-}
6 1
deleted file mode 100644
... ...
@@ -1,42 +0,0 @@
1
-// +build !windows
2
-
3
-package libcontainerd
4
-
5
-import (
6
-	"sync"
7
-)
8
-
9
-// pauseMonitor is helper to get notifications from pause state changes.
10
-type pauseMonitor struct {
11
-	sync.Mutex
12
-	waiters map[string][]chan struct{}
13
-}
14
-
15
-func (m *pauseMonitor) handle(t string) {
16
-	m.Lock()
17
-	defer m.Unlock()
18
-	if m.waiters == nil {
19
-		return
20
-	}
21
-	q, ok := m.waiters[t]
22
-	if !ok {
23
-		return
24
-	}
25
-	if len(q) > 0 {
26
-		close(q[0])
27
-		m.waiters[t] = q[1:]
28
-	}
29
-}
30
-
31
-func (m *pauseMonitor) append(t string, waiter chan struct{}) {
32
-	m.Lock()
33
-	defer m.Unlock()
34
-	if m.waiters == nil {
35
-		m.waiters = make(map[string][]chan struct{})
36
-	}
37
-	_, ok := m.waiters[t]
38
-	if !ok {
39
-		m.waiters[t] = make([]chan struct{}, 0)
40
-	}
41
-	m.waiters[t] = append(m.waiters[t], waiter)
42
-}
43 1
deleted file mode 100644
... ...
@@ -1,18 +0,0 @@
1
-package libcontainerd
2
-
3
-// processCommon are the platform common fields as part of the process structure
4
-// which keeps the state for the main container process, as well as any exec
5
-// processes.
6
-type processCommon struct {
7
-	client *client
8
-
9
-	// containerID is the Container ID
10
-	containerID string
11
-
12
-	// friendlyName is an identifier for the process (or `InitFriendlyName`
13
-	// for the first process)
14
-	friendlyName string
15
-
16
-	// systemPid is the PID of the main container process
17
-	systemPid uint32
18
-}
19 1
deleted file mode 100644
... ...
@@ -1,107 +0,0 @@
1
-// +build linux solaris
2
-
3
-package libcontainerd
4
-
5
-import (
6
-	"io"
7
-	"io/ioutil"
8
-	"os"
9
-	"path/filepath"
10
-	goruntime "runtime"
11
-	"strings"
12
-
13
-	containerd "github.com/containerd/containerd/api/grpc/types"
14
-	"github.com/tonistiigi/fifo"
15
-	"golang.org/x/net/context"
16
-	"golang.org/x/sys/unix"
17
-)
18
-
19
-var fdNames = map[int]string{
20
-	unix.Stdin:  "stdin",
21
-	unix.Stdout: "stdout",
22
-	unix.Stderr: "stderr",
23
-}
24
-
25
-// process keeps the state for both main container process and exec process.
26
-type process struct {
27
-	processCommon
28
-
29
-	// Platform specific fields are below here.
30
-	dir string
31
-}
32
-
33
-func (p *process) openFifos(ctx context.Context, terminal bool) (pipe *IOPipe, err error) {
34
-	if err := os.MkdirAll(p.dir, 0700); err != nil {
35
-		return nil, err
36
-	}
37
-
38
-	io := &IOPipe{}
39
-
40
-	io.Stdin, err = fifo.OpenFifo(ctx, p.fifo(unix.Stdin), unix.O_WRONLY|unix.O_CREAT|unix.O_NONBLOCK, 0700)
41
-	if err != nil {
42
-		return nil, err
43
-	}
44
-
45
-	defer func() {
46
-		if err != nil {
47
-			io.Stdin.Close()
48
-		}
49
-	}()
50
-
51
-	io.Stdout, err = fifo.OpenFifo(ctx, p.fifo(unix.Stdout), unix.O_RDONLY|unix.O_CREAT|unix.O_NONBLOCK, 0700)
52
-	if err != nil {
53
-		return nil, err
54
-	}
55
-
56
-	defer func() {
57
-		if err != nil {
58
-			io.Stdout.Close()
59
-		}
60
-	}()
61
-
62
-	if goruntime.GOOS == "solaris" || !terminal {
63
-		// For Solaris terminal handling is done exclusively by the runtime therefore we make no distinction
64
-		// in the processing for terminal and !terminal cases.
65
-		io.Stderr, err = fifo.OpenFifo(ctx, p.fifo(unix.Stderr), unix.O_RDONLY|unix.O_CREAT|unix.O_NONBLOCK, 0700)
66
-		if err != nil {
67
-			return nil, err
68
-		}
69
-		defer func() {
70
-			if err != nil {
71
-				io.Stderr.Close()
72
-			}
73
-		}()
74
-	} else {
75
-		io.Stderr = ioutil.NopCloser(emptyReader{})
76
-	}
77
-
78
-	return io, nil
79
-}
80
-
81
-func (p *process) sendCloseStdin() error {
82
-	_, err := p.client.remote.apiClient.UpdateProcess(context.Background(), &containerd.UpdateProcessRequest{
83
-		Id:         p.containerID,
84
-		Pid:        p.friendlyName,
85
-		CloseStdin: true,
86
-	})
87
-	if err != nil && (strings.Contains(err.Error(), "container not found") || strings.Contains(err.Error(), "process not found")) {
88
-		return nil
89
-	}
90
-	return err
91
-}
92
-
93
-func (p *process) closeFifos(io *IOPipe) {
94
-	io.Stdin.Close()
95
-	io.Stdout.Close()
96
-	io.Stderr.Close()
97
-}
98
-
99
-type emptyReader struct{}
100
-
101
-func (r emptyReader) Read(b []byte) (int, error) {
102
-	return 0, io.EOF
103
-}
104
-
105
-func (p *process) fifo(index int) string {
106
-	return filepath.Join(p.dir, p.friendlyName+"-"+fdNames[index])
107
-}
... ...
@@ -8,14 +8,6 @@ import (
8 8
 	"github.com/docker/docker/pkg/ioutils"
9 9
 )
10 10
 
11
-// process keeps the state for both main container process and exec process.
12
-type process struct {
13
-	processCommon
14
-
15
-	// Platform specific fields are below here.
16
-	hcsProcess hcsshim.Process
17
-}
18
-
19 11
 type autoClosingReader struct {
20 12
 	io.ReadCloser
21 13
 	sync.Once
... ...
@@ -23,7 +15,7 @@ type autoClosingReader struct {
23 23
 
24 24
 func (r *autoClosingReader) Read(b []byte) (n int, err error) {
25 25
 	n, err = r.ReadCloser.Read(b)
26
-	if err == io.EOF {
26
+	if err != nil {
27 27
 		r.Once.Do(func() { r.ReadCloser.Close() })
28 28
 	}
29 29
 	return
... ...
@@ -46,3 +38,7 @@ func createStdInCloser(pipe io.WriteCloser, process hcsshim.Process) io.WriteClo
46 46
 		return nil
47 47
 	})
48 48
 }
49
+
50
+func (p *process) Cleanup() error {
51
+	return nil
52
+}
49 53
new file mode 100644
... ...
@@ -0,0 +1,35 @@
0
+package libcontainerd
1
+
2
+import "sync"
3
+
4
+type queue struct {
5
+	sync.Mutex
6
+	fns map[string]chan struct{}
7
+}
8
+
9
+func (q *queue) append(id string, f func()) {
10
+	q.Lock()
11
+	defer q.Unlock()
12
+
13
+	if q.fns == nil {
14
+		q.fns = make(map[string]chan struct{})
15
+	}
16
+
17
+	done := make(chan struct{})
18
+
19
+	fn, ok := q.fns[id]
20
+	q.fns[id] = done
21
+	go func() {
22
+		if ok {
23
+			<-fn
24
+		}
25
+		f()
26
+		close(done)
27
+
28
+		q.Lock()
29
+		if q.fns[id] == done {
30
+			delete(q.fns, id)
31
+		}
32
+		q.Unlock()
33
+	}()
34
+}
0 35
new file mode 100644
... ...
@@ -0,0 +1,31 @@
0
+package libcontainerd
1
+
2
+import (
3
+	"testing"
4
+	"time"
5
+
6
+	"github.com/stretchr/testify/require"
7
+)
8
+
9
+func TestSerialization(t *testing.T) {
10
+	var (
11
+		q             queue
12
+		serialization = 1
13
+	)
14
+
15
+	q.append("aaa", func() {
16
+		//simulate a long time task
17
+		time.Sleep(10 * time.Millisecond)
18
+		require.EqualValues(t, serialization, 1)
19
+		serialization = 2
20
+	})
21
+	q.append("aaa", func() {
22
+		require.EqualValues(t, serialization, 2)
23
+		serialization = 3
24
+	})
25
+	q.append("aaa", func() {
26
+		require.EqualValues(t, serialization, 3)
27
+		serialization = 4
28
+	})
29
+	time.Sleep(20 * time.Millisecond)
30
+}
0 31
deleted file mode 100644
... ...
@@ -1,37 +0,0 @@
1
-// +build linux solaris
2
-
3
-package libcontainerd
4
-
5
-import "sync"
6
-
7
-type queue struct {
8
-	sync.Mutex
9
-	fns map[string]chan struct{}
10
-}
11
-
12
-func (q *queue) append(id string, f func()) {
13
-	q.Lock()
14
-	defer q.Unlock()
15
-
16
-	if q.fns == nil {
17
-		q.fns = make(map[string]chan struct{})
18
-	}
19
-
20
-	done := make(chan struct{})
21
-
22
-	fn, ok := q.fns[id]
23
-	q.fns[id] = done
24
-	go func() {
25
-		if ok {
26
-			<-fn
27
-		}
28
-		f()
29
-		close(done)
30
-
31
-		q.Lock()
32
-		if q.fns[id] == done {
33
-			delete(q.fns, id)
34
-		}
35
-		q.Unlock()
36
-	}()
37
-}
38 1
deleted file mode 100644
... ...
@@ -1,33 +0,0 @@
1
-// +build linux solaris
2
-
3
-package libcontainerd
4
-
5
-import (
6
-	"testing"
7
-	"time"
8
-
9
-	"github.com/stretchr/testify/require"
10
-)
11
-
12
-func TestSerialization(t *testing.T) {
13
-	var (
14
-		q             queue
15
-		serialization = 1
16
-	)
17
-
18
-	q.append("aaa", func() {
19
-		//simulate a long time task
20
-		time.Sleep(10 * time.Millisecond)
21
-		require.EqualValues(t, serialization, 1)
22
-		serialization = 2
23
-	})
24
-	q.append("aaa", func() {
25
-		require.EqualValues(t, serialization, 2)
26
-		serialization = 3
27
-	})
28
-	q.append("aaa", func() {
29
-		require.EqualValues(t, serialization, 3)
30
-		serialization = 4
31
-	})
32
-	time.Sleep(20 * time.Millisecond)
33
-}
34 1
deleted file mode 100644
... ...
@@ -1,20 +0,0 @@
1
-package libcontainerd
2
-
3
-// Remote on Linux defines the accesspoint to the containerd grpc API.
4
-// Remote on Windows is largely an unimplemented interface as there is
5
-// no remote containerd.
6
-type Remote interface {
7
-	// Client returns a new Client instance connected with given Backend.
8
-	Client(Backend) (Client, error)
9
-	// Cleanup stops containerd if it was started by libcontainerd.
10
-	// Note this is not used on Windows as there is no remote containerd.
11
-	Cleanup()
12
-	// UpdateOptions allows various remote options to be updated at runtime.
13
-	UpdateOptions(...RemoteOption) error
14
-}
15
-
16
-// RemoteOption allows to configure parameters of remotes.
17
-// This is unused on Windows.
18
-type RemoteOption interface {
19
-	Apply(Remote) error
20
-}
21 1
new file mode 100644
... ...
@@ -0,0 +1,317 @@
0
+// +build !windows
1
+
2
+package libcontainerd
3
+
4
+import (
5
+	"context"
6
+	"fmt"
7
+	"io"
8
+	"io/ioutil"
9
+	"os"
10
+	"os/exec"
11
+	"path/filepath"
12
+	"strconv"
13
+	"strings"
14
+	"sync"
15
+	"syscall"
16
+	"time"
17
+
18
+	"github.com/BurntSushi/toml"
19
+	"github.com/containerd/containerd"
20
+	"github.com/containerd/containerd/server"
21
+	"github.com/docker/docker/pkg/system"
22
+	"github.com/pkg/errors"
23
+	"github.com/sirupsen/logrus"
24
+)
25
+
26
+const (
27
+	maxConnectionRetryCount = 3
28
+	healthCheckTimeout      = 3 * time.Second
29
+	shutdownTimeout         = 15 * time.Second
30
+	configFile              = "containerd.toml"
31
+	binaryName              = "docker-containerd"
32
+	pidFile                 = "docker-containerd.pid"
33
+)
34
+
35
+type pluginConfigs struct {
36
+	Plugins map[string]interface{} `toml:"plugins"`
37
+}
38
+
39
+type remote struct {
40
+	sync.RWMutex
41
+	server.Config
42
+
43
+	daemonPid int
44
+	logger    *logrus.Entry
45
+
46
+	daemonWaitCh    chan struct{}
47
+	clients         []*client
48
+	shutdownContext context.Context
49
+	shutdownCancel  context.CancelFunc
50
+	shutdown        bool
51
+
52
+	// Options
53
+	startDaemon bool
54
+	rootDir     string
55
+	stateDir    string
56
+	snapshotter string
57
+	pluginConfs pluginConfigs
58
+}
59
+
60
+// New creates a fresh instance of libcontainerd remote.
61
+func New(rootDir, stateDir string, options ...RemoteOption) (rem Remote, err error) {
62
+	defer func() {
63
+		if err != nil {
64
+			err = errors.Wrap(err, "Failed to connect to containerd")
65
+		}
66
+	}()
67
+
68
+	r := &remote{
69
+		rootDir:  rootDir,
70
+		stateDir: stateDir,
71
+		Config: server.Config{
72
+			Root:  filepath.Join(rootDir, "daemon"),
73
+			State: filepath.Join(stateDir, "daemon"),
74
+		},
75
+		pluginConfs: pluginConfigs{make(map[string]interface{})},
76
+		daemonPid:   -1,
77
+		logger:      logrus.WithField("module", "libcontainerd"),
78
+	}
79
+	r.shutdownContext, r.shutdownCancel = context.WithCancel(context.Background())
80
+
81
+	rem = r
82
+	for _, option := range options {
83
+		if err = option.Apply(r); err != nil {
84
+			return
85
+		}
86
+	}
87
+	r.setDefaults()
88
+
89
+	if err = system.MkdirAll(stateDir, 0700, ""); err != nil {
90
+		return
91
+	}
92
+
93
+	if r.startDaemon {
94
+		os.Remove(r.GRPC.Address)
95
+		if err = r.startContainerd(); err != nil {
96
+			return
97
+		}
98
+		defer func() {
99
+			if err != nil {
100
+				r.Cleanup()
101
+			}
102
+		}()
103
+	}
104
+
105
+	// This connection is just used to monitor the connection
106
+	client, err := containerd.New(r.GRPC.Address)
107
+	if err != nil {
108
+		return
109
+	}
110
+	if _, err := client.Version(context.Background()); err != nil {
111
+		system.KillProcess(r.daemonPid)
112
+		return nil, errors.Wrapf(err, "unable to get containerd version")
113
+	}
114
+
115
+	go r.monitorConnection(client)
116
+
117
+	return r, nil
118
+}
119
+
120
+func (r *remote) NewClient(ns string, b Backend) (Client, error) {
121
+	c := &client{
122
+		stateDir:   r.stateDir,
123
+		logger:     r.logger.WithField("namespace", ns),
124
+		namespace:  ns,
125
+		backend:    b,
126
+		containers: make(map[string]*container),
127
+	}
128
+
129
+	rclient, err := containerd.New(r.GRPC.Address, containerd.WithDefaultNamespace(ns))
130
+	if err != nil {
131
+		return nil, err
132
+	}
133
+	c.remote = rclient
134
+
135
+	go c.processEventStream(r.shutdownContext)
136
+
137
+	r.Lock()
138
+	r.clients = append(r.clients, c)
139
+	r.Unlock()
140
+	return c, nil
141
+}
142
+
143
+func (r *remote) Cleanup() {
144
+	if r.daemonPid != -1 {
145
+		r.shutdownCancel()
146
+		r.stopDaemon()
147
+	}
148
+
149
+	// cleanup some files
150
+	os.Remove(filepath.Join(r.stateDir, pidFile))
151
+
152
+	r.platformCleanup()
153
+}
154
+
155
+func (r *remote) getContainerdPid() (int, error) {
156
+	pidFile := filepath.Join(r.stateDir, pidFile)
157
+	f, err := os.OpenFile(pidFile, os.O_RDWR, 0600)
158
+	if err != nil {
159
+		if os.IsNotExist(err) {
160
+			return -1, nil
161
+		}
162
+		return -1, err
163
+	}
164
+	defer f.Close()
165
+
166
+	b := make([]byte, 8)
167
+	n, err := f.Read(b)
168
+	if err != nil && err != io.EOF {
169
+		return -1, err
170
+	}
171
+
172
+	if n > 0 {
173
+		pid, err := strconv.ParseUint(string(b[:n]), 10, 64)
174
+		if err != nil {
175
+			return -1, err
176
+		}
177
+		if system.IsProcessAlive(int(pid)) {
178
+			return int(pid), nil
179
+		}
180
+	}
181
+
182
+	return -1, nil
183
+}
184
+
185
+func (r *remote) getContainerdConfig() (string, error) {
186
+	path := filepath.Join(r.stateDir, configFile)
187
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
188
+	if err != nil {
189
+		return "", errors.Wrapf(err, "failed to open containerd config file at %s", path)
190
+	}
191
+	defer f.Close()
192
+
193
+	enc := toml.NewEncoder(f)
194
+	if err = enc.Encode(r.Config); err != nil {
195
+		return "", errors.Wrapf(err, "failed to encode general config")
196
+	}
197
+	if err = enc.Encode(r.pluginConfs); err != nil {
198
+		return "", errors.Wrapf(err, "failed to encode plugin configs")
199
+	}
200
+
201
+	return path, nil
202
+}
203
+
204
+func (r *remote) startContainerd() error {
205
+	pid, err := r.getContainerdPid()
206
+	if err != nil {
207
+		return err
208
+	}
209
+
210
+	if pid != -1 {
211
+		r.daemonPid = pid
212
+		logrus.WithField("pid", pid).
213
+			Infof("libcontainerd: %s is still running", binaryName)
214
+		return nil
215
+	}
216
+
217
+	configFile, err := r.getContainerdConfig()
218
+	if err != nil {
219
+		return err
220
+	}
221
+
222
+	args := []string{"--config", configFile}
223
+	cmd := exec.Command(binaryName, args...)
224
+	// redirect containerd logs to docker logs
225
+	cmd.Stdout = os.Stdout
226
+	cmd.Stderr = os.Stderr
227
+	cmd.SysProcAttr = containerdSysProcAttr()
228
+	// clear the NOTIFY_SOCKET from the env when starting containerd
229
+	cmd.Env = nil
230
+	for _, e := range os.Environ() {
231
+		if !strings.HasPrefix(e, "NOTIFY_SOCKET") {
232
+			cmd.Env = append(cmd.Env, e)
233
+		}
234
+	}
235
+	if err := cmd.Start(); err != nil {
236
+		return err
237
+	}
238
+
239
+	r.daemonWaitCh = make(chan struct{})
240
+	go func() {
241
+		// Reap our child when needed
242
+		if err := cmd.Wait(); err != nil {
243
+			r.logger.WithError(err).Errorf("containerd did not exit successfully")
244
+		}
245
+		close(r.daemonWaitCh)
246
+	}()
247
+
248
+	r.daemonPid = cmd.Process.Pid
249
+
250
+	err = ioutil.WriteFile(filepath.Join(r.stateDir, pidFile), []byte(fmt.Sprintf("%d", r.daemonPid)), 0660)
251
+	if err != nil {
252
+		system.KillProcess(r.daemonPid)
253
+		return errors.Wrap(err, "libcontainerd: failed to save daemon pid to disk")
254
+	}
255
+
256
+	logrus.WithField("pid", r.daemonPid).
257
+		Infof("libcontainerd: started new %s process", binaryName)
258
+
259
+	return nil
260
+}
261
+
262
+func (r *remote) monitorConnection(client *containerd.Client) {
263
+	var transientFailureCount = 0
264
+
265
+	ticker := time.NewTicker(500 * time.Millisecond)
266
+	defer ticker.Stop()
267
+
268
+	for {
269
+		<-ticker.C
270
+		ctx, cancel := context.WithTimeout(r.shutdownContext, healthCheckTimeout)
271
+		_, err := client.IsServing(ctx)
272
+		cancel()
273
+		if err == nil {
274
+			transientFailureCount = 0
275
+			continue
276
+		}
277
+
278
+		select {
279
+		case <-r.shutdownContext.Done():
280
+			r.logger.Info("stopping healtcheck following graceful shutdown")
281
+			client.Close()
282
+			return
283
+		default:
284
+		}
285
+
286
+		r.logger.WithError(err).WithField("binary", binaryName).Debug("daemon is not responding")
287
+
288
+		if r.daemonPid != -1 {
289
+			transientFailureCount++
290
+			if transientFailureCount >= maxConnectionRetryCount || !system.IsProcessAlive(r.daemonPid) {
291
+				transientFailureCount = 0
292
+				if system.IsProcessAlive(r.daemonPid) {
293
+					r.logger.WithField("pid", r.daemonPid).Info("killing and restarting containerd")
294
+					// Try to get a stack trace
295
+					syscall.Kill(r.daemonPid, syscall.SIGUSR1)
296
+					<-time.After(100 * time.Millisecond)
297
+					system.KillProcess(r.daemonPid)
298
+				}
299
+				<-r.daemonWaitCh
300
+				var err error
301
+				client.Close()
302
+				os.Remove(r.GRPC.Address)
303
+				if err = r.startContainerd(); err != nil {
304
+					r.logger.WithError(err).Error("failed restarting containerd")
305
+				} else {
306
+					newClient, err := containerd.New(r.GRPC.Address)
307
+					if err != nil {
308
+						r.logger.WithError(err).Error("failed connect to containerd")
309
+					} else {
310
+						client = newClient
311
+					}
312
+				}
313
+			}
314
+		}
315
+	}
316
+}
0 317
new file mode 100644
... ...
@@ -0,0 +1,141 @@
0
+// +build !windows
1
+
2
+package libcontainerd
3
+
4
+import "fmt"
5
+
6
+// WithRemoteAddr sets the external containerd socket to connect to.
7
+func WithRemoteAddr(addr string) RemoteOption {
8
+	return rpcAddr(addr)
9
+}
10
+
11
+type rpcAddr string
12
+
13
+func (a rpcAddr) Apply(r Remote) error {
14
+	if remote, ok := r.(*remote); ok {
15
+		remote.GRPC.Address = string(a)
16
+		return nil
17
+	}
18
+	return fmt.Errorf("WithRemoteAddr option not supported for this remote")
19
+}
20
+
21
+// WithRemoteAddrUser sets the uid and gid to create the RPC address with
22
+func WithRemoteAddrUser(uid, gid int) RemoteOption {
23
+	return rpcUser{uid, gid}
24
+}
25
+
26
+type rpcUser struct {
27
+	uid int
28
+	gid int
29
+}
30
+
31
+func (u rpcUser) Apply(r Remote) error {
32
+	if remote, ok := r.(*remote); ok {
33
+		remote.GRPC.Uid = u.uid
34
+		remote.GRPC.Gid = u.gid
35
+		return nil
36
+	}
37
+	return fmt.Errorf("WithRemoteAddr option not supported for this remote")
38
+}
39
+
40
+// WithStartDaemon defines if libcontainerd should also run containerd daemon.
41
+func WithStartDaemon(start bool) RemoteOption {
42
+	return startDaemon(start)
43
+}
44
+
45
+type startDaemon bool
46
+
47
+func (s startDaemon) Apply(r Remote) error {
48
+	if remote, ok := r.(*remote); ok {
49
+		remote.startDaemon = bool(s)
50
+		return nil
51
+	}
52
+	return fmt.Errorf("WithStartDaemon option not supported for this remote")
53
+}
54
+
55
+// WithLogLevel defines which log level to starts containerd with.
56
+// This only makes sense if WithStartDaemon() was set to true.
57
+func WithLogLevel(lvl string) RemoteOption {
58
+	return logLevel(lvl)
59
+}
60
+
61
+type logLevel string
62
+
63
+func (l logLevel) Apply(r Remote) error {
64
+	if remote, ok := r.(*remote); ok {
65
+		remote.Debug.Level = string(l)
66
+		return nil
67
+	}
68
+	return fmt.Errorf("WithDebugLog option not supported for this remote")
69
+}
70
+
71
+// WithDebugAddress defines at which location the debug GRPC connection
72
+// should be made
73
+func WithDebugAddress(addr string) RemoteOption {
74
+	return debugAddress(addr)
75
+}
76
+
77
+type debugAddress string
78
+
79
+func (d debugAddress) Apply(r Remote) error {
80
+	if remote, ok := r.(*remote); ok {
81
+		remote.Debug.Address = string(d)
82
+		return nil
83
+	}
84
+	return fmt.Errorf("WithDebugAddress option not supported for this remote")
85
+}
86
+
87
+// WithMetricsAddress defines at which location the debug GRPC connection
88
+// should be made
89
+func WithMetricsAddress(addr string) RemoteOption {
90
+	return metricsAddress(addr)
91
+}
92
+
93
+type metricsAddress string
94
+
95
+func (m metricsAddress) Apply(r Remote) error {
96
+	if remote, ok := r.(*remote); ok {
97
+		remote.Metrics.Address = string(m)
98
+		return nil
99
+	}
100
+	return fmt.Errorf("WithMetricsAddress option not supported for this remote")
101
+}
102
+
103
+// WithSnapshotter defines snapshotter driver should be used
104
+func WithSnapshotter(name string) RemoteOption {
105
+	return snapshotter(name)
106
+}
107
+
108
+type snapshotter string
109
+
110
+func (s snapshotter) Apply(r Remote) error {
111
+	if remote, ok := r.(*remote); ok {
112
+		remote.snapshotter = string(s)
113
+		return nil
114
+	}
115
+	return fmt.Errorf("WithSnapshotter option not supported for this remote")
116
+}
117
+
118
+// WithPlugin allow configuring a containerd plugin
119
+// configuration values passed needs to be quoted if quotes are needed in
120
+// the toml format.
121
+func WithPlugin(name string, conf interface{}) RemoteOption {
122
+	return pluginConf{
123
+		name: name,
124
+		conf: conf,
125
+	}
126
+}
127
+
128
+type pluginConf struct {
129
+	// Name is the name of the plugin
130
+	name string
131
+	conf interface{}
132
+}
133
+
134
+func (p pluginConf) Apply(r Remote) error {
135
+	if remote, ok := r.(*remote); ok {
136
+		remote.pluginConfs.Plugins[p.name] = p.conf
137
+		return nil
138
+	}
139
+	return fmt.Errorf("WithPlugin option not supported for this remote")
140
+}
0 141
new file mode 100644
... ...
@@ -0,0 +1,36 @@
0
+// +build linux solaris
1
+
2
+package libcontainerd
3
+
4
+import "fmt"
5
+
6
+// WithOOMScore defines the oom_score_adj to set for the containerd process.
7
+func WithOOMScore(score int) RemoteOption {
8
+	return oomScore(score)
9
+}
10
+
11
+type oomScore int
12
+
13
+func (o oomScore) Apply(r Remote) error {
14
+	if remote, ok := r.(*remote); ok {
15
+		remote.OOMScore = int(o)
16
+		return nil
17
+	}
18
+	return fmt.Errorf("WithOOMScore option not supported for this remote")
19
+}
20
+
21
+// WithSubreaper sets whether containerd should register itself as a
22
+// subreaper
23
+func WithSubreaper(reap bool) RemoteOption {
24
+	return subreaper(reap)
25
+}
26
+
27
+type subreaper bool
28
+
29
+func (s subreaper) Apply(r Remote) error {
30
+	if remote, ok := r.(*remote); ok {
31
+		remote.Subreaper = bool(s)
32
+		return nil
33
+	}
34
+	return fmt.Errorf("WithSubreaper option not supported for this remote")
35
+}
0 36
new file mode 100644
... ...
@@ -0,0 +1,56 @@
0
+// +build !windows
1
+
2
+package libcontainerd
3
+
4
+import "github.com/pkg/errors"
5
+
6
+// process represents the state for the main container process or an exec.
7
+type process struct {
8
+	// id is the logical name of the process
9
+	id string
10
+
11
+	// cid is the container id to which this process belongs
12
+	cid string
13
+
14
+	// pid is the identifier of the process
15
+	pid uint32
16
+
17
+	// io holds the io reader/writer associated with the process
18
+	io *IOPipe
19
+
20
+	// root is the state directory for the process
21
+	root string
22
+}
23
+
24
+func (p *process) ID() string {
25
+	return p.id
26
+}
27
+
28
+func (p *process) Pid() uint32 {
29
+	return p.pid
30
+}
31
+
32
+func (p *process) SetPid(pid uint32) error {
33
+	if p.pid != 0 {
34
+		return errors.Errorf("pid is already set to %d", pid)
35
+	}
36
+
37
+	p.pid = pid
38
+	return nil
39
+}
40
+
41
+func (p *process) IOPipe() *IOPipe {
42
+	return p.io
43
+}
44
+
45
+func (p *process) CloseIO() {
46
+	if p.io.Stdin != nil {
47
+		p.io.Stdin.Close()
48
+	}
49
+	if p.io.Stdout != nil {
50
+		p.io.Stdout.Close()
51
+	}
52
+	if p.io.Stderr != nil {
53
+		p.io.Stderr.Close()
54
+	}
55
+}
0 56
new file mode 100644
... ...
@@ -0,0 +1,61 @@
0
+// +build linux solaris
1
+
2
+package libcontainerd
3
+
4
+import (
5
+	"os"
6
+	"path/filepath"
7
+
8
+	"github.com/pkg/errors"
9
+	"golang.org/x/sys/unix"
10
+)
11
+
12
+var fdNames = map[int]string{
13
+	unix.Stdin:  "stdin",
14
+	unix.Stdout: "stdout",
15
+	unix.Stderr: "stderr",
16
+}
17
+
18
+func (p *process) pipeName(index int) string {
19
+	return filepath.Join(p.root, p.id+"-"+fdNames[index])
20
+}
21
+
22
+func (p *process) IOPaths() (string, string, string) {
23
+	var (
24
+		stdin  = p.pipeName(unix.Stdin)
25
+		stdout = p.pipeName(unix.Stdout)
26
+		stderr = p.pipeName(unix.Stderr)
27
+	)
28
+	// TODO: debug why we're having zombies when I don't unset those
29
+	if p.io.Stdin == nil {
30
+		stdin = ""
31
+	}
32
+	if p.io.Stderr == nil {
33
+		stderr = ""
34
+	}
35
+	return stdin, stdout, stderr
36
+}
37
+
38
+func (p *process) Cleanup() error {
39
+	var retErr error
40
+
41
+	// Ensure everything was closed
42
+	p.CloseIO()
43
+
44
+	for _, i := range [3]string{
45
+		p.pipeName(unix.Stdin),
46
+		p.pipeName(unix.Stdout),
47
+		p.pipeName(unix.Stderr),
48
+	} {
49
+		err := os.Remove(i)
50
+		if err != nil {
51
+			if retErr == nil {
52
+				retErr = errors.Wrapf(err, "failed to remove %s", i)
53
+			} else {
54
+				retErr = errors.Wrapf(retErr, "failed to remove %s", i)
55
+			}
56
+		}
57
+	}
58
+
59
+	return retErr
60
+}
0 61
new file mode 100644
... ...
@@ -0,0 +1,56 @@
0
+// +build linux solaris
1
+
2
+package libcontainerd
3
+
4
+import (
5
+	"os"
6
+	"path/filepath"
7
+	"syscall"
8
+	"time"
9
+
10
+	"github.com/docker/docker/pkg/system"
11
+)
12
+
13
+const (
14
+	sockFile      = "docker-containerd.sock"
15
+	debugSockFile = "docker-containerd-debug.sock"
16
+)
17
+
18
+func (r *remote) setDefaults() {
19
+	if r.GRPC.Address == "" {
20
+		r.GRPC.Address = filepath.Join(r.stateDir, sockFile)
21
+	}
22
+	if r.Debug.Address == "" {
23
+		r.Debug.Address = filepath.Join(r.stateDir, debugSockFile)
24
+	}
25
+	if r.Debug.Level == "" {
26
+		r.Debug.Level = "info"
27
+	}
28
+	if r.OOMScore == 0 {
29
+		r.OOMScore = -999
30
+	}
31
+	if r.snapshotter == "" {
32
+		r.snapshotter = "overlay"
33
+	}
34
+}
35
+
36
+func (r *remote) stopDaemon() {
37
+	// Ask the daemon to quit
38
+	syscall.Kill(r.daemonPid, syscall.SIGTERM)
39
+	// Wait up to 15secs for it to stop
40
+	for i := time.Duration(0); i < shutdownTimeout; i += time.Second {
41
+		if !system.IsProcessAlive(r.daemonPid) {
42
+			break
43
+		}
44
+		time.Sleep(time.Second)
45
+	}
46
+
47
+	if system.IsProcessAlive(r.daemonPid) {
48
+		r.logger.WithField("pid", r.daemonPid).Warn("daemon didn't stop within 15 secs, killing it")
49
+		syscall.Kill(r.daemonPid, syscall.SIGKILL)
50
+	}
51
+}
52
+
53
+func (r *remote) platformCleanup() {
54
+	os.Remove(filepath.Join(r.stateDir, sockFile))
55
+}
0 56
new file mode 100644
... ...
@@ -0,0 +1,50 @@
0
+// +build remote_daemon
1
+
2
+package libcontainerd
3
+
4
+import (
5
+	"os"
6
+)
7
+
8
+const (
9
+	grpcPipeName  = `\\.\pipe\docker-containerd-containerd`
10
+	debugPipeName = `\\.\pipe\docker-containerd-debug`
11
+)
12
+
13
+func (r *remote) setDefaults() {
14
+	if r.GRPC.Address == "" {
15
+		r.GRPC.Address = grpcPipeName
16
+	}
17
+	if r.Debug.Address == "" {
18
+		r.Debug.Address = debugPipeName
19
+	}
20
+	if r.Debug.Level == "" {
21
+		r.Debug.Level = "info"
22
+	}
23
+	if r.snapshotter == "" {
24
+		r.snapshotter = "naive" // TODO(mlaventure): switch to "windows" once implemented
25
+	}
26
+}
27
+
28
+func (r *remote) stopDaemon() {
29
+	p, err := os.FindProcess(r.daemonPid)
30
+	if err != nil {
31
+		r.logger.WithField("pid", r.daemonPid).Warn("could not find daemon process")
32
+		return
33
+	}
34
+
35
+	if err = p.Kill(); err != nil {
36
+		r.logger.WithError(err).WithField("pid", r.daemonPid).Warn("could not kill daemon process")
37
+		return
38
+	}
39
+
40
+	_, err = p.Wait()
41
+	if err != nil {
42
+		r.logger.WithError(err).WithField("pid", r.daemonPid).Warn("wait for daemon process")
43
+		return
44
+	}
45
+}
46
+
47
+func (r *remote) platformCleanup() {
48
+	// Nothing to do
49
+}
0 50
new file mode 100644
... ...
@@ -0,0 +1,59 @@
0
+// +build windows
1
+
2
+package libcontainerd
3
+
4
+import (
5
+	"sync"
6
+
7
+	"github.com/sirupsen/logrus"
8
+)
9
+
10
+type remote struct {
11
+	sync.RWMutex
12
+
13
+	logger  *logrus.Entry
14
+	clients []*client
15
+
16
+	// Options
17
+	rootDir  string
18
+	stateDir string
19
+}
20
+
21
+// New creates a fresh instance of libcontainerd remote.
22
+func New(rootDir, stateDir string, options ...RemoteOption) (Remote, error) {
23
+	return &remote{
24
+		logger:   logrus.WithField("module", "libcontainerd"),
25
+		rootDir:  rootDir,
26
+		stateDir: stateDir,
27
+	}, nil
28
+}
29
+
30
+type client struct {
31
+	sync.Mutex
32
+
33
+	rootDir    string
34
+	stateDir   string
35
+	backend    Backend
36
+	logger     *logrus.Entry
37
+	eventQ     queue
38
+	containers map[string]*container
39
+}
40
+
41
+func (r *remote) NewClient(ns string, b Backend) (Client, error) {
42
+	c := &client{
43
+		rootDir:    r.rootDir,
44
+		stateDir:   r.stateDir,
45
+		backend:    b,
46
+		logger:     r.logger.WithField("namespace", ns),
47
+		containers: make(map[string]*container),
48
+	}
49
+	r.Lock()
50
+	r.clients = append(r.clients, c)
51
+	r.Unlock()
52
+
53
+	return c, nil
54
+}
55
+
56
+func (r *remote) Cleanup() {
57
+	// Nothing to do
58
+}
0 59
deleted file mode 100644
... ...
@@ -1,565 +0,0 @@
1
-// +build linux solaris
2
-
3
-package libcontainerd
4
-
5
-import (
6
-	"fmt"
7
-	"io"
8
-	"io/ioutil"
9
-	"log"
10
-	"net"
11
-	"os"
12
-	"os/exec"
13
-	"path/filepath"
14
-	goruntime "runtime"
15
-	"strconv"
16
-	"strings"
17
-	"sync"
18
-	"time"
19
-
20
-	containerd "github.com/containerd/containerd/api/grpc/types"
21
-	"github.com/docker/docker/pkg/locker"
22
-	"github.com/docker/docker/pkg/system"
23
-	"github.com/golang/protobuf/ptypes"
24
-	"github.com/golang/protobuf/ptypes/timestamp"
25
-	"github.com/sirupsen/logrus"
26
-	"golang.org/x/net/context"
27
-	"golang.org/x/sys/unix"
28
-	"google.golang.org/grpc"
29
-	"google.golang.org/grpc/grpclog"
30
-	"google.golang.org/grpc/health/grpc_health_v1"
31
-	"google.golang.org/grpc/transport"
32
-)
33
-
34
-const (
35
-	maxConnectionRetryCount      = 3
36
-	containerdHealthCheckTimeout = 3 * time.Second
37
-	containerdShutdownTimeout    = 15 * time.Second
38
-	containerdBinary             = "docker-containerd"
39
-	containerdPidFilename        = "docker-containerd.pid"
40
-	containerdSockFilename       = "docker-containerd.sock"
41
-	containerdStateDir           = "containerd"
42
-	eventTimestampFilename       = "event.ts"
43
-)
44
-
45
-type remote struct {
46
-	sync.RWMutex
47
-	apiClient            containerd.APIClient
48
-	daemonPid            int
49
-	stateDir             string
50
-	rpcAddr              string
51
-	startDaemon          bool
52
-	closedManually       bool
53
-	debugLog             bool
54
-	rpcConn              *grpc.ClientConn
55
-	clients              []*client
56
-	eventTsPath          string
57
-	runtime              string
58
-	runtimeArgs          []string
59
-	daemonWaitCh         chan struct{}
60
-	liveRestore          bool
61
-	oomScore             int
62
-	restoreFromTimestamp *timestamp.Timestamp
63
-}
64
-
65
-// New creates a fresh instance of libcontainerd remote.
66
-func New(stateDir string, options ...RemoteOption) (_ Remote, err error) {
67
-	defer func() {
68
-		if err != nil {
69
-			err = fmt.Errorf("Failed to connect to containerd. Please make sure containerd is installed in your PATH or you have specified the correct address. Got error: %v", err)
70
-		}
71
-	}()
72
-	r := &remote{
73
-		stateDir:    stateDir,
74
-		daemonPid:   -1,
75
-		eventTsPath: filepath.Join(stateDir, eventTimestampFilename),
76
-	}
77
-	for _, option := range options {
78
-		if err := option.Apply(r); err != nil {
79
-			return nil, err
80
-		}
81
-	}
82
-
83
-	if err := system.MkdirAll(stateDir, 0700, ""); err != nil {
84
-		return nil, err
85
-	}
86
-
87
-	if r.rpcAddr == "" {
88
-		r.rpcAddr = filepath.Join(stateDir, containerdSockFilename)
89
-	}
90
-
91
-	if r.startDaemon {
92
-		if err := r.runContainerdDaemon(); err != nil {
93
-			return nil, err
94
-		}
95
-	}
96
-
97
-	// don't output the grpc reconnect logging
98
-	grpclog.SetLogger(log.New(ioutil.Discard, "", log.LstdFlags))
99
-	dialOpts := []grpc.DialOption{
100
-		grpc.WithInsecure(),
101
-		grpc.WithBackoffMaxDelay(2 * time.Second),
102
-		grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
103
-			return net.DialTimeout("unix", addr, timeout)
104
-		}),
105
-	}
106
-	conn, err := grpc.Dial(r.rpcAddr, dialOpts...)
107
-	if err != nil {
108
-		return nil, fmt.Errorf("error connecting to containerd: %v", err)
109
-	}
110
-
111
-	r.rpcConn = conn
112
-	r.apiClient = containerd.NewAPIClient(conn)
113
-
114
-	// Get the timestamp to restore from
115
-	t := r.getLastEventTimestamp()
116
-	tsp, err := ptypes.TimestampProto(t)
117
-	if err != nil {
118
-		logrus.Errorf("libcontainerd: failed to convert timestamp: %q", err)
119
-	}
120
-	r.restoreFromTimestamp = tsp
121
-
122
-	go r.handleConnectionChange()
123
-
124
-	if err := r.startEventsMonitor(); err != nil {
125
-		return nil, err
126
-	}
127
-
128
-	return r, nil
129
-}
130
-
131
-func (r *remote) UpdateOptions(options ...RemoteOption) error {
132
-	for _, option := range options {
133
-		if err := option.Apply(r); err != nil {
134
-			return err
135
-		}
136
-	}
137
-	return nil
138
-}
139
-
140
-func (r *remote) handleConnectionChange() {
141
-	var transientFailureCount = 0
142
-
143
-	ticker := time.NewTicker(500 * time.Millisecond)
144
-	defer ticker.Stop()
145
-	healthClient := grpc_health_v1.NewHealthClient(r.rpcConn)
146
-
147
-	for {
148
-		<-ticker.C
149
-		ctx, cancel := context.WithTimeout(context.Background(), containerdHealthCheckTimeout)
150
-		_, err := healthClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{})
151
-		cancel()
152
-		if err == nil {
153
-			continue
154
-		}
155
-
156
-		logrus.Debugf("libcontainerd: containerd health check returned error: %v", err)
157
-
158
-		if r.daemonPid != -1 {
159
-			if r.closedManually {
160
-				// Well, we asked for it to stop, just return
161
-				return
162
-			}
163
-			// all other errors are transient
164
-			// Reset state to be notified of next failure
165
-			transientFailureCount++
166
-			if transientFailureCount >= maxConnectionRetryCount {
167
-				transientFailureCount = 0
168
-				if system.IsProcessAlive(r.daemonPid) {
169
-					system.KillProcess(r.daemonPid)
170
-				}
171
-				<-r.daemonWaitCh
172
-				if err := r.runContainerdDaemon(); err != nil { //FIXME: Handle error
173
-					logrus.Errorf("libcontainerd: error restarting containerd: %v", err)
174
-				}
175
-				continue
176
-			}
177
-		}
178
-	}
179
-}
180
-
181
-func (r *remote) Cleanup() {
182
-	if r.daemonPid == -1 {
183
-		return
184
-	}
185
-	r.closedManually = true
186
-	r.rpcConn.Close()
187
-	// Ask the daemon to quit
188
-	unix.Kill(r.daemonPid, unix.SIGTERM)
189
-
190
-	// Wait up to 15secs for it to stop
191
-	for i := time.Duration(0); i < containerdShutdownTimeout; i += time.Second {
192
-		if !system.IsProcessAlive(r.daemonPid) {
193
-			break
194
-		}
195
-		time.Sleep(time.Second)
196
-	}
197
-
198
-	if system.IsProcessAlive(r.daemonPid) {
199
-		logrus.Warnf("libcontainerd: containerd (%d) didn't stop within 15 secs, killing it\n", r.daemonPid)
200
-		unix.Kill(r.daemonPid, unix.SIGKILL)
201
-	}
202
-
203
-	// cleanup some files
204
-	os.Remove(filepath.Join(r.stateDir, containerdPidFilename))
205
-	os.Remove(filepath.Join(r.stateDir, containerdSockFilename))
206
-}
207
-
208
-func (r *remote) Client(b Backend) (Client, error) {
209
-	c := &client{
210
-		clientCommon: clientCommon{
211
-			backend:    b,
212
-			containers: make(map[string]*container),
213
-			locker:     locker.New(),
214
-		},
215
-		remote:        r,
216
-		exitNotifiers: make(map[string]*exitNotifier),
217
-		liveRestore:   r.liveRestore,
218
-	}
219
-
220
-	r.Lock()
221
-	r.clients = append(r.clients, c)
222
-	r.Unlock()
223
-	return c, nil
224
-}
225
-
226
-func (r *remote) updateEventTimestamp(t time.Time) {
227
-	f, err := os.OpenFile(r.eventTsPath, unix.O_CREAT|unix.O_WRONLY|unix.O_TRUNC, 0600)
228
-	if err != nil {
229
-		logrus.Warnf("libcontainerd: failed to open event timestamp file: %v", err)
230
-		return
231
-	}
232
-	defer f.Close()
233
-
234
-	b, err := t.MarshalText()
235
-	if err != nil {
236
-		logrus.Warnf("libcontainerd: failed to encode timestamp: %v", err)
237
-		return
238
-	}
239
-
240
-	n, err := f.Write(b)
241
-	if err != nil || n != len(b) {
242
-		logrus.Warnf("libcontainerd: failed to update event timestamp file: %v", err)
243
-		f.Truncate(0)
244
-		return
245
-	}
246
-}
247
-
248
-func (r *remote) getLastEventTimestamp() time.Time {
249
-	t := time.Now()
250
-
251
-	fi, err := os.Stat(r.eventTsPath)
252
-	if os.IsNotExist(err) || fi.Size() == 0 {
253
-		return t
254
-	}
255
-
256
-	f, err := os.Open(r.eventTsPath)
257
-	if err != nil {
258
-		logrus.Warnf("libcontainerd: Unable to access last event ts: %v", err)
259
-		return t
260
-	}
261
-	defer f.Close()
262
-
263
-	b := make([]byte, fi.Size())
264
-	n, err := f.Read(b)
265
-	if err != nil || n != len(b) {
266
-		logrus.Warnf("libcontainerd: Unable to read last event ts: %v", err)
267
-		return t
268
-	}
269
-
270
-	t.UnmarshalText(b)
271
-
272
-	return t
273
-}
274
-
275
-func (r *remote) startEventsMonitor() error {
276
-	// First, get past events
277
-	t := r.getLastEventTimestamp()
278
-	tsp, err := ptypes.TimestampProto(t)
279
-	if err != nil {
280
-		logrus.Errorf("libcontainerd: failed to convert timestamp: %q", err)
281
-	}
282
-	er := &containerd.EventsRequest{
283
-		Timestamp: tsp,
284
-	}
285
-
286
-	var events containerd.API_EventsClient
287
-	for {
288
-		events, err = r.apiClient.Events(context.Background(), er, grpc.FailFast(false))
289
-		if err == nil {
290
-			break
291
-		}
292
-		logrus.Warnf("libcontainerd: failed to get events from containerd: %q", err)
293
-
294
-		if r.closedManually {
295
-			// ignore error if grpc remote connection is closed manually
296
-			return nil
297
-		}
298
-
299
-		<-time.After(100 * time.Millisecond)
300
-	}
301
-
302
-	go r.handleEventStream(events)
303
-	return nil
304
-}
305
-
306
-func (r *remote) handleEventStream(events containerd.API_EventsClient) {
307
-	for {
308
-		e, err := events.Recv()
309
-		if err != nil {
310
-			if grpc.ErrorDesc(err) == transport.ErrConnClosing.Desc &&
311
-				r.closedManually {
312
-				// ignore error if grpc remote connection is closed manually
313
-				return
314
-			}
315
-			logrus.Errorf("libcontainerd: failed to receive event from containerd: %v", err)
316
-			go r.startEventsMonitor()
317
-			return
318
-		}
319
-
320
-		logrus.Debugf("libcontainerd: received containerd event: %#v", e)
321
-
322
-		var container *container
323
-		var c *client
324
-		r.RLock()
325
-		for _, c = range r.clients {
326
-			container, err = c.getContainer(e.Id)
327
-			if err == nil {
328
-				break
329
-			}
330
-		}
331
-		r.RUnlock()
332
-		if container == nil {
333
-			logrus.Warnf("libcontainerd: unknown container %s", e.Id)
334
-			continue
335
-		}
336
-
337
-		if err := container.handleEvent(e); err != nil {
338
-			logrus.Errorf("libcontainerd: error processing state change for %s: %v", e.Id, err)
339
-		}
340
-
341
-		tsp, err := ptypes.Timestamp(e.Timestamp)
342
-		if err != nil {
343
-			logrus.Errorf("libcontainerd: failed to convert event timestamp: %q", err)
344
-			continue
345
-		}
346
-
347
-		r.updateEventTimestamp(tsp)
348
-	}
349
-}
350
-
351
-func (r *remote) runContainerdDaemon() error {
352
-	pidFilename := filepath.Join(r.stateDir, containerdPidFilename)
353
-	f, err := os.OpenFile(pidFilename, os.O_RDWR|os.O_CREATE, 0600)
354
-	if err != nil {
355
-		return err
356
-	}
357
-	defer f.Close()
358
-
359
-	// File exist, check if the daemon is alive
360
-	b := make([]byte, 8)
361
-	n, err := f.Read(b)
362
-	if err != nil && err != io.EOF {
363
-		return err
364
-	}
365
-
366
-	if n > 0 {
367
-		pid, err := strconv.ParseUint(string(b[:n]), 10, 64)
368
-		if err != nil {
369
-			return err
370
-		}
371
-		if system.IsProcessAlive(int(pid)) {
372
-			logrus.Infof("libcontainerd: previous instance of containerd still alive (%d)", pid)
373
-			r.daemonPid = int(pid)
374
-			return nil
375
-		}
376
-	}
377
-
378
-	// rewind the file
379
-	_, err = f.Seek(0, os.SEEK_SET)
380
-	if err != nil {
381
-		return err
382
-	}
383
-
384
-	// Truncate it
385
-	err = f.Truncate(0)
386
-	if err != nil {
387
-		return err
388
-	}
389
-
390
-	// Start a new instance
391
-	args := []string{
392
-		"-l", fmt.Sprintf("unix://%s", r.rpcAddr),
393
-		"--metrics-interval=0",
394
-		"--start-timeout", "2m",
395
-		"--state-dir", filepath.Join(r.stateDir, containerdStateDir),
396
-	}
397
-	if goruntime.GOOS == "solaris" {
398
-		args = append(args, "--shim", "containerd-shim", "--runtime", "runc")
399
-	} else {
400
-		args = append(args, "--shim", "docker-containerd-shim")
401
-		if r.runtime != "" {
402
-			args = append(args, "--runtime")
403
-			args = append(args, r.runtime)
404
-		}
405
-	}
406
-	if r.debugLog {
407
-		args = append(args, "--debug")
408
-	}
409
-	if len(r.runtimeArgs) > 0 {
410
-		for _, v := range r.runtimeArgs {
411
-			args = append(args, "--runtime-args")
412
-			args = append(args, v)
413
-		}
414
-		logrus.Debugf("libcontainerd: runContainerdDaemon: runtimeArgs: %s", args)
415
-	}
416
-
417
-	cmd := exec.Command(containerdBinary, args...)
418
-	// redirect containerd logs to docker logs
419
-	cmd.Stdout = os.Stdout
420
-	cmd.Stderr = os.Stderr
421
-	cmd.SysProcAttr = setSysProcAttr(true)
422
-	cmd.Env = nil
423
-	// clear the NOTIFY_SOCKET from the env when starting containerd
424
-	for _, e := range os.Environ() {
425
-		if !strings.HasPrefix(e, "NOTIFY_SOCKET") {
426
-			cmd.Env = append(cmd.Env, e)
427
-		}
428
-	}
429
-	if err := cmd.Start(); err != nil {
430
-		return err
431
-	}
432
-
433
-	// unless strictly necessary, do not add anything in between here
434
-	// as the reaper goroutine below needs to kick in as soon as possible
435
-	// and any "return" from code paths added here will defeat the reaper
436
-	// process.
437
-
438
-	r.daemonWaitCh = make(chan struct{})
439
-	go func() {
440
-		cmd.Wait()
441
-		close(r.daemonWaitCh)
442
-	}() // Reap our child when needed
443
-
444
-	logrus.Infof("libcontainerd: new containerd process, pid: %d", cmd.Process.Pid)
445
-	if err := setOOMScore(cmd.Process.Pid, r.oomScore); err != nil {
446
-		system.KillProcess(cmd.Process.Pid)
447
-		return err
448
-	}
449
-	if _, err := f.WriteString(fmt.Sprintf("%d", cmd.Process.Pid)); err != nil {
450
-		system.KillProcess(cmd.Process.Pid)
451
-		return err
452
-	}
453
-
454
-	r.daemonPid = cmd.Process.Pid
455
-	return nil
456
-}
457
-
458
-// WithRemoteAddr sets the external containerd socket to connect to.
459
-func WithRemoteAddr(addr string) RemoteOption {
460
-	return rpcAddr(addr)
461
-}
462
-
463
-type rpcAddr string
464
-
465
-func (a rpcAddr) Apply(r Remote) error {
466
-	if remote, ok := r.(*remote); ok {
467
-		remote.rpcAddr = string(a)
468
-		return nil
469
-	}
470
-	return fmt.Errorf("WithRemoteAddr option not supported for this remote")
471
-}
472
-
473
-// WithRuntimePath sets the path of the runtime to be used as the
474
-// default by containerd
475
-func WithRuntimePath(rt string) RemoteOption {
476
-	return runtimePath(rt)
477
-}
478
-
479
-type runtimePath string
480
-
481
-func (rt runtimePath) Apply(r Remote) error {
482
-	if remote, ok := r.(*remote); ok {
483
-		remote.runtime = string(rt)
484
-		return nil
485
-	}
486
-	return fmt.Errorf("WithRuntime option not supported for this remote")
487
-}
488
-
489
-// WithRuntimeArgs sets the list of runtime args passed to containerd
490
-func WithRuntimeArgs(args []string) RemoteOption {
491
-	return runtimeArgs(args)
492
-}
493
-
494
-type runtimeArgs []string
495
-
496
-func (rt runtimeArgs) Apply(r Remote) error {
497
-	if remote, ok := r.(*remote); ok {
498
-		remote.runtimeArgs = rt
499
-		return nil
500
-	}
501
-	return fmt.Errorf("WithRuntimeArgs option not supported for this remote")
502
-}
503
-
504
-// WithStartDaemon defines if libcontainerd should also run containerd daemon.
505
-func WithStartDaemon(start bool) RemoteOption {
506
-	return startDaemon(start)
507
-}
508
-
509
-type startDaemon bool
510
-
511
-func (s startDaemon) Apply(r Remote) error {
512
-	if remote, ok := r.(*remote); ok {
513
-		remote.startDaemon = bool(s)
514
-		return nil
515
-	}
516
-	return fmt.Errorf("WithStartDaemon option not supported for this remote")
517
-}
518
-
519
-// WithDebugLog defines if containerd debug logs will be enabled for daemon.
520
-func WithDebugLog(debug bool) RemoteOption {
521
-	return debugLog(debug)
522
-}
523
-
524
-type debugLog bool
525
-
526
-func (d debugLog) Apply(r Remote) error {
527
-	if remote, ok := r.(*remote); ok {
528
-		remote.debugLog = bool(d)
529
-		return nil
530
-	}
531
-	return fmt.Errorf("WithDebugLog option not supported for this remote")
532
-}
533
-
534
-// WithLiveRestore defines if containers are stopped on shutdown or restored.
535
-func WithLiveRestore(v bool) RemoteOption {
536
-	return liveRestore(v)
537
-}
538
-
539
-type liveRestore bool
540
-
541
-func (l liveRestore) Apply(r Remote) error {
542
-	if remote, ok := r.(*remote); ok {
543
-		remote.liveRestore = bool(l)
544
-		for _, c := range remote.clients {
545
-			c.liveRestore = bool(l)
546
-		}
547
-		return nil
548
-	}
549
-	return fmt.Errorf("WithLiveRestore option not supported for this remote")
550
-}
551
-
552
-// WithOOMScore defines the oom_score_adj to set for the containerd process.
553
-func WithOOMScore(score int) RemoteOption {
554
-	return oomScore(score)
555
-}
556
-
557
-type oomScore int
558
-
559
-func (o oomScore) Apply(r Remote) error {
560
-	if remote, ok := r.(*remote); ok {
561
-		remote.oomScore = int(o)
562
-		return nil
563
-	}
564
-	return fmt.Errorf("WithOOMScore option not supported for this remote")
565
-}
566 1
deleted file mode 100644
... ...
@@ -1,36 +0,0 @@
1
-package libcontainerd
2
-
3
-import "github.com/docker/docker/pkg/locker"
4
-
5
-type remote struct {
6
-}
7
-
8
-func (r *remote) Client(b Backend) (Client, error) {
9
-	c := &client{
10
-		clientCommon: clientCommon{
11
-			backend:    b,
12
-			containers: make(map[string]*container),
13
-			locker:     locker.New(),
14
-		},
15
-	}
16
-	return c, nil
17
-}
18
-
19
-// Cleanup is a no-op on Windows. It is here to implement the interface.
20
-func (r *remote) Cleanup() {
21
-}
22
-
23
-func (r *remote) UpdateOptions(opts ...RemoteOption) error {
24
-	return nil
25
-}
26
-
27
-// New creates a fresh instance of libcontainerd remote. On Windows,
28
-// this is not used as there is no remote containerd process.
29
-func New(_ string, _ ...RemoteOption) (Remote, error) {
30
-	return &remote{}, nil
31
-}
32
-
33
-// WithLiveRestore is a noop on windows.
34
-func WithLiveRestore(v bool) RemoteOption {
35
-	return nil
36
-}
... ...
@@ -1,64 +1,110 @@
1 1
 package libcontainerd
2 2
 
3 3
 import (
4
+	"context"
4 5
 	"io"
6
+	"time"
5 7
 
6
-	containerd "github.com/containerd/containerd/api/grpc/types"
8
+	"github.com/containerd/containerd"
7 9
 	"github.com/opencontainers/runtime-spec/specs-go"
8
-	"golang.org/x/net/context"
9 10
 )
10 11
 
11
-// State constants used in state change reporting.
12
+// EventType represents a possible event from libcontainerd
13
+type EventType string
14
+
15
+// Event constants used when reporting events
16
+const (
17
+	EventUnknown     EventType = "unknown"
18
+	EventExit        EventType = "exit"
19
+	EventOOM         EventType = "oom"
20
+	EventCreate      EventType = "create"
21
+	EventStart       EventType = "start"
22
+	EventExecAdded   EventType = "exec-added"
23
+	EventExecStarted EventType = "exec-started"
24
+	EventPaused      EventType = "paused"
25
+	EventResumed     EventType = "resumed"
26
+)
27
+
28
+// Status represents the current status of a container
29
+type Status string
30
+
31
+// Possible container statuses
12 32
 const (
13
-	StateStart       = "start-container"
14
-	StatePause       = "pause"
15
-	StateResume      = "resume"
16
-	StateExit        = "exit"
17
-	StateRestore     = "restore"
18
-	StateExitProcess = "exit-process"
19
-	StateOOM         = "oom" // fake state
33
+	// Running indicates the process is currently executing
34
+	StatusRunning Status = "running"
35
+	// Created indicates the process has been created within containerd but the
36
+	// user's defined process has not started
37
+	StatusCreated Status = "created"
38
+	// Stopped indicates that the process has ran and exited
39
+	StatusStopped Status = "stopped"
40
+	// Paused indicates that the process is currently paused
41
+	StatusPaused Status = "paused"
42
+	// Pausing indicates that the process is currently switching from a
43
+	// running state into a paused state
44
+	StatusPausing Status = "pausing"
45
+	// Unknown indicates that we could not determine the status from the runtime
46
+	StatusUnknown Status = "unknown"
20 47
 )
21 48
 
22
-// CommonStateInfo contains the state info common to all platforms.
23
-type CommonStateInfo struct { // FIXME: event?
24
-	State     string
25
-	Pid       uint32
26
-	ExitCode  uint32
27
-	ProcessID string
49
+// Remote on Linux defines the accesspoint to the containerd grpc API.
50
+// Remote on Windows is largely an unimplemented interface as there is
51
+// no remote containerd.
52
+type Remote interface {
53
+	// Client returns a new Client instance connected with given Backend.
54
+	NewClient(namespace string, backend Backend) (Client, error)
55
+	// Cleanup stops containerd if it was started by libcontainerd.
56
+	// Note this is not used on Windows as there is no remote containerd.
57
+	Cleanup()
58
+}
59
+
60
+// RemoteOption allows to configure parameters of remotes.
61
+// This is unused on Windows.
62
+type RemoteOption interface {
63
+	Apply(Remote) error
64
+}
65
+
66
+// EventInfo contains the event info
67
+type EventInfo struct {
68
+	ContainerID string
69
+	ProcessID   string
70
+	Pid         uint32
71
+	ExitCode    uint32
72
+	ExitedAt    time.Time
73
+	OOMKilled   bool
74
+	// Windows Only field
75
+	UpdatePending bool
28 76
 }
29 77
 
30 78
 // Backend defines callbacks that the client of the library needs to implement.
31 79
 type Backend interface {
32
-	StateChanged(containerID string, state StateInfo) error
80
+	ProcessEvent(containerID string, event EventType, ei EventInfo) error
33 81
 }
34 82
 
35 83
 // Client provides access to containerd features.
36 84
 type Client interface {
37
-	GetServerVersion(ctx context.Context) (*ServerVersion, error)
38
-	Create(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) error
39
-	Signal(containerID string, sig int) error
40
-	SignalProcess(containerID string, processFriendlyName string, sig int) error
41
-	AddProcess(ctx context.Context, containerID, processFriendlyName string, process Process, attachStdio StdioCallback) (int, error)
42
-	Resize(containerID, processFriendlyName string, width, height int) error
43
-	Pause(containerID string) error
44
-	Resume(containerID string) error
45
-	Restore(containerID string, attachStdio StdioCallback, options ...CreateOption) error
46
-	Stats(containerID string) (*Stats, error)
47
-	GetPidsForContainer(containerID string) ([]int, error)
48
-	Summary(containerID string) ([]Summary, error)
49
-	UpdateResources(containerID string, resources Resources) error
50
-	CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error
51
-	DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error
52
-	ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error)
53
-}
85
+	Restore(ctx context.Context, containerID string, attachStdio StdioCallback) (alive bool, pid int, err error)
86
+
87
+	Create(ctx context.Context, containerID string, spec *specs.Spec, runtimeOptions interface{}) error
88
+	Start(ctx context.Context, containerID, checkpointDir string, withStdin bool, attachStdio StdioCallback) (pid int, err error)
89
+	SignalProcess(ctx context.Context, containerID, processID string, signal int) error
90
+	Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (int, error)
91
+	ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error
92
+	CloseStdin(ctx context.Context, containerID, processID string) error
93
+	Pause(ctx context.Context, containerID string) error
94
+	Resume(ctx context.Context, containerID string) error
95
+	Stats(ctx context.Context, containerID string) (*Stats, error)
96
+	ListPids(ctx context.Context, containerID string) ([]uint32, error)
97
+	Summary(ctx context.Context, containerID string) ([]Summary, error)
98
+	DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error)
99
+	Delete(ctx context.Context, containerID string) error
100
+	Status(ctx context.Context, containerID string) (Status, error)
54 101
 
55
-// CreateOption allows to configure parameters of container creation.
56
-type CreateOption interface {
57
-	Apply(interface{}) error
102
+	UpdateResources(ctx context.Context, containerID string, resources *Resources) error
103
+	CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error
58 104
 }
59 105
 
60 106
 // StdioCallback is called to connect a container or process stdio.
61
-type StdioCallback func(IOPipe) error
107
+type StdioCallback func(*IOPipe) (containerd.IO, error)
62 108
 
63 109
 // IOPipe contains the stdio streams.
64 110
 type IOPipe struct {
... ...
@@ -66,10 +112,12 @@ type IOPipe struct {
66 66
 	Stdout   io.ReadCloser
67 67
 	Stderr   io.ReadCloser
68 68
 	Terminal bool // Whether stderr is connected on Windows
69
+
70
+	cancel context.CancelFunc
71
+	config containerd.IOConfig
69 72
 }
70 73
 
71 74
 // ServerVersion contains version information as retrieved from the
72 75
 // server
73 76
 type ServerVersion struct {
74
-	containerd.GetServerVersionResponse
75 77
 }
... ...
@@ -1,49 +1,30 @@
1 1
 package libcontainerd
2 2
 
3 3
 import (
4
-	containerd "github.com/containerd/containerd/api/grpc/types"
5
-	"github.com/opencontainers/runtime-spec/specs-go"
6
-)
4
+	"time"
7 5
 
8
-// Process contains information to start a specific application inside the container.
9
-type Process struct {
10
-	// Terminal creates an interactive terminal for the container.
11
-	Terminal bool `json:"terminal"`
12
-	// User specifies user information for the process.
13
-	User *specs.User `json:"user"`
14
-	// Args specifies the binary and arguments for the application to execute.
15
-	Args []string `json:"args"`
16
-	// Env populates the process environment for the process.
17
-	Env []string `json:"env,omitempty"`
18
-	// Cwd is the current working directory for the process and must be
19
-	// relative to the container's root.
20
-	Cwd *string `json:"cwd"`
21
-	// Capabilities are linux capabilities that are kept for the container.
22
-	Capabilities []string `json:"capabilities,omitempty"`
23
-	// Rlimits specifies rlimit options to apply to the process.
24
-	Rlimits []specs.POSIXRlimit `json:"rlimits,omitempty"`
25
-	// ApparmorProfile specifies the apparmor profile for the container.
26
-	ApparmorProfile *string `json:"apparmorProfile,omitempty"`
27
-	// SelinuxLabel specifies the selinux context that the container process is run as.
28
-	SelinuxLabel *string `json:"selinuxLabel,omitempty"`
29
-}
6
+	"github.com/containerd/cgroups"
7
+	specs "github.com/opencontainers/runtime-spec/specs-go"
8
+)
30 9
 
31
-// StateInfo contains description about the new state container has entered.
32
-type StateInfo struct {
33
-	CommonStateInfo
10
+// Summary is not used on linux
11
+type Summary struct{}
34 12
 
35
-	// Platform specific StateInfo
36
-	OOMKilled bool
13
+// Stats holds metrics properties as returned by containerd
14
+type Stats struct {
15
+	Read    time.Time
16
+	Metrics *cgroups.Metrics
37 17
 }
38 18
 
39
-// Stats contains a stats properties from containerd.
40
-type Stats containerd.StatsResponse
41
-
42
-// Summary contains a container summary from containerd
43
-type Summary struct{}
19
+func interfaceToStats(read time.Time, v interface{}) *Stats {
20
+	return &Stats{
21
+		Metrics: v.(*cgroups.Metrics),
22
+		Read:    read,
23
+	}
24
+}
44 25
 
45
-// Resources defines updatable container resource values.
46
-type Resources containerd.UpdateResource
26
+// Resources defines updatable container resource values. TODO: it must match containerd upcoming API
27
+type Resources specs.LinuxResources
47 28
 
48 29
 // Checkpoints contains the details of a checkpoint
49
-type Checkpoints containerd.ListCheckpointResponse
30
+type Checkpoints struct{}
50 31
deleted file mode 100644
... ...
@@ -1,43 +0,0 @@
1
-package libcontainerd
2
-
3
-import (
4
-	containerd "github.com/containerd/containerd/api/grpc/types"
5
-	"github.com/opencontainers/runtime-spec/specs-go"
6
-)
7
-
8
-// Process contains information to start a specific application inside the container.
9
-type Process struct {
10
-	// Terminal creates an interactive terminal for the container.
11
-	Terminal bool `json:"terminal"`
12
-	// User specifies user information for the process.
13
-	User *specs.User `json:"user"`
14
-	// Args specifies the binary and arguments for the application to execute.
15
-	Args []string `json:"args"`
16
-	// Env populates the process environment for the process.
17
-	Env []string `json:"env,omitempty"`
18
-	// Cwd is the current working directory for the process and must be
19
-	// relative to the container's root.
20
-	Cwd *string `json:"cwd"`
21
-	// Capabilities are linux capabilities that are kept for the container.
22
-	Capabilities []string `json:"capabilities,omitempty"`
23
-}
24
-
25
-// Stats contains a stats properties from containerd.
26
-type Stats struct{}
27
-
28
-// Summary contains a container summary from containerd
29
-type Summary struct{}
30
-
31
-// StateInfo contains description about the new state container has entered.
32
-type StateInfo struct {
33
-	CommonStateInfo
34
-
35
-	// Platform specific StateInfo
36
-	OOMKilled bool
37
-}
38
-
39
-// Resources defines updatable container resource values.
40
-type Resources struct{}
41
-
42
-// Checkpoints contains the details of a checkpoint
43
-type Checkpoints containerd.ListCheckpointResponse
... ...
@@ -1,27 +1,27 @@
1 1
 package libcontainerd
2 2
 
3 3
 import (
4
+	"time"
5
+
4 6
 	"github.com/Microsoft/hcsshim"
5 7
 	opengcs "github.com/Microsoft/opengcs/client"
6
-	"github.com/opencontainers/runtime-spec/specs-go"
7 8
 )
8 9
 
9
-// Process contains information to start a specific application inside the container.
10
-type Process specs.Process
11
-
12 10
 // Summary contains a ProcessList item from HCS to support `top`
13 11
 type Summary hcsshim.ProcessListItem
14 12
 
15
-// StateInfo contains description about the new state container has entered.
16
-type StateInfo struct {
17
-	CommonStateInfo
18
-
19
-	// Platform specific StateInfo
20
-	UpdatePending bool // Indicates that there are some update operations pending that should be completed by a servicing container.
13
+// Stats contains statistics from HCS
14
+type Stats struct {
15
+	Read     time.Time
16
+	HCSStats *hcsshim.Statistics
21 17
 }
22 18
 
23
-// Stats contains statistics from HCS
24
-type Stats hcsshim.Statistics
19
+func interfaceToStats(read time.Time, v interface{}) *Stats {
20
+	return &Stats{
21
+		HCSStats: v.(*hcsshim.Statistics),
22
+		Read:     read,
23
+	}
24
+}
25 25
 
26 26
 // Resources defines updatable container resource values.
27 27
 type Resources struct{}
... ...
@@ -1,63 +1,12 @@
1 1
 package libcontainerd
2 2
 
3
-import (
4
-	"syscall"
3
+import "syscall"
5 4
 
6
-	containerd "github.com/containerd/containerd/api/grpc/types"
7
-	"github.com/opencontainers/runtime-spec/specs-go"
8
-	"golang.org/x/sys/unix"
9
-)
10
-
11
-func getRootIDs(s specs.Spec) (int, int, error) {
12
-	var hasUserns bool
13
-	for _, ns := range s.Linux.Namespaces {
14
-		if ns.Type == specs.UserNamespace {
15
-			hasUserns = true
16
-			break
17
-		}
18
-	}
19
-	if !hasUserns {
20
-		return 0, 0, nil
21
-	}
22
-	uid := hostIDFromMap(0, s.Linux.UIDMappings)
23
-	gid := hostIDFromMap(0, s.Linux.GIDMappings)
24
-	return uid, gid, nil
25
-}
26
-
27
-func hostIDFromMap(id uint32, mp []specs.LinuxIDMapping) int {
28
-	for _, m := range mp {
29
-		if id >= m.ContainerID && id <= m.ContainerID+m.Size-1 {
30
-			return int(m.HostID + id - m.ContainerID)
31
-		}
32
-	}
33
-	return 0
34
-}
35
-
36
-func systemPid(ctr *containerd.Container) uint32 {
37
-	var pid uint32
38
-	for _, p := range ctr.Processes {
39
-		if p.Pid == InitFriendlyName {
40
-			pid = p.SystemPid
41
-		}
42
-	}
43
-	return pid
44
-}
45
-
46
-func convertRlimits(sr []specs.POSIXRlimit) (cr []*containerd.Rlimit) {
47
-	for _, r := range sr {
48
-		cr = append(cr, &containerd.Rlimit{
49
-			Type: r.Type,
50
-			Hard: r.Hard,
51
-			Soft: r.Soft,
52
-		})
53
-	}
54
-	return
55
-}
56
-
57
-// setPDeathSig sets the parent death signal to SIGKILL
58
-func setSysProcAttr(sid bool) *syscall.SysProcAttr {
5
+// containerdSysProcAttr returns the SysProcAttr to use when exec'ing
6
+// containerd
7
+func containerdSysProcAttr() *syscall.SysProcAttr {
59 8
 	return &syscall.SysProcAttr{
60
-		Setsid:    sid,
61
-		Pdeathsig: unix.SIGKILL,
9
+		Setsid:    true,
10
+		Pdeathsig: syscall.SIGKILL,
62 11
 	}
63 12
 }
64 13
deleted file mode 100644
... ...
@@ -1,27 +0,0 @@
1
-package libcontainerd
2
-
3
-import (
4
-	"syscall"
5
-
6
-	containerd "github.com/containerd/containerd/api/grpc/types"
7
-	"github.com/opencontainers/runtime-spec/specs-go"
8
-)
9
-
10
-func getRootIDs(s specs.Spec) (int, int, error) {
11
-	return 0, 0, nil
12
-}
13
-
14
-func systemPid(ctr *containerd.Container) uint32 {
15
-	var pid uint32
16
-	for _, p := range ctr.Processes {
17
-		if p.Pid == InitFriendlyName {
18
-			pid = p.SystemPid
19
-		}
20
-	}
21
-	return pid
22
-}
23
-
24
-// setPDeathSig sets the parent death signal to SIGKILL
25
-func setSysProcAttr(sid bool) *syscall.SysProcAttr {
26
-	return nil
27
-}
... ...
@@ -3,6 +3,8 @@ package libcontainerd
3 3
 import (
4 4
 	"strings"
5 5
 
6
+	"syscall"
7
+
6 8
 	opengcs "github.com/Microsoft/opengcs/client"
7 9
 )
8 10
 
... ...
@@ -36,3 +38,9 @@ func (c *container) debugGCS() {
36 36
 	}
37 37
 	cfg.DebugGCS()
38 38
 }
39
+
40
+// containerdSysProcAttr returns the SysProcAttr to use when exec'ing
41
+// containerd
42
+func containerdSysProcAttr() *syscall.SysProcAttr {
43
+	return nil
44
+}
... ...
@@ -69,8 +69,14 @@ func DefaultSolarisSpec() specs.Spec {
69 69
 func DefaultLinuxSpec() specs.Spec {
70 70
 	s := specs.Spec{
71 71
 		Version: specs.Version,
72
-		Process: &specs.Process{},
73
-		Root:    &specs.Root{},
72
+		Process: &specs.Process{
73
+			Capabilities: &specs.LinuxCapabilities{
74
+				Bounding:    defaultCapabilities(),
75
+				Permitted:   defaultCapabilities(),
76
+				Inheritable: defaultCapabilities(),
77
+				Effective:   defaultCapabilities(),
78
+			},
79
+		},
74 80
 	}
75 81
 	s.Mounts = []specs.Mount{
76 82
 		{
... ...
@@ -116,14 +122,6 @@ func DefaultLinuxSpec() specs.Spec {
116 116
 			Options:     []string{"nosuid", "noexec", "nodev", "mode=1777"},
117 117
 		},
118 118
 	}
119
-	s.Process = &specs.Process{
120
-		Capabilities: &specs.LinuxCapabilities{
121
-			Bounding:    defaultCapabilities(),
122
-			Permitted:   defaultCapabilities(),
123
-			Inheritable: defaultCapabilities(),
124
-			Effective:   defaultCapabilities(),
125
-		},
126
-	}
127 119
 
128 120
 	s.Linux = &specs.Linux{
129 121
 		MaskedPaths: []string{
... ...
@@ -48,9 +48,10 @@ func GetPluginGetter() plugingetter.PluginGetter {
48 48
 
49 49
 // authorizationPlugin is an internal adapter to docker plugin system
50 50
 type authorizationPlugin struct {
51
-	plugin *plugins.Client
52
-	name   string
53
-	once   sync.Once
51
+	initErr error
52
+	plugin  *plugins.Client
53
+	name    string
54
+	once    sync.Once
54 55
 }
55 56
 
56 57
 func newAuthorizationPlugin(name string) Plugin {
... ...
@@ -95,7 +96,6 @@ func (a *authorizationPlugin) AuthZResponse(authReq *Request) (*Response, error)
95 95
 // initPlugin initializes the authorization plugin if needed
96 96
 func (a *authorizationPlugin) initPlugin() error {
97 97
 	// Lazy loading of plugins
98
-	var err error
99 98
 	a.once.Do(func() {
100 99
 		if a.plugin == nil {
101 100
 			var plugin plugingetter.CompatPlugin
... ...
@@ -108,11 +108,11 @@ func (a *authorizationPlugin) initPlugin() error {
108 108
 				plugin, e = plugins.Get(a.name, AuthZApiImplements)
109 109
 			}
110 110
 			if e != nil {
111
-				err = e
111
+				a.initErr = e
112 112
 				return
113 113
 			}
114 114
 			a.plugin = plugin.Client()
115 115
 		}
116 116
 	})
117
-	return err
117
+	return a.initErr
118 118
 }
... ...
@@ -3,6 +3,8 @@ package mount
3 3
 import (
4 4
 	"sort"
5 5
 	"strings"
6
+
7
+	"github.com/sirupsen/logrus"
6 8
 )
7 9
 
8 10
 // GetMounts retrieves a list of mounts for the current running process.
... ...
@@ -74,12 +76,18 @@ func RecursiveUnmount(target string) error {
74 74
 		if !strings.HasPrefix(m.Mountpoint, target) {
75 75
 			continue
76 76
 		}
77
-		if err := Unmount(m.Mountpoint); err != nil && i == len(mounts)-1 {
77
+		logrus.Debugf("Trying to unmount %s", m.Mountpoint)
78
+		err = Unmount(m.Mountpoint)
79
+		if err != nil && i == len(mounts)-1 {
78 80
 			if mounted, err := Mounted(m.Mountpoint); err != nil || mounted {
79 81
 				return err
80 82
 			}
81 83
 			// Ignore errors for submounts and continue trying to unmount others
82 84
 			// The final unmount should fail if there ane any submounts remaining
85
+		} else if err != nil {
86
+			logrus.Errorf("Failed to unmount %s: %v", m.Mountpoint, err)
87
+		} else if err == nil {
88
+			logrus.Debugf("Unmounted %s", m.Mountpoint)
83 89
 		}
84 90
 	}
85 91
 	return nil
86 92
new file mode 100644
... ...
@@ -0,0 +1,18 @@
0
+package system
1
+
2
+import "os"
3
+
4
+// IsProcessAlive returns true if process with a given pid is running.
5
+func IsProcessAlive(pid int) bool {
6
+	_, err := os.FindProcess(pid)
7
+
8
+	return err == nil
9
+}
10
+
11
+// KillProcess force-stops a process.
12
+func KillProcess(pid int) {
13
+	p, err := os.FindProcess(pid)
14
+	if err == nil {
15
+		p.Kill()
16
+	}
17
+}
... ...
@@ -26,7 +26,7 @@ func EnsureRemoveAll(dir string) error {
26 26
 
27 27
 	// track retries
28 28
 	exitOnErr := make(map[string]int)
29
-	maxRetry := 5
29
+	maxRetry := 50
30 30
 
31 31
 	// Attempt to unmount anything beneath this dir first
32 32
 	mount.RecursiveUnmount(dir)
... ...
@@ -1,22 +1,35 @@
1 1
 package containerd
2 2
 
3 3
 import (
4
+	"context"
4 5
 	"io"
6
+	"path/filepath"
7
+	"sync"
5 8
 
9
+	"github.com/containerd/containerd"
10
+	"github.com/containerd/containerd/linux/runcopts"
11
+	"github.com/docker/docker/api/errdefs"
6 12
 	"github.com/docker/docker/libcontainerd"
7 13
 	"github.com/opencontainers/runtime-spec/specs-go"
8 14
 	"github.com/pkg/errors"
15
+	"github.com/sirupsen/logrus"
9 16
 )
10 17
 
18
+// PluginNamespace is the name used for the plugins namespace
19
+var PluginNamespace = "moby-plugins"
20
+
11 21
 // ExitHandler represents an object that is called when the exit event is received from containerd
12 22
 type ExitHandler interface {
13 23
 	HandleExitEvent(id string) error
14 24
 }
15 25
 
16 26
 // New creates a new containerd plugin executor
17
-func New(remote libcontainerd.Remote, exitHandler ExitHandler) (*Executor, error) {
18
-	e := &Executor{exitHandler: exitHandler}
19
-	client, err := remote.Client(e)
27
+func New(rootDir string, remote libcontainerd.Remote, exitHandler ExitHandler) (*Executor, error) {
28
+	e := &Executor{
29
+		rootDir:     rootDir,
30
+		exitHandler: exitHandler,
31
+	}
32
+	client, err := remote.NewClient(PluginNamespace, e)
20 33
 	if err != nil {
21 34
 		return nil, errors.Wrap(err, "error creating containerd exec client")
22 35
 	}
... ...
@@ -26,52 +39,108 @@ func New(remote libcontainerd.Remote, exitHandler ExitHandler) (*Executor, error
26 26
 
27 27
 // Executor is the containerd client implementation of a plugin executor
28 28
 type Executor struct {
29
+	rootDir     string
29 30
 	client      libcontainerd.Client
30 31
 	exitHandler ExitHandler
31 32
 }
32 33
 
33 34
 // Create creates a new container
34 35
 func (e *Executor) Create(id string, spec specs.Spec, stdout, stderr io.WriteCloser) error {
35
-	return e.client.Create(id, "", "", spec, attachStreamsFunc(stdout, stderr))
36
+	opts := runcopts.RuncOptions{
37
+		RuntimeRoot: filepath.Join(e.rootDir, "runtime-root"),
38
+	}
39
+	ctx := context.Background()
40
+	err := e.client.Create(ctx, id, &spec, &opts)
41
+	if err != nil {
42
+		return err
43
+	}
44
+
45
+	_, err = e.client.Start(ctx, id, "", false, attachStreamsFunc(stdout, stderr))
46
+	return err
36 47
 }
37 48
 
38 49
 // Restore restores a container
39 50
 func (e *Executor) Restore(id string, stdout, stderr io.WriteCloser) error {
40
-	return e.client.Restore(id, attachStreamsFunc(stdout, stderr))
51
+	alive, _, err := e.client.Restore(context.Background(), id, attachStreamsFunc(stdout, stderr))
52
+	if err != nil && !errdefs.IsNotFound(err) {
53
+		return err
54
+	}
55
+	if !alive {
56
+		_, _, err = e.client.DeleteTask(context.Background(), id)
57
+		if err != nil && !errdefs.IsNotFound(err) {
58
+			logrus.WithError(err).Errorf("failed to delete container plugin %s task from containerd", id)
59
+			return err
60
+		}
61
+
62
+		err = e.client.Delete(context.Background(), id)
63
+		if err != nil && !errdefs.IsNotFound(err) {
64
+			logrus.WithError(err).Errorf("failed to delete container plugin %s from containerd", id)
65
+			return err
66
+		}
67
+	}
68
+	return nil
41 69
 }
42 70
 
43 71
 // IsRunning returns if the container with the given id is running
44 72
 func (e *Executor) IsRunning(id string) (bool, error) {
45
-	pids, err := e.client.GetPidsForContainer(id)
46
-	return len(pids) > 0, err
73
+	status, err := e.client.Status(context.Background(), id)
74
+	return status == libcontainerd.StatusRunning, err
47 75
 }
48 76
 
49 77
 // Signal sends the specified signal to the container
50 78
 func (e *Executor) Signal(id string, signal int) error {
51
-	return e.client.Signal(id, signal)
79
+	return e.client.SignalProcess(context.Background(), id, libcontainerd.InitProcessName, signal)
52 80
 }
53 81
 
54
-// StateChanged handles state changes from containerd
82
+// ProcessEvent handles events from containerd
55 83
 // All events are ignored except the exit event, which is sent of to the stored handler
56
-func (e *Executor) StateChanged(id string, event libcontainerd.StateInfo) error {
57
-	switch event.State {
58
-	case libcontainerd.StateExit:
59
-		return e.exitHandler.HandleExitEvent(id)
84
+func (e *Executor) ProcessEvent(id string, et libcontainerd.EventType, ei libcontainerd.EventInfo) error {
85
+	switch et {
86
+	case libcontainerd.EventExit:
87
+		// delete task and container
88
+		if _, _, err := e.client.DeleteTask(context.Background(), id); err != nil {
89
+			logrus.WithError(err).Errorf("failed to delete container plugin %s task from containerd", id)
90
+		}
91
+
92
+		if err := e.client.Delete(context.Background(), id); err != nil {
93
+			logrus.WithError(err).Errorf("failed to delete container plugin %s from containerd", id)
94
+		}
95
+		return e.exitHandler.HandleExitEvent(ei.ContainerID)
60 96
 	}
61 97
 	return nil
62 98
 }
63 99
 
64
-func attachStreamsFunc(stdout, stderr io.WriteCloser) func(libcontainerd.IOPipe) error {
65
-	return func(iop libcontainerd.IOPipe) error {
66
-		iop.Stdin.Close()
100
+type cio struct {
101
+	containerd.IO
102
+
103
+	wg sync.WaitGroup
104
+}
105
+
106
+func (c *cio) Wait() {
107
+	c.wg.Wait()
108
+	c.IO.Wait()
109
+}
110
+
111
+func attachStreamsFunc(stdout, stderr io.WriteCloser) libcontainerd.StdioCallback {
112
+	return func(iop *libcontainerd.IOPipe) (containerd.IO, error) {
113
+		if iop.Stdin != nil {
114
+			iop.Stdin.Close()
115
+			// closing stdin shouldn't be needed here, it should never be open
116
+			panic("plugin stdin shouldn't have been created!")
117
+		}
118
+
119
+		cio := &cio{IO: iop}
120
+		cio.wg.Add(2)
67 121
 		go func() {
68 122
 			io.Copy(stdout, iop.Stdout)
69 123
 			stdout.Close()
124
+			cio.wg.Done()
70 125
 		}()
71 126
 		go func() {
72 127
 			io.Copy(stderr, iop.Stderr)
73 128
 			stderr.Close()
129
+			cio.wg.Done()
74 130
 		}()
75
-		return nil
131
+		return cio, nil
76 132
 	}
77 133
 }
... ...
@@ -23,7 +23,7 @@ import (
23 23
 	"golang.org/x/sys/unix"
24 24
 )
25 25
 
26
-func (pm *Manager) enable(p *v2.Plugin, c *controller, force bool) error {
26
+func (pm *Manager) enable(p *v2.Plugin, c *controller, force bool) (err error) {
27 27
 	p.Rootfs = filepath.Join(pm.config.Root, p.PluginObj.ID, "rootfs")
28 28
 	if p.IsEnabled() && !force {
29 29
 		return errors.Wrap(enabledError(p.Name()), "plugin already enabled")
... ...
@@ -44,15 +44,15 @@ func (pm *Manager) enable(p *v2.Plugin, c *controller, force bool) error {
44 44
 	if p.PropagatedMount != "" {
45 45
 		propRoot = filepath.Join(filepath.Dir(p.Rootfs), "propagated-mount")
46 46
 
47
-		if err := os.MkdirAll(propRoot, 0755); err != nil {
47
+		if err = os.MkdirAll(propRoot, 0755); err != nil {
48 48
 			logrus.Errorf("failed to create PropagatedMount directory at %s: %v", propRoot, err)
49 49
 		}
50 50
 
51
-		if err := mount.MakeRShared(propRoot); err != nil {
51
+		if err = mount.MakeRShared(propRoot); err != nil {
52 52
 			return errors.Wrap(err, "error setting up propagated mount dir")
53 53
 		}
54 54
 
55
-		if err := mount.Mount(propRoot, p.PropagatedMount, "none", "rbind"); err != nil {
55
+		if err = mount.Mount(propRoot, p.PropagatedMount, "none", "rbind"); err != nil {
56 56
 			return errors.Wrap(err, "error creating mount for propagated mount")
57 57
 		}
58 58
 	}
... ...
@@ -72,7 +72,6 @@ func (pm *Manager) enable(p *v2.Plugin, c *controller, force bool) error {
72 72
 				logrus.Warnf("Could not unmount %s: %v", propRoot, err)
73 73
 			}
74 74
 		}
75
-		return errors.WithStack(err)
76 75
 	}
77 76
 
78 77
 	return pm.pluginPostStart(p, c)
... ...
@@ -159,6 +158,12 @@ func shutdownPlugin(p *v2.Plugin, c *controller, executor Executor) {
159 159
 			if err := executor.Signal(pluginID, int(unix.SIGKILL)); err != nil {
160 160
 				logrus.Errorf("Sending SIGKILL to plugin failed with error: %v", err)
161 161
 			}
162
+			select {
163
+			case <-c.exitChan:
164
+				logrus.Debug("SIGKILL plugin shutdown")
165
+			case <-time.After(time.Second * 10):
166
+				logrus.Debug("Force shutdown plugin FAILED")
167
+			}
162 168
 		}
163 169
 	}
164 170
 }