Browse code

Replace execdrivers with containerd implementation

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>
Signed-off-by: Kenfe-Mickael Laventure <mickael.laventure@gmail.com>
Signed-off-by: Anusha Ragunathan <anusha@docker.com>

Tonis Tiigi authored on 2016/03/19 03:50:19
Showing 89 changed files
... ...
@@ -249,6 +249,24 @@ RUN set -x \
249 249
 	&& go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \
250 250
 	&& rm -rf "$GOPATH"
251 251
 
252
+# Install runc
253
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
254
+RUN set -x \
255
+	&& export GOPATH="$(mktemp -d)" \
256
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
257
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
258
+	&& git checkout -q "$RUNC_COMMIT" \
259
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
260
+
261
+# Install containerd
262
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
263
+RUN set -x \
264
+	&& export GOPATH="$(mktemp -d)" \
265
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
266
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
267
+	&& git checkout -q "$CONTAINERD_COMMIT" \
268
+	&& make && make install
269
+
252 270
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
253 271
 ENTRYPOINT ["hack/dind"]
254 272
 
... ...
@@ -186,6 +186,24 @@ RUN set -x \
186 186
 	&& go build -v -o /usr/local/bin/tomlv github.com/BurntSushi/toml/cmd/tomlv \
187 187
 	&& rm -rf "$GOPATH"
188 188
 
189
+# Install runc
190
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
191
+RUN set -x \
192
+	&& export GOPATH="$(mktemp -d)" \
193
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
194
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
195
+	&& git checkout -q "$RUNC_COMMIT" \
196
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
197
+
198
+# Install containerd
199
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
200
+RUN set -x \
201
+	&& export GOPATH="$(mktemp -d)" \
202
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
203
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
204
+	&& git checkout -q "$CONTAINERD_COMMIT" \
205
+	&& make && make install
206
+
189 207
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
190 208
 ENTRYPOINT ["hack/dind"]
191 209
 
... ...
@@ -205,6 +205,24 @@ RUN set -x \
205 205
 	&& go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \
206 206
 	&& rm -rf "$GOPATH"
207 207
 
208
+# Install runc
209
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
210
+RUN set -x \
211
+	&& export GOPATH="$(mktemp -d)" \
212
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
213
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
214
+	&& git checkout -q "$RUNC_COMMIT" \
215
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
216
+
217
+# Install containerd
218
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
219
+RUN set -x \
220
+	&& export GOPATH="$(mktemp -d)" \
221
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
222
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
223
+	&& git checkout -q "$CONTAINERD_COMMIT" \
224
+	&& make && make install
225
+
208 226
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
209 227
 ENTRYPOINT ["hack/dind"]
210 228
 
... ...
@@ -73,6 +73,24 @@ VOLUME /var/lib/docker
73 73
 WORKDIR /go/src/github.com/docker/docker
74 74
 ENV DOCKER_BUILDTAGS apparmor seccomp selinux
75 75
 
76
+# Install runc
77
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
78
+RUN set -x \
79
+	&& export GOPATH="$(mktemp -d)" \
80
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
81
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
82
+	&& git checkout -q "$RUNC_COMMIT" \
83
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
84
+
85
+# Install containerd
86
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
87
+RUN set -x \
88
+	&& export GOPATH="$(mktemp -d)" \
89
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
90
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
91
+	&& git checkout -q "$CONTAINERD_COMMIT" \
92
+	&& make && make install
93
+
76 94
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
77 95
 ENTRYPOINT ["hack/dind"]
78 96
 
... ...
@@ -197,6 +197,24 @@ RUN set -x \
197 197
 	&& go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \
198 198
 	&& rm -rf "$GOPATH"
199 199
 
200
+# Install runc
201
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
202
+RUN set -x \
203
+	&& export GOPATH="$(mktemp -d)" \
204
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
205
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
206
+	&& git checkout -q "$RUNC_COMMIT" \
207
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
208
+
209
+# Install containerd
210
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
211
+RUN set -x \
212
+	&& export GOPATH="$(mktemp -d)" \
213
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
214
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
215
+	&& git checkout -q "$CONTAINERD_COMMIT" \
216
+	&& make && make install
217
+
200 218
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
201 219
 ENTRYPOINT ["hack/dind"]
202 220
 
... ...
@@ -176,6 +176,24 @@ RUN set -x \
176 176
 	&& go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \
177 177
 	&& rm -rf "$GOPATH"
178 178
 
179
+# Install runc
180
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
181
+RUN set -x \
182
+	&& export GOPATH="$(mktemp -d)" \
183
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
184
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
185
+	&& git checkout -q "$RUNC_COMMIT" \
186
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
187
+
188
+# Install containerd
189
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
190
+RUN set -x \
191
+	&& export GOPATH="$(mktemp -d)" \
192
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
193
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
194
+	&& git checkout -q "$CONTAINERD_COMMIT" \
195
+	&& make && make install
196
+
179 197
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
180 198
 ENTRYPOINT ["hack/dind"]
181 199
 
... ...
@@ -29,6 +29,24 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
29 29
 		aufs-tools \
30 30
 	&& rm -rf /var/lib/apt/lists/*
31 31
 
32
+# Install runc
33
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
34
+RUN set -x \
35
+	&& export GOPATH="$(mktemp -d)" \
36
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
37
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
38
+	&& git checkout -q "$RUNC_COMMIT" \
39
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
40
+
41
+# Install containerd
42
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
43
+RUN set -x \
44
+	&& export GOPATH="$(mktemp -d)" \
45
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
46
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
47
+	&& git checkout -q "$CONTAINERD_COMMIT" \
48
+	&& make && make install
49
+
32 50
 ENV AUTO_GOPATH 1
33 51
 WORKDIR /usr/src/docker
34 52
 COPY . /usr/src/docker
... ...
@@ -14,7 +14,6 @@ import (
14 14
 	"github.com/docker/docker/opts"
15 15
 	"github.com/docker/docker/pkg/promise"
16 16
 	"github.com/docker/docker/pkg/signal"
17
-	"github.com/docker/docker/pkg/stringid"
18 17
 	runconfigopts "github.com/docker/docker/runconfig/opts"
19 18
 	"github.com/docker/engine-api/types"
20 19
 	"github.com/docker/libnetwork/resolvconf/dns"
... ...
@@ -256,16 +255,6 @@ func (cli *DockerCli) CmdRun(args ...string) error {
256 256
 
257 257
 	// Attached mode
258 258
 	if *flAutoRemove {
259
-		// Warn user if they detached us
260
-		js, err := cli.client.ContainerInspect(context.Background(), createResponse.ID)
261
-		if err != nil {
262
-			return runStartContainerErr(err)
263
-		}
264
-		if js.State.Running == true || js.State.Paused == true {
265
-			fmt.Fprintf(cli.out, "Detached from %s, awaiting its termination in order to uphold \"--rm\".\n",
266
-				stringid.TruncateID(createResponse.ID))
267
-		}
268
-
269 259
 		// Autoremove: wait for the container to finish, retrieve
270 260
 		// the exit code and remove the container
271 261
 		if status, err = cli.client.ContainerWait(context.Background(), createResponse.ID); err != nil {
... ...
@@ -112,7 +112,9 @@ func (s *containerRouter) postContainerExecStart(ctx context.Context, w http.Res
112 112
 		if execStartCheck.Detach {
113 113
 			return err
114 114
 		}
115
+		stdout.Write([]byte(err.Error()))
115 116
 		logrus.Errorf("Error running exec in container: %v\n", err)
117
+		return err
116 118
 	}
117 119
 	return nil
118 120
 }
... ...
@@ -17,7 +17,6 @@ import (
17 17
 
18 18
 	"github.com/Sirupsen/logrus"
19 19
 	"github.com/docker/docker/daemon/exec"
20
-	"github.com/docker/docker/daemon/execdriver"
21 20
 	"github.com/docker/docker/daemon/logger"
22 21
 	"github.com/docker/docker/daemon/logger/jsonfilelog"
23 22
 	"github.com/docker/docker/daemon/network"
... ...
@@ -27,6 +26,7 @@ import (
27 27
 	"github.com/docker/docker/pkg/promise"
28 28
 	"github.com/docker/docker/pkg/signal"
29 29
 	"github.com/docker/docker/pkg/symlink"
30
+	"github.com/docker/docker/restartmanager"
30 31
 	"github.com/docker/docker/runconfig"
31 32
 	runconfigopts "github.com/docker/docker/runconfig/opts"
32 33
 	"github.com/docker/docker/volume"
... ...
@@ -74,13 +74,12 @@ type CommonContainer struct {
74 74
 	HasBeenManuallyStopped bool // used for unless-stopped restart policy
75 75
 	MountPoints            map[string]*volume.MountPoint
76 76
 	HostConfig             *containertypes.HostConfig `json:"-"` // do not serialize the host config in the json, otherwise we'll make the container unportable
77
-	Command                *execdriver.Command        `json:"-"`
78
-	monitor                *containerMonitor
79
-	ExecCommands           *exec.Store `json:"-"`
77
+	ExecCommands           *exec.Store                `json:"-"`
80 78
 	// logDriver for closing
81
-	LogDriver     logger.Logger  `json:"-"`
82
-	LogCopier     *logger.Copier `json:"-"`
83
-	attachContext *attachContext
79
+	LogDriver      logger.Logger  `json:"-"`
80
+	LogCopier      *logger.Copier `json:"-"`
81
+	restartManager restartmanager.RestartManager
82
+	attachContext  *attachContext
84 83
 }
85 84
 
86 85
 // NewBaseContainer creates a new container with its
... ...
@@ -276,19 +275,9 @@ func (container *Container) GetRootResourcePath(path string) (string, error) {
276 276
 // ExitOnNext signals to the monitor that it should not restart the container
277 277
 // after we send the kill signal.
278 278
 func (container *Container) ExitOnNext() {
279
-	container.monitor.ExitOnNext()
280
-}
281
-
282
-// Resize changes the TTY of the process running inside the container
283
-// to the given height and width. The container must be running.
284
-func (container *Container) Resize(h, w int) error {
285
-	if container.Command.ProcessConfig.Terminal == nil {
286
-		return fmt.Errorf("Container %s does not have a terminal ready", container.ID)
287
-	}
288
-	if err := container.Command.ProcessConfig.Terminal.Resize(h, w); err != nil {
289
-		return err
279
+	if container.restartManager != nil {
280
+		container.restartManager.Cancel()
290 281
 	}
291
-	return nil
292 282
 }
293 283
 
294 284
 // HostConfigPath returns the path to the container's JSON hostconfig
... ...
@@ -897,19 +886,33 @@ func (container *Container) BuildCreateEndpointOptions(n libnetwork.Network, epC
897 897
 
898 898
 // UpdateMonitor updates monitor configure for running container
899 899
 func (container *Container) UpdateMonitor(restartPolicy containertypes.RestartPolicy) {
900
-	monitor := container.monitor
901
-	// No need to update monitor if container hasn't got one
902
-	// monitor will be generated correctly according to container
903
-	if monitor == nil {
904
-		return
900
+	type policySetter interface {
901
+		SetPolicy(containertypes.RestartPolicy)
902
+	}
903
+
904
+	if rm, ok := container.RestartManager(false).(policySetter); ok {
905
+		rm.SetPolicy(restartPolicy)
906
+	}
907
+}
908
+
909
+// FullHostname returns hostname and optional domain appended to it.
910
+func (container *Container) FullHostname() string {
911
+	fullHostname := container.Config.Hostname
912
+	if container.Config.Domainname != "" {
913
+		fullHostname = fmt.Sprintf("%s.%s", fullHostname, container.Config.Domainname)
905 914
 	}
915
+	return fullHostname
916
+}
906 917
 
907
-	monitor.mux.Lock()
908
-	// to check whether restart policy has changed.
909
-	if restartPolicy.Name != "" && !monitor.restartPolicy.IsSame(&restartPolicy) {
910
-		monitor.restartPolicy = restartPolicy
918
+// RestartManager returns the current restartmanager instace connected to container.
919
+func (container *Container) RestartManager(reset bool) restartmanager.RestartManager {
920
+	if reset {
921
+		container.RestartCount = 0
922
+	}
923
+	if container.restartManager == nil {
924
+		container.restartManager = restartmanager.New(container.HostConfig.RestartPolicy)
911 925
 	}
912
-	monitor.mux.Unlock()
926
+	return container.restartManager
913 927
 }
914 928
 
915 929
 type attachContext struct {
... ...
@@ -11,7 +11,6 @@ import (
11 11
 	"syscall"
12 12
 
13 13
 	"github.com/Sirupsen/logrus"
14
-	"github.com/docker/docker/daemon/execdriver"
15 14
 	"github.com/docker/docker/pkg/chrootarchive"
16 15
 	"github.com/docker/docker/pkg/symlink"
17 16
 	"github.com/docker/docker/pkg/system"
... ...
@@ -39,6 +38,15 @@ type Container struct {
39 39
 	NoNewPrivileges bool
40 40
 }
41 41
 
42
+// ExitStatus provides exit reasons for a container.
43
+type ExitStatus struct {
44
+	// The exit code with which the container exited.
45
+	ExitCode int
46
+
47
+	// Whether the container encountered an OOM.
48
+	OOMKilled bool
49
+}
50
+
42 51
 // CreateDaemonEnvironment returns the list of all environment variables given the list of
43 52
 // environment variables related to links.
44 53
 // Sets PATH, HOSTNAME and if container.Config.Tty is set: TERM.
... ...
@@ -57,7 +65,6 @@ func (container *Container) CreateDaemonEnvironment(linkedEnv []string) []string
57 57
 	// we need to replace the 'env' keys where they match and append anything
58 58
 	// else.
59 59
 	env = utils.ReplaceOrAppendEnvValues(env, container.Config.Env)
60
-
61 60
 	return env
62 61
 }
63 62
 
... ...
@@ -103,8 +110,8 @@ func appendNetworkMounts(container *Container, volumeMounts []volume.MountPoint)
103 103
 }
104 104
 
105 105
 // NetworkMounts returns the list of network mounts.
106
-func (container *Container) NetworkMounts() []execdriver.Mount {
107
-	var mounts []execdriver.Mount
106
+func (container *Container) NetworkMounts() []Mount {
107
+	var mounts []Mount
108 108
 	shared := container.HostConfig.NetworkMode.IsContainer()
109 109
 	if container.ResolvConfPath != "" {
110 110
 		if _, err := os.Stat(container.ResolvConfPath); err != nil {
... ...
@@ -115,7 +122,7 @@ func (container *Container) NetworkMounts() []execdriver.Mount {
115 115
 			if m, exists := container.MountPoints["/etc/resolv.conf"]; exists {
116 116
 				writable = m.RW
117 117
 			}
118
-			mounts = append(mounts, execdriver.Mount{
118
+			mounts = append(mounts, Mount{
119 119
 				Source:      container.ResolvConfPath,
120 120
 				Destination: "/etc/resolv.conf",
121 121
 				Writable:    writable,
... ...
@@ -132,7 +139,7 @@ func (container *Container) NetworkMounts() []execdriver.Mount {
132 132
 			if m, exists := container.MountPoints["/etc/hostname"]; exists {
133 133
 				writable = m.RW
134 134
 			}
135
-			mounts = append(mounts, execdriver.Mount{
135
+			mounts = append(mounts, Mount{
136 136
 				Source:      container.HostnamePath,
137 137
 				Destination: "/etc/hostname",
138 138
 				Writable:    writable,
... ...
@@ -149,7 +156,7 @@ func (container *Container) NetworkMounts() []execdriver.Mount {
149 149
 			if m, exists := container.MountPoints["/etc/hosts"]; exists {
150 150
 				writable = m.RW
151 151
 			}
152
-			mounts = append(mounts, execdriver.Mount{
152
+			mounts = append(mounts, Mount{
153 153
 				Source:      container.HostsPath,
154 154
 				Destination: "/etc/hosts",
155 155
 				Writable:    writable,
... ...
@@ -224,37 +231,26 @@ func (container *Container) UnmountIpcMounts(unmount func(pth string) error) {
224 224
 }
225 225
 
226 226
 // IpcMounts returns the list of IPC mounts
227
-func (container *Container) IpcMounts() []execdriver.Mount {
228
-	var mounts []execdriver.Mount
227
+func (container *Container) IpcMounts() []Mount {
228
+	var mounts []Mount
229 229
 
230 230
 	if !container.HasMountFor("/dev/shm") {
231 231
 		label.SetFileLabel(container.ShmPath, container.MountLabel)
232
-		mounts = append(mounts, execdriver.Mount{
232
+		mounts = append(mounts, Mount{
233 233
 			Source:      container.ShmPath,
234 234
 			Destination: "/dev/shm",
235 235
 			Writable:    true,
236 236
 			Propagation: volume.DefaultPropagationMode,
237 237
 		})
238 238
 	}
239
-	return mounts
240
-}
241 239
 
242
-func updateCommand(c *execdriver.Command, resources containertypes.Resources) {
243
-	c.Resources.BlkioWeight = resources.BlkioWeight
244
-	c.Resources.CPUShares = resources.CPUShares
245
-	c.Resources.CPUPeriod = resources.CPUPeriod
246
-	c.Resources.CPUQuota = resources.CPUQuota
247
-	c.Resources.CpusetCpus = resources.CpusetCpus
248
-	c.Resources.CpusetMems = resources.CpusetMems
249
-	c.Resources.Memory = resources.Memory
250
-	c.Resources.MemorySwap = resources.MemorySwap
251
-	c.Resources.MemoryReservation = resources.MemoryReservation
252
-	c.Resources.KernelMemory = resources.KernelMemory
240
+	return mounts
253 241
 }
254 242
 
255 243
 // UpdateContainer updates configuration of a container.
256 244
 func (container *Container) UpdateContainer(hostConfig *containertypes.HostConfig) error {
257 245
 	container.Lock()
246
+	defer container.Unlock()
258 247
 
259 248
 	// update resources of container
260 249
 	resources := hostConfig.Resources
... ...
@@ -294,19 +290,8 @@ func (container *Container) UpdateContainer(hostConfig *containertypes.HostConfi
294 294
 	if hostConfig.RestartPolicy.Name != "" {
295 295
 		container.HostConfig.RestartPolicy = hostConfig.RestartPolicy
296 296
 	}
297
-	container.Unlock()
298
-
299
-	// If container is not running, update hostConfig struct is enough,
300
-	// resources will be updated when the container is started again.
301
-	// If container is running (including paused), we need to update
302
-	// the command so we can update configs to the real world.
303
-	if container.IsRunning() {
304
-		container.Lock()
305
-		updateCommand(container.Command, *cResources)
306
-		container.Unlock()
307
-	}
308 297
 
309
-	if err := container.ToDiskLocking(); err != nil {
298
+	if err := container.ToDisk(); err != nil {
310 299
 		logrus.Errorf("Error saving updated container: %v", err)
311 300
 		return err
312 301
 	}
... ...
@@ -400,10 +385,10 @@ func copyOwnership(source, destination string) error {
400 400
 }
401 401
 
402 402
 // TmpfsMounts returns the list of tmpfs mounts
403
-func (container *Container) TmpfsMounts() []execdriver.Mount {
404
-	var mounts []execdriver.Mount
403
+func (container *Container) TmpfsMounts() []Mount {
404
+	var mounts []Mount
405 405
 	for dest, data := range container.HostConfig.Tmpfs {
406
-		mounts = append(mounts, execdriver.Mount{
406
+		mounts = append(mounts, Mount{
407 407
 			Source:      "tmpfs",
408 408
 			Destination: dest,
409 409
 			Data:        data,
... ...
@@ -5,7 +5,7 @@ import "sync"
5 5
 // memoryStore implements a Store in memory.
6 6
 type memoryStore struct {
7 7
 	s map[string]*Container
8
-	sync.Mutex
8
+	sync.RWMutex
9 9
 }
10 10
 
11 11
 // NewMemoryStore initializes a new memory store.
... ...
@@ -25,9 +25,9 @@ func (c *memoryStore) Add(id string, cont *Container) {
25 25
 
26 26
 // Get returns a container from the store by id.
27 27
 func (c *memoryStore) Get(id string) *Container {
28
-	c.Lock()
28
+	c.RLock()
29 29
 	res := c.s[id]
30
-	c.Unlock()
30
+	c.RUnlock()
31 31
 	return res
32 32
 }
33 33
 
... ...
@@ -42,26 +42,26 @@ func (c *memoryStore) Delete(id string) {
42 42
 // The containers are ordered by creation date.
43 43
 func (c *memoryStore) List() []*Container {
44 44
 	containers := new(History)
45
-	c.Lock()
45
+	c.RLock()
46 46
 	for _, cont := range c.s {
47 47
 		containers.Add(cont)
48 48
 	}
49
-	c.Unlock()
49
+	c.RUnlock()
50 50
 	containers.sort()
51 51
 	return *containers
52 52
 }
53 53
 
54 54
 // Size returns the number of containers in the store.
55 55
 func (c *memoryStore) Size() int {
56
-	c.Lock()
57
-	defer c.Unlock()
56
+	c.RLock()
57
+	defer c.RUnlock()
58 58
 	return len(c.s)
59 59
 }
60 60
 
61 61
 // First returns the first container found in the store by a given filter.
62 62
 func (c *memoryStore) First(filter StoreFilter) *Container {
63
-	c.Lock()
64
-	defer c.Unlock()
63
+	c.RLock()
64
+	defer c.RUnlock()
65 65
 	for _, cont := range c.s {
66 66
 		if filter(cont) {
67 67
 			return cont
... ...
@@ -72,9 +72,10 @@ func (c *memoryStore) First(filter StoreFilter) *Container {
72 72
 
73 73
 // ApplyAll calls the reducer function with every container in the store.
74 74
 // This operation is asyncronous in the memory store.
75
+// NOTE: Modifications to the store MUST NOT be done by the StoreReducer.
75 76
 func (c *memoryStore) ApplyAll(apply StoreReducer) {
76
-	c.Lock()
77
-	defer c.Unlock()
77
+	c.RLock()
78
+	defer c.RUnlock()
78 79
 
79 80
 	wg := new(sync.WaitGroup)
80 81
 	for _, cont := range c.s {
... ...
@@ -1,24 +1,13 @@
1 1
 package container
2 2
 
3 3
 import (
4
-	"fmt"
5
-	"io"
6
-	"os/exec"
7
-	"strings"
8
-	"sync"
9
-	"syscall"
10 4
 	"time"
11 5
 
12 6
 	"github.com/Sirupsen/logrus"
13
-	"github.com/docker/docker/daemon/execdriver"
14
-	"github.com/docker/docker/pkg/promise"
15
-	"github.com/docker/docker/pkg/stringid"
16
-	"github.com/docker/engine-api/types/container"
17 7
 )
18 8
 
19 9
 const (
20
-	defaultTimeIncrement = 100
21
-	loggerCloseTimeout   = 10 * time.Second
10
+	loggerCloseTimeout = 10 * time.Second
22 11
 )
23 12
 
24 13
 // supervisor defines the interface that a supervisor must implement
... ...
@@ -30,311 +19,13 @@ type supervisor interface {
30 30
 	// StartLogging starts the logging driver for the container
31 31
 	StartLogging(*Container) error
32 32
 	// Run starts a container
33
-	Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.DriverCallback) (execdriver.ExitStatus, error)
33
+	Run(c *Container) error
34 34
 	// IsShuttingDown tells whether the supervisor is shutting down or not
35 35
 	IsShuttingDown() bool
36 36
 }
37 37
 
38
-// containerMonitor monitors the execution of a container's main process.
39
-// If a restart policy is specified for the container the monitor will ensure that the
40
-// process is restarted based on the rules of the policy.  When the container is finally stopped
41
-// the monitor will reset and cleanup any of the container resources such as networking allocations
42
-// and the rootfs
43
-type containerMonitor struct {
44
-	mux sync.Mutex
45
-
46
-	// supervisor keeps track of the container and the events it generates
47
-	supervisor supervisor
48
-
49
-	// container is the container being monitored
50
-	container *Container
51
-
52
-	// restartPolicy is the current policy being applied to the container monitor
53
-	restartPolicy container.RestartPolicy
54
-
55
-	// failureCount is the number of times the container has failed to
56
-	// start in a row
57
-	failureCount int
58
-
59
-	// shouldStop signals the monitor that the next time the container exits it is
60
-	// either because docker or the user asked for the container to be stopped
61
-	shouldStop bool
62
-
63
-	// startSignal is a channel that is closes after the container initially starts
64
-	startSignal chan struct{}
65
-
66
-	// stopChan is used to signal to the monitor whenever there is a wait for the
67
-	// next restart so that the timeIncrement is not honored and the user is not
68
-	// left waiting for nothing to happen during this time
69
-	stopChan chan struct{}
70
-
71
-	// timeIncrement is the amount of time to wait between restarts
72
-	// this is in milliseconds
73
-	timeIncrement int
74
-
75
-	// lastStartTime is the time which the monitor last exec'd the container's process
76
-	lastStartTime time.Time
77
-}
78
-
79
-// StartMonitor initializes a containerMonitor for this container with the provided supervisor and restart policy
80
-// and starts the container's process.
81
-func (container *Container) StartMonitor(s supervisor) error {
82
-	container.monitor = &containerMonitor{
83
-		supervisor:    s,
84
-		container:     container,
85
-		restartPolicy: container.HostConfig.RestartPolicy,
86
-		timeIncrement: defaultTimeIncrement,
87
-		stopChan:      make(chan struct{}),
88
-		startSignal:   make(chan struct{}),
89
-	}
90
-
91
-	return container.monitor.wait()
92
-}
93
-
94
-// wait starts the container and wait until
95
-// we either receive an error from the initial start of the container's
96
-// process or until the process is running in the container
97
-func (m *containerMonitor) wait() error {
98
-	select {
99
-	case <-m.startSignal:
100
-	case err := <-promise.Go(m.start):
101
-		return err
102
-	}
103
-
104
-	return nil
105
-}
106
-
107
-// Stop signals to the container monitor that it should stop monitoring the container
108
-// for exits the next time the process dies
109
-func (m *containerMonitor) ExitOnNext() {
110
-	m.mux.Lock()
111
-
112
-	// we need to protect having a double close of the channel when stop is called
113
-	// twice or else we will get a panic
114
-	if !m.shouldStop {
115
-		m.shouldStop = true
116
-		close(m.stopChan)
117
-	}
118
-
119
-	m.mux.Unlock()
120
-}
121
-
122
-// Close closes the container's resources such as networking allocations and
123
-// unmounts the container's root filesystem
124
-func (m *containerMonitor) Close() error {
125
-	// Cleanup networking and mounts
126
-	m.supervisor.Cleanup(m.container)
127
-
128
-	if err := m.container.ToDisk(); err != nil {
129
-		logrus.Errorf("Error dumping container %s state to disk: %s", m.container.ID, err)
130
-
131
-		return err
132
-	}
133
-
134
-	return nil
135
-}
136
-
137
-// Start starts the containers process and monitors it according to the restart policy
138
-func (m *containerMonitor) start() error {
139
-	var (
140
-		err        error
141
-		exitStatus execdriver.ExitStatus
142
-		// this variable indicates where we in execution flow:
143
-		// before Run or after
144
-		afterRun bool
145
-	)
146
-
147
-	// ensure that when the monitor finally exits we release the networking and unmount the rootfs
148
-	defer func() {
149
-		if afterRun {
150
-			m.container.Lock()
151
-			defer m.container.Unlock()
152
-			m.container.SetStopped(&exitStatus)
153
-		}
154
-		m.Close()
155
-	}()
156
-	// reset stopped flag
157
-	if m.container.HasBeenManuallyStopped {
158
-		m.container.HasBeenManuallyStopped = false
159
-	}
160
-
161
-	// reset the restart count
162
-	m.container.RestartCount = -1
163
-
164
-	for {
165
-		m.container.RestartCount++
166
-
167
-		if err := m.supervisor.StartLogging(m.container); err != nil {
168
-			m.resetContainer(false)
169
-
170
-			return err
171
-		}
172
-
173
-		pipes := execdriver.NewPipes(m.container.Stdin(), m.container.Stdout(), m.container.Stderr(), m.container.Config.OpenStdin)
174
-
175
-		m.logEvent("start")
176
-
177
-		m.lastStartTime = time.Now()
178
-
179
-		if exitStatus, err = m.supervisor.Run(m.container, pipes, m.callback); err != nil {
180
-			// if we receive an internal error from the initial start of a container then lets
181
-			// return it instead of entering the restart loop
182
-			// set to 127 for container cmd not found/does not exist)
183
-			if strings.Contains(err.Error(), "executable file not found") ||
184
-				strings.Contains(err.Error(), "no such file or directory") ||
185
-				strings.Contains(err.Error(), "system cannot find the file specified") {
186
-				if m.container.RestartCount == 0 {
187
-					m.container.ExitCode = 127
188
-					m.resetContainer(false)
189
-					return fmt.Errorf("Container command not found or does not exist.")
190
-				}
191
-			}
192
-			// set to 126 for container cmd can't be invoked errors
193
-			if strings.Contains(err.Error(), syscall.EACCES.Error()) {
194
-				if m.container.RestartCount == 0 {
195
-					m.container.ExitCode = 126
196
-					m.resetContainer(false)
197
-					return fmt.Errorf("Container command could not be invoked.")
198
-				}
199
-			}
200
-
201
-			if m.container.RestartCount == 0 {
202
-				m.container.ExitCode = -1
203
-				m.resetContainer(false)
204
-
205
-				return fmt.Errorf("Cannot start container %s: %v", m.container.ID, err)
206
-			}
207
-
208
-			logrus.Errorf("Error running container: %s", err)
209
-		}
210
-
211
-		// here container.Lock is already lost
212
-		afterRun = true
213
-
214
-		m.resetMonitor(err == nil && exitStatus.ExitCode == 0)
215
-
216
-		if m.shouldRestart(exitStatus.ExitCode) {
217
-			m.container.SetRestartingLocking(&exitStatus)
218
-			m.logEvent("die")
219
-			m.resetContainer(true)
220
-
221
-			// sleep with a small time increment between each restart to help avoid issues cased by quickly
222
-			// restarting the container because of some types of errors ( networking cut out, etc... )
223
-			m.waitForNextRestart()
224
-
225
-			// we need to check this before reentering the loop because the waitForNextRestart could have
226
-			// been terminated by a request from a user
227
-			if m.shouldStop {
228
-				return err
229
-			}
230
-			continue
231
-		}
232
-
233
-		m.logEvent("die")
234
-		m.resetContainer(true)
235
-		return err
236
-	}
237
-}
238
-
239
-// resetMonitor resets the stateful fields on the containerMonitor based on the
240
-// previous runs success or failure.  Regardless of success, if the container had
241
-// an execution time of more than 10s then reset the timer back to the default
242
-func (m *containerMonitor) resetMonitor(successful bool) {
243
-	executionTime := time.Now().Sub(m.lastStartTime).Seconds()
244
-
245
-	if executionTime > 10 {
246
-		m.timeIncrement = defaultTimeIncrement
247
-	} else {
248
-		// otherwise we need to increment the amount of time we wait before restarting
249
-		// the process.  We will build up by multiplying the increment by 2
250
-		m.timeIncrement *= 2
251
-	}
252
-
253
-	// the container exited successfully so we need to reset the failure counter
254
-	if successful {
255
-		m.failureCount = 0
256
-	} else {
257
-		m.failureCount++
258
-	}
259
-}
260
-
261
-// waitForNextRestart waits with the default time increment to restart the container unless
262
-// a user or docker asks for the container to be stopped
263
-func (m *containerMonitor) waitForNextRestart() {
264
-	select {
265
-	case <-time.After(time.Duration(m.timeIncrement) * time.Millisecond):
266
-	case <-m.stopChan:
267
-	}
268
-}
269
-
270
-// shouldRestart checks the restart policy and applies the rules to determine if
271
-// the container's process should be restarted
272
-func (m *containerMonitor) shouldRestart(exitCode int) bool {
273
-	m.mux.Lock()
274
-	defer m.mux.Unlock()
275
-
276
-	// do not restart if the user or docker has requested that this container be stopped
277
-	if m.shouldStop {
278
-		m.container.HasBeenManuallyStopped = !m.supervisor.IsShuttingDown()
279
-		return false
280
-	}
281
-
282
-	switch {
283
-	case m.restartPolicy.IsAlways(), m.restartPolicy.IsUnlessStopped():
284
-		return true
285
-	case m.restartPolicy.IsOnFailure():
286
-		// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count
287
-		if max := m.restartPolicy.MaximumRetryCount; max != 0 && m.failureCount > max {
288
-			logrus.Debugf("stopping restart of container %s because maximum failure could of %d has been reached",
289
-				stringid.TruncateID(m.container.ID), max)
290
-			return false
291
-		}
292
-
293
-		return exitCode != 0
294
-	}
295
-
296
-	return false
297
-}
298
-
299
-// callback ensures that the container's state is properly updated after we
300
-// received ack from the execution drivers
301
-func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error {
302
-	go func() {
303
-		for range chOOM {
304
-			m.logEvent("oom")
305
-		}
306
-	}()
307
-
308
-	if processConfig.Tty {
309
-		// The callback is called after the process start()
310
-		// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlave
311
-		// which we close here.
312
-		if c, ok := processConfig.Stdout.(io.Closer); ok {
313
-			c.Close()
314
-		}
315
-	}
316
-
317
-	m.container.SetRunning(pid)
318
-
319
-	// signal that the process has started
320
-	// close channel only if not closed
321
-	select {
322
-	case <-m.startSignal:
323
-	default:
324
-		close(m.startSignal)
325
-	}
326
-
327
-	if err := m.container.ToDiskLocking(); err != nil {
328
-		logrus.Errorf("Error saving container to disk: %v", err)
329
-	}
330
-	return nil
331
-}
332
-
333
-// resetContainer resets the container's IO and ensures that the command is able to be executed again
334
-// by copying the data into a new struct
335
-// if lock is true, then container locked during reset
336
-func (m *containerMonitor) resetContainer(lock bool) {
337
-	container := m.container
38
+// Reset puts a container into a state where it can be restarted again.
39
+func (container *Container) Reset(lock bool) {
338 40
 	if lock {
339 41
 		container.Lock()
340 42
 		defer container.Unlock()
... ...
@@ -344,12 +35,6 @@ func (m *containerMonitor) resetContainer(lock bool) {
344 344
 		logrus.Errorf("%s: %s", container.ID, err)
345 345
 	}
346 346
 
347
-	if container.Command != nil && container.Command.ProcessConfig.Terminal != nil {
348
-		if err := container.Command.ProcessConfig.Terminal.Close(); err != nil {
349
-			logrus.Errorf("%s: Error closing terminal: %s", container.ID, err)
350
-		}
351
-	}
352
-
353 347
 	// Re-create a brand new stdin pipe once the container exited
354 348
 	if container.Config.OpenStdin {
355 349
 		container.NewInputPipes()
... ...
@@ -365,9 +50,6 @@ func (m *containerMonitor) resetContainer(lock bool) {
365 365
 			select {
366 366
 			case <-time.After(loggerCloseTimeout):
367 367
 				logrus.Warnf("Logger didn't exit in time: logs may be truncated")
368
-				container.LogCopier.Close()
369
-				// always waits for the LogCopier to finished before closing
370
-				<-exit
371 368
 			case <-exit:
372 369
 			}
373 370
 		}
... ...
@@ -375,22 +57,4 @@ func (m *containerMonitor) resetContainer(lock bool) {
375 375
 		container.LogCopier = nil
376 376
 		container.LogDriver = nil
377 377
 	}
378
-
379
-	c := container.Command.ProcessConfig.Cmd
380
-
381
-	container.Command.ProcessConfig.Cmd = exec.Cmd{
382
-		Stdin:       c.Stdin,
383
-		Stdout:      c.Stdout,
384
-		Stderr:      c.Stderr,
385
-		Path:        c.Path,
386
-		Env:         c.Env,
387
-		ExtraFiles:  c.ExtraFiles,
388
-		Args:        c.Args,
389
-		Dir:         c.Dir,
390
-		SysProcAttr: c.SysProcAttr,
391
-	}
392
-}
393
-
394
-func (m *containerMonitor) logEvent(action string) {
395
-	m.supervisor.LogContainerEvent(m.container, action)
396 378
 }
397 379
new file mode 100644
... ...
@@ -0,0 +1,12 @@
0
+// +build !windows
1
+
2
+package container
3
+
4
+// Mount contains information for a mount operation.
5
+type Mount struct {
6
+	Source      string `json:"source"`
7
+	Destination string `json:"destination"`
8
+	Writable    bool   `json:"writable"`
9
+	Data        string `json:"data"`
10
+	Propagation string `json:"mountpropagation"`
11
+}
... ...
@@ -5,7 +5,6 @@ import (
5 5
 	"sync"
6 6
 	"time"
7 7
 
8
-	"github.com/docker/docker/daemon/execdriver"
9 8
 	"github.com/docker/go-units"
10 9
 )
11 10
 
... ...
@@ -179,28 +178,31 @@ func (s *State) getExitCode() int {
179 179
 }
180 180
 
181 181
 // SetRunning sets the state of the container to "running".
182
-func (s *State) SetRunning(pid int) {
182
+func (s *State) SetRunning(pid int, initial bool) {
183 183
 	s.Error = ""
184 184
 	s.Running = true
185 185
 	s.Paused = false
186 186
 	s.Restarting = false
187 187
 	s.ExitCode = 0
188 188
 	s.Pid = pid
189
-	s.StartedAt = time.Now().UTC()
189
+	if initial {
190
+		s.StartedAt = time.Now().UTC()
191
+	}
190 192
 	close(s.waitChan) // fire waiters for start
191 193
 	s.waitChan = make(chan struct{})
192 194
 }
193 195
 
194 196
 // SetStoppedLocking locks the container state is sets it to "stopped".
195
-func (s *State) SetStoppedLocking(exitStatus *execdriver.ExitStatus) {
197
+func (s *State) SetStoppedLocking(exitStatus *ExitStatus) {
196 198
 	s.Lock()
197 199
 	s.SetStopped(exitStatus)
198 200
 	s.Unlock()
199 201
 }
200 202
 
201 203
 // SetStopped sets the container state to "stopped" without locking.
202
-func (s *State) SetStopped(exitStatus *execdriver.ExitStatus) {
204
+func (s *State) SetStopped(exitStatus *ExitStatus) {
203 205
 	s.Running = false
206
+	s.Paused = false
204 207
 	s.Restarting = false
205 208
 	s.Pid = 0
206 209
 	s.FinishedAt = time.Now().UTC()
... ...
@@ -211,7 +213,7 @@ func (s *State) SetStopped(exitStatus *execdriver.ExitStatus) {
211 211
 
212 212
 // SetRestartingLocking is when docker handles the auto restart of containers when they are
213 213
 // in the middle of a stop and being restarted again
214
-func (s *State) SetRestartingLocking(exitStatus *execdriver.ExitStatus) {
214
+func (s *State) SetRestartingLocking(exitStatus *ExitStatus) {
215 215
 	s.Lock()
216 216
 	s.SetRestarting(exitStatus)
217 217
 	s.Unlock()
... ...
@@ -219,7 +221,7 @@ func (s *State) SetRestartingLocking(exitStatus *execdriver.ExitStatus) {
219 219
 
220 220
 // SetRestarting sets the container state to "restarting".
221 221
 // It also sets the container PID to 0.
222
-func (s *State) SetRestarting(exitStatus *execdriver.ExitStatus) {
222
+func (s *State) SetRestarting(exitStatus *ExitStatus) {
223 223
 	// we should consider the container running when it is restarting because of
224 224
 	// all the checks in docker around rm/stop/etc
225 225
 	s.Running = true
... ...
@@ -4,8 +4,6 @@ import (
4 4
 	"sync/atomic"
5 5
 	"testing"
6 6
 	"time"
7
-
8
-	"github.com/docker/docker/daemon/execdriver"
9 7
 )
10 8
 
11 9
 func TestStateRunStop(t *testing.T) {
... ...
@@ -19,7 +17,7 @@ func TestStateRunStop(t *testing.T) {
19 19
 			close(started)
20 20
 		}()
21 21
 		s.Lock()
22
-		s.SetRunning(i + 100)
22
+		s.SetRunning(i+100, false)
23 23
 		s.Unlock()
24 24
 
25 25
 		if !s.IsRunning() {
... ...
@@ -52,7 +50,7 @@ func TestStateRunStop(t *testing.T) {
52 52
 			atomic.StoreInt64(&exit, int64(exitCode))
53 53
 			close(stopped)
54 54
 		}()
55
-		s.SetStoppedLocking(&execdriver.ExitStatus{ExitCode: i})
55
+		s.SetStoppedLocking(&ExitStatus{ExitCode: i})
56 56
 		if s.IsRunning() {
57 57
 			t.Fatal("State is running")
58 58
 		}
... ...
@@ -93,7 +91,7 @@ func TestStateTimeoutWait(t *testing.T) {
93 93
 	}
94 94
 
95 95
 	s.Lock()
96
-	s.SetRunning(49)
96
+	s.SetRunning(49, false)
97 97
 	s.Unlock()
98 98
 
99 99
 	stopped := make(chan struct{})
... ...
@@ -2,11 +2,9 @@
2 2
 
3 3
 package container
4 4
 
5
-import "github.com/docker/docker/daemon/execdriver"
6
-
7 5
 // setFromExitStatus is a platform specific helper function to set the state
8 6
 // based on the ExitStatus structure.
9
-func (s *State) setFromExitStatus(exitStatus *execdriver.ExitStatus) {
7
+func (s *State) setFromExitStatus(exitStatus *ExitStatus) {
10 8
 	s.ExitCode = exitStatus.ExitCode
11 9
 	s.OOMKilled = exitStatus.OOMKilled
12 10
 }
13 11
new file mode 100644
... ...
@@ -0,0 +1,30 @@
0
+// +build linux
1
+
2
+package daemon
3
+
4
+import (
5
+	"github.com/Sirupsen/logrus"
6
+	aaprofile "github.com/docker/docker/profiles/apparmor"
7
+	"github.com/opencontainers/runc/libcontainer/apparmor"
8
+)
9
+
10
+// Define constants for native driver
11
+const (
12
+	defaultApparmorProfile = "docker-default"
13
+)
14
+
15
+func installDefaultAppArmorProfile() {
16
+	if apparmor.IsEnabled() {
17
+		if err := aaprofile.InstallDefault(defaultApparmorProfile); err != nil {
18
+			apparmorProfiles := []string{defaultApparmorProfile}
19
+
20
+			// Allow daemon to run if loading failed, but are active
21
+			// (possibly through another run, manually, or via system startup)
22
+			for _, policy := range apparmorProfiles {
23
+				if err := aaprofile.IsLoaded(policy); err != nil {
24
+					logrus.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy)
25
+				}
26
+			}
27
+		}
28
+	}
29
+}
0 30
new file mode 100644
... ...
@@ -0,0 +1,6 @@
0
+// +build !linux
1
+
2
+package daemon
3
+
4
+func installDefaultAppArmorProfile() {
5
+}
0 6
new file mode 100644
... ...
@@ -0,0 +1,131 @@
0
+// +build !windows
1
+
2
+package caps
3
+
4
+import (
5
+	"fmt"
6
+	"strings"
7
+
8
+	"github.com/docker/docker/pkg/stringutils"
9
+	"github.com/syndtr/gocapability/capability"
10
+)
11
+
12
+var capabilityList Capabilities
13
+
14
+func init() {
15
+	last := capability.CAP_LAST_CAP
16
+	// hack for RHEL6 which has no /proc/sys/kernel/cap_last_cap
17
+	if last == capability.Cap(63) {
18
+		last = capability.CAP_BLOCK_SUSPEND
19
+	}
20
+	for _, cap := range capability.List() {
21
+		if cap > last {
22
+			continue
23
+		}
24
+		capabilityList = append(capabilityList,
25
+			&CapabilityMapping{
26
+				Key:   "CAP_" + strings.ToUpper(cap.String()),
27
+				Value: cap,
28
+			},
29
+		)
30
+	}
31
+}
32
+
33
+type (
34
+	// CapabilityMapping maps linux capability name to its value of capability.Cap type
35
+	// Capabilities is one of the security systems in Linux Security Module (LSM)
36
+	// framework provided by the kernel.
37
+	// For more details on capabilities, see http://man7.org/linux/man-pages/man7/capabilities.7.html
38
+	CapabilityMapping struct {
39
+		Key   string         `json:"key,omitempty"`
40
+		Value capability.Cap `json:"value,omitempty"`
41
+	}
42
+	// Capabilities contains all CapabilityMapping
43
+	Capabilities []*CapabilityMapping
44
+)
45
+
46
+// String returns <key> of CapabilityMapping
47
+func (c *CapabilityMapping) String() string {
48
+	return c.Key
49
+}
50
+
51
+// GetCapability returns CapabilityMapping which contains specific key
52
+func GetCapability(key string) *CapabilityMapping {
53
+	for _, capp := range capabilityList {
54
+		if capp.Key == key {
55
+			cpy := *capp
56
+			return &cpy
57
+		}
58
+	}
59
+	return nil
60
+}
61
+
62
+// GetAllCapabilities returns all of the capabilities
63
+func GetAllCapabilities() []string {
64
+	output := make([]string, len(capabilityList))
65
+	for i, capability := range capabilityList {
66
+		output[i] = capability.String()
67
+	}
68
+	return output
69
+}
70
+
71
+// TweakCapabilities can tweak capabilities by adding or dropping capabilities
72
+// based on the basics capabilities.
73
+func TweakCapabilities(basics, adds, drops []string) ([]string, error) {
74
+	var (
75
+		newCaps []string
76
+		allCaps = GetAllCapabilities()
77
+	)
78
+
79
+	// FIXME(tonistiigi): docker format is without CAP_ prefix, oci is with prefix
80
+	// Currently they are mixed in here. We should do conversion in one place.
81
+
82
+	// look for invalid cap in the drop list
83
+	for _, cap := range drops {
84
+		if strings.ToLower(cap) == "all" {
85
+			continue
86
+		}
87
+
88
+		if !stringutils.InSlice(allCaps, "CAP_"+cap) {
89
+			return nil, fmt.Errorf("Unknown capability drop: %q", cap)
90
+		}
91
+	}
92
+
93
+	// handle --cap-add=all
94
+	if stringutils.InSlice(adds, "all") {
95
+		basics = allCaps
96
+	}
97
+
98
+	if !stringutils.InSlice(drops, "all") {
99
+		for _, cap := range basics {
100
+			// skip `all` already handled above
101
+			if strings.ToLower(cap) == "all" {
102
+				continue
103
+			}
104
+
105
+			// if we don't drop `all`, add back all the non-dropped caps
106
+			if !stringutils.InSlice(drops, cap[4:]) {
107
+				newCaps = append(newCaps, strings.ToUpper(cap))
108
+			}
109
+		}
110
+	}
111
+
112
+	for _, cap := range adds {
113
+		// skip `all` already handled above
114
+		if strings.ToLower(cap) == "all" {
115
+			continue
116
+		}
117
+
118
+		cap = "CAP_" + cap
119
+
120
+		if !stringutils.InSlice(allCaps, cap) {
121
+			return nil, fmt.Errorf("Unknown capability to add: %q", cap)
122
+		}
123
+
124
+		// add cap if not already in the list
125
+		if !stringutils.InSlice(newCaps, cap) {
126
+			newCaps = append(newCaps, strings.ToUpper(cap))
127
+		}
128
+	}
129
+	return newCaps, nil
130
+}
... ...
@@ -115,7 +115,7 @@ func (config *Config) InstallCommonFlags(cmd *flag.FlagSet, usageFn func(string)
115 115
 	cmd.Var(opts.NewNamedListOptsRef("exec-opts", &config.ExecOptions, nil), []string{"-exec-opt"}, usageFn("Set exec driver options"))
116 116
 	cmd.StringVar(&config.Pidfile, []string{"p", "-pidfile"}, defaultPidFile, usageFn("Path to use for daemon PID file"))
117 117
 	cmd.StringVar(&config.Root, []string{"g", "-graph"}, defaultGraph, usageFn("Root of the Docker runtime"))
118
-	cmd.StringVar(&config.ExecRoot, []string{"-exec-root"}, "/var/run/docker", usageFn("Root of the Docker execdriver"))
118
+	cmd.StringVar(&config.ExecRoot, []string{"-exec-root"}, defaultExecRoot, usageFn("Root of the Docker execdriver"))
119 119
 	cmd.BoolVar(&config.AutoRestart, []string{"#r", "#-restart"}, true, usageFn("--restart on the daemon has been deprecated in favor of --restart policies on docker run"))
120 120
 	cmd.StringVar(&config.GraphDriver, []string{"s", "-storage-driver"}, "", usageFn("Storage driver to use"))
121 121
 	cmd.IntVar(&config.Mtu, []string{"#mtu", "-mtu"}, 0, usageFn("Set the containers network MTU"))
... ...
@@ -12,8 +12,9 @@ import (
12 12
 )
13 13
 
14 14
 var (
15
-	defaultPidFile = "/var/run/docker.pid"
16
-	defaultGraph   = "/var/lib/docker"
15
+	defaultPidFile  = "/var/run/docker.pid"
16
+	defaultGraph    = "/var/lib/docker"
17
+	defaultExecRoot = "/var/run/docker"
17 18
 )
18 19
 
19 20
 // Config defines the configuration of a docker daemon.
... ...
@@ -30,6 +31,7 @@ type Config struct {
30 30
 	RemappedRoot         string                   `json:"userns-remap,omitempty"`
31 31
 	CgroupParent         string                   `json:"cgroup-parent,omitempty"`
32 32
 	Ulimits              map[string]*units.Ulimit `json:"default-ulimits,omitempty"`
33
+	ContainerdAddr       string                   `json:"containerd,omitempty"`
33 34
 }
34 35
 
35 36
 // bridgeConfig stores all the bridge driver specific
... ...
@@ -80,6 +82,7 @@ func (config *Config) InstallFlags(cmd *flag.FlagSet, usageFn func(string) strin
80 80
 	cmd.StringVar(&config.CorsHeaders, []string{"-api-cors-header"}, "", usageFn("Set CORS headers in the remote API"))
81 81
 	cmd.StringVar(&config.CgroupParent, []string{"-cgroup-parent"}, "", usageFn("Set parent cgroup for all containers"))
82 82
 	cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces"))
83
+	cmd.StringVar(&config.ContainerdAddr, []string{"-containerd"}, "", usageFn("Path to containerD socket"))
83 84
 
84 85
 	config.attachExperimentalFlags(cmd, usageFn)
85 86
 }
... ...
@@ -48,11 +48,10 @@ func (daemon *Daemon) buildSandboxOptions(container *container.Container, n libn
48 48
 		sboxOptions = append(sboxOptions, libnetwork.OptionUseDefaultSandbox())
49 49
 		sboxOptions = append(sboxOptions, libnetwork.OptionOriginHostsPath("/etc/hosts"))
50 50
 		sboxOptions = append(sboxOptions, libnetwork.OptionOriginResolvConfPath("/etc/resolv.conf"))
51
-	} else if daemon.execDriver.SupportsHooks() {
52
-		// OptionUseExternalKey is mandatory for userns support.
53
-		// But optional for non-userns support
54
-		sboxOptions = append(sboxOptions, libnetwork.OptionUseExternalKey())
55 51
 	}
52
+	// OptionUseExternalKey is mandatory for userns support.
53
+	// But optional for non-userns support
54
+	sboxOptions = append(sboxOptions, libnetwork.OptionUseExternalKey())
56 55
 
57 56
 	container.HostsPath, err = container.GetRootResourcePath("hosts")
58 57
 	if err != nil {
... ...
@@ -13,7 +13,6 @@ import (
13 13
 
14 14
 	"github.com/Sirupsen/logrus"
15 15
 	"github.com/docker/docker/container"
16
-	"github.com/docker/docker/daemon/execdriver"
17 16
 	"github.com/docker/docker/daemon/links"
18 17
 	"github.com/docker/docker/pkg/fileutils"
19 18
 	"github.com/docker/docker/pkg/idtools"
... ...
@@ -22,13 +21,16 @@ import (
22 22
 	"github.com/docker/docker/runconfig"
23 23
 	containertypes "github.com/docker/engine-api/types/container"
24 24
 	networktypes "github.com/docker/engine-api/types/network"
25
-	"github.com/docker/go-units"
26 25
 	"github.com/docker/libnetwork"
27 26
 	"github.com/opencontainers/runc/libcontainer/configs"
28 27
 	"github.com/opencontainers/runc/libcontainer/devices"
29 28
 	"github.com/opencontainers/runc/libcontainer/label"
29
+	"github.com/opencontainers/specs/specs-go"
30 30
 )
31 31
 
32
+func u32Ptr(i int64) *uint32     { u := uint32(i); return &u }
33
+func fmPtr(i int64) *os.FileMode { fm := os.FileMode(i); return &fm }
34
+
32 35
 func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]string, error) {
33 36
 	var env []string
34 37
 	children := daemon.children(container)
... ...
@@ -64,220 +66,6 @@ func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]s
64 64
 	return env, nil
65 65
 }
66 66
 
67
-func (daemon *Daemon) populateCommand(c *container.Container, env []string) error {
68
-	var en *execdriver.Network
69
-	if !c.Config.NetworkDisabled {
70
-		en = &execdriver.Network{}
71
-		if !daemon.execDriver.SupportsHooks() || c.HostConfig.NetworkMode.IsHost() {
72
-			en.NamespacePath = c.NetworkSettings.SandboxKey
73
-		}
74
-
75
-		if c.HostConfig.NetworkMode.IsContainer() {
76
-			nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer())
77
-			if err != nil {
78
-				return err
79
-			}
80
-			en.ContainerID = nc.ID
81
-		}
82
-	}
83
-
84
-	ipc := &execdriver.Ipc{}
85
-	var err error
86
-	c.ShmPath, err = c.ShmResourcePath()
87
-	if err != nil {
88
-		return err
89
-	}
90
-
91
-	if c.HostConfig.IpcMode.IsContainer() {
92
-		ic, err := daemon.getIpcContainer(c)
93
-		if err != nil {
94
-			return err
95
-		}
96
-		ipc.ContainerID = ic.ID
97
-		c.ShmPath = ic.ShmPath
98
-	} else {
99
-		ipc.HostIpc = c.HostConfig.IpcMode.IsHost()
100
-		if ipc.HostIpc {
101
-			if _, err := os.Stat("/dev/shm"); err != nil {
102
-				return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host")
103
-			}
104
-			c.ShmPath = "/dev/shm"
105
-		}
106
-	}
107
-
108
-	pid := &execdriver.Pid{}
109
-	pid.HostPid = c.HostConfig.PidMode.IsHost()
110
-
111
-	uts := &execdriver.UTS{
112
-		HostUTS: c.HostConfig.UTSMode.IsHost(),
113
-	}
114
-
115
-	// Build lists of devices allowed and created within the container.
116
-	var userSpecifiedDevices []*configs.Device
117
-	for _, deviceMapping := range c.HostConfig.Devices {
118
-		devs, err := getDevicesFromPath(deviceMapping)
119
-		if err != nil {
120
-			return err
121
-		}
122
-
123
-		userSpecifiedDevices = append(userSpecifiedDevices, devs...)
124
-	}
125
-
126
-	allowedDevices := mergeDevices(configs.DefaultAllowedDevices, userSpecifiedDevices)
127
-
128
-	autoCreatedDevices := mergeDevices(configs.DefaultAutoCreatedDevices, userSpecifiedDevices)
129
-
130
-	var rlimits []*units.Rlimit
131
-	ulimits := c.HostConfig.Ulimits
132
-
133
-	// Merge ulimits with daemon defaults
134
-	ulIdx := make(map[string]*units.Ulimit)
135
-	for _, ul := range ulimits {
136
-		ulIdx[ul.Name] = ul
137
-	}
138
-	for name, ul := range daemon.configStore.Ulimits {
139
-		if _, exists := ulIdx[name]; !exists {
140
-			ulimits = append(ulimits, ul)
141
-		}
142
-	}
143
-
144
-	weightDevices, err := getBlkioWeightDevices(c.HostConfig)
145
-	if err != nil {
146
-		return err
147
-	}
148
-
149
-	readBpsDevice, err := getBlkioReadBpsDevices(c.HostConfig)
150
-	if err != nil {
151
-		return err
152
-	}
153
-
154
-	writeBpsDevice, err := getBlkioWriteBpsDevices(c.HostConfig)
155
-	if err != nil {
156
-		return err
157
-	}
158
-
159
-	readIOpsDevice, err := getBlkioReadIOpsDevices(c.HostConfig)
160
-	if err != nil {
161
-		return err
162
-	}
163
-
164
-	writeIOpsDevice, err := getBlkioWriteIOpsDevices(c.HostConfig)
165
-	if err != nil {
166
-		return err
167
-	}
168
-
169
-	for _, limit := range ulimits {
170
-		rl, err := limit.GetRlimit()
171
-		if err != nil {
172
-			return err
173
-		}
174
-		rlimits = append(rlimits, rl)
175
-	}
176
-
177
-	resources := &execdriver.Resources{
178
-		CommonResources: execdriver.CommonResources{
179
-			Memory:            c.HostConfig.Memory,
180
-			MemoryReservation: c.HostConfig.MemoryReservation,
181
-			CPUShares:         c.HostConfig.CPUShares,
182
-			BlkioWeight:       c.HostConfig.BlkioWeight,
183
-		},
184
-		MemorySwap:                   c.HostConfig.MemorySwap,
185
-		KernelMemory:                 c.HostConfig.KernelMemory,
186
-		CpusetCpus:                   c.HostConfig.CpusetCpus,
187
-		CpusetMems:                   c.HostConfig.CpusetMems,
188
-		CPUPeriod:                    c.HostConfig.CPUPeriod,
189
-		CPUQuota:                     c.HostConfig.CPUQuota,
190
-		Rlimits:                      rlimits,
191
-		BlkioWeightDevice:            weightDevices,
192
-		BlkioThrottleReadBpsDevice:   readBpsDevice,
193
-		BlkioThrottleWriteBpsDevice:  writeBpsDevice,
194
-		BlkioThrottleReadIOpsDevice:  readIOpsDevice,
195
-		BlkioThrottleWriteIOpsDevice: writeIOpsDevice,
196
-		PidsLimit:                    c.HostConfig.PidsLimit,
197
-		MemorySwappiness:             -1,
198
-	}
199
-
200
-	if c.HostConfig.OomKillDisable != nil {
201
-		resources.OomKillDisable = *c.HostConfig.OomKillDisable
202
-	}
203
-	if c.HostConfig.MemorySwappiness != nil {
204
-		resources.MemorySwappiness = *c.HostConfig.MemorySwappiness
205
-	}
206
-
207
-	processConfig := execdriver.ProcessConfig{
208
-		CommonProcessConfig: execdriver.CommonProcessConfig{
209
-			Entrypoint: c.Path,
210
-			Arguments:  c.Args,
211
-			Tty:        c.Config.Tty,
212
-		},
213
-		Privileged: c.HostConfig.Privileged,
214
-		User:       c.Config.User,
215
-	}
216
-
217
-	processConfig.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
218
-	processConfig.Env = env
219
-
220
-	remappedRoot := &execdriver.User{}
221
-	if c.HostConfig.UsernsMode.IsPrivate() {
222
-		rootUID, rootGID := daemon.GetRemappedUIDGID()
223
-		if rootUID != 0 {
224
-			remappedRoot.UID = rootUID
225
-			remappedRoot.GID = rootGID
226
-		}
227
-	}
228
-
229
-	uidMap, gidMap := daemon.GetUIDGIDMaps()
230
-
231
-	if !daemon.seccompEnabled {
232
-		if c.SeccompProfile != "" && c.SeccompProfile != "unconfined" {
233
-			return fmt.Errorf("Seccomp is not enabled in your kernel, cannot run a custom seccomp profile.")
234
-		}
235
-		logrus.Warn("Seccomp is not enabled in your kernel, running container without default profile.")
236
-		c.SeccompProfile = "unconfined"
237
-	}
238
-
239
-	defaultCgroupParent := "/docker"
240
-	if daemon.configStore.CgroupParent != "" {
241
-		defaultCgroupParent = daemon.configStore.CgroupParent
242
-	} else if daemon.usingSystemd() {
243
-		defaultCgroupParent = "system.slice"
244
-	}
245
-	c.Command = &execdriver.Command{
246
-		CommonCommand: execdriver.CommonCommand{
247
-			ID:            c.ID,
248
-			MountLabel:    c.GetMountLabel(),
249
-			Network:       en,
250
-			ProcessConfig: processConfig,
251
-			ProcessLabel:  c.GetProcessLabel(),
252
-			Rootfs:        c.BaseFS,
253
-			Resources:     resources,
254
-			WorkingDir:    c.Config.WorkingDir,
255
-		},
256
-		AllowedDevices:     allowedDevices,
257
-		AppArmorProfile:    c.AppArmorProfile,
258
-		AutoCreatedDevices: autoCreatedDevices,
259
-		CapAdd:             c.HostConfig.CapAdd,
260
-		CapDrop:            c.HostConfig.CapDrop,
261
-		CgroupParent:       defaultCgroupParent,
262
-		GIDMapping:         gidMap,
263
-		GroupAdd:           c.HostConfig.GroupAdd,
264
-		Ipc:                ipc,
265
-		OomScoreAdj:        c.HostConfig.OomScoreAdj,
266
-		Pid:                pid,
267
-		ReadonlyRootfs:     c.HostConfig.ReadonlyRootfs,
268
-		RemappedRoot:       remappedRoot,
269
-		SeccompProfile:     c.SeccompProfile,
270
-		UIDMapping:         uidMap,
271
-		UTS:                uts,
272
-		NoNewPrivileges:    c.NoNewPrivileges,
273
-	}
274
-	if c.HostConfig.CgroupParent != "" {
275
-		c.Command.CgroupParent = c.HostConfig.CgroupParent
276
-	}
277
-
278
-	return nil
279
-}
280
-
281 67
 // getSize returns the real size & virtual size of the container.
282 68
 func (daemon *Daemon) getSize(container *container.Container) (int64, int64) {
283 69
 	var (
... ...
@@ -395,28 +183,49 @@ func (daemon *Daemon) getIpcContainer(container *container.Container) (*containe
395 395
 }
396 396
 
397 397
 func (daemon *Daemon) setupIpcDirs(c *container.Container) error {
398
-	rootUID, rootGID := daemon.GetRemappedUIDGID()
399
-	if !c.HasMountFor("/dev/shm") {
400
-		shmPath, err := c.ShmResourcePath()
398
+	var err error
399
+
400
+	c.ShmPath, err = c.ShmResourcePath()
401
+	if err != nil {
402
+		return err
403
+	}
404
+
405
+	if c.HostConfig.IpcMode.IsContainer() {
406
+		ic, err := daemon.getIpcContainer(c)
401 407
 		if err != nil {
402 408
 			return err
403 409
 		}
404
-
405
-		if err := idtools.MkdirAllAs(shmPath, 0700, rootUID, rootGID); err != nil {
406
-			return err
410
+		c.ShmPath = ic.ShmPath
411
+	} else if c.HostConfig.IpcMode.IsHost() {
412
+		if _, err := os.Stat("/dev/shm"); err != nil {
413
+			return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host")
407 414
 		}
415
+		c.ShmPath = "/dev/shm"
416
+	} else {
417
+		rootUID, rootGID := daemon.GetRemappedUIDGID()
418
+		if !c.HasMountFor("/dev/shm") {
419
+			shmPath, err := c.ShmResourcePath()
420
+			if err != nil {
421
+				return err
422
+			}
408 423
 
409
-		shmSize := container.DefaultSHMSize
410
-		if c.HostConfig.ShmSize != 0 {
411
-			shmSize = c.HostConfig.ShmSize
412
-		}
413
-		shmproperty := "mode=1777,size=" + strconv.FormatInt(shmSize, 10)
414
-		if err := syscall.Mount("shm", shmPath, "tmpfs", uintptr(syscall.MS_NOEXEC|syscall.MS_NOSUID|syscall.MS_NODEV), label.FormatMountLabel(shmproperty, c.GetMountLabel())); err != nil {
415
-			return fmt.Errorf("mounting shm tmpfs: %s", err)
416
-		}
417
-		if err := os.Chown(shmPath, rootUID, rootGID); err != nil {
418
-			return err
424
+			if err := idtools.MkdirAllAs(shmPath, 0700, rootUID, rootGID); err != nil {
425
+				return err
426
+			}
427
+
428
+			shmSize := container.DefaultSHMSize
429
+			if c.HostConfig.ShmSize != 0 {
430
+				shmSize = c.HostConfig.ShmSize
431
+			}
432
+			shmproperty := "mode=1777,size=" + strconv.FormatInt(shmSize, 10)
433
+			if err := syscall.Mount("shm", shmPath, "tmpfs", uintptr(syscall.MS_NOEXEC|syscall.MS_NOSUID|syscall.MS_NODEV), label.FormatMountLabel(shmproperty, c.GetMountLabel())); err != nil {
434
+				return fmt.Errorf("mounting shm tmpfs: %s", err)
435
+			}
436
+			if err := os.Chown(shmPath, rootUID, rootGID); err != nil {
437
+				return err
438
+			}
419 439
 		}
440
+
420 441
 	}
421 442
 
422 443
 	return nil
... ...
@@ -474,7 +283,19 @@ func killProcessDirectly(container *container.Container) error {
474 474
 	return nil
475 475
 }
476 476
 
477
-func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []*configs.Device, err error) {
477
+func specDevice(d *configs.Device) specs.Device {
478
+	return specs.Device{
479
+		Type:     string(d.Type),
480
+		Path:     d.Path,
481
+		Major:    d.Major,
482
+		Minor:    d.Minor,
483
+		FileMode: fmPtr(int64(d.FileMode)),
484
+		UID:      u32Ptr(int64(d.Uid)),
485
+		GID:      u32Ptr(int64(d.Gid)),
486
+	}
487
+}
488
+
489
+func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []specs.Device, err error) {
478 490
 	resolvedPathOnHost := deviceMapping.PathOnHost
479 491
 
480 492
 	// check if it is a symbolic link
... ...
@@ -488,7 +309,7 @@ func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []*con
488 488
 	// if there was no error, return the device
489 489
 	if err == nil {
490 490
 		device.Path = deviceMapping.PathInContainer
491
-		return append(devs, device), nil
491
+		return append(devs, specDevice(device)), nil
492 492
 	}
493 493
 
494 494
 	// if the device is not a device node
... ...
@@ -508,7 +329,7 @@ func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []*con
508 508
 
509 509
 				// add the device to userSpecified devices
510 510
 				childDevice.Path = strings.Replace(dpath, resolvedPathOnHost, deviceMapping.PathInContainer, 1)
511
-				devs = append(devs, childDevice)
511
+				devs = append(devs, specDevice(childDevice))
512 512
 
513 513
 				return nil
514 514
 			})
... ...
@@ -20,13 +20,12 @@ import (
20 20
 	"time"
21 21
 
22 22
 	"github.com/Sirupsen/logrus"
23
+	containerd "github.com/docker/containerd/api/grpc/types"
23 24
 	"github.com/docker/docker/api"
24 25
 	"github.com/docker/docker/builder"
25 26
 	"github.com/docker/docker/container"
26 27
 	"github.com/docker/docker/daemon/events"
27 28
 	"github.com/docker/docker/daemon/exec"
28
-	"github.com/docker/docker/daemon/execdriver"
29
-	"github.com/docker/docker/daemon/execdriver/execdrivers"
30 29
 	"github.com/docker/docker/errors"
31 30
 	"github.com/docker/engine-api/types"
32 31
 	containertypes "github.com/docker/engine-api/types/container"
... ...
@@ -46,12 +45,12 @@ import (
46 46
 	"github.com/docker/docker/image"
47 47
 	"github.com/docker/docker/image/tarexport"
48 48
 	"github.com/docker/docker/layer"
49
+	"github.com/docker/docker/libcontainerd"
49 50
 	"github.com/docker/docker/migrate/v1"
50 51
 	"github.com/docker/docker/pkg/archive"
51 52
 	"github.com/docker/docker/pkg/fileutils"
52 53
 	"github.com/docker/docker/pkg/graphdb"
53 54
 	"github.com/docker/docker/pkg/idtools"
54
-	"github.com/docker/docker/pkg/mount"
55 55
 	"github.com/docker/docker/pkg/namesgenerator"
56 56
 	"github.com/docker/docker/pkg/progress"
57 57
 	"github.com/docker/docker/pkg/registrar"
... ...
@@ -115,7 +114,6 @@ type Daemon struct {
115 115
 	trustKey                  libtrust.PrivateKey
116 116
 	idIndex                   *truncindex.TruncIndex
117 117
 	configStore               *Config
118
-	execDriver                execdriver.Driver
119 118
 	statsCollector            *statsCollector
120 119
 	defaultLogConfig          containertypes.LogConfig
121 120
 	RegistryService           *registry.Service
... ...
@@ -132,6 +130,8 @@ type Daemon struct {
132 132
 	imageStore                image.Store
133 133
 	nameIndex                 *registrar.Registrar
134 134
 	linkIndex                 *linkIndex
135
+	containerd                libcontainerd.Client
136
+	defaultIsolation          containertypes.Isolation // Default isolation mode on Windows
135 137
 }
136 138
 
137 139
 // GetContainer looks for a container using the provided information, which could be
... ...
@@ -220,36 +220,16 @@ func (daemon *Daemon) registerName(container *container.Container) error {
220 220
 }
221 221
 
222 222
 // Register makes a container object usable by the daemon as <container.ID>
223
-func (daemon *Daemon) Register(container *container.Container) error {
223
+func (daemon *Daemon) Register(c *container.Container) error {
224 224
 	// Attach to stdout and stderr
225
-	if container.Config.OpenStdin {
226
-		container.NewInputPipes()
225
+	if c.Config.OpenStdin {
226
+		c.NewInputPipes()
227 227
 	} else {
228
-		container.NewNopInputPipe()
228
+		c.NewNopInputPipe()
229 229
 	}
230 230
 
231
-	daemon.containers.Add(container.ID, container)
232
-	daemon.idIndex.Add(container.ID)
233
-
234
-	if container.IsRunning() {
235
-		logrus.Debugf("killing old running container %s", container.ID)
236
-		// Set exit code to 128 + SIGKILL (9) to properly represent unsuccessful exit
237
-		container.SetStoppedLocking(&execdriver.ExitStatus{ExitCode: 137})
238
-		// use the current driver and ensure that the container is dead x.x
239
-		cmd := &execdriver.Command{
240
-			CommonCommand: execdriver.CommonCommand{
241
-				ID: container.ID,
242
-			},
243
-		}
244
-		daemon.execDriver.Terminate(cmd)
245
-
246
-		container.UnmountIpcMounts(mount.Unmount)
247
-
248
-		daemon.Unmount(container)
249
-		if err := container.ToDiskLocking(); err != nil {
250
-			logrus.Errorf("Error saving stopped state to disk: %v", err)
251
-		}
252
-	}
231
+	daemon.containers.Add(c.ID, c)
232
+	daemon.idIndex.Add(c.ID)
253 233
 
254 234
 	return nil
255 235
 }
... ...
@@ -307,17 +287,38 @@ func (daemon *Daemon) restore() error {
307 307
 			logrus.Errorf("Failed to register container %s: %s", c.ID, err)
308 308
 			continue
309 309
 		}
310
+	}
311
+	var wg sync.WaitGroup
312
+	var mapLock sync.Mutex
313
+	for _, c := range containers {
314
+		wg.Add(1)
315
+		go func(c *container.Container) {
316
+			defer wg.Done()
317
+			if c.IsRunning() || c.IsPaused() {
318
+				if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(c.RestartManager(true))); err != nil {
319
+					logrus.Errorf("Failed to restore with containerd: %q", err)
320
+					return
321
+				}
322
+			}
323
+			// fixme: only if not running
324
+			// get list of containers we need to restart
325
+			if daemon.configStore.AutoRestart && !c.IsRunning() && !c.IsPaused() && c.ShouldRestart() {
326
+				mapLock.Lock()
327
+				restartContainers[c] = make(chan struct{})
328
+				mapLock.Unlock()
329
+			} else if !c.IsRunning() && !c.IsPaused() {
330
+				if mountid, err := daemon.layerStore.GetMountID(c.ID); err == nil {
331
+					daemon.cleanupMountsByID(mountid)
332
+				}
333
+			}
310 334
 
311
-		// get list of containers we need to restart
312
-		if daemon.configStore.AutoRestart && c.ShouldRestart() {
313
-			restartContainers[c] = make(chan struct{})
314
-		}
315
-
316
-		// if c.hostConfig.Links is nil (not just empty), then it is using the old sqlite links and needs to be migrated
317
-		if c.HostConfig != nil && c.HostConfig.Links == nil {
318
-			migrateLegacyLinks = true
319
-		}
335
+			// if c.hostConfig.Links is nil (not just empty), then it is using the old sqlite links and needs to be migrated
336
+			if c.HostConfig != nil && c.HostConfig.Links == nil {
337
+				migrateLegacyLinks = true
338
+			}
339
+		}(c)
320 340
 	}
341
+	wg.Wait()
321 342
 
322 343
 	// migrate any legacy links from sqlite
323 344
 	linkdbFile := filepath.Join(daemon.root, "linkgraph.db")
... ...
@@ -599,7 +600,7 @@ func (daemon *Daemon) registerLink(parent, child *container.Container, alias str
599 599
 
600 600
 // NewDaemon sets up everything for the daemon to be able to service
601 601
 // requests from the webserver.
602
-func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemon, err error) {
602
+func NewDaemon(config *Config, registryService *registry.Service, containerdRemote libcontainerd.Remote) (daemon *Daemon, err error) {
603 603
 	setDefaultMtu(config)
604 604
 
605 605
 	// Ensure we have compatible and valid configuration options
... ...
@@ -659,7 +660,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
659 659
 	}
660 660
 	os.Setenv("TMPDIR", realTmp)
661 661
 
662
-	d := &Daemon{}
662
+	d := &Daemon{configStore: config}
663 663
 	// Ensure the daemon is properly shutdown if there is a failure during
664 664
 	// initialization
665 665
 	defer func() {
... ...
@@ -670,6 +671,11 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
670 670
 		}
671 671
 	}()
672 672
 
673
+	// Set the default isolation mode (only applicable on Windows)
674
+	if err := d.setDefaultIsolation(); err != nil {
675
+		return nil, fmt.Errorf("error setting default isolation mode: %v", err)
676
+	}
677
+
673 678
 	// Verify logging driver type
674 679
 	if config.LogConfig.Type != "none" {
675 680
 		if _, err := logger.GetLogDriver(config.LogConfig.Type); err != nil {
... ...
@@ -682,6 +688,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
682 682
 		logrus.Warnf("Failed to configure golang's threads limit: %v", err)
683 683
 	}
684 684
 
685
+	installDefaultAppArmorProfile()
685 686
 	daemonRepo := filepath.Join(config.Root, "containers")
686 687
 	if err := idtools.MkdirAllAs(daemonRepo, 0700, rootUID, rootGID); err != nil && !os.IsExist(err) {
687 688
 		return nil, err
... ...
@@ -781,11 +788,6 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
781 781
 		return nil, fmt.Errorf("Devices cgroup isn't mounted")
782 782
 	}
783 783
 
784
-	ed, err := execdrivers.NewDriver(config.ExecOptions, config.ExecRoot, config.Root, sysInfo)
785
-	if err != nil {
786
-		return nil, err
787
-	}
788
-
789 784
 	d.ID = trustKey.PublicKey().KeyID()
790 785
 	d.repository = daemonRepo
791 786
 	d.containers = container.NewMemoryStore()
... ...
@@ -794,8 +796,6 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
794 794
 	d.distributionMetadataStore = distributionMetadataStore
795 795
 	d.trustKey = trustKey
796 796
 	d.idIndex = truncindex.NewTruncIndex([]string{})
797
-	d.configStore = config
798
-	d.execDriver = ed
799 797
 	d.statsCollector = d.newStatsCollector(1 * time.Second)
800 798
 	d.defaultLogConfig = containertypes.LogConfig{
801 799
 		Type:   config.LogConfig.Type,
... ...
@@ -812,10 +812,12 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
812 812
 	d.nameIndex = registrar.NewRegistrar()
813 813
 	d.linkIndex = newLinkIndex()
814 814
 
815
-	if err := d.cleanupMounts(); err != nil {
815
+	go d.execCommandGC()
816
+
817
+	d.containerd, err = containerdRemote.Client(d)
818
+	if err != nil {
816 819
 		return nil, err
817 820
 	}
818
-	go d.execCommandGC()
819 821
 
820 822
 	if err := d.restore(); err != nil {
821 823
 		return nil, err
... ...
@@ -877,6 +879,9 @@ func (daemon *Daemon) Shutdown() error {
877 877
 				logrus.Errorf("Stop container error: %v", err)
878 878
 				return
879 879
 			}
880
+			if mountid, err := daemon.layerStore.GetMountID(c.ID); err == nil {
881
+				daemon.cleanupMountsByID(mountid)
882
+			}
880 883
 			logrus.Debugf("container stopped %s", c.ID)
881 884
 		})
882 885
 	}
... ...
@@ -923,29 +928,16 @@ func (daemon *Daemon) Mount(container *container.Container) error {
923 923
 }
924 924
 
925 925
 // Unmount unsets the container base filesystem
926
-func (daemon *Daemon) Unmount(container *container.Container) {
926
+func (daemon *Daemon) Unmount(container *container.Container) error {
927 927
 	if err := container.RWLayer.Unmount(); err != nil {
928 928
 		logrus.Errorf("Error unmounting container %s: %s", container.ID, err)
929
+		return err
929 930
 	}
930
-}
931
-
932
-// Run uses the execution driver to run a given container
933
-func (daemon *Daemon) Run(c *container.Container, pipes *execdriver.Pipes, startCallback execdriver.DriverCallback) (execdriver.ExitStatus, error) {
934
-	hooks := execdriver.Hooks{
935
-		Start: startCallback,
936
-	}
937
-	hooks.PreStart = append(hooks.PreStart, func(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error {
938
-		return daemon.setNetworkNamespaceKey(c.ID, pid)
939
-	})
940
-	return daemon.execDriver.Run(c.Command, pipes, hooks)
931
+	return nil
941 932
 }
942 933
 
943 934
 func (daemon *Daemon) kill(c *container.Container, sig int) error {
944
-	return daemon.execDriver.Kill(c.Command, sig)
945
-}
946
-
947
-func (daemon *Daemon) stats(c *container.Container) (*execdriver.ResourceStats, error) {
948
-	return daemon.execDriver.Stats(c.ID)
935
+	return daemon.containerd.Signal(c.ID, sig)
949 936
 }
950 937
 
951 938
 func (daemon *Daemon) subscribeToContainerStats(c *container.Container) chan interface{} {
... ...
@@ -1322,12 +1314,6 @@ func (daemon *Daemon) GraphDriverName() string {
1322 1322
 	return daemon.layerStore.DriverName()
1323 1323
 }
1324 1324
 
1325
-// ExecutionDriver returns the currently used driver for creating and
1326
-// starting execs in a container.
1327
-func (daemon *Daemon) ExecutionDriver() execdriver.Driver {
1328
-	return daemon.execDriver
1329
-}
1330
-
1331 1325
 // GetUIDGIDMaps returns the current daemon's user namespace settings
1332 1326
 // for the full uid and gid maps which will be applied to containers
1333 1327
 // started in this instance.
... ...
@@ -1536,7 +1522,7 @@ func (daemon *Daemon) IsShuttingDown() bool {
1536 1536
 }
1537 1537
 
1538 1538
 // GetContainerStats collects all the stats published by a container
1539
-func (daemon *Daemon) GetContainerStats(container *container.Container) (*execdriver.ResourceStats, error) {
1539
+func (daemon *Daemon) GetContainerStats(container *container.Container) (*types.StatsJSON, error) {
1540 1540
 	stats, err := daemon.stats(container)
1541 1541
 	if err != nil {
1542 1542
 		return nil, err
... ...
@@ -1547,7 +1533,22 @@ func (daemon *Daemon) GetContainerStats(container *container.Container) (*execdr
1547 1547
 	if nwStats, err = daemon.getNetworkStats(container); err != nil {
1548 1548
 		return nil, err
1549 1549
 	}
1550
-	stats.Interfaces = nwStats
1550
+
1551
+	stats.Networks = make(map[string]types.NetworkStats)
1552
+	for _, iface := range nwStats {
1553
+		// For API Version >= 1.21, the original data of network will
1554
+		// be returned.
1555
+		stats.Networks[iface.Name] = types.NetworkStats{
1556
+			RxBytes:   iface.RxBytes,
1557
+			RxPackets: iface.RxPackets,
1558
+			RxErrors:  iface.RxErrors,
1559
+			RxDropped: iface.RxDropped,
1560
+			TxBytes:   iface.TxBytes,
1561
+			TxPackets: iface.TxPackets,
1562
+			TxErrors:  iface.TxErrors,
1563
+			TxDropped: iface.TxDropped,
1564
+		}
1565
+	}
1551 1566
 
1552 1567
 	return stats, nil
1553 1568
 }
... ...
@@ -1735,3 +1736,16 @@ func (daemon *Daemon) networkOptions(dconfig *Config) ([]nwconfig.Option, error)
1735 1735
 	options = append(options, driverOptions(dconfig)...)
1736 1736
 	return options, nil
1737 1737
 }
1738
+
1739
+func copyBlkioEntry(entries []*containerd.BlkioStatsEntry) []types.BlkioStatEntry {
1740
+	out := make([]types.BlkioStatEntry, len(entries))
1741
+	for i, re := range entries {
1742
+		out[i] = types.BlkioStatEntry{
1743
+			Major: re.Major,
1744
+			Minor: re.Minor,
1745
+			Op:    re.Op,
1746
+			Value: re.Value,
1747
+		}
1748
+	}
1749
+	return out
1750
+}
... ...
@@ -12,6 +12,64 @@ import (
12 12
 	"github.com/docker/docker/pkg/mount"
13 13
 )
14 14
 
15
+func (daemon *Daemon) cleanupMountsByID(id string) error {
16
+	logrus.Debugf("Cleaning up old mountid %s: start.", id)
17
+	f, err := os.Open("/proc/self/mountinfo")
18
+	if err != nil {
19
+		return err
20
+	}
21
+	defer f.Close()
22
+
23
+	return daemon.cleanupMountsFromReaderByID(f, id, mount.Unmount)
24
+}
25
+
26
+func (daemon *Daemon) cleanupMountsFromReaderByID(reader io.Reader, id string, unmount func(target string) error) error {
27
+	if daemon.root == "" {
28
+		return nil
29
+	}
30
+	var errors []string
31
+	mountRoot := ""
32
+	shmSuffix := "/" + id + "/shm"
33
+	mergedSuffix := "/" + id + "/merged"
34
+	sc := bufio.NewScanner(reader)
35
+	for sc.Scan() {
36
+		line := sc.Text()
37
+		fields := strings.Fields(line)
38
+		if strings.HasPrefix(fields[4], daemon.root) {
39
+			logrus.Debugf("Mount base: %v", fields[4])
40
+			mnt := fields[4]
41
+			if strings.HasSuffix(mnt, shmSuffix) || strings.HasSuffix(mnt, mergedSuffix) {
42
+				logrus.Debugf("Unmounting %v", mnt)
43
+				if err := unmount(mnt); err != nil {
44
+					logrus.Error(err)
45
+					errors = append(errors, err.Error())
46
+				}
47
+			} else if mountBase := filepath.Base(mnt); mountBase == id {
48
+				mountRoot = mnt
49
+			}
50
+		}
51
+	}
52
+
53
+	if mountRoot != "" {
54
+		logrus.Debugf("Unmounting %v", mountRoot)
55
+		if err := unmount(mountRoot); err != nil {
56
+			logrus.Error(err)
57
+			errors = append(errors, err.Error())
58
+		}
59
+	}
60
+
61
+	if err := sc.Err(); err != nil {
62
+		return err
63
+	}
64
+
65
+	if len(errors) > 0 {
66
+		return fmt.Errorf("Error cleaningup mounts:\n%v", strings.Join(errors, "\n"))
67
+	}
68
+
69
+	logrus.Debugf("Cleaning up old container shm/mqueue/rootfs mounts: done.")
70
+	return nil
71
+}
72
+
15 73
 // cleanupMounts umounts shm/mqueue mounts for old containers
16 74
 func (daemon *Daemon) cleanupMounts() error {
17 75
 	logrus.Debugf("Cleaning up old container shm/mqueue/rootfs mounts: start.")
... ...
@@ -25,7 +83,7 @@ func (daemon *Daemon) cleanupMounts() error {
25 25
 }
26 26
 
27 27
 func (daemon *Daemon) cleanupMountsFromReader(reader io.Reader, unmount func(target string) error) error {
28
-	if daemon.repository == "" {
28
+	if daemon.root == "" {
29 29
 		return nil
30 30
 	}
31 31
 	sc := bufio.NewScanner(reader)
... ...
@@ -37,7 +95,7 @@ func (daemon *Daemon) cleanupMountsFromReader(reader io.Reader, unmount func(tar
37 37
 			logrus.Debugf("Mount base: %v", fields[4])
38 38
 			mnt := fields[4]
39 39
 			mountBase := filepath.Base(mnt)
40
-			if mountBase == "mqueue" || mountBase == "shm" || mountBase == "merged" {
40
+			if mountBase == "shm" || mountBase == "merged" {
41 41
 				logrus.Debugf("Unmounting %v", mnt)
42 42
 				if err := unmount(mnt); err != nil {
43 43
 					logrus.Error(err)
... ...
@@ -7,53 +7,83 @@ import (
7 7
 	"testing"
8 8
 )
9 9
 
10
+const mountsFixture = `142 78 0:38 / / rw,relatime - aufs none rw,si=573b861da0b3a05b,dio
11
+143 142 0:60 / /proc rw,nosuid,nodev,noexec,relatime - proc proc rw
12
+144 142 0:67 / /dev rw,nosuid - tmpfs tmpfs rw,mode=755
13
+145 144 0:78 / /dev/pts rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=666
14
+146 144 0:49 / /dev/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw
15
+147 142 0:84 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
16
+148 147 0:86 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755
17
+149 148 0:22 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuset
18
+150 148 0:25 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/cpu rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpu
19
+151 148 0:27 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/cpuacct rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuacct
20
+152 148 0:28 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
21
+153 148 0:29 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,devices
22
+154 148 0:30 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,freezer
23
+155 148 0:31 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,blkio
24
+156 148 0:32 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,perf_event
25
+157 148 0:33 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,hugetlb
26
+158 148 0:35 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup systemd rw,name=systemd
27
+159 142 8:4 /home/mlaventure/gopath /home/mlaventure/gopath rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
28
+160 142 8:4 /var/lib/docker/volumes/9a428b651ee4c538130143cad8d87f603a4bf31b928afe7ff3ecd65480692b35/_data /var/lib/docker rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
29
+164 142 8:4 /home/mlaventure/gopath/src/github.com/docker/docker /go/src/github.com/docker/docker rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
30
+165 142 8:4 /var/lib/docker/containers/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a/resolv.conf /etc/resolv.conf rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
31
+166 142 8:4 /var/lib/docker/containers/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a/hostname /etc/hostname rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
32
+167 142 8:4 /var/lib/docker/containers/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a/hosts /etc/hosts rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
33
+168 144 0:39 / /dev/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k
34
+169 144 0:12 /14 /dev/console rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=000
35
+83 147 0:10 / /sys/kernel/security rw,relatime - securityfs none rw
36
+89 142 0:87 / /tmp rw,relatime - tmpfs none rw
37
+97 142 0:60 / /run/docker/netns/default rw,nosuid,nodev,noexec,relatime - proc proc rw
38
+100 160 8:4 /var/lib/docker/volumes/9a428b651ee4c538130143cad8d87f603a4bf31b928afe7ff3ecd65480692b35/_data/aufs /var/lib/docker/aufs rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
39
+115 100 0:102 / /var/lib/docker/aufs/mnt/0ecda1c63e5b58b3d89ff380bf646c95cc980252cf0b52466d43619aec7c8432 rw,relatime - aufs none rw,si=573b861dbc01905b,dio
40
+116 160 0:107 / /var/lib/docker/containers/d045dc441d2e2e1d5b3e328d47e5943811a40819fb47497c5f5a5df2d6d13c37/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k
41
+118 142 0:102 / /run/docker/libcontainerd/d045dc441d2e2e1d5b3e328d47e5943811a40819fb47497c5f5a5df2d6d13c37/rootfs rw,relatime - aufs none rw,si=573b861dbc01905b,dio
42
+242 142 0:60 / /run/docker/netns/c3664df2a0f7 rw,nosuid,nodev,noexec,relatime - proc proc rw
43
+120 100 0:122 / /var/lib/docker/aufs/mnt/03ca4b49e71f1e49a41108829f4d5c70ac95934526e2af8984a1f65f1de0715d rw,relatime - aufs none rw,si=573b861eb147805b,dio
44
+171 142 0:122 / /run/docker/libcontainerd/e406ff6f3e18516d50e03dbca4de54767a69a403a6f7ec1edc2762812824521e/rootfs rw,relatime - aufs none rw,si=573b861eb147805b,dio
45
+310 142 0:60 / /run/docker/netns/71a18572176b rw,nosuid,nodev,noexec,relatime - proc proc rw
46
+`
47
+
10 48
 func TestCleanupMounts(t *testing.T) {
11
-	fixture := `230 138 0:60 / / rw,relatime - overlay overlay rw,lowerdir=/var/lib/docker/overlay/0ef9f93d5d365c1385b09d54bbee6afff3d92002c16f22eccb6e1549b2ff97d8/root,upperdir=/var/lib/docker/overlay/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb/upper,workdir=/var/lib/docker/overlay/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb/work
12
-231 230 0:56 / /proc rw,nosuid,nodev,noexec,relatime - proc proc rw
13
-232 230 0:57 / /dev rw,nosuid - tmpfs tmpfs rw,mode=755
14
-233 232 0:58 / /dev/pts rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=666
15
-234 232 0:59 / /dev/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k
16
-235 232 0:55 / /dev/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw
17
-236 230 0:61 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
18
-237 236 0:62 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw
19
-238 237 0:21 /system.slice/docker.service /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd
20
-239 237 0:23 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,perf_event
21
-240 237 0:24 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuset,clone_children
22
-241 237 0:25 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,devices
23
-242 237 0:26 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,freezer
24
-243 237 0:27 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpu,cpuacct
25
-244 237 0:28 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,blkio
26
-245 237 0:29 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,net_cls,net_prio
27
-246 237 0:30 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,hugetlb
28
-247 237 0:31 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
29
-248 230 253:1 /var/lib/docker/volumes/510cc41ac68c48bd4eac932e3e09711673876287abf1b185312cfbfe6261a111/_data /var/lib/docker rw,relatime - ext4 /dev/disk/by-uuid/ba70ea0c-1a8f-4ee4-9687-cb393730e2b5 rw,errors=remount-ro,data=ordered
30
-250 230 253:1 /var/lib/docker/containers/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb/hostname /etc/hostname rw,relatime - ext4 /dev/disk/by-uuid/ba70ea0c-1a8f-4ee4-9687-cb393730e2b5 rw,errors=remount-ro,data=ordered
31
-251 230 253:1 /var/lib/docker/containers/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb/hosts /etc/hosts rw,relatime - ext4 /dev/disk/by-uuid/ba70ea0c-1a8f-4ee4-9687-cb393730e2b5 rw,errors=remount-ro,data=ordered
32
-252 232 0:13 /1 /dev/console rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=000
33
-139 236 0:11 / /sys/kernel/security rw,relatime - securityfs none rw
34
-140 230 0:54 / /tmp rw,relatime - tmpfs none rw
35
-145 230 0:3 / /run/docker/netns/default rw - nsfs nsfs rw
36
-130 140 0:45 / /tmp/docker_recursive_mount_test312125472/tmpfs rw,relatime - tmpfs tmpfs rw
37
-131 230 0:3 / /run/docker/netns/47903e2e6701 rw - nsfs nsfs rw
38
-133 230 0:55 / /go/src/github.com/docker/docker/bundles/1.9.0-dev/test-integration-cli/d45526097/graph/containers/47903e2e67014246eba27607809d5f5c2437c3bf84c2986393448f84093cc40b/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw`
49
+	d := &Daemon{
50
+		root: "/var/lib/docker/",
51
+	}
52
+
53
+	expected := "/var/lib/docker/containers/d045dc441d2e2e1d5b3e328d47e5943811a40819fb47497c5f5a5df2d6d13c37/shm"
54
+	var unmounted int
55
+	unmount := func(target string) error {
56
+		if target == expected {
57
+			unmounted++
58
+		}
59
+		return nil
60
+	}
39 61
 
62
+	d.cleanupMountsFromReader(strings.NewReader(mountsFixture), unmount)
63
+
64
+	if unmounted != 1 {
65
+		t.Fatalf("Expected to unmount the shm (and the shm only)")
66
+	}
67
+}
68
+
69
+func TestCleanupMountsByID(t *testing.T) {
40 70
 	d := &Daemon{
41
-		repository: "/go/src/github.com/docker/docker/bundles/1.9.0-dev/test-integration-cli/d45526097/graph/containers/",
71
+		root: "/var/lib/docker/",
42 72
 	}
43 73
 
44
-	expected := "/go/src/github.com/docker/docker/bundles/1.9.0-dev/test-integration-cli/d45526097/graph/containers/47903e2e67014246eba27607809d5f5c2437c3bf84c2986393448f84093cc40b/mqueue"
45
-	var unmounted bool
74
+	expected := "/var/lib/docker/aufs/mnt/03ca4b49e71f1e49a41108829f4d5c70ac95934526e2af8984a1f65f1de0715d"
75
+	var unmounted int
46 76
 	unmount := func(target string) error {
47 77
 		if target == expected {
48
-			unmounted = true
78
+			unmounted++
49 79
 		}
50 80
 		return nil
51 81
 	}
52 82
 
53
-	d.cleanupMountsFromReader(strings.NewReader(fixture), unmount)
83
+	d.cleanupMountsFromReaderByID(strings.NewReader(mountsFixture), "03ca4b49e71f1e49a41108829f4d5c70ac95934526e2af8984a1f65f1de0715d", unmount)
54 84
 
55
-	if !unmounted {
56
-		t.Fatalf("Expected to unmount the mqueue")
85
+	if unmounted != 1 {
86
+		t.Fatalf("Expected to unmount the auf root (and that only)")
57 87
 	}
58 88
 }
59 89
 
... ...
@@ -13,6 +13,7 @@ import (
13 13
 	"strconv"
14 14
 	"strings"
15 15
 	"syscall"
16
+	"time"
16 17
 
17 18
 	"github.com/Sirupsen/logrus"
18 19
 	"github.com/docker/docker/container"
... ...
@@ -25,6 +26,7 @@ import (
25 25
 	"github.com/docker/docker/reference"
26 26
 	"github.com/docker/docker/runconfig"
27 27
 	runconfigopts "github.com/docker/docker/runconfig/opts"
28
+	"github.com/docker/engine-api/types"
28 29
 	pblkiodev "github.com/docker/engine-api/types/blkiodev"
29 30
 	containertypes "github.com/docker/engine-api/types/container"
30 31
 	"github.com/docker/libnetwork"
... ...
@@ -33,10 +35,10 @@ import (
33 33
 	"github.com/docker/libnetwork/ipamutils"
34 34
 	"github.com/docker/libnetwork/netlabel"
35 35
 	"github.com/docker/libnetwork/options"
36
-	"github.com/docker/libnetwork/types"
37
-	blkiodev "github.com/opencontainers/runc/libcontainer/configs"
36
+	lntypes "github.com/docker/libnetwork/types"
38 37
 	"github.com/opencontainers/runc/libcontainer/label"
39 38
 	"github.com/opencontainers/runc/libcontainer/user"
39
+	"github.com/opencontainers/specs/specs-go"
40 40
 )
41 41
 
42 42
 const (
... ...
@@ -51,16 +53,81 @@ const (
51 51
 	defaultRemappedID  string = "dockremap"
52 52
 )
53 53
 
54
-func getBlkioWeightDevices(config *containertypes.HostConfig) ([]*blkiodev.WeightDevice, error) {
54
+func getMemoryResources(config containertypes.Resources) *specs.Memory {
55
+	memory := specs.Memory{}
56
+
57
+	if config.Memory > 0 {
58
+		limit := uint64(config.Memory)
59
+		memory.Limit = &limit
60
+	}
61
+
62
+	if config.MemoryReservation > 0 {
63
+		reservation := uint64(config.MemoryReservation)
64
+		memory.Reservation = &reservation
65
+	}
66
+
67
+	if config.MemorySwap != 0 {
68
+		swap := uint64(config.MemorySwap)
69
+		memory.Swap = &swap
70
+	}
71
+
72
+	if config.MemorySwappiness != nil {
73
+		swappiness := uint64(*config.MemorySwappiness)
74
+		memory.Swappiness = &swappiness
75
+	}
76
+
77
+	if config.KernelMemory != 0 {
78
+		kernelMemory := uint64(config.KernelMemory)
79
+		memory.Kernel = &kernelMemory
80
+	}
81
+
82
+	return &memory
83
+}
84
+
85
+func getCPUResources(config containertypes.Resources) *specs.CPU {
86
+	cpu := specs.CPU{}
87
+
88
+	if config.CPUShares != 0 {
89
+		shares := uint64(config.CPUShares)
90
+		cpu.Shares = &shares
91
+	}
92
+
93
+	if config.CpusetCpus != "" {
94
+		cpuset := config.CpusetCpus
95
+		cpu.Cpus = &cpuset
96
+	}
97
+
98
+	if config.CpusetMems != "" {
99
+		cpuset := config.CpusetMems
100
+		cpu.Mems = &cpuset
101
+	}
102
+
103
+	if config.CPUPeriod != 0 {
104
+		period := uint64(config.CPUPeriod)
105
+		cpu.Period = &period
106
+	}
107
+
108
+	if config.CPUQuota != 0 {
109
+		quota := uint64(config.CPUQuota)
110
+		cpu.Quota = &quota
111
+	}
112
+
113
+	return &cpu
114
+}
115
+
116
+func getBlkioWeightDevices(config containertypes.Resources) ([]specs.WeightDevice, error) {
55 117
 	var stat syscall.Stat_t
56
-	var blkioWeightDevices []*blkiodev.WeightDevice
118
+	var blkioWeightDevices []specs.WeightDevice
57 119
 
58 120
 	for _, weightDevice := range config.BlkioWeightDevice {
59 121
 		if err := syscall.Stat(weightDevice.Path, &stat); err != nil {
60 122
 			return nil, err
61 123
 		}
62
-		weightDevice := blkiodev.NewWeightDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), weightDevice.Weight, 0)
63
-		blkioWeightDevices = append(blkioWeightDevices, weightDevice)
124
+		weight := weightDevice.Weight
125
+		d := specs.WeightDevice{Weight: &weight}
126
+		d.Major = int64(stat.Rdev / 256)
127
+		d.Major = int64(stat.Rdev % 256)
128
+		blkioWeightDevices = append(blkioWeightDevices, d)
64 129
 	}
65 130
 
66 131
 	return blkioWeightDevices, nil
... ...
@@ -99,61 +166,73 @@ func parseSecurityOpt(container *container.Container, config *containertypes.Hos
99 99
 	return err
100 100
 }
101 101
 
102
-func getBlkioReadIOpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) {
103
-	var blkioReadIOpsDevice []*blkiodev.ThrottleDevice
102
+func getBlkioReadIOpsDevices(config containertypes.Resources) ([]specs.ThrottleDevice, error) {
103
+	var blkioReadIOpsDevice []specs.ThrottleDevice
104 104
 	var stat syscall.Stat_t
105 105
 
106 106
 	for _, iopsDevice := range config.BlkioDeviceReadIOps {
107 107
 		if err := syscall.Stat(iopsDevice.Path, &stat); err != nil {
108 108
 			return nil, err
109 109
 		}
110
-		readIOpsDevice := blkiodev.NewThrottleDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), iopsDevice.Rate)
111
-		blkioReadIOpsDevice = append(blkioReadIOpsDevice, readIOpsDevice)
110
+		rate := iopsDevice.Rate
111
+		d := specs.ThrottleDevice{Rate: &rate}
112
+		d.Major = int64(stat.Rdev / 256)
113
+		d.Major = int64(stat.Rdev % 256)
114
+		blkioReadIOpsDevice = append(blkioReadIOpsDevice, d)
112 115
 	}
113 116
 
114 117
 	return blkioReadIOpsDevice, nil
115 118
 }
116 119
 
117
-func getBlkioWriteIOpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) {
118
-	var blkioWriteIOpsDevice []*blkiodev.ThrottleDevice
120
+func getBlkioWriteIOpsDevices(config containertypes.Resources) ([]specs.ThrottleDevice, error) {
121
+	var blkioWriteIOpsDevice []specs.ThrottleDevice
119 122
 	var stat syscall.Stat_t
120 123
 
121 124
 	for _, iopsDevice := range config.BlkioDeviceWriteIOps {
122 125
 		if err := syscall.Stat(iopsDevice.Path, &stat); err != nil {
123 126
 			return nil, err
124 127
 		}
125
-		writeIOpsDevice := blkiodev.NewThrottleDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), iopsDevice.Rate)
126
-		blkioWriteIOpsDevice = append(blkioWriteIOpsDevice, writeIOpsDevice)
128
+		rate := iopsDevice.Rate
129
+		d := specs.ThrottleDevice{Rate: &rate}
130
+		d.Major = int64(stat.Rdev / 256)
131
+		d.Major = int64(stat.Rdev % 256)
132
+		blkioWriteIOpsDevice = append(blkioWriteIOpsDevice, d)
127 133
 	}
128 134
 
129 135
 	return blkioWriteIOpsDevice, nil
130 136
 }
131 137
 
132
-func getBlkioReadBpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) {
133
-	var blkioReadBpsDevice []*blkiodev.ThrottleDevice
138
+func getBlkioReadBpsDevices(config containertypes.Resources) ([]specs.ThrottleDevice, error) {
139
+	var blkioReadBpsDevice []specs.ThrottleDevice
134 140
 	var stat syscall.Stat_t
135 141
 
136 142
 	for _, bpsDevice := range config.BlkioDeviceReadBps {
137 143
 		if err := syscall.Stat(bpsDevice.Path, &stat); err != nil {
138 144
 			return nil, err
139 145
 		}
140
-		readBpsDevice := blkiodev.NewThrottleDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), bpsDevice.Rate)
141
-		blkioReadBpsDevice = append(blkioReadBpsDevice, readBpsDevice)
146
+		rate := bpsDevice.Rate
147
+		d := specs.ThrottleDevice{Rate: &rate}
148
+		d.Major = int64(stat.Rdev / 256)
149
+		d.Major = int64(stat.Rdev % 256)
150
+		blkioReadBpsDevice = append(blkioReadBpsDevice, d)
142 151
 	}
143 152
 
144 153
 	return blkioReadBpsDevice, nil
145 154
 }
146 155
 
147
-func getBlkioWriteBpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) {
148
-	var blkioWriteBpsDevice []*blkiodev.ThrottleDevice
156
+func getBlkioWriteBpsDevices(config containertypes.Resources) ([]specs.ThrottleDevice, error) {
157
+	var blkioWriteBpsDevice []specs.ThrottleDevice
149 158
 	var stat syscall.Stat_t
150 159
 
151 160
 	for _, bpsDevice := range config.BlkioDeviceWriteBps {
152 161
 		if err := syscall.Stat(bpsDevice.Path, &stat); err != nil {
153 162
 			return nil, err
154 163
 		}
155
-		writeBpsDevice := blkiodev.NewThrottleDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), bpsDevice.Rate)
156
-		blkioWriteBpsDevice = append(blkioWriteBpsDevice, writeBpsDevice)
164
+		rate := bpsDevice.Rate
165
+		d := specs.ThrottleDevice{Rate: &rate}
166
+		d.Major = int64(stat.Rdev / 256)
167
+		d.Major = int64(stat.Rdev % 256)
168
+		blkioWriteBpsDevice = append(blkioWriteBpsDevice, d)
157 169
 	}
158 170
 
159 171
 	return blkioWriteBpsDevice, nil
... ...
@@ -594,8 +673,8 @@ func initBridgeDriver(controller libnetwork.NetworkController, config *Config) e
594 594
 
595 595
 	nw, nw6List, err := ipamutils.ElectInterfaceAddresses(bridgeName)
596 596
 	if err == nil {
597
-		ipamV4Conf.PreferredPool = types.GetIPNetCanonical(nw).String()
598
-		hip, _ := types.GetHostPartIP(nw.IP, nw.Mask)
597
+		ipamV4Conf.PreferredPool = lntypes.GetIPNetCanonical(nw).String()
598
+		hip, _ := lntypes.GetHostPartIP(nw.IP, nw.Mask)
599 599
 		if hip.IsGlobalUnicast() {
600 600
 			ipamV4Conf.Gateway = nw.IP.String()
601 601
 		}
... ...
@@ -947,11 +1026,69 @@ func (daemon *Daemon) conditionalMountOnStart(container *container.Container) er
947 947
 
948 948
 // conditionalUnmountOnCleanup is a platform specific helper function called
949 949
 // during the cleanup of a container to unmount.
950
-func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) {
951
-	daemon.Unmount(container)
950
+func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error {
951
+	return daemon.Unmount(container)
952 952
 }
953 953
 
954 954
 func restoreCustomImage(is image.Store, ls layer.Store, rs reference.Store) error {
955 955
 	// Unix has no custom images to register
956 956
 	return nil
957 957
 }
958
+
959
+func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
960
+	if !c.IsRunning() {
961
+		return nil, errNotRunning{c.ID}
962
+	}
963
+	stats, err := daemon.containerd.Stats(c.ID)
964
+	if err != nil {
965
+		return nil, err
966
+	}
967
+	s := &types.StatsJSON{}
968
+	cgs := stats.CgroupStats
969
+	if cgs != nil {
970
+		s.BlkioStats = types.BlkioStats{
971
+			IoServiceBytesRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceBytesRecursive),
972
+			IoServicedRecursive:     copyBlkioEntry(cgs.BlkioStats.IoServicedRecursive),
973
+			IoQueuedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoQueuedRecursive),
974
+			IoServiceTimeRecursive:  copyBlkioEntry(cgs.BlkioStats.IoServiceTimeRecursive),
975
+			IoWaitTimeRecursive:     copyBlkioEntry(cgs.BlkioStats.IoWaitTimeRecursive),
976
+			IoMergedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoMergedRecursive),
977
+			IoTimeRecursive:         copyBlkioEntry(cgs.BlkioStats.IoTimeRecursive),
978
+			SectorsRecursive:        copyBlkioEntry(cgs.BlkioStats.SectorsRecursive),
979
+		}
980
+		cpu := cgs.CpuStats
981
+		s.CPUStats = types.CPUStats{
982
+			CPUUsage: types.CPUUsage{
983
+				TotalUsage:        cpu.CpuUsage.TotalUsage,
984
+				PercpuUsage:       cpu.CpuUsage.PercpuUsage,
985
+				UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
986
+				UsageInUsermode:   cpu.CpuUsage.UsageInUsermode,
987
+			},
988
+			ThrottlingData: types.ThrottlingData{
989
+				Periods:          cpu.ThrottlingData.Periods,
990
+				ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
991
+				ThrottledTime:    cpu.ThrottlingData.ThrottledTime,
992
+			},
993
+		}
994
+		mem := cgs.MemoryStats.Usage
995
+		s.MemoryStats = types.MemoryStats{
996
+			Usage:    mem.Usage,
997
+			MaxUsage: mem.MaxUsage,
998
+			Stats:    cgs.MemoryStats.Stats,
999
+			Failcnt:  mem.Failcnt,
1000
+		}
1001
+		if cgs.PidsStats != nil {
1002
+			s.PidsStats = types.PidsStats{
1003
+				Current: cgs.PidsStats.Current,
1004
+			}
1005
+		}
1006
+	}
1007
+	s.Read = time.Unix(int64(stats.Timestamp), 0)
1008
+	return s, nil
1009
+}
1010
+
1011
+// setDefaultIsolation determine the default isolation mode for the
1012
+// daemon to run in. This is only applicable on Windows
1013
+func (daemon *Daemon) setDefaultIsolation() error {
1014
+	return nil
1015
+}
... ...
@@ -129,9 +129,6 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo
129 129
 		return fmt.Errorf("Driver %s failed to remove root filesystem %s: %s", daemon.GraphDriverName(), container.ID, err)
130 130
 	}
131 131
 
132
-	if err = daemon.execDriver.Clean(container.ID); err != nil {
133
-		return fmt.Errorf("Unable to remove execdriver data for %s: %s", container.ID, err)
134
-	}
135 132
 	return nil
136 133
 }
137 134
 
... ...
@@ -11,10 +11,9 @@ import (
11 11
 	"github.com/Sirupsen/logrus"
12 12
 	"github.com/docker/docker/container"
13 13
 	"github.com/docker/docker/daemon/exec"
14
-	"github.com/docker/docker/daemon/execdriver"
15 14
 	"github.com/docker/docker/errors"
15
+	"github.com/docker/docker/libcontainerd"
16 16
 	"github.com/docker/docker/pkg/pools"
17
-	"github.com/docker/docker/pkg/promise"
18 17
 	"github.com/docker/docker/pkg/term"
19 18
 	"github.com/docker/engine-api/types"
20 19
 	"github.com/docker/engine-api/types/strslice"
... ...
@@ -106,33 +105,31 @@ func (d *Daemon) ContainerExecCreate(config *types.ExecConfig) (string, error) {
106 106
 		}
107 107
 	}
108 108
 
109
-	processConfig := &execdriver.ProcessConfig{
110
-		CommonProcessConfig: execdriver.CommonProcessConfig{
111
-			Tty:        config.Tty,
112
-			Entrypoint: entrypoint,
113
-			Arguments:  args,
114
-		},
115
-	}
116
-	setPlatformSpecificExecProcessConfig(config, container, processConfig)
117
-
118 109
 	execConfig := exec.NewConfig()
119 110
 	execConfig.OpenStdin = config.AttachStdin
120 111
 	execConfig.OpenStdout = config.AttachStdout
121 112
 	execConfig.OpenStderr = config.AttachStderr
122
-	execConfig.ProcessConfig = processConfig
123 113
 	execConfig.ContainerID = container.ID
124 114
 	execConfig.DetachKeys = keys
115
+	execConfig.Entrypoint = entrypoint
116
+	execConfig.Args = args
117
+	execConfig.Tty = config.Tty
118
+	execConfig.Privileged = config.Privileged
119
+	execConfig.User = config.User
120
+	if len(execConfig.User) == 0 {
121
+		execConfig.User = container.Config.User
122
+	}
125 123
 
126 124
 	d.registerExecCommand(container, execConfig)
127 125
 
128
-	d.LogContainerEvent(container, "exec_create: "+execConfig.ProcessConfig.Entrypoint+" "+strings.Join(execConfig.ProcessConfig.Arguments, " "))
126
+	d.LogContainerEvent(container, "exec_create: "+execConfig.Entrypoint+" "+strings.Join(execConfig.Args, " "))
129 127
 
130 128
 	return execConfig.ID, nil
131 129
 }
132 130
 
133 131
 // ContainerExecStart starts a previously set up exec instance. The
134 132
 // std streams are set up.
135
-func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) error {
133
+func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) (err error) {
136 134
 	var (
137 135
 		cStdin           io.ReadCloser
138 136
 		cStdout, cStderr io.Writer
... ...
@@ -155,11 +152,18 @@ func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.
155 155
 		return fmt.Errorf("Error: Exec command %s is already running", ec.ID)
156 156
 	}
157 157
 	ec.Running = true
158
+	defer func() {
159
+		if err != nil {
160
+			ec.Running = false
161
+			exitCode := 126
162
+			ec.ExitCode = &exitCode
163
+		}
164
+	}()
158 165
 	ec.Unlock()
159 166
 
160 167
 	c := d.containers.Get(ec.ContainerID)
161 168
 	logrus.Debugf("starting exec command %s in container %s", ec.ID, c.ID)
162
-	d.LogContainerEvent(c, "exec_start: "+ec.ProcessConfig.Entrypoint+" "+strings.Join(ec.ProcessConfig.Arguments, " "))
169
+	d.LogContainerEvent(c, "exec_start: "+ec.Entrypoint+" "+strings.Join(ec.Args, " "))
163 170
 
164 171
 	if ec.OpenStdin && stdin != nil {
165 172
 		r, w := io.Pipe()
... ...
@@ -183,56 +187,26 @@ func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.
183 183
 		ec.NewNopInputPipe()
184 184
 	}
185 185
 
186
-	attachErr := container.AttachStreams(context.Background(), ec.StreamConfig, ec.OpenStdin, true, ec.ProcessConfig.Tty, cStdin, cStdout, cStderr, ec.DetachKeys)
187
-
188
-	execErr := make(chan error)
189
-
190
-	// Note, the ExecConfig data will be removed when the container
191
-	// itself is deleted.  This allows us to query it (for things like
192
-	// the exitStatus) even after the cmd is done running.
193
-
194
-	go func() {
195
-		execErr <- d.containerExec(c, ec)
196
-	}()
186
+	p := libcontainerd.Process{
187
+		Args:     append([]string{ec.Entrypoint}, ec.Args...),
188
+		Terminal: ec.Tty,
189
+	}
197 190
 
198
-	select {
199
-	case err := <-attachErr:
200
-		if err != nil {
201
-			return fmt.Errorf("attach failed with error: %v", err)
202
-		}
191
+	if err := execSetPlatformOpt(c, ec, &p); err != nil {
203 192
 		return nil
204
-	case err := <-execErr:
205
-		if aErr := <-attachErr; aErr != nil && err == nil {
206
-			return fmt.Errorf("attach failed with error: %v", aErr)
207
-		}
208
-		if err == nil {
209
-			return nil
210
-		}
211
-
212
-		// Maybe the container stopped while we were trying to exec
213
-		if !c.IsRunning() {
214
-			return fmt.Errorf("container stopped while running exec: %s", c.ID)
215
-		}
216
-		return fmt.Errorf("Cannot run exec command %s in container %s: %s", ec.ID, c.ID, err)
217 193
 	}
218
-}
219 194
 
220
-// Exec calls the underlying exec driver to run
221
-func (d *Daemon) Exec(c *container.Container, execConfig *exec.Config, pipes *execdriver.Pipes, startCallback execdriver.DriverCallback) (int, error) {
222
-	hooks := execdriver.Hooks{
223
-		Start: startCallback,
224
-	}
225
-	exitStatus, err := d.execDriver.Exec(c.Command, execConfig.ProcessConfig, pipes, hooks)
195
+	attachErr := container.AttachStreams(context.Background(), ec.StreamConfig, ec.OpenStdin, true, ec.Tty, cStdin, cStdout, cStderr, ec.DetachKeys)
226 196
 
227
-	// On err, make sure we don't leave ExitCode at zero
228
-	if err != nil && exitStatus == 0 {
229
-		exitStatus = 128
197
+	if err := d.containerd.AddProcess(c.ID, name, p); err != nil {
198
+		return err
230 199
 	}
231 200
 
232
-	execConfig.ExitCode = &exitStatus
233
-	execConfig.Running = false
234
-
235
-	return exitStatus, err
201
+	err = <-attachErr
202
+	if err != nil {
203
+		return fmt.Errorf("attach failed with error: %v", err)
204
+	}
205
+	return nil
236 206
 }
237 207
 
238 208
 // execCommandGC runs a ticker to clean up the daemon references
... ...
@@ -270,52 +244,3 @@ func (d *Daemon) containerExecIds() map[string]struct{} {
270 270
 	}
271 271
 	return ids
272 272
 }
273
-
274
-func (d *Daemon) containerExec(container *container.Container, ec *exec.Config) error {
275
-	container.Lock()
276
-	defer container.Unlock()
277
-
278
-	callback := func(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error {
279
-		if processConfig.Tty {
280
-			// The callback is called after the process Start()
281
-			// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlave
282
-			// which we close here.
283
-			if c, ok := processConfig.Stdout.(io.Closer); ok {
284
-				c.Close()
285
-			}
286
-		}
287
-		ec.Close()
288
-		return nil
289
-	}
290
-
291
-	// We use a callback here instead of a goroutine and an chan for
292
-	// synchronization purposes
293
-	cErr := promise.Go(func() error { return d.monitorExec(container, ec, callback) })
294
-	return ec.Wait(cErr)
295
-}
296
-
297
-func (d *Daemon) monitorExec(container *container.Container, execConfig *exec.Config, callback execdriver.DriverCallback) error {
298
-	pipes := execdriver.NewPipes(execConfig.Stdin(), execConfig.Stdout(), execConfig.Stderr(), execConfig.OpenStdin)
299
-	exitCode, err := d.Exec(container, execConfig, pipes, callback)
300
-	if err != nil {
301
-		logrus.Errorf("Error running command in existing container %s: %s", container.ID, err)
302
-	}
303
-	logrus.Debugf("Exec task in container %s exited with code %d", container.ID, exitCode)
304
-
305
-	if err := execConfig.CloseStreams(); err != nil {
306
-		logrus.Errorf("%s: %s", container.ID, err)
307
-	}
308
-
309
-	if execConfig.ProcessConfig.Terminal != nil {
310
-		if err := execConfig.WaitResize(); err != nil {
311
-			logrus.Errorf("Error waiting for resize: %v", err)
312
-		}
313
-		if err := execConfig.ProcessConfig.Terminal.Close(); err != nil {
314
-			logrus.Errorf("Error closing terminal while running in container %s: %s", container.ID, err)
315
-		}
316
-	}
317
-	// remove the exec command from the container's store only and not the
318
-	// daemon's store so that the exec command can be inspected.
319
-	container.ExecCommands.Delete(execConfig.ID)
320
-	return err
321
-}
... ...
@@ -1,11 +1,8 @@
1 1
 package exec
2 2
 
3 3
 import (
4
-	"fmt"
5 4
 	"sync"
6
-	"time"
7 5
 
8
-	"github.com/docker/docker/daemon/execdriver"
9 6
 	"github.com/docker/docker/pkg/stringid"
10 7
 	"github.com/docker/docker/runconfig"
11 8
 )
... ...
@@ -16,22 +13,20 @@ import (
16 16
 type Config struct {
17 17
 	sync.Mutex
18 18
 	*runconfig.StreamConfig
19
-	ID            string
20
-	Running       bool
21
-	ExitCode      *int
22
-	ProcessConfig *execdriver.ProcessConfig
23
-	OpenStdin     bool
24
-	OpenStderr    bool
25
-	OpenStdout    bool
26
-	CanRemove     bool
27
-	ContainerID   string
28
-	DetachKeys    []byte
29
-
30
-	// waitStart will be closed immediately after the exec is really started.
31
-	waitStart chan struct{}
32
-
33
-	// waitResize will be closed after Resize is finished.
34
-	waitResize chan struct{}
19
+	ID          string
20
+	Running     bool
21
+	ExitCode    *int
22
+	OpenStdin   bool
23
+	OpenStderr  bool
24
+	OpenStdout  bool
25
+	CanRemove   bool
26
+	ContainerID string
27
+	DetachKeys  []byte
28
+	Entrypoint  string
29
+	Args        []string
30
+	Tty         bool
31
+	Privileged  bool
32
+	User        string
35 33
 }
36 34
 
37 35
 // NewConfig initializes the a new exec configuration
... ...
@@ -39,8 +34,6 @@ func NewConfig() *Config {
39 39
 	return &Config{
40 40
 		ID:           stringid.GenerateNonCryptoID(),
41 41
 		StreamConfig: runconfig.NewStreamConfig(),
42
-		waitStart:    make(chan struct{}),
43
-		waitResize:   make(chan struct{}),
44 42
 	}
45 43
 }
46 44
 
... ...
@@ -98,45 +91,3 @@ func (e *Store) List() []string {
98 98
 	e.RUnlock()
99 99
 	return IDs
100 100
 }
101
-
102
-// Wait waits until the exec process finishes or there is an error in the error channel.
103
-func (c *Config) Wait(cErr chan error) error {
104
-	// Exec should not return until the process is actually running
105
-	select {
106
-	case <-c.waitStart:
107
-	case err := <-cErr:
108
-		return err
109
-	}
110
-	return nil
111
-}
112
-
113
-// WaitResize waits until terminal resize finishes or time out.
114
-func (c *Config) WaitResize() error {
115
-	select {
116
-	case <-c.waitResize:
117
-	case <-time.After(time.Second):
118
-		return fmt.Errorf("Terminal resize for exec %s time out.", c.ID)
119
-	}
120
-	return nil
121
-}
122
-
123
-// Close closes the wait channel for the progress.
124
-func (c *Config) Close() {
125
-	close(c.waitStart)
126
-}
127
-
128
-// CloseResize closes the wait channel for resizing terminal.
129
-func (c *Config) CloseResize() {
130
-	close(c.waitResize)
131
-}
132
-
133
-// Resize changes the size of the terminal for the exec process.
134
-func (c *Config) Resize(h, w int) error {
135
-	defer c.CloseResize()
136
-	select {
137
-	case <-c.waitStart:
138
-	case <-time.After(time.Second):
139
-		return fmt.Errorf("Exec %s is not running, so it can not be resized.", c.ID)
140
-	}
141
-	return c.ProcessConfig.Terminal.Resize(h, w)
142
-}
143 101
new file mode 100644
... ...
@@ -0,0 +1,26 @@
0
+package daemon
1
+
2
+import (
3
+	"github.com/docker/docker/container"
4
+	"github.com/docker/docker/daemon/caps"
5
+	"github.com/docker/docker/daemon/exec"
6
+	"github.com/docker/docker/libcontainerd"
7
+)
8
+
9
+func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainerd.Process) error {
10
+	if len(ec.User) > 0 {
11
+		uid, gid, additionalGids, err := getUser(c, ec.User)
12
+		if err != nil {
13
+			return err
14
+		}
15
+		p.User = &libcontainerd.User{
16
+			UID:            uid,
17
+			GID:            gid,
18
+			AdditionalGids: additionalGids,
19
+		}
20
+	}
21
+	if ec.Privileged {
22
+		p.Capabilities = caps.GetAllCapabilities()
23
+	}
24
+	return nil
25
+}
0 26
deleted file mode 100644
... ...
@@ -1,21 +0,0 @@
1
-// +build linux freebsd
2
-
3
-package daemon
4
-
5
-import (
6
-	"github.com/docker/docker/container"
7
-	"github.com/docker/docker/daemon/execdriver"
8
-	"github.com/docker/engine-api/types"
9
-)
10
-
11
-// setPlatformSpecificExecProcessConfig sets platform-specific fields in the
12
-// ProcessConfig structure.
13
-func setPlatformSpecificExecProcessConfig(config *types.ExecConfig, container *container.Container, pc *execdriver.ProcessConfig) {
14
-	user := config.User
15
-	if len(user) == 0 {
16
-		user = container.Config.User
17
-	}
18
-
19
-	pc.User = user
20
-	pc.Privileged = config.Privileged
21
-}
... ...
@@ -84,7 +84,6 @@ func (daemon *Daemon) SystemInfo() (*types.Info, error) {
84 84
 		NFd:                fileutils.GetTotalUsedFds(),
85 85
 		NGoroutines:        runtime.NumGoroutine(),
86 86
 		SystemTime:         time.Now().Format(time.RFC3339Nano),
87
-		ExecutionDriver:    daemon.ExecutionDriver().Name(),
88 87
 		LoggingDriver:      daemon.defaultLogConfig.Type,
89 88
 		CgroupDriver:       daemon.getCgroupDriver(),
90 89
 		NEventsListener:    daemon.EventsService.SubscribersCount(),
... ...
@@ -82,10 +82,10 @@ func addMountPoints(container *container.Container) []types.MountPoint {
82 82
 
83 83
 func inspectExecProcessConfig(e *exec.Config) *backend.ExecProcessConfig {
84 84
 	return &backend.ExecProcessConfig{
85
-		Tty:        e.ProcessConfig.Tty,
86
-		Entrypoint: e.ProcessConfig.Entrypoint,
87
-		Arguments:  e.ProcessConfig.Arguments,
88
-		Privileged: &e.ProcessConfig.Privileged,
89
-		User:       e.ProcessConfig.User,
85
+		Tty:        e.Tty,
86
+		Entrypoint: e.Entrypoint,
87
+		Arguments:  e.Args,
88
+		Privileged: &e.Privileged,
89
+		User:       e.User,
90 90
 	}
91 91
 }
... ...
@@ -69,6 +69,10 @@ func (daemon *Daemon) killWithSignal(container *container.Container, sig int) er
69 69
 
70 70
 	container.ExitOnNext()
71 71
 
72
+	if !daemon.IsShuttingDown() {
73
+		container.HasBeenManuallyStopped = true
74
+	}
75
+
72 76
 	// if the container is currently restarting we do not need to send the signal
73 77
 	// to the process.  Telling the monitor that it should exit on it's next event
74 78
 	// loop is enough
75 79
new file mode 100644
... ...
@@ -0,0 +1,143 @@
0
+package daemon
1
+
2
+import (
3
+	"errors"
4
+	"fmt"
5
+	"io"
6
+	"runtime"
7
+	"strconv"
8
+
9
+	"github.com/Sirupsen/logrus"
10
+	"github.com/docker/docker/libcontainerd"
11
+	"github.com/docker/docker/runconfig"
12
+)
13
+
14
+// StateChanged updates daemon state changes from containerd
15
+func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
16
+	c := daemon.containers.Get(id)
17
+	if c == nil {
18
+		return fmt.Errorf("no such container: %s", id)
19
+	}
20
+
21
+	switch e.State {
22
+	case libcontainerd.StateOOM:
23
+		// StateOOM is Linux specific and should never be hit on Windows
24
+		if runtime.GOOS == "windows" {
25
+			return errors.New("Received StateOOM from libcontainerd on Windows. This should never happen.")
26
+		}
27
+		daemon.LogContainerEvent(c, "oom")
28
+	case libcontainerd.StateExit:
29
+		c.Lock()
30
+		defer c.Unlock()
31
+		c.Wait()
32
+		c.Reset(false)
33
+		c.SetStopped(platformConstructExitStatus(e))
34
+		attributes := map[string]string{
35
+			"exitCode": strconv.Itoa(int(e.ExitCode)),
36
+		}
37
+		daemon.LogContainerEventWithAttributes(c, "die", attributes)
38
+		daemon.Cleanup(c)
39
+		// FIXME: here is race condition between two RUN instructions in Dockerfile
40
+		// because they share same runconfig and change image. Must be fixed
41
+		// in builder/builder.go
42
+		return c.ToDisk()
43
+	case libcontainerd.StateRestart:
44
+		c.Lock()
45
+		defer c.Unlock()
46
+		c.Reset(false)
47
+		c.RestartCount++
48
+		c.SetRestarting(platformConstructExitStatus(e))
49
+		attributes := map[string]string{
50
+			"exitCode": strconv.Itoa(int(e.ExitCode)),
51
+		}
52
+		daemon.LogContainerEventWithAttributes(c, "die", attributes)
53
+		return c.ToDisk()
54
+	case libcontainerd.StateExitProcess:
55
+		c.Lock()
56
+		defer c.Unlock()
57
+		if execConfig := c.ExecCommands.Get(e.ProcessID); execConfig != nil {
58
+			ec := int(e.ExitCode)
59
+			execConfig.ExitCode = &ec
60
+			execConfig.Running = false
61
+			execConfig.Wait()
62
+			if err := execConfig.CloseStreams(); err != nil {
63
+				logrus.Errorf("%s: %s", c.ID, err)
64
+			}
65
+
66
+			// remove the exec command from the container's store only and not the
67
+			// daemon's store so that the exec command can be inspected.
68
+			c.ExecCommands.Delete(execConfig.ID)
69
+		} else {
70
+			logrus.Warnf("Ignoring StateExitProcess for %v but no exec command found", e)
71
+		}
72
+	case libcontainerd.StateStart, libcontainerd.StateRestore:
73
+		c.SetRunning(int(e.Pid), e.State == libcontainerd.StateStart)
74
+		c.HasBeenManuallyStopped = false
75
+		if err := c.ToDisk(); err != nil {
76
+			c.Reset(false)
77
+			return err
78
+		}
79
+	case libcontainerd.StatePause:
80
+		c.Paused = true
81
+		daemon.LogContainerEvent(c, "pause")
82
+	case libcontainerd.StateResume:
83
+		c.Paused = false
84
+		daemon.LogContainerEvent(c, "unpause")
85
+	}
86
+
87
+	return nil
88
+}
89
+
90
+// AttachStreams is called by libcontainerd to connect the stdio.
91
+func (daemon *Daemon) AttachStreams(id string, iop libcontainerd.IOPipe) error {
92
+	var s *runconfig.StreamConfig
93
+	c := daemon.containers.Get(id)
94
+	if c == nil {
95
+		ec, err := daemon.getExecConfig(id)
96
+		if err != nil {
97
+			return fmt.Errorf("no such exec/container: %s", id)
98
+		}
99
+		s = ec.StreamConfig
100
+	} else {
101
+		s = c.StreamConfig
102
+		if err := daemon.StartLogging(c); err != nil {
103
+			c.Reset(false)
104
+			return err
105
+		}
106
+	}
107
+
108
+	if stdin := s.Stdin(); stdin != nil {
109
+		if iop.Stdin != nil {
110
+			go func() {
111
+				io.Copy(iop.Stdin, stdin)
112
+				iop.Stdin.Close()
113
+			}()
114
+		}
115
+	} else {
116
+		if c != nil && !c.Config.Tty {
117
+			// tty is enabled, so dont close containerd's iopipe stdin.
118
+			if iop.Stdin != nil {
119
+				iop.Stdin.Close()
120
+			}
121
+		}
122
+	}
123
+
124
+	copy := func(w io.Writer, r io.Reader) {
125
+		s.Add(1)
126
+		go func() {
127
+			if _, err := io.Copy(w, r); err != nil {
128
+				logrus.Errorf("%v stream copy error: %v", id, err)
129
+			}
130
+			s.Done()
131
+		}()
132
+	}
133
+
134
+	if iop.Stdout != nil {
135
+		copy(s.Stdout(), iop.Stdout)
136
+	}
137
+	if iop.Stderr != nil {
138
+		copy(s.Stderr(), iop.Stderr)
139
+	}
140
+
141
+	return nil
142
+}
0 143
new file mode 100644
... ...
@@ -0,0 +1,14 @@
0
+package daemon
1
+
2
+import (
3
+	"github.com/docker/docker/container"
4
+	"github.com/docker/docker/libcontainerd"
5
+)
6
+
7
+// platformConstructExitStatus returns a platform specific exit status structure
8
+func platformConstructExitStatus(e libcontainerd.StateInfo) *container.ExitStatus {
9
+	return &container.ExitStatus{
10
+		ExitCode:  int(e.ExitCode),
11
+		OOMKilled: e.OOMKilled,
12
+	}
13
+}
0 14
new file mode 100644
... ...
@@ -0,0 +1,652 @@
0
+package daemon
1
+
2
+import (
3
+	"fmt"
4
+	"io"
5
+	"os"
6
+	"path/filepath"
7
+	"strconv"
8
+	"strings"
9
+
10
+	"github.com/docker/docker/container"
11
+	"github.com/docker/docker/daemon/caps"
12
+	"github.com/docker/docker/libcontainerd"
13
+	"github.com/docker/docker/oci"
14
+	"github.com/docker/docker/pkg/idtools"
15
+	"github.com/docker/docker/pkg/mount"
16
+	"github.com/docker/docker/pkg/stringutils"
17
+	"github.com/docker/docker/pkg/symlink"
18
+	"github.com/docker/docker/volume"
19
+	containertypes "github.com/docker/engine-api/types/container"
20
+	"github.com/opencontainers/runc/libcontainer/apparmor"
21
+	"github.com/opencontainers/runc/libcontainer/devices"
22
+	"github.com/opencontainers/runc/libcontainer/user"
23
+	"github.com/opencontainers/specs/specs-go"
24
+)
25
+
26
+func setResources(s *specs.Spec, r containertypes.Resources) error {
27
+	weightDevices, err := getBlkioWeightDevices(r)
28
+	if err != nil {
29
+		return err
30
+	}
31
+	readBpsDevice, err := getBlkioReadBpsDevices(r)
32
+	if err != nil {
33
+		return err
34
+	}
35
+	writeBpsDevice, err := getBlkioWriteBpsDevices(r)
36
+	if err != nil {
37
+		return err
38
+	}
39
+	readIOpsDevice, err := getBlkioReadIOpsDevices(r)
40
+	if err != nil {
41
+		return err
42
+	}
43
+	writeIOpsDevice, err := getBlkioWriteIOpsDevices(r)
44
+	if err != nil {
45
+		return err
46
+	}
47
+
48
+	memoryRes := getMemoryResources(r)
49
+	cpuRes := getCPUResources(r)
50
+	blkioWeight := r.BlkioWeight
51
+
52
+	specResources := &specs.Resources{
53
+		Memory: memoryRes,
54
+		CPU:    cpuRes,
55
+		BlockIO: &specs.BlockIO{
56
+			Weight:                  &blkioWeight,
57
+			WeightDevice:            weightDevices,
58
+			ThrottleReadBpsDevice:   readBpsDevice,
59
+			ThrottleWriteBpsDevice:  writeBpsDevice,
60
+			ThrottleReadIOPSDevice:  readIOpsDevice,
61
+			ThrottleWriteIOPSDevice: writeIOpsDevice,
62
+		},
63
+		DisableOOMKiller: r.OomKillDisable,
64
+		Pids: &specs.Pids{
65
+			Limit: &r.PidsLimit,
66
+		},
67
+	}
68
+
69
+	if s.Linux.Resources != nil && len(s.Linux.Resources.Devices) > 0 {
70
+		specResources.Devices = s.Linux.Resources.Devices
71
+	}
72
+
73
+	s.Linux.Resources = specResources
74
+	return nil
75
+}
76
+
77
+func setDevices(s *specs.Spec, c *container.Container) error {
78
+	// Build lists of devices allowed and created within the container.
79
+	var devs []specs.Device
80
+	if c.HostConfig.Privileged {
81
+		hostDevices, err := devices.HostDevices()
82
+		if err != nil {
83
+			return err
84
+		}
85
+		for _, d := range hostDevices {
86
+			devs = append(devs, specDevice(d))
87
+		}
88
+	} else {
89
+		for _, deviceMapping := range c.HostConfig.Devices {
90
+			d, err := getDevicesFromPath(deviceMapping)
91
+			if err != nil {
92
+				return err
93
+			}
94
+
95
+			devs = append(devs, d...)
96
+		}
97
+	}
98
+
99
+	s.Linux.Devices = append(s.Linux.Devices, devs...)
100
+	return nil
101
+}
102
+
103
+func setRlimits(daemon *Daemon, s *specs.Spec, c *container.Container) error {
104
+	var rlimits []specs.Rlimit
105
+
106
+	ulimits := c.HostConfig.Ulimits
107
+	// Merge ulimits with daemon defaults
108
+	ulIdx := make(map[string]struct{})
109
+	for _, ul := range ulimits {
110
+		ulIdx[ul.Name] = struct{}{}
111
+	}
112
+	for name, ul := range daemon.configStore.Ulimits {
113
+		if _, exists := ulIdx[name]; !exists {
114
+			ulimits = append(ulimits, ul)
115
+		}
116
+	}
117
+
118
+	for _, ul := range ulimits {
119
+		rlimits = append(rlimits, specs.Rlimit{
120
+			Type: "RLIMIT_" + strings.ToUpper(ul.Name),
121
+			Soft: uint64(ul.Soft),
122
+			Hard: uint64(ul.Hard),
123
+		})
124
+	}
125
+
126
+	s.Process.Rlimits = rlimits
127
+	return nil
128
+}
129
+
130
+func setUser(s *specs.Spec, c *container.Container) error {
131
+	uid, gid, additionalGids, err := getUser(c, c.Config.User)
132
+	if err != nil {
133
+		return err
134
+	}
135
+	s.Process.User.UID = uid
136
+	s.Process.User.GID = gid
137
+	s.Process.User.AdditionalGids = additionalGids
138
+	return nil
139
+}
140
+
141
+func readUserFile(c *container.Container, p string) (io.ReadCloser, error) {
142
+	fp, err := symlink.FollowSymlinkInScope(filepath.Join(c.BaseFS, p), c.BaseFS)
143
+	if err != nil {
144
+		return nil, err
145
+	}
146
+	return os.Open(fp)
147
+}
148
+
149
+func getUser(c *container.Container, username string) (uint32, uint32, []uint32, error) {
150
+	passwdPath, err := user.GetPasswdPath()
151
+	if err != nil {
152
+		return 0, 0, nil, err
153
+	}
154
+	groupPath, err := user.GetGroupPath()
155
+	if err != nil {
156
+		return 0, 0, nil, err
157
+	}
158
+	passwdFile, err := readUserFile(c, passwdPath)
159
+	if err == nil {
160
+		defer passwdFile.Close()
161
+	}
162
+	groupFile, err := readUserFile(c, groupPath)
163
+	if err == nil {
164
+		defer groupFile.Close()
165
+	}
166
+
167
+	execUser, err := user.GetExecUser(username, nil, passwdFile, groupFile)
168
+	if err != nil {
169
+		return 0, 0, nil, err
170
+	}
171
+
172
+	// todo: fix this double read by a change to libcontainer/user pkg
173
+	groupFile, err = readUserFile(c, groupPath)
174
+	if err == nil {
175
+		defer groupFile.Close()
176
+	}
177
+	var addGroups []int
178
+	if len(c.HostConfig.GroupAdd) > 0 {
179
+		addGroups, err = user.GetAdditionalGroups(c.HostConfig.GroupAdd, groupFile)
180
+		if err != nil {
181
+			return 0, 0, nil, err
182
+		}
183
+	}
184
+	uid := uint32(execUser.Uid)
185
+	gid := uint32(execUser.Gid)
186
+	sgids := append(execUser.Sgids, addGroups...)
187
+	var additionalGids []uint32
188
+	for _, g := range sgids {
189
+		additionalGids = append(additionalGids, uint32(g))
190
+	}
191
+	return uid, gid, additionalGids, nil
192
+}
193
+
194
+func setNamespace(s *specs.Spec, ns specs.Namespace) {
195
+	for i, n := range s.Linux.Namespaces {
196
+		if n.Type == ns.Type {
197
+			s.Linux.Namespaces[i] = ns
198
+			return
199
+		}
200
+	}
201
+	s.Linux.Namespaces = append(s.Linux.Namespaces, ns)
202
+}
203
+
204
+func setCapabilities(s *specs.Spec, c *container.Container) error {
205
+	var caplist []string
206
+	var err error
207
+	if c.HostConfig.Privileged {
208
+		caplist = caps.GetAllCapabilities()
209
+	} else {
210
+		caplist, err = caps.TweakCapabilities(s.Process.Capabilities, c.HostConfig.CapAdd, c.HostConfig.CapDrop)
211
+		if err != nil {
212
+			return err
213
+		}
214
+	}
215
+	s.Process.Capabilities = caplist
216
+	return nil
217
+}
218
+
219
+func delNamespace(s *specs.Spec, nsType specs.NamespaceType) {
220
+	idx := -1
221
+	for i, n := range s.Linux.Namespaces {
222
+		if n.Type == nsType {
223
+			idx = i
224
+		}
225
+	}
226
+	if idx >= 0 {
227
+		s.Linux.Namespaces = append(s.Linux.Namespaces[:idx], s.Linux.Namespaces[idx+1:]...)
228
+	}
229
+}
230
+
231
+func setNamespaces(daemon *Daemon, s *specs.Spec, c *container.Container) error {
232
+	// network
233
+	if !c.Config.NetworkDisabled {
234
+		ns := specs.Namespace{Type: "network"}
235
+		parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2)
236
+		if parts[0] == "container" {
237
+			nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer())
238
+			if err != nil {
239
+				return err
240
+			}
241
+			ns.Path = fmt.Sprintf("/proc/%d/ns/net", nc.State.GetPID())
242
+		} else if c.HostConfig.NetworkMode.IsHost() {
243
+			ns.Path = c.NetworkSettings.SandboxKey
244
+		}
245
+		setNamespace(s, ns)
246
+	}
247
+	// ipc
248
+	if c.HostConfig.IpcMode.IsContainer() {
249
+		ns := specs.Namespace{Type: "ipc"}
250
+		ic, err := daemon.getIpcContainer(c)
251
+		if err != nil {
252
+			return err
253
+		}
254
+		ns.Path = fmt.Sprintf("/proc/%d/ns/ipc", ic.State.GetPID())
255
+		setNamespace(s, ns)
256
+	} else if c.HostConfig.IpcMode.IsHost() {
257
+		delNamespace(s, specs.NamespaceType("ipc"))
258
+	} else {
259
+		ns := specs.Namespace{Type: "ipc"}
260
+		setNamespace(s, ns)
261
+	}
262
+	// pid
263
+	if c.HostConfig.PidMode.IsHost() {
264
+		delNamespace(s, specs.NamespaceType("pid"))
265
+	}
266
+	// uts
267
+	if c.HostConfig.UTSMode.IsHost() {
268
+		delNamespace(s, specs.NamespaceType("uts"))
269
+		s.Hostname = ""
270
+	}
271
+	// user
272
+	if c.HostConfig.UsernsMode.IsPrivate() {
273
+		uidMap, gidMap := daemon.GetUIDGIDMaps()
274
+		if uidMap != nil {
275
+			ns := specs.Namespace{Type: "user"}
276
+			setNamespace(s, ns)
277
+			s.Linux.UIDMappings = specMapping(uidMap)
278
+			s.Linux.GIDMappings = specMapping(gidMap)
279
+		}
280
+	}
281
+
282
+	return nil
283
+}
284
+
285
+func specMapping(s []idtools.IDMap) []specs.IDMapping {
286
+	var ids []specs.IDMapping
287
+	for _, item := range s {
288
+		ids = append(ids, specs.IDMapping{
289
+			HostID:      uint32(item.HostID),
290
+			ContainerID: uint32(item.ContainerID),
291
+			Size:        uint32(item.Size),
292
+		})
293
+	}
294
+	return ids
295
+}
296
+
297
+func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
298
+	for _, m := range mountinfo {
299
+		if m.Mountpoint == dir {
300
+			return m
301
+		}
302
+	}
303
+	return nil
304
+}
305
+
306
+// Get the source mount point of directory passed in as argument. Also return
307
+// optional fields.
308
+func getSourceMount(source string) (string, string, error) {
309
+	// Ensure any symlinks are resolved.
310
+	sourcePath, err := filepath.EvalSymlinks(source)
311
+	if err != nil {
312
+		return "", "", err
313
+	}
314
+
315
+	mountinfos, err := mount.GetMounts()
316
+	if err != nil {
317
+		return "", "", err
318
+	}
319
+
320
+	mountinfo := getMountInfo(mountinfos, sourcePath)
321
+	if mountinfo != nil {
322
+		return sourcePath, mountinfo.Optional, nil
323
+	}
324
+
325
+	path := sourcePath
326
+	for {
327
+		path = filepath.Dir(path)
328
+
329
+		mountinfo = getMountInfo(mountinfos, path)
330
+		if mountinfo != nil {
331
+			return path, mountinfo.Optional, nil
332
+		}
333
+
334
+		if path == "/" {
335
+			break
336
+		}
337
+	}
338
+
339
+	// If we are here, we did not find parent mount. Something is wrong.
340
+	return "", "", fmt.Errorf("Could not find source mount of %s", source)
341
+}
342
+
343
+// Ensure mount point on which path is mounted, is shared.
344
+func ensureShared(path string) error {
345
+	sharedMount := false
346
+
347
+	sourceMount, optionalOpts, err := getSourceMount(path)
348
+	if err != nil {
349
+		return err
350
+	}
351
+	// Make sure source mount point is shared.
352
+	optsSplit := strings.Split(optionalOpts, " ")
353
+	for _, opt := range optsSplit {
354
+		if strings.HasPrefix(opt, "shared:") {
355
+			sharedMount = true
356
+			break
357
+		}
358
+	}
359
+
360
+	if !sharedMount {
361
+		return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount)
362
+	}
363
+	return nil
364
+}
365
+
366
+// Ensure mount point on which path is mounted, is either shared or slave.
367
+func ensureSharedOrSlave(path string) error {
368
+	sharedMount := false
369
+	slaveMount := false
370
+
371
+	sourceMount, optionalOpts, err := getSourceMount(path)
372
+	if err != nil {
373
+		return err
374
+	}
375
+	// Make sure source mount point is shared.
376
+	optsSplit := strings.Split(optionalOpts, " ")
377
+	for _, opt := range optsSplit {
378
+		if strings.HasPrefix(opt, "shared:") {
379
+			sharedMount = true
380
+			break
381
+		} else if strings.HasPrefix(opt, "master:") {
382
+			slaveMount = true
383
+			break
384
+		}
385
+	}
386
+
387
+	if !sharedMount && !slaveMount {
388
+		return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount)
389
+	}
390
+	return nil
391
+}
392
+
393
+var (
394
+	mountPropagationMap = map[string]int{
395
+		"private":  mount.PRIVATE,
396
+		"rprivate": mount.RPRIVATE,
397
+		"shared":   mount.SHARED,
398
+		"rshared":  mount.RSHARED,
399
+		"slave":    mount.SLAVE,
400
+		"rslave":   mount.RSLAVE,
401
+	}
402
+
403
+	mountPropagationReverseMap = map[int]string{
404
+		mount.PRIVATE:  "private",
405
+		mount.RPRIVATE: "rprivate",
406
+		mount.SHARED:   "shared",
407
+		mount.RSHARED:  "rshared",
408
+		mount.SLAVE:    "slave",
409
+		mount.RSLAVE:   "rslave",
410
+	}
411
+)
412
+
413
+func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []container.Mount) error {
414
+	userMounts := make(map[string]struct{})
415
+	for _, m := range mounts {
416
+		userMounts[m.Destination] = struct{}{}
417
+	}
418
+
419
+	// Filter out mounts that are overriden by user supplied mounts
420
+	var defaultMounts []specs.Mount
421
+	_, mountDev := userMounts["/dev"]
422
+	for _, m := range s.Mounts {
423
+		if _, ok := userMounts[m.Destination]; !ok {
424
+			if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
425
+				continue
426
+			}
427
+			defaultMounts = append(defaultMounts, m)
428
+		}
429
+	}
430
+
431
+	s.Mounts = defaultMounts
432
+	for _, m := range mounts {
433
+		for _, cm := range s.Mounts {
434
+			if cm.Destination == m.Destination {
435
+				return fmt.Errorf("Duplicate mount point '%s'", m.Destination)
436
+			}
437
+		}
438
+
439
+		if m.Source == "tmpfs" {
440
+			opt := []string{"noexec", "nosuid", "nodev", volume.DefaultPropagationMode}
441
+			if m.Data != "" {
442
+				opt = append(opt, strings.Split(m.Data, ",")...)
443
+			} else {
444
+				opt = append(opt, "size=65536k")
445
+			}
446
+
447
+			s.Mounts = append(s.Mounts, specs.Mount{Destination: m.Destination, Source: m.Source, Type: "tmpfs", Options: opt})
448
+			continue
449
+		}
450
+
451
+		mt := specs.Mount{Destination: m.Destination, Source: m.Source, Type: "bind"}
452
+
453
+		// Determine property of RootPropagation based on volume
454
+		// properties. If a volume is shared, then keep root propagation
455
+		// shared. This should work for slave and private volumes too.
456
+		//
457
+		// For slave volumes, it can be either [r]shared/[r]slave.
458
+		//
459
+		// For private volumes any root propagation value should work.
460
+		pFlag := mountPropagationMap[m.Propagation]
461
+		if pFlag == mount.SHARED || pFlag == mount.RSHARED {
462
+			if err := ensureShared(m.Source); err != nil {
463
+				return err
464
+			}
465
+			rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
466
+			if rootpg != mount.SHARED && rootpg != mount.RSHARED {
467
+				s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.SHARED]
468
+			}
469
+		} else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
470
+			if err := ensureSharedOrSlave(m.Source); err != nil {
471
+				return err
472
+			}
473
+			rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
474
+			if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
475
+				s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.RSLAVE]
476
+			}
477
+		}
478
+
479
+		opts := []string{"rbind"}
480
+		if !m.Writable {
481
+			opts = append(opts, "ro")
482
+		}
483
+		if pFlag != 0 {
484
+			opts = append(opts, mountPropagationReverseMap[pFlag])
485
+		}
486
+
487
+		mt.Options = opts
488
+		s.Mounts = append(s.Mounts, mt)
489
+	}
490
+
491
+	if s.Root.Readonly {
492
+		for i, m := range s.Mounts {
493
+			switch m.Destination {
494
+			case "/proc", "/dev/pts", "/dev/mqueue": // /dev is remounted by runc
495
+				continue
496
+			}
497
+			if _, ok := userMounts[m.Destination]; !ok {
498
+				if !stringutils.InSlice(m.Options, "ro") {
499
+					s.Mounts[i].Options = append(s.Mounts[i].Options, "ro")
500
+				}
501
+			}
502
+		}
503
+	}
504
+
505
+	if c.HostConfig.Privileged {
506
+		if !s.Root.Readonly {
507
+			// clear readonly for /sys
508
+			for i := range s.Mounts {
509
+				if s.Mounts[i].Destination == "/sys" {
510
+					clearReadOnly(&s.Mounts[i])
511
+				}
512
+			}
513
+		}
514
+	}
515
+
516
+	// TODO: until a kernel/mount solution exists for handling remount in a user namespace,
517
+	// we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
518
+	if uidMap, _ := daemon.GetUIDGIDMaps(); uidMap != nil || c.HostConfig.Privileged {
519
+		for i, m := range s.Mounts {
520
+			if m.Type == "cgroup" {
521
+				clearReadOnly(&s.Mounts[i])
522
+			}
523
+		}
524
+	}
525
+
526
+	return nil
527
+}
528
+
529
+func (daemon *Daemon) populateCommonSpec(s *specs.Spec, c *container.Container) error {
530
+	linkedEnv, err := daemon.setupLinkedContainers(c)
531
+	if err != nil {
532
+		return err
533
+	}
534
+	s.Root = specs.Root{
535
+		Path:     c.BaseFS,
536
+		Readonly: c.HostConfig.ReadonlyRootfs,
537
+	}
538
+	rootUID, rootGID := daemon.GetRemappedUIDGID()
539
+	if err := c.SetupWorkingDirectory(rootUID, rootGID); err != nil {
540
+		return err
541
+	}
542
+	cwd := c.Config.WorkingDir
543
+	if len(cwd) == 0 {
544
+		cwd = "/"
545
+	}
546
+	s.Process.Args = append([]string{c.Path}, c.Args...)
547
+	s.Process.Cwd = cwd
548
+	s.Process.Env = c.CreateDaemonEnvironment(linkedEnv)
549
+	s.Process.Terminal = c.Config.Tty
550
+	s.Hostname = c.FullHostname()
551
+
552
+	return nil
553
+}
554
+
555
+func (daemon *Daemon) createSpec(c *container.Container) (*libcontainerd.Spec, error) {
556
+	s := oci.DefaultSpec()
557
+	if err := daemon.populateCommonSpec(&s, c); err != nil {
558
+		return nil, err
559
+	}
560
+
561
+	var cgroupsPath string
562
+	if c.HostConfig.CgroupParent != "" {
563
+		cgroupsPath = filepath.Join(c.HostConfig.CgroupParent, c.ID)
564
+	} else {
565
+		defaultCgroupParent := "/docker"
566
+		if daemon.configStore.CgroupParent != "" {
567
+			defaultCgroupParent = daemon.configStore.CgroupParent
568
+		} else if daemon.usingSystemd() {
569
+			defaultCgroupParent = "system.slice"
570
+		}
571
+		cgroupsPath = filepath.Join(defaultCgroupParent, c.ID)
572
+	}
573
+	s.Linux.CgroupsPath = &cgroupsPath
574
+
575
+	if err := setResources(&s, c.HostConfig.Resources); err != nil {
576
+		return nil, fmt.Errorf("linux runtime spec resources: %v", err)
577
+	}
578
+	s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj
579
+	if err := setDevices(&s, c); err != nil {
580
+		return nil, fmt.Errorf("linux runtime spec devices: %v", err)
581
+	}
582
+	if err := setRlimits(daemon, &s, c); err != nil {
583
+		return nil, fmt.Errorf("linux runtime spec rlimits: %v", err)
584
+	}
585
+	if err := setUser(&s, c); err != nil {
586
+		return nil, fmt.Errorf("linux spec user: %v", err)
587
+	}
588
+	if err := setNamespaces(daemon, &s, c); err != nil {
589
+		return nil, fmt.Errorf("linux spec namespaces: %v", err)
590
+	}
591
+	if err := setCapabilities(&s, c); err != nil {
592
+		return nil, fmt.Errorf("linux spec capabilities: %v", err)
593
+	}
594
+	if err := setSeccomp(daemon, &s, c); err != nil {
595
+		return nil, fmt.Errorf("linux seccomp: %v", err)
596
+	}
597
+
598
+	if err := daemon.setupIpcDirs(c); err != nil {
599
+		return nil, err
600
+	}
601
+
602
+	mounts, err := daemon.setupMounts(c)
603
+	if err != nil {
604
+		return nil, err
605
+	}
606
+	mounts = append(mounts, c.IpcMounts()...)
607
+	mounts = append(mounts, c.TmpfsMounts()...)
608
+	if err := setMounts(daemon, &s, c, mounts); err != nil {
609
+		return nil, fmt.Errorf("linux mounts: %v", err)
610
+	}
611
+
612
+	for _, ns := range s.Linux.Namespaces {
613
+		if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled {
614
+			target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe"))
615
+			if err != nil {
616
+				return nil, err
617
+			}
618
+
619
+			s.Hooks = specs.Hooks{
620
+				Prestart: []specs.Hook{{
621
+					Path: target, // FIXME: cross-platform
622
+					Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()},
623
+				}},
624
+			}
625
+		}
626
+	}
627
+
628
+	if apparmor.IsEnabled() {
629
+		appArmorProfile := "docker-default"
630
+		if c.HostConfig.Privileged {
631
+			appArmorProfile = "unconfined"
632
+		} else if len(c.AppArmorProfile) > 0 {
633
+			appArmorProfile = c.AppArmorProfile
634
+		}
635
+		s.Process.ApparmorProfile = appArmorProfile
636
+	}
637
+	s.Process.SelinuxLabel = c.GetProcessLabel()
638
+	s.Process.NoNewPrivileges = c.NoNewPrivileges
639
+
640
+	return (*libcontainerd.Spec)(&s), nil
641
+}
642
+
643
+func clearReadOnly(m *specs.Mount) {
644
+	var opt []string
645
+	for _, o := range m.Options {
646
+		if o != "ro" {
647
+			opt = append(opt, o)
648
+		}
649
+	}
650
+	m.Options = opt
651
+}
... ...
@@ -41,10 +41,9 @@ func (daemon *Daemon) containerPause(container *container.Container) error {
41 41
 		return errContainerIsRestarting(container.ID)
42 42
 	}
43 43
 
44
-	if err := daemon.execDriver.Pause(container.Command); err != nil {
44
+	if err := daemon.containerd.Pause(container.ID); err != nil {
45 45
 		return fmt.Errorf("Cannot pause container %s: %s", container.ID, err)
46 46
 	}
47
-	container.Paused = true
48
-	daemon.LogContainerEvent(container, "pause")
47
+
49 48
 	return nil
50 49
 }
... ...
@@ -1,6 +1,10 @@
1 1
 package daemon
2 2
 
3
-import "fmt"
3
+import (
4
+	"fmt"
5
+
6
+	"github.com/docker/docker/libcontainerd"
7
+)
4 8
 
5 9
 // ContainerResize changes the size of the TTY of the process running
6 10
 // in the container with the given name to the given height and width.
... ...
@@ -14,7 +18,7 @@ func (daemon *Daemon) ContainerResize(name string, height, width int) error {
14 14
 		return errNotRunning{container.ID}
15 15
 	}
16 16
 
17
-	if err = container.Resize(height, width); err == nil {
17
+	if err = daemon.containerd.Resize(container.ID, libcontainerd.InitFriendlyName, width, height); err == nil {
18 18
 		attributes := map[string]string{
19 19
 			"height": fmt.Sprintf("%d", height),
20 20
 			"width":  fmt.Sprintf("%d", width),
... ...
@@ -28,10 +32,9 @@ func (daemon *Daemon) ContainerResize(name string, height, width int) error {
28 28
 // running in the exec with the given name to the given height and
29 29
 // width.
30 30
 func (daemon *Daemon) ContainerExecResize(name string, height, width int) error {
31
-	ExecConfig, err := daemon.getExecConfig(name)
31
+	ec, err := daemon.getExecConfig(name)
32 32
 	if err != nil {
33 33
 		return err
34 34
 	}
35
-
36
-	return ExecConfig.Resize(height, width)
35
+	return daemon.containerd.Resize(ec.ContainerID, ec.ID, width, height)
37 36
 }
38 37
new file mode 100644
... ...
@@ -0,0 +1,1600 @@
0
+// +build linux,seccomp
1
+
2
+package daemon
3
+
4
+import (
5
+	"syscall"
6
+
7
+	"github.com/opencontainers/specs/specs-go"
8
+	libseccomp "github.com/seccomp/libseccomp-golang"
9
+)
10
+
11
+func arches() []specs.Arch {
12
+	var native, err = libseccomp.GetNativeArch()
13
+	if err != nil {
14
+		return []specs.Arch{}
15
+	}
16
+	var a = native.String()
17
+	switch a {
18
+	case "amd64":
19
+		return []specs.Arch{specs.ArchX86_64, specs.ArchX86, specs.ArchX32}
20
+	case "arm64":
21
+		return []specs.Arch{specs.ArchAARCH64, specs.ArchARM}
22
+	case "mips64":
23
+		return []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64, specs.ArchMIPS64N32}
24
+	case "mips64n32":
25
+		return []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64, specs.ArchMIPS64N32}
26
+	case "mipsel64":
27
+		return []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64, specs.ArchMIPSEL64N32}
28
+	case "mipsel64n32":
29
+		return []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64, specs.ArchMIPSEL64N32}
30
+	default:
31
+		return []specs.Arch{}
32
+	}
33
+}
34
+
35
+var defaultSeccompProfile = specs.Seccomp{
36
+	DefaultAction: specs.ActErrno,
37
+	Architectures: arches(),
38
+	Syscalls: []specs.Syscall{
39
+		{
40
+			Name:   "accept",
41
+			Action: specs.ActAllow,
42
+			Args:   []specs.Arg{},
43
+		},
44
+		{
45
+			Name:   "accept4",
46
+			Action: specs.ActAllow,
47
+			Args:   []specs.Arg{},
48
+		},
49
+		{
50
+			Name:   "access",
51
+			Action: specs.ActAllow,
52
+			Args:   []specs.Arg{},
53
+		},
54
+		{
55
+			Name:   "alarm",
56
+			Action: specs.ActAllow,
57
+			Args:   []specs.Arg{},
58
+		},
59
+		{
60
+			Name:   "arch_prctl",
61
+			Action: specs.ActAllow,
62
+			Args:   []specs.Arg{},
63
+		},
64
+		{
65
+			Name:   "bind",
66
+			Action: specs.ActAllow,
67
+			Args:   []specs.Arg{},
68
+		},
69
+		{
70
+			Name:   "brk",
71
+			Action: specs.ActAllow,
72
+			Args:   []specs.Arg{},
73
+		},
74
+		{
75
+			Name:   "capget",
76
+			Action: specs.ActAllow,
77
+			Args:   []specs.Arg{},
78
+		},
79
+		{
80
+			Name:   "capset",
81
+			Action: specs.ActAllow,
82
+			Args:   []specs.Arg{},
83
+		},
84
+		{
85
+			Name:   "chdir",
86
+			Action: specs.ActAllow,
87
+			Args:   []specs.Arg{},
88
+		},
89
+		{
90
+			Name:   "chmod",
91
+			Action: specs.ActAllow,
92
+			Args:   []specs.Arg{},
93
+		},
94
+		{
95
+			Name:   "chown",
96
+			Action: specs.ActAllow,
97
+			Args:   []specs.Arg{},
98
+		},
99
+		{
100
+			Name:   "chown32",
101
+			Action: specs.ActAllow,
102
+			Args:   []specs.Arg{},
103
+		},
104
+		{
105
+			Name:   "chroot",
106
+			Action: specs.ActAllow,
107
+			Args:   []specs.Arg{},
108
+		},
109
+		{
110
+			Name:   "clock_getres",
111
+			Action: specs.ActAllow,
112
+			Args:   []specs.Arg{},
113
+		},
114
+		{
115
+			Name:   "clock_gettime",
116
+			Action: specs.ActAllow,
117
+			Args:   []specs.Arg{},
118
+		},
119
+		{
120
+			Name:   "clock_nanosleep",
121
+			Action: specs.ActAllow,
122
+			Args:   []specs.Arg{},
123
+		},
124
+		{
125
+			Name:   "clone",
126
+			Action: specs.ActAllow,
127
+			Args: []specs.Arg{
128
+				{
129
+					Index:    0,
130
+					Value:    syscall.CLONE_NEWNS | syscall.CLONE_NEWUTS | syscall.CLONE_NEWIPC | syscall.CLONE_NEWUSER | syscall.CLONE_NEWPID | syscall.CLONE_NEWNET,
131
+					ValueTwo: 0,
132
+					Op:       specs.OpMaskedEqual,
133
+				},
134
+			},
135
+		},
136
+		{
137
+			Name:   "close",
138
+			Action: specs.ActAllow,
139
+			Args:   []specs.Arg{},
140
+		},
141
+		{
142
+			Name:   "connect",
143
+			Action: specs.ActAllow,
144
+			Args:   []specs.Arg{},
145
+		},
146
+		{
147
+			Name:   "creat",
148
+			Action: specs.ActAllow,
149
+			Args:   []specs.Arg{},
150
+		},
151
+		{
152
+			Name:   "dup",
153
+			Action: specs.ActAllow,
154
+			Args:   []specs.Arg{},
155
+		},
156
+		{
157
+			Name:   "dup2",
158
+			Action: specs.ActAllow,
159
+			Args:   []specs.Arg{},
160
+		},
161
+		{
162
+			Name:   "dup3",
163
+			Action: specs.ActAllow,
164
+			Args:   []specs.Arg{},
165
+		},
166
+		{
167
+			Name:   "epoll_create",
168
+			Action: specs.ActAllow,
169
+			Args:   []specs.Arg{},
170
+		},
171
+		{
172
+			Name:   "epoll_create1",
173
+			Action: specs.ActAllow,
174
+			Args:   []specs.Arg{},
175
+		},
176
+		{
177
+			Name:   "epoll_ctl",
178
+			Action: specs.ActAllow,
179
+			Args:   []specs.Arg{},
180
+		},
181
+		{
182
+			Name:   "epoll_ctl_old",
183
+			Action: specs.ActAllow,
184
+			Args:   []specs.Arg{},
185
+		},
186
+		{
187
+			Name:   "epoll_pwait",
188
+			Action: specs.ActAllow,
189
+			Args:   []specs.Arg{},
190
+		},
191
+		{
192
+			Name:   "epoll_wait",
193
+			Action: specs.ActAllow,
194
+			Args:   []specs.Arg{},
195
+		},
196
+		{
197
+			Name:   "epoll_wait_old",
198
+			Action: specs.ActAllow,
199
+			Args:   []specs.Arg{},
200
+		},
201
+		{
202
+			Name:   "eventfd",
203
+			Action: specs.ActAllow,
204
+			Args:   []specs.Arg{},
205
+		},
206
+		{
207
+			Name:   "eventfd2",
208
+			Action: specs.ActAllow,
209
+			Args:   []specs.Arg{},
210
+		},
211
+		{
212
+			Name:   "execve",
213
+			Action: specs.ActAllow,
214
+			Args:   []specs.Arg{},
215
+		},
216
+		{
217
+			Name:   "execveat",
218
+			Action: specs.ActAllow,
219
+			Args:   []specs.Arg{},
220
+		},
221
+		{
222
+			Name:   "exit",
223
+			Action: specs.ActAllow,
224
+			Args:   []specs.Arg{},
225
+		},
226
+		{
227
+			Name:   "exit_group",
228
+			Action: specs.ActAllow,
229
+			Args:   []specs.Arg{},
230
+		},
231
+		{
232
+			Name:   "faccessat",
233
+			Action: specs.ActAllow,
234
+			Args:   []specs.Arg{},
235
+		},
236
+		{
237
+			Name:   "fadvise64",
238
+			Action: specs.ActAllow,
239
+			Args:   []specs.Arg{},
240
+		},
241
+		{
242
+			Name:   "fadvise64_64",
243
+			Action: specs.ActAllow,
244
+			Args:   []specs.Arg{},
245
+		},
246
+		{
247
+			Name:   "fallocate",
248
+			Action: specs.ActAllow,
249
+			Args:   []specs.Arg{},
250
+		},
251
+		{
252
+			Name:   "fanotify_init",
253
+			Action: specs.ActAllow,
254
+			Args:   []specs.Arg{},
255
+		},
256
+		{
257
+			Name:   "fanotify_mark",
258
+			Action: specs.ActAllow,
259
+			Args:   []specs.Arg{},
260
+		},
261
+		{
262
+			Name:   "fchdir",
263
+			Action: specs.ActAllow,
264
+			Args:   []specs.Arg{},
265
+		},
266
+		{
267
+			Name:   "fchmod",
268
+			Action: specs.ActAllow,
269
+			Args:   []specs.Arg{},
270
+		},
271
+		{
272
+			Name:   "fchmodat",
273
+			Action: specs.ActAllow,
274
+			Args:   []specs.Arg{},
275
+		},
276
+		{
277
+			Name:   "fchown",
278
+			Action: specs.ActAllow,
279
+			Args:   []specs.Arg{},
280
+		},
281
+		{
282
+			Name:   "fchown32",
283
+			Action: specs.ActAllow,
284
+			Args:   []specs.Arg{},
285
+		},
286
+		{
287
+			Name:   "fchownat",
288
+			Action: specs.ActAllow,
289
+			Args:   []specs.Arg{},
290
+		},
291
+		{
292
+			Name:   "fcntl",
293
+			Action: specs.ActAllow,
294
+			Args:   []specs.Arg{},
295
+		},
296
+		{
297
+			Name:   "fcntl64",
298
+			Action: specs.ActAllow,
299
+			Args:   []specs.Arg{},
300
+		},
301
+		{
302
+			Name:   "fdatasync",
303
+			Action: specs.ActAllow,
304
+			Args:   []specs.Arg{},
305
+		},
306
+		{
307
+			Name:   "fgetxattr",
308
+			Action: specs.ActAllow,
309
+			Args:   []specs.Arg{},
310
+		},
311
+		{
312
+			Name:   "flistxattr",
313
+			Action: specs.ActAllow,
314
+			Args:   []specs.Arg{},
315
+		},
316
+		{
317
+			Name:   "flock",
318
+			Action: specs.ActAllow,
319
+			Args:   []specs.Arg{},
320
+		},
321
+		{
322
+			Name:   "fork",
323
+			Action: specs.ActAllow,
324
+			Args:   []specs.Arg{},
325
+		},
326
+		{
327
+			Name:   "fremovexattr",
328
+			Action: specs.ActAllow,
329
+			Args:   []specs.Arg{},
330
+		},
331
+		{
332
+			Name:   "fsetxattr",
333
+			Action: specs.ActAllow,
334
+			Args:   []specs.Arg{},
335
+		},
336
+		{
337
+			Name:   "fstat",
338
+			Action: specs.ActAllow,
339
+			Args:   []specs.Arg{},
340
+		},
341
+		{
342
+			Name:   "fstat64",
343
+			Action: specs.ActAllow,
344
+			Args:   []specs.Arg{},
345
+		},
346
+		{
347
+			Name:   "fstatat64",
348
+			Action: specs.ActAllow,
349
+			Args:   []specs.Arg{},
350
+		},
351
+		{
352
+			Name:   "fstatfs",
353
+			Action: specs.ActAllow,
354
+			Args:   []specs.Arg{},
355
+		},
356
+		{
357
+			Name:   "fstatfs64",
358
+			Action: specs.ActAllow,
359
+			Args:   []specs.Arg{},
360
+		},
361
+		{
362
+			Name:   "fsync",
363
+			Action: specs.ActAllow,
364
+			Args:   []specs.Arg{},
365
+		},
366
+		{
367
+			Name:   "ftruncate",
368
+			Action: specs.ActAllow,
369
+			Args:   []specs.Arg{},
370
+		},
371
+		{
372
+			Name:   "ftruncate64",
373
+			Action: specs.ActAllow,
374
+			Args:   []specs.Arg{},
375
+		},
376
+		{
377
+			Name:   "futex",
378
+			Action: specs.ActAllow,
379
+			Args:   []specs.Arg{},
380
+		},
381
+		{
382
+			Name:   "futimesat",
383
+			Action: specs.ActAllow,
384
+			Args:   []specs.Arg{},
385
+		},
386
+		{
387
+			Name:   "getcpu",
388
+			Action: specs.ActAllow,
389
+			Args:   []specs.Arg{},
390
+		},
391
+		{
392
+			Name:   "getcwd",
393
+			Action: specs.ActAllow,
394
+			Args:   []specs.Arg{},
395
+		},
396
+		{
397
+			Name:   "getdents",
398
+			Action: specs.ActAllow,
399
+			Args:   []specs.Arg{},
400
+		},
401
+		{
402
+			Name:   "getdents64",
403
+			Action: specs.ActAllow,
404
+			Args:   []specs.Arg{},
405
+		},
406
+		{
407
+			Name:   "getegid",
408
+			Action: specs.ActAllow,
409
+			Args:   []specs.Arg{},
410
+		},
411
+		{
412
+			Name:   "getegid32",
413
+			Action: specs.ActAllow,
414
+			Args:   []specs.Arg{},
415
+		},
416
+		{
417
+			Name:   "geteuid",
418
+			Action: specs.ActAllow,
419
+			Args:   []specs.Arg{},
420
+		},
421
+		{
422
+			Name:   "geteuid32",
423
+			Action: specs.ActAllow,
424
+			Args:   []specs.Arg{},
425
+		},
426
+		{
427
+			Name:   "getgid",
428
+			Action: specs.ActAllow,
429
+			Args:   []specs.Arg{},
430
+		},
431
+		{
432
+			Name:   "getgid32",
433
+			Action: specs.ActAllow,
434
+			Args:   []specs.Arg{},
435
+		},
436
+		{
437
+			Name:   "getgroups",
438
+			Action: specs.ActAllow,
439
+			Args:   []specs.Arg{},
440
+		},
441
+		{
442
+			Name:   "getgroups32",
443
+			Action: specs.ActAllow,
444
+			Args:   []specs.Arg{},
445
+		},
446
+		{
447
+			Name:   "getitimer",
448
+			Action: specs.ActAllow,
449
+			Args:   []specs.Arg{},
450
+		},
451
+		{
452
+			Name:   "getpeername",
453
+			Action: specs.ActAllow,
454
+			Args:   []specs.Arg{},
455
+		},
456
+		{
457
+			Name:   "getpgid",
458
+			Action: specs.ActAllow,
459
+			Args:   []specs.Arg{},
460
+		},
461
+		{
462
+			Name:   "getpgrp",
463
+			Action: specs.ActAllow,
464
+			Args:   []specs.Arg{},
465
+		},
466
+		{
467
+			Name:   "getpid",
468
+			Action: specs.ActAllow,
469
+			Args:   []specs.Arg{},
470
+		},
471
+		{
472
+			Name:   "getppid",
473
+			Action: specs.ActAllow,
474
+			Args:   []specs.Arg{},
475
+		},
476
+		{
477
+			Name:   "getpriority",
478
+			Action: specs.ActAllow,
479
+			Args:   []specs.Arg{},
480
+		},
481
+		{
482
+			Name:   "getrandom",
483
+			Action: specs.ActAllow,
484
+			Args:   []specs.Arg{},
485
+		},
486
+		{
487
+			Name:   "getresgid",
488
+			Action: specs.ActAllow,
489
+			Args:   []specs.Arg{},
490
+		},
491
+		{
492
+			Name:   "getresgid32",
493
+			Action: specs.ActAllow,
494
+			Args:   []specs.Arg{},
495
+		},
496
+		{
497
+			Name:   "getresuid",
498
+			Action: specs.ActAllow,
499
+			Args:   []specs.Arg{},
500
+		},
501
+		{
502
+			Name:   "getresuid32",
503
+			Action: specs.ActAllow,
504
+			Args:   []specs.Arg{},
505
+		},
506
+		{
507
+			Name:   "getrlimit",
508
+			Action: specs.ActAllow,
509
+			Args:   []specs.Arg{},
510
+		},
511
+		{
512
+			Name:   "get_robust_list",
513
+			Action: specs.ActAllow,
514
+			Args:   []specs.Arg{},
515
+		},
516
+		{
517
+			Name:   "getrusage",
518
+			Action: specs.ActAllow,
519
+			Args:   []specs.Arg{},
520
+		},
521
+		{
522
+			Name:   "getsid",
523
+			Action: specs.ActAllow,
524
+			Args:   []specs.Arg{},
525
+		},
526
+		{
527
+			Name:   "getsockname",
528
+			Action: specs.ActAllow,
529
+			Args:   []specs.Arg{},
530
+		},
531
+		{
532
+			Name:   "getsockopt",
533
+			Action: specs.ActAllow,
534
+			Args:   []specs.Arg{},
535
+		},
536
+		{
537
+			Name:   "get_thread_area",
538
+			Action: specs.ActAllow,
539
+			Args:   []specs.Arg{},
540
+		},
541
+		{
542
+			Name:   "gettid",
543
+			Action: specs.ActAllow,
544
+			Args:   []specs.Arg{},
545
+		},
546
+		{
547
+			Name:   "gettimeofday",
548
+			Action: specs.ActAllow,
549
+			Args:   []specs.Arg{},
550
+		},
551
+		{
552
+			Name:   "getuid",
553
+			Action: specs.ActAllow,
554
+			Args:   []specs.Arg{},
555
+		},
556
+		{
557
+			Name:   "getuid32",
558
+			Action: specs.ActAllow,
559
+			Args:   []specs.Arg{},
560
+		},
561
+		{
562
+			Name:   "getxattr",
563
+			Action: specs.ActAllow,
564
+			Args:   []specs.Arg{},
565
+		},
566
+		{
567
+			Name:   "inotify_add_watch",
568
+			Action: specs.ActAllow,
569
+			Args:   []specs.Arg{},
570
+		},
571
+		{
572
+			Name:   "inotify_init",
573
+			Action: specs.ActAllow,
574
+			Args:   []specs.Arg{},
575
+		},
576
+		{
577
+			Name:   "inotify_init1",
578
+			Action: specs.ActAllow,
579
+			Args:   []specs.Arg{},
580
+		},
581
+		{
582
+			Name:   "inotify_rm_watch",
583
+			Action: specs.ActAllow,
584
+			Args:   []specs.Arg{},
585
+		},
586
+		{
587
+			Name:   "io_cancel",
588
+			Action: specs.ActAllow,
589
+			Args:   []specs.Arg{},
590
+		},
591
+		{
592
+			Name:   "ioctl",
593
+			Action: specs.ActAllow,
594
+			Args:   []specs.Arg{},
595
+		},
596
+		{
597
+			Name:   "io_destroy",
598
+			Action: specs.ActAllow,
599
+			Args:   []specs.Arg{},
600
+		},
601
+		{
602
+			Name:   "io_getevents",
603
+			Action: specs.ActAllow,
604
+			Args:   []specs.Arg{},
605
+		},
606
+		{
607
+			Name:   "ioprio_get",
608
+			Action: specs.ActAllow,
609
+			Args:   []specs.Arg{},
610
+		},
611
+		{
612
+			Name:   "ioprio_set",
613
+			Action: specs.ActAllow,
614
+			Args:   []specs.Arg{},
615
+		},
616
+		{
617
+			Name:   "io_setup",
618
+			Action: specs.ActAllow,
619
+			Args:   []specs.Arg{},
620
+		},
621
+		{
622
+			Name:   "io_submit",
623
+			Action: specs.ActAllow,
624
+			Args:   []specs.Arg{},
625
+		},
626
+		{
627
+			Name:   "kill",
628
+			Action: specs.ActAllow,
629
+			Args:   []specs.Arg{},
630
+		},
631
+		{
632
+			Name:   "lchown",
633
+			Action: specs.ActAllow,
634
+			Args:   []specs.Arg{},
635
+		},
636
+		{
637
+			Name:   "lchown32",
638
+			Action: specs.ActAllow,
639
+			Args:   []specs.Arg{},
640
+		},
641
+		{
642
+			Name:   "lgetxattr",
643
+			Action: specs.ActAllow,
644
+			Args:   []specs.Arg{},
645
+		},
646
+		{
647
+			Name:   "link",
648
+			Action: specs.ActAllow,
649
+			Args:   []specs.Arg{},
650
+		},
651
+		{
652
+			Name:   "linkat",
653
+			Action: specs.ActAllow,
654
+			Args:   []specs.Arg{},
655
+		},
656
+		{
657
+			Name:   "listen",
658
+			Action: specs.ActAllow,
659
+			Args:   []specs.Arg{},
660
+		},
661
+		{
662
+			Name:   "listxattr",
663
+			Action: specs.ActAllow,
664
+			Args:   []specs.Arg{},
665
+		},
666
+		{
667
+			Name:   "llistxattr",
668
+			Action: specs.ActAllow,
669
+			Args:   []specs.Arg{},
670
+		},
671
+		{
672
+			Name:   "_llseek",
673
+			Action: specs.ActAllow,
674
+			Args:   []specs.Arg{},
675
+		},
676
+		{
677
+			Name:   "lremovexattr",
678
+			Action: specs.ActAllow,
679
+			Args:   []specs.Arg{},
680
+		},
681
+		{
682
+			Name:   "lseek",
683
+			Action: specs.ActAllow,
684
+			Args:   []specs.Arg{},
685
+		},
686
+		{
687
+			Name:   "lsetxattr",
688
+			Action: specs.ActAllow,
689
+			Args:   []specs.Arg{},
690
+		},
691
+		{
692
+			Name:   "lstat",
693
+			Action: specs.ActAllow,
694
+			Args:   []specs.Arg{},
695
+		},
696
+		{
697
+			Name:   "lstat64",
698
+			Action: specs.ActAllow,
699
+			Args:   []specs.Arg{},
700
+		},
701
+		{
702
+			Name:   "madvise",
703
+			Action: specs.ActAllow,
704
+			Args:   []specs.Arg{},
705
+		},
706
+		{
707
+			Name:   "memfd_create",
708
+			Action: specs.ActAllow,
709
+			Args:   []specs.Arg{},
710
+		},
711
+		{
712
+			Name:   "mincore",
713
+			Action: specs.ActAllow,
714
+			Args:   []specs.Arg{},
715
+		},
716
+		{
717
+			Name:   "mkdir",
718
+			Action: specs.ActAllow,
719
+			Args:   []specs.Arg{},
720
+		},
721
+		{
722
+			Name:   "mkdirat",
723
+			Action: specs.ActAllow,
724
+			Args:   []specs.Arg{},
725
+		},
726
+		{
727
+			Name:   "mknod",
728
+			Action: specs.ActAllow,
729
+			Args:   []specs.Arg{},
730
+		},
731
+		{
732
+			Name:   "mknodat",
733
+			Action: specs.ActAllow,
734
+			Args:   []specs.Arg{},
735
+		},
736
+		{
737
+			Name:   "mlock",
738
+			Action: specs.ActAllow,
739
+			Args:   []specs.Arg{},
740
+		},
741
+		{
742
+			Name:   "mlockall",
743
+			Action: specs.ActAllow,
744
+			Args:   []specs.Arg{},
745
+		},
746
+		{
747
+			Name:   "mmap",
748
+			Action: specs.ActAllow,
749
+			Args:   []specs.Arg{},
750
+		},
751
+		{
752
+			Name:   "mmap2",
753
+			Action: specs.ActAllow,
754
+			Args:   []specs.Arg{},
755
+		},
756
+		{
757
+			Name:   "mprotect",
758
+			Action: specs.ActAllow,
759
+			Args:   []specs.Arg{},
760
+		},
761
+		{
762
+			Name:   "mq_getsetattr",
763
+			Action: specs.ActAllow,
764
+			Args:   []specs.Arg{},
765
+		},
766
+		{
767
+			Name:   "mq_notify",
768
+			Action: specs.ActAllow,
769
+			Args:   []specs.Arg{},
770
+		},
771
+		{
772
+			Name:   "mq_open",
773
+			Action: specs.ActAllow,
774
+			Args:   []specs.Arg{},
775
+		},
776
+		{
777
+			Name:   "mq_timedreceive",
778
+			Action: specs.ActAllow,
779
+			Args:   []specs.Arg{},
780
+		},
781
+		{
782
+			Name:   "mq_timedsend",
783
+			Action: specs.ActAllow,
784
+			Args:   []specs.Arg{},
785
+		},
786
+		{
787
+			Name:   "mq_unlink",
788
+			Action: specs.ActAllow,
789
+			Args:   []specs.Arg{},
790
+		},
791
+		{
792
+			Name:   "mremap",
793
+			Action: specs.ActAllow,
794
+			Args:   []specs.Arg{},
795
+		},
796
+		{
797
+			Name:   "msgctl",
798
+			Action: specs.ActAllow,
799
+			Args:   []specs.Arg{},
800
+		},
801
+		{
802
+			Name:   "msgget",
803
+			Action: specs.ActAllow,
804
+			Args:   []specs.Arg{},
805
+		},
806
+		{
807
+			Name:   "msgrcv",
808
+			Action: specs.ActAllow,
809
+			Args:   []specs.Arg{},
810
+		},
811
+		{
812
+			Name:   "msgsnd",
813
+			Action: specs.ActAllow,
814
+			Args:   []specs.Arg{},
815
+		},
816
+		{
817
+			Name:   "msync",
818
+			Action: specs.ActAllow,
819
+			Args:   []specs.Arg{},
820
+		},
821
+		{
822
+			Name:   "munlock",
823
+			Action: specs.ActAllow,
824
+			Args:   []specs.Arg{},
825
+		},
826
+		{
827
+			Name:   "munlockall",
828
+			Action: specs.ActAllow,
829
+			Args:   []specs.Arg{},
830
+		},
831
+		{
832
+			Name:   "munmap",
833
+			Action: specs.ActAllow,
834
+			Args:   []specs.Arg{},
835
+		},
836
+		{
837
+			Name:   "nanosleep",
838
+			Action: specs.ActAllow,
839
+			Args:   []specs.Arg{},
840
+		},
841
+		{
842
+			Name:   "newfstatat",
843
+			Action: specs.ActAllow,
844
+			Args:   []specs.Arg{},
845
+		},
846
+		{
847
+			Name:   "_newselect",
848
+			Action: specs.ActAllow,
849
+			Args:   []specs.Arg{},
850
+		},
851
+		{
852
+			Name:   "open",
853
+			Action: specs.ActAllow,
854
+			Args:   []specs.Arg{},
855
+		},
856
+		{
857
+			Name:   "openat",
858
+			Action: specs.ActAllow,
859
+			Args:   []specs.Arg{},
860
+		},
861
+		{
862
+			Name:   "pause",
863
+			Action: specs.ActAllow,
864
+			Args:   []specs.Arg{},
865
+		},
866
+		{
867
+			Name:   "pipe",
868
+			Action: specs.ActAllow,
869
+			Args:   []specs.Arg{},
870
+		},
871
+		{
872
+			Name:   "pipe2",
873
+			Action: specs.ActAllow,
874
+			Args:   []specs.Arg{},
875
+		},
876
+		{
877
+			Name:   "poll",
878
+			Action: specs.ActAllow,
879
+			Args:   []specs.Arg{},
880
+		},
881
+		{
882
+			Name:   "ppoll",
883
+			Action: specs.ActAllow,
884
+			Args:   []specs.Arg{},
885
+		},
886
+		{
887
+			Name:   "prctl",
888
+			Action: specs.ActAllow,
889
+			Args:   []specs.Arg{},
890
+		},
891
+		{
892
+			Name:   "pread64",
893
+			Action: specs.ActAllow,
894
+			Args:   []specs.Arg{},
895
+		},
896
+		{
897
+			Name:   "preadv",
898
+			Action: specs.ActAllow,
899
+			Args:   []specs.Arg{},
900
+		},
901
+		{
902
+			Name:   "prlimit64",
903
+			Action: specs.ActAllow,
904
+			Args:   []specs.Arg{},
905
+		},
906
+		{
907
+			Name:   "pselect6",
908
+			Action: specs.ActAllow,
909
+			Args:   []specs.Arg{},
910
+		},
911
+		{
912
+			Name:   "pwrite64",
913
+			Action: specs.ActAllow,
914
+			Args:   []specs.Arg{},
915
+		},
916
+		{
917
+			Name:   "pwritev",
918
+			Action: specs.ActAllow,
919
+			Args:   []specs.Arg{},
920
+		},
921
+		{
922
+			Name:   "read",
923
+			Action: specs.ActAllow,
924
+			Args:   []specs.Arg{},
925
+		},
926
+		{
927
+			Name:   "readahead",
928
+			Action: specs.ActAllow,
929
+			Args:   []specs.Arg{},
930
+		},
931
+		{
932
+			Name:   "readlink",
933
+			Action: specs.ActAllow,
934
+			Args:   []specs.Arg{},
935
+		},
936
+		{
937
+			Name:   "readlinkat",
938
+			Action: specs.ActAllow,
939
+			Args:   []specs.Arg{},
940
+		},
941
+		{
942
+			Name:   "readv",
943
+			Action: specs.ActAllow,
944
+			Args:   []specs.Arg{},
945
+		},
946
+		{
947
+			Name:   "recv",
948
+			Action: specs.ActAllow,
949
+			Args:   []specs.Arg{},
950
+		},
951
+		{
952
+			Name:   "recvfrom",
953
+			Action: specs.ActAllow,
954
+			Args:   []specs.Arg{},
955
+		},
956
+		{
957
+			Name:   "recvmmsg",
958
+			Action: specs.ActAllow,
959
+			Args:   []specs.Arg{},
960
+		},
961
+		{
962
+			Name:   "recvmsg",
963
+			Action: specs.ActAllow,
964
+			Args:   []specs.Arg{},
965
+		},
966
+		{
967
+			Name:   "remap_file_pages",
968
+			Action: specs.ActAllow,
969
+			Args:   []specs.Arg{},
970
+		},
971
+		{
972
+			Name:   "removexattr",
973
+			Action: specs.ActAllow,
974
+			Args:   []specs.Arg{},
975
+		},
976
+		{
977
+			Name:   "rename",
978
+			Action: specs.ActAllow,
979
+			Args:   []specs.Arg{},
980
+		},
981
+		{
982
+			Name:   "renameat",
983
+			Action: specs.ActAllow,
984
+			Args:   []specs.Arg{},
985
+		},
986
+		{
987
+			Name:   "renameat2",
988
+			Action: specs.ActAllow,
989
+			Args:   []specs.Arg{},
990
+		},
991
+		{
992
+			Name:   "rmdir",
993
+			Action: specs.ActAllow,
994
+			Args:   []specs.Arg{},
995
+		},
996
+		{
997
+			Name:   "rt_sigaction",
998
+			Action: specs.ActAllow,
999
+			Args:   []specs.Arg{},
1000
+		},
1001
+		{
1002
+			Name:   "rt_sigpending",
1003
+			Action: specs.ActAllow,
1004
+			Args:   []specs.Arg{},
1005
+		},
1006
+		{
1007
+			Name:   "rt_sigprocmask",
1008
+			Action: specs.ActAllow,
1009
+			Args:   []specs.Arg{},
1010
+		},
1011
+		{
1012
+			Name:   "rt_sigqueueinfo",
1013
+			Action: specs.ActAllow,
1014
+			Args:   []specs.Arg{},
1015
+		},
1016
+		{
1017
+			Name:   "rt_sigreturn",
1018
+			Action: specs.ActAllow,
1019
+			Args:   []specs.Arg{},
1020
+		},
1021
+		{
1022
+			Name:   "rt_sigsuspend",
1023
+			Action: specs.ActAllow,
1024
+			Args:   []specs.Arg{},
1025
+		},
1026
+		{
1027
+			Name:   "rt_sigtimedwait",
1028
+			Action: specs.ActAllow,
1029
+			Args:   []specs.Arg{},
1030
+		},
1031
+		{
1032
+			Name:   "rt_tgsigqueueinfo",
1033
+			Action: specs.ActAllow,
1034
+			Args:   []specs.Arg{},
1035
+		},
1036
+		{
1037
+			Name:   "sched_getaffinity",
1038
+			Action: specs.ActAllow,
1039
+			Args:   []specs.Arg{},
1040
+		},
1041
+		{
1042
+			Name:   "sched_getattr",
1043
+			Action: specs.ActAllow,
1044
+			Args:   []specs.Arg{},
1045
+		},
1046
+		{
1047
+			Name:   "sched_getparam",
1048
+			Action: specs.ActAllow,
1049
+			Args:   []specs.Arg{},
1050
+		},
1051
+		{
1052
+			Name:   "sched_get_priority_max",
1053
+			Action: specs.ActAllow,
1054
+			Args:   []specs.Arg{},
1055
+		},
1056
+		{
1057
+			Name:   "sched_get_priority_min",
1058
+			Action: specs.ActAllow,
1059
+			Args:   []specs.Arg{},
1060
+		},
1061
+		{
1062
+			Name:   "sched_getscheduler",
1063
+			Action: specs.ActAllow,
1064
+			Args:   []specs.Arg{},
1065
+		},
1066
+		{
1067
+			Name:   "sched_rr_get_interval",
1068
+			Action: specs.ActAllow,
1069
+			Args:   []specs.Arg{},
1070
+		},
1071
+		{
1072
+			Name:   "sched_setaffinity",
1073
+			Action: specs.ActAllow,
1074
+			Args:   []specs.Arg{},
1075
+		},
1076
+		{
1077
+			Name:   "sched_setattr",
1078
+			Action: specs.ActAllow,
1079
+			Args:   []specs.Arg{},
1080
+		},
1081
+		{
1082
+			Name:   "sched_setparam",
1083
+			Action: specs.ActAllow,
1084
+			Args:   []specs.Arg{},
1085
+		},
1086
+		{
1087
+			Name:   "sched_setscheduler",
1088
+			Action: specs.ActAllow,
1089
+			Args:   []specs.Arg{},
1090
+		},
1091
+		{
1092
+			Name:   "sched_yield",
1093
+			Action: specs.ActAllow,
1094
+			Args:   []specs.Arg{},
1095
+		},
1096
+		{
1097
+			Name:   "seccomp",
1098
+			Action: specs.ActAllow,
1099
+			Args:   []specs.Arg{},
1100
+		},
1101
+		{
1102
+			Name:   "select",
1103
+			Action: specs.ActAllow,
1104
+			Args:   []specs.Arg{},
1105
+		},
1106
+		{
1107
+			Name:   "semctl",
1108
+			Action: specs.ActAllow,
1109
+			Args:   []specs.Arg{},
1110
+		},
1111
+		{
1112
+			Name:   "semget",
1113
+			Action: specs.ActAllow,
1114
+			Args:   []specs.Arg{},
1115
+		},
1116
+		{
1117
+			Name:   "semop",
1118
+			Action: specs.ActAllow,
1119
+			Args:   []specs.Arg{},
1120
+		},
1121
+		{
1122
+			Name:   "semtimedop",
1123
+			Action: specs.ActAllow,
1124
+			Args:   []specs.Arg{},
1125
+		},
1126
+		{
1127
+			Name:   "send",
1128
+			Action: specs.ActAllow,
1129
+			Args:   []specs.Arg{},
1130
+		},
1131
+		{
1132
+			Name:   "sendfile",
1133
+			Action: specs.ActAllow,
1134
+			Args:   []specs.Arg{},
1135
+		},
1136
+		{
1137
+			Name:   "sendfile64",
1138
+			Action: specs.ActAllow,
1139
+			Args:   []specs.Arg{},
1140
+		},
1141
+		{
1142
+			Name:   "sendmmsg",
1143
+			Action: specs.ActAllow,
1144
+			Args:   []specs.Arg{},
1145
+		},
1146
+		{
1147
+			Name:   "sendmsg",
1148
+			Action: specs.ActAllow,
1149
+			Args:   []specs.Arg{},
1150
+		},
1151
+		{
1152
+			Name:   "sendto",
1153
+			Action: specs.ActAllow,
1154
+			Args:   []specs.Arg{},
1155
+		},
1156
+		{
1157
+			Name:   "setdomainname",
1158
+			Action: specs.ActAllow,
1159
+			Args:   []specs.Arg{},
1160
+		},
1161
+		{
1162
+			Name:   "setfsgid",
1163
+			Action: specs.ActAllow,
1164
+			Args:   []specs.Arg{},
1165
+		},
1166
+		{
1167
+			Name:   "setfsgid32",
1168
+			Action: specs.ActAllow,
1169
+			Args:   []specs.Arg{},
1170
+		},
1171
+		{
1172
+			Name:   "setfsuid",
1173
+			Action: specs.ActAllow,
1174
+			Args:   []specs.Arg{},
1175
+		},
1176
+		{
1177
+			Name:   "setfsuid32",
1178
+			Action: specs.ActAllow,
1179
+			Args:   []specs.Arg{},
1180
+		},
1181
+		{
1182
+			Name:   "setgid",
1183
+			Action: specs.ActAllow,
1184
+			Args:   []specs.Arg{},
1185
+		},
1186
+		{
1187
+			Name:   "setgid32",
1188
+			Action: specs.ActAllow,
1189
+			Args:   []specs.Arg{},
1190
+		},
1191
+		{
1192
+			Name:   "setgroups",
1193
+			Action: specs.ActAllow,
1194
+			Args:   []specs.Arg{},
1195
+		},
1196
+		{
1197
+			Name:   "setgroups32",
1198
+			Action: specs.ActAllow,
1199
+			Args:   []specs.Arg{},
1200
+		},
1201
+		{
1202
+			Name:   "sethostname",
1203
+			Action: specs.ActAllow,
1204
+			Args:   []specs.Arg{},
1205
+		},
1206
+		{
1207
+			Name:   "setitimer",
1208
+			Action: specs.ActAllow,
1209
+			Args:   []specs.Arg{},
1210
+		},
1211
+		{
1212
+			Name:   "setpgid",
1213
+			Action: specs.ActAllow,
1214
+			Args:   []specs.Arg{},
1215
+		},
1216
+		{
1217
+			Name:   "setpriority",
1218
+			Action: specs.ActAllow,
1219
+			Args:   []specs.Arg{},
1220
+		},
1221
+		{
1222
+			Name:   "setregid",
1223
+			Action: specs.ActAllow,
1224
+			Args:   []specs.Arg{},
1225
+		},
1226
+		{
1227
+			Name:   "setregid32",
1228
+			Action: specs.ActAllow,
1229
+			Args:   []specs.Arg{},
1230
+		},
1231
+		{
1232
+			Name:   "setresgid",
1233
+			Action: specs.ActAllow,
1234
+			Args:   []specs.Arg{},
1235
+		},
1236
+		{
1237
+			Name:   "setresgid32",
1238
+			Action: specs.ActAllow,
1239
+			Args:   []specs.Arg{},
1240
+		},
1241
+		{
1242
+			Name:   "setresuid",
1243
+			Action: specs.ActAllow,
1244
+			Args:   []specs.Arg{},
1245
+		},
1246
+		{
1247
+			Name:   "setresuid32",
1248
+			Action: specs.ActAllow,
1249
+			Args:   []specs.Arg{},
1250
+		},
1251
+		{
1252
+			Name:   "setreuid",
1253
+			Action: specs.ActAllow,
1254
+			Args:   []specs.Arg{},
1255
+		},
1256
+		{
1257
+			Name:   "setreuid32",
1258
+			Action: specs.ActAllow,
1259
+			Args:   []specs.Arg{},
1260
+		},
1261
+		{
1262
+			Name:   "setrlimit",
1263
+			Action: specs.ActAllow,
1264
+			Args:   []specs.Arg{},
1265
+		},
1266
+		{
1267
+			Name:   "set_robust_list",
1268
+			Action: specs.ActAllow,
1269
+			Args:   []specs.Arg{},
1270
+		},
1271
+		{
1272
+			Name:   "setsid",
1273
+			Action: specs.ActAllow,
1274
+			Args:   []specs.Arg{},
1275
+		},
1276
+		{
1277
+			Name:   "setsockopt",
1278
+			Action: specs.ActAllow,
1279
+			Args:   []specs.Arg{},
1280
+		},
1281
+		{
1282
+			Name:   "set_thread_area",
1283
+			Action: specs.ActAllow,
1284
+			Args:   []specs.Arg{},
1285
+		},
1286
+		{
1287
+			Name:   "set_tid_address",
1288
+			Action: specs.ActAllow,
1289
+			Args:   []specs.Arg{},
1290
+		},
1291
+		{
1292
+			Name:   "setuid",
1293
+			Action: specs.ActAllow,
1294
+			Args:   []specs.Arg{},
1295
+		},
1296
+		{
1297
+			Name:   "setuid32",
1298
+			Action: specs.ActAllow,
1299
+			Args:   []specs.Arg{},
1300
+		},
1301
+		{
1302
+			Name:   "setxattr",
1303
+			Action: specs.ActAllow,
1304
+			Args:   []specs.Arg{},
1305
+		},
1306
+		{
1307
+			Name:   "shmat",
1308
+			Action: specs.ActAllow,
1309
+			Args:   []specs.Arg{},
1310
+		},
1311
+		{
1312
+			Name:   "shmctl",
1313
+			Action: specs.ActAllow,
1314
+			Args:   []specs.Arg{},
1315
+		},
1316
+		{
1317
+			Name:   "shmdt",
1318
+			Action: specs.ActAllow,
1319
+			Args:   []specs.Arg{},
1320
+		},
1321
+		{
1322
+			Name:   "shmget",
1323
+			Action: specs.ActAllow,
1324
+			Args:   []specs.Arg{},
1325
+		},
1326
+		{
1327
+			Name:   "shutdown",
1328
+			Action: specs.ActAllow,
1329
+			Args:   []specs.Arg{},
1330
+		},
1331
+		{
1332
+			Name:   "sigaltstack",
1333
+			Action: specs.ActAllow,
1334
+			Args:   []specs.Arg{},
1335
+		},
1336
+		{
1337
+			Name:   "signalfd",
1338
+			Action: specs.ActAllow,
1339
+			Args:   []specs.Arg{},
1340
+		},
1341
+		{
1342
+			Name:   "signalfd4",
1343
+			Action: specs.ActAllow,
1344
+			Args:   []specs.Arg{},
1345
+		},
1346
+		{
1347
+			Name:   "sigreturn",
1348
+			Action: specs.ActAllow,
1349
+			Args:   []specs.Arg{},
1350
+		},
1351
+		{
1352
+			Name:   "socket",
1353
+			Action: specs.ActAllow,
1354
+			Args:   []specs.Arg{},
1355
+		},
1356
+		{
1357
+			Name:   "socketpair",
1358
+			Action: specs.ActAllow,
1359
+			Args:   []specs.Arg{},
1360
+		},
1361
+		{
1362
+			Name:   "splice",
1363
+			Action: specs.ActAllow,
1364
+			Args:   []specs.Arg{},
1365
+		},
1366
+		{
1367
+			Name:   "stat",
1368
+			Action: specs.ActAllow,
1369
+			Args:   []specs.Arg{},
1370
+		},
1371
+		{
1372
+			Name:   "stat64",
1373
+			Action: specs.ActAllow,
1374
+			Args:   []specs.Arg{},
1375
+		},
1376
+		{
1377
+			Name:   "statfs",
1378
+			Action: specs.ActAllow,
1379
+			Args:   []specs.Arg{},
1380
+		},
1381
+		{
1382
+			Name:   "statfs64",
1383
+			Action: specs.ActAllow,
1384
+			Args:   []specs.Arg{},
1385
+		},
1386
+		{
1387
+			Name:   "symlink",
1388
+			Action: specs.ActAllow,
1389
+			Args:   []specs.Arg{},
1390
+		},
1391
+		{
1392
+			Name:   "symlinkat",
1393
+			Action: specs.ActAllow,
1394
+			Args:   []specs.Arg{},
1395
+		},
1396
+		{
1397
+			Name:   "sync",
1398
+			Action: specs.ActAllow,
1399
+			Args:   []specs.Arg{},
1400
+		},
1401
+		{
1402
+			Name:   "sync_file_range",
1403
+			Action: specs.ActAllow,
1404
+			Args:   []specs.Arg{},
1405
+		},
1406
+		{
1407
+			Name:   "syncfs",
1408
+			Action: specs.ActAllow,
1409
+			Args:   []specs.Arg{},
1410
+		},
1411
+		{
1412
+			Name:   "sysinfo",
1413
+			Action: specs.ActAllow,
1414
+			Args:   []specs.Arg{},
1415
+		},
1416
+		{
1417
+			Name:   "syslog",
1418
+			Action: specs.ActAllow,
1419
+			Args:   []specs.Arg{},
1420
+		},
1421
+		{
1422
+			Name:   "tee",
1423
+			Action: specs.ActAllow,
1424
+			Args:   []specs.Arg{},
1425
+		},
1426
+		{
1427
+			Name:   "tgkill",
1428
+			Action: specs.ActAllow,
1429
+			Args:   []specs.Arg{},
1430
+		},
1431
+		{
1432
+			Name:   "time",
1433
+			Action: specs.ActAllow,
1434
+			Args:   []specs.Arg{},
1435
+		},
1436
+		{
1437
+			Name:   "timer_create",
1438
+			Action: specs.ActAllow,
1439
+			Args:   []specs.Arg{},
1440
+		},
1441
+		{
1442
+			Name:   "timer_delete",
1443
+			Action: specs.ActAllow,
1444
+			Args:   []specs.Arg{},
1445
+		},
1446
+		{
1447
+			Name:   "timerfd_create",
1448
+			Action: specs.ActAllow,
1449
+			Args:   []specs.Arg{},
1450
+		},
1451
+		{
1452
+			Name:   "timerfd_gettime",
1453
+			Action: specs.ActAllow,
1454
+			Args:   []specs.Arg{},
1455
+		},
1456
+		{
1457
+			Name:   "timerfd_settime",
1458
+			Action: specs.ActAllow,
1459
+			Args:   []specs.Arg{},
1460
+		},
1461
+		{
1462
+			Name:   "timer_getoverrun",
1463
+			Action: specs.ActAllow,
1464
+			Args:   []specs.Arg{},
1465
+		},
1466
+		{
1467
+			Name:   "timer_gettime",
1468
+			Action: specs.ActAllow,
1469
+			Args:   []specs.Arg{},
1470
+		},
1471
+		{
1472
+			Name:   "timer_settime",
1473
+			Action: specs.ActAllow,
1474
+			Args:   []specs.Arg{},
1475
+		},
1476
+		{
1477
+			Name:   "times",
1478
+			Action: specs.ActAllow,
1479
+			Args:   []specs.Arg{},
1480
+		},
1481
+		{
1482
+			Name:   "tkill",
1483
+			Action: specs.ActAllow,
1484
+			Args:   []specs.Arg{},
1485
+		},
1486
+		{
1487
+			Name:   "truncate",
1488
+			Action: specs.ActAllow,
1489
+			Args:   []specs.Arg{},
1490
+		},
1491
+		{
1492
+			Name:   "truncate64",
1493
+			Action: specs.ActAllow,
1494
+			Args:   []specs.Arg{},
1495
+		},
1496
+		{
1497
+			Name:   "ugetrlimit",
1498
+			Action: specs.ActAllow,
1499
+			Args:   []specs.Arg{},
1500
+		},
1501
+		{
1502
+			Name:   "umask",
1503
+			Action: specs.ActAllow,
1504
+			Args:   []specs.Arg{},
1505
+		},
1506
+		{
1507
+			Name:   "uname",
1508
+			Action: specs.ActAllow,
1509
+			Args:   []specs.Arg{},
1510
+		},
1511
+		{
1512
+			Name:   "unlink",
1513
+			Action: specs.ActAllow,
1514
+			Args:   []specs.Arg{},
1515
+		},
1516
+		{
1517
+			Name:   "unlinkat",
1518
+			Action: specs.ActAllow,
1519
+			Args:   []specs.Arg{},
1520
+		},
1521
+		{
1522
+			Name:   "utime",
1523
+			Action: specs.ActAllow,
1524
+			Args:   []specs.Arg{},
1525
+		},
1526
+		{
1527
+			Name:   "utimensat",
1528
+			Action: specs.ActAllow,
1529
+			Args:   []specs.Arg{},
1530
+		},
1531
+		{
1532
+			Name:   "utimes",
1533
+			Action: specs.ActAllow,
1534
+			Args:   []specs.Arg{},
1535
+		},
1536
+		{
1537
+			Name:   "vfork",
1538
+			Action: specs.ActAllow,
1539
+			Args:   []specs.Arg{},
1540
+		},
1541
+		{
1542
+			Name:   "vhangup",
1543
+			Action: specs.ActAllow,
1544
+			Args:   []specs.Arg{},
1545
+		},
1546
+		{
1547
+			Name:   "vmsplice",
1548
+			Action: specs.ActAllow,
1549
+			Args:   []specs.Arg{},
1550
+		},
1551
+		{
1552
+			Name:   "wait4",
1553
+			Action: specs.ActAllow,
1554
+			Args:   []specs.Arg{},
1555
+		},
1556
+		{
1557
+			Name:   "waitid",
1558
+			Action: specs.ActAllow,
1559
+			Args:   []specs.Arg{},
1560
+		},
1561
+		{
1562
+			Name:   "waitpid",
1563
+			Action: specs.ActAllow,
1564
+			Args:   []specs.Arg{},
1565
+		},
1566
+		{
1567
+			Name:   "write",
1568
+			Action: specs.ActAllow,
1569
+			Args:   []specs.Arg{},
1570
+		},
1571
+		{
1572
+			Name:   "writev",
1573
+			Action: specs.ActAllow,
1574
+			Args:   []specs.Arg{},
1575
+		},
1576
+		// i386 specific syscalls
1577
+		{
1578
+			Name:   "modify_ldt",
1579
+			Action: specs.ActAllow,
1580
+			Args:   []specs.Arg{},
1581
+		},
1582
+		// arm specific syscalls
1583
+		{
1584
+			Name:   "breakpoint",
1585
+			Action: specs.ActAllow,
1586
+			Args:   []specs.Arg{},
1587
+		},
1588
+		{
1589
+			Name:   "cacheflush",
1590
+			Action: specs.ActAllow,
1591
+			Args:   []specs.Arg{},
1592
+		},
1593
+		{
1594
+			Name:   "set_tls",
1595
+			Action: specs.ActAllow,
1596
+			Args:   []specs.Arg{},
1597
+		},
1598
+	},
1599
+}
0 1600
new file mode 100644
... ...
@@ -0,0 +1,12 @@
0
+// +build !seccomp,!windows
1
+
2
+package daemon
3
+
4
+import (
5
+	"github.com/docker/docker/container"
6
+	"github.com/opencontainers/specs/specs-go"
7
+)
8
+
9
+func setSeccomp(daemon *Daemon, rs *specs.Spec, c *container.Container) error {
10
+	return nil
11
+}
0 12
new file mode 100644
... ...
@@ -0,0 +1,100 @@
0
+// +build linux,seccomp
1
+
2
+package daemon
3
+
4
+import (
5
+	"encoding/json"
6
+	"fmt"
7
+
8
+	"github.com/Sirupsen/logrus"
9
+	"github.com/docker/docker/container"
10
+	"github.com/docker/engine-api/types"
11
+	"github.com/opencontainers/specs/specs-go"
12
+)
13
+
14
+func setSeccomp(daemon *Daemon, rs *specs.Spec, c *container.Container) error {
15
+	var seccomp *specs.Seccomp
16
+	var err error
17
+
18
+	if c.HostConfig.Privileged {
19
+		return nil
20
+	}
21
+
22
+	if !daemon.seccompEnabled {
23
+		if c.SeccompProfile != "" && c.SeccompProfile != "unconfined" {
24
+			return fmt.Errorf("Seccomp is not enabled in your kernel, cannot run a custom seccomp profile.")
25
+		}
26
+		logrus.Warn("Seccomp is not enabled in your kernel, running container without default profile.")
27
+		c.SeccompProfile = "unconfined"
28
+	}
29
+	if c.SeccompProfile == "unconfined" {
30
+		return nil
31
+	}
32
+	if c.SeccompProfile != "" {
33
+		seccomp, err = loadSeccompProfile(c.SeccompProfile)
34
+		if err != nil {
35
+			return err
36
+		}
37
+	} else {
38
+		seccomp = &defaultSeccompProfile
39
+	}
40
+
41
+	rs.Linux.Seccomp = seccomp
42
+	return nil
43
+}
44
+
45
+func loadSeccompProfile(body string) (*specs.Seccomp, error) {
46
+	var config types.Seccomp
47
+	if err := json.Unmarshal([]byte(body), &config); err != nil {
48
+		return nil, fmt.Errorf("Decoding seccomp profile failed: %v", err)
49
+	}
50
+
51
+	return setupSeccomp(&config)
52
+}
53
+
54
+func setupSeccomp(config *types.Seccomp) (newConfig *specs.Seccomp, err error) {
55
+	if config == nil {
56
+		return nil, nil
57
+	}
58
+
59
+	// No default action specified, no syscalls listed, assume seccomp disabled
60
+	if config.DefaultAction == "" && len(config.Syscalls) == 0 {
61
+		return nil, nil
62
+	}
63
+
64
+	newConfig = &specs.Seccomp{}
65
+
66
+	// if config.Architectures == 0 then libseccomp will figure out the architecture to use
67
+	if len(config.Architectures) > 0 {
68
+		// newConfig.Architectures = []string{}
69
+		for _, arch := range config.Architectures {
70
+			newConfig.Architectures = append(newConfig.Architectures, specs.Arch(arch))
71
+		}
72
+	}
73
+
74
+	newConfig.DefaultAction = specs.Action(config.DefaultAction)
75
+
76
+	// Loop through all syscall blocks and convert them to libcontainer format
77
+	for _, call := range config.Syscalls {
78
+		newCall := specs.Syscall{
79
+			Name:   call.Name,
80
+			Action: specs.Action(call.Action),
81
+		}
82
+
83
+		// Loop through all the arguments of the syscall and convert them
84
+		for _, arg := range call.Args {
85
+			newArg := specs.Arg{
86
+				Index:    arg.Index,
87
+				Value:    arg.Value,
88
+				ValueTwo: arg.ValueTwo,
89
+				Op:       specs.Operator(arg.Op),
90
+			}
91
+
92
+			newCall.Args = append(newCall.Args, newArg)
93
+		}
94
+
95
+		newConfig.Syscalls = append(newConfig.Syscalls, newCall)
96
+	}
97
+
98
+	return newConfig, nil
99
+}
... ...
@@ -4,10 +4,13 @@ import (
4 4
 	"fmt"
5 5
 	"net/http"
6 6
 	"runtime"
7
+	"strings"
8
+	"syscall"
7 9
 
8 10
 	"github.com/Sirupsen/logrus"
9 11
 	"github.com/docker/docker/container"
10 12
 	"github.com/docker/docker/errors"
13
+	"github.com/docker/docker/libcontainerd"
11 14
 	"github.com/docker/docker/runconfig"
12 15
 	containertypes "github.com/docker/engine-api/types/container"
13 16
 )
... ...
@@ -122,44 +125,36 @@ func (daemon *Daemon) containerStart(container *container.Container) (err error)
122 122
 	if err := daemon.initializeNetworking(container); err != nil {
123 123
 		return err
124 124
 	}
125
-	linkedEnv, err := daemon.setupLinkedContainers(container)
125
+
126
+	spec, err := daemon.createSpec(container)
126 127
 	if err != nil {
127 128
 		return err
128 129
 	}
129
-	rootUID, rootGID := daemon.GetRemappedUIDGID()
130
-	if err := container.SetupWorkingDirectory(rootUID, rootGID); err != nil {
131
-		return err
132
-	}
133
-	env := container.CreateDaemonEnvironment(linkedEnv)
134
-	if err := daemon.populateCommand(container, env); err != nil {
135
-		return err
136
-	}
137 130
 
138
-	if !container.HostConfig.IpcMode.IsContainer() && !container.HostConfig.IpcMode.IsHost() {
139
-		if err := daemon.setupIpcDirs(container); err != nil {
140
-			return err
131
+	defer daemon.LogContainerEvent(container, "start") // this is logged even on error
132
+	if err := daemon.containerd.Create(container.ID, *spec, libcontainerd.WithRestartManager(container.RestartManager(true))); err != nil {
133
+		// if we receive an internal error from the initial start of a container then lets
134
+		// return it instead of entering the restart loop
135
+		// set to 127 for container cmd not found/does not exist)
136
+		if strings.Contains(err.Error(), "executable file not found") ||
137
+			strings.Contains(err.Error(), "no such file or directory") ||
138
+			strings.Contains(err.Error(), "system cannot find the file specified") {
139
+			container.ExitCode = 127
140
+			err = fmt.Errorf("Container command not found or does not exist.")
141
+		}
142
+		// set to 126 for container cmd can't be invoked errors
143
+		if strings.Contains(err.Error(), syscall.EACCES.Error()) {
144
+			container.ExitCode = 126
145
+			err = fmt.Errorf("Container command could not be invoked.")
141 146
 		}
142
-	}
143 147
 
144
-	mounts, err := daemon.setupMounts(container)
145
-	if err != nil {
148
+		container.Reset(false)
146 149
 		return err
147 150
 	}
148
-	mounts = append(mounts, container.IpcMounts()...)
149
-	mounts = append(mounts, container.TmpfsMounts()...)
150 151
 
151
-	container.Command.Mounts = mounts
152
-	if err := daemon.waitForStart(container); err != nil {
153
-		return err
154
-	}
155
-	container.HasBeenStartedBefore = true
156 152
 	return nil
157 153
 }
158 154
 
159
-func (daemon *Daemon) waitForStart(container *container.Container) error {
160
-	return container.StartMonitor(daemon)
161
-}
162
-
163 155
 // Cleanup releases any network resources allocated to the container along with any rules
164 156
 // around how containers are linked together.  It also unmounts the container's root filesystem.
165 157
 func (daemon *Daemon) Cleanup(container *container.Container) {
... ...
@@ -167,7 +162,13 @@ func (daemon *Daemon) Cleanup(container *container.Container) {
167 167
 
168 168
 	container.UnmountIpcMounts(detachMounted)
169 169
 
170
-	daemon.conditionalUnmountOnCleanup(container)
170
+	if err := daemon.conditionalUnmountOnCleanup(container); err != nil {
171
+		// FIXME: remove once reference counting for graphdrivers has been refactored
172
+		// Ensure that all the mounts are gone
173
+		if mountid, err := daemon.layerStore.GetMountID(container.ID); err == nil {
174
+			daemon.cleanupMountsByID(mountid)
175
+		}
176
+	}
171 177
 
172 178
 	for _, eConfig := range container.ExecCommands.Commands() {
173 179
 		daemon.unregisterExecCommand(container, eConfig)
... ...
@@ -6,7 +6,6 @@ import (
6 6
 	"runtime"
7 7
 
8 8
 	"github.com/docker/docker/api/types/backend"
9
-	"github.com/docker/docker/daemon/execdriver"
10 9
 	"github.com/docker/docker/pkg/ioutils"
11 10
 	"github.com/docker/docker/pkg/version"
12 11
 	"github.com/docker/engine-api/types"
... ...
@@ -42,12 +41,9 @@ func (daemon *Daemon) ContainerStats(prefixOrName string, config *backend.Contai
42 42
 
43 43
 	var preCPUStats types.CPUStats
44 44
 	getStatJSON := func(v interface{}) *types.StatsJSON {
45
-		update := v.(*execdriver.ResourceStats)
46
-		ss := convertStatsToAPITypes(update.Stats)
45
+		ss := v.(*types.StatsJSON)
47 46
 		ss.PreCPUStats = preCPUStats
48
-		ss.MemoryStats.Limit = uint64(update.MemoryLimit)
49
-		ss.Read = update.Read
50
-		ss.CPUStats.SystemUsage = update.SystemUsage
47
+		// ss.MemoryStats.Limit = uint64(update.MemoryLimit)
51 48
 		preCPUStats = ss.CPUStats
52 49
 		return ss
53 50
 	}
... ...
@@ -13,14 +13,14 @@ import (
13 13
 
14 14
 	"github.com/Sirupsen/logrus"
15 15
 	"github.com/docker/docker/container"
16
-	"github.com/docker/docker/daemon/execdriver"
17 16
 	"github.com/docker/docker/pkg/pubsub"
17
+	"github.com/docker/engine-api/types"
18 18
 	"github.com/opencontainers/runc/libcontainer/system"
19 19
 )
20 20
 
21 21
 type statsSupervisor interface {
22 22
 	// GetContainerStats collects all the stats related to a container
23
-	GetContainerStats(container *container.Container) (*execdriver.ResourceStats, error)
23
+	GetContainerStats(container *container.Container) (*types.StatsJSON, error)
24 24
 }
25 25
 
26 26
 // newStatsCollector returns a new statsCollector that collections
... ...
@@ -120,12 +120,13 @@ func (s *statsCollector) run() {
120 120
 		for _, pair := range pairs {
121 121
 			stats, err := s.supervisor.GetContainerStats(pair.container)
122 122
 			if err != nil {
123
-				if err != execdriver.ErrNotRunning {
123
+				if err, ok := err.(errNotRunning); ok {
124 124
 					logrus.Errorf("collecting stats for %s: %v", pair.container.ID, err)
125 125
 				}
126 126
 				continue
127 127
 			}
128
-			stats.SystemUsage = systemUsage
128
+			// FIXME: move to containerd
129
+			stats.CPUStats.SystemUsage = systemUsage
129 130
 
130 131
 			pair.publisher.Publish(stats)
131 132
 		}
132 133
deleted file mode 100644
... ...
@@ -1,84 +0,0 @@
1
-package daemon
2
-
3
-import (
4
-	"github.com/docker/engine-api/types"
5
-	"github.com/opencontainers/runc/libcontainer"
6
-	"github.com/opencontainers/runc/libcontainer/cgroups"
7
-)
8
-
9
-// convertStatsToAPITypes converts the libcontainer.Stats to the api specific
10
-// structs. This is done to preserve API compatibility and versioning.
11
-func convertStatsToAPITypes(ls *libcontainer.Stats) *types.StatsJSON {
12
-	s := &types.StatsJSON{}
13
-	if ls.Interfaces != nil {
14
-		s.Networks = make(map[string]types.NetworkStats)
15
-		for _, iface := range ls.Interfaces {
16
-			// For API Version >= 1.21, the original data of network will
17
-			// be returned.
18
-			s.Networks[iface.Name] = types.NetworkStats{
19
-				RxBytes:   iface.RxBytes,
20
-				RxPackets: iface.RxPackets,
21
-				RxErrors:  iface.RxErrors,
22
-				RxDropped: iface.RxDropped,
23
-				TxBytes:   iface.TxBytes,
24
-				TxPackets: iface.TxPackets,
25
-				TxErrors:  iface.TxErrors,
26
-				TxDropped: iface.TxDropped,
27
-			}
28
-		}
29
-	}
30
-
31
-	cs := ls.CgroupStats
32
-	if cs != nil {
33
-		s.BlkioStats = types.BlkioStats{
34
-			IoServiceBytesRecursive: copyBlkioEntry(cs.BlkioStats.IoServiceBytesRecursive),
35
-			IoServicedRecursive:     copyBlkioEntry(cs.BlkioStats.IoServicedRecursive),
36
-			IoQueuedRecursive:       copyBlkioEntry(cs.BlkioStats.IoQueuedRecursive),
37
-			IoServiceTimeRecursive:  copyBlkioEntry(cs.BlkioStats.IoServiceTimeRecursive),
38
-			IoWaitTimeRecursive:     copyBlkioEntry(cs.BlkioStats.IoWaitTimeRecursive),
39
-			IoMergedRecursive:       copyBlkioEntry(cs.BlkioStats.IoMergedRecursive),
40
-			IoTimeRecursive:         copyBlkioEntry(cs.BlkioStats.IoTimeRecursive),
41
-			SectorsRecursive:        copyBlkioEntry(cs.BlkioStats.SectorsRecursive),
42
-		}
43
-		cpu := cs.CpuStats
44
-		s.CPUStats = types.CPUStats{
45
-			CPUUsage: types.CPUUsage{
46
-				TotalUsage:        cpu.CpuUsage.TotalUsage,
47
-				PercpuUsage:       cpu.CpuUsage.PercpuUsage,
48
-				UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
49
-				UsageInUsermode:   cpu.CpuUsage.UsageInUsermode,
50
-			},
51
-			ThrottlingData: types.ThrottlingData{
52
-				Periods:          cpu.ThrottlingData.Periods,
53
-				ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
54
-				ThrottledTime:    cpu.ThrottlingData.ThrottledTime,
55
-			},
56
-		}
57
-		mem := cs.MemoryStats
58
-		s.MemoryStats = types.MemoryStats{
59
-			Usage:    mem.Usage.Usage,
60
-			MaxUsage: mem.Usage.MaxUsage,
61
-			Stats:    mem.Stats,
62
-			Failcnt:  mem.Usage.Failcnt,
63
-		}
64
-		pids := cs.PidsStats
65
-		s.PidsStats = types.PidsStats{
66
-			Current: pids.Current,
67
-		}
68
-	}
69
-
70
-	return s
71
-}
72
-
73
-func copyBlkioEntry(entries []cgroups.BlkioStatEntry) []types.BlkioStatEntry {
74
-	out := make([]types.BlkioStatEntry, len(entries))
75
-	for i, re := range entries {
76
-		out[i] = types.BlkioStatEntry{
77
-			Major: re.Major,
78
-			Minor: re.Minor,
79
-			Op:    re.Op,
80
-			Value: re.Value,
81
-		}
82
-	}
83
-	return out
84
-}
85 1
deleted file mode 100644
... ...
@@ -1,14 +0,0 @@
1
-package daemon
2
-
3
-import (
4
-	"github.com/docker/engine-api/types"
5
-	"github.com/opencontainers/runc/libcontainer"
6
-)
7
-
8
-// convertStatsToAPITypes converts the libcontainer.Stats to the api specific
9
-// structs. This is done to preserve API compatibility and versioning.
10
-func convertStatsToAPITypes(ls *libcontainer.Stats) *types.StatsJSON {
11
-	// TODO Windows. Refactor accordingly to fill in stats.
12
-	s := &types.StatsJSON{}
13
-	return s
14
-}
... ...
@@ -33,7 +33,8 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*types.Container
33 33
 	if container.IsRestarting() {
34 34
 		return nil, errContainerIsRestarting(container.ID)
35 35
 	}
36
-	pids, err := daemon.ExecutionDriver().GetPidsForContainer(container.ID)
36
+
37
+	pids, err := daemon.containerd.GetPidsForContainer(container.ID)
37 38
 	if err != nil {
38 39
 		return nil, err
39 40
 	}
... ...
@@ -35,11 +35,9 @@ func (daemon *Daemon) containerUnpause(container *container.Container) error {
35 35
 		return fmt.Errorf("Container %s is not paused", container.ID)
36 36
 	}
37 37
 
38
-	if err := daemon.execDriver.Unpause(container.Command); err != nil {
38
+	if err := daemon.containerd.Resume(container.ID); err != nil {
39 39
 		return fmt.Errorf("Cannot unpause container %s: %s", container.ID, err)
40 40
 	}
41 41
 
42
-	container.Paused = false
43
-	daemon.LogContainerEvent(container, "unpause")
44 42
 	return nil
45 43
 }
... ...
@@ -84,7 +84,7 @@ func (daemon *Daemon) update(name string, hostConfig *container.HostConfig) erro
84 84
 	// If container is running (including paused), we need to update configs
85 85
 	// to the real world.
86 86
 	if container.IsRunning() && !container.IsRestarting() {
87
-		if err := daemon.execDriver.Update(container.Command); err != nil {
87
+		if err := daemon.containerd.UpdateResources(container.ID, toContainerdResources(hostConfig.Resources)); err != nil {
88 88
 			restoreConfig = true
89 89
 			return errCannotUpdate(container.ID, err)
90 90
 		}
91 91
new file mode 100644
... ...
@@ -0,0 +1,25 @@
0
+// +build linux
1
+
2
+package daemon
3
+
4
+import (
5
+	"github.com/docker/docker/libcontainerd"
6
+	"github.com/docker/engine-api/types/container"
7
+)
8
+
9
+func toContainerdResources(resources container.Resources) libcontainerd.Resources {
10
+	var r libcontainerd.Resources
11
+	r.BlkioWeight = uint32(resources.BlkioWeight)
12
+	r.CpuShares = uint32(resources.CPUShares)
13
+	r.CpuPeriod = uint32(resources.CPUPeriod)
14
+	r.CpuQuota = uint32(resources.CPUQuota)
15
+	r.CpusetCpus = resources.CpusetCpus
16
+	r.CpusetMems = resources.CpusetMems
17
+	r.MemoryLimit = uint32(resources.Memory)
18
+	if resources.MemorySwap > 0 {
19
+		r.MemorySwap = uint32(resources.MemorySwap)
20
+	}
21
+	r.MemoryReservation = uint32(resources.MemoryReservation)
22
+	r.KernelMemoryLimit = uint32(resources.KernelMemory)
23
+	return r
24
+}
... ...
@@ -8,7 +8,6 @@ import (
8 8
 	"strings"
9 9
 
10 10
 	"github.com/docker/docker/container"
11
-	"github.com/docker/docker/daemon/execdriver"
12 11
 	"github.com/docker/docker/volume"
13 12
 	"github.com/docker/engine-api/types"
14 13
 	containertypes "github.com/docker/engine-api/types/container"
... ...
@@ -21,7 +20,7 @@ var (
21 21
 	ErrVolumeReadonly = errors.New("mounted volume is marked read-only")
22 22
 )
23 23
 
24
-type mounts []execdriver.Mount
24
+type mounts []container.Mount
25 25
 
26 26
 // volumeToAPIType converts a volume.Volume to the type used by the remote API
27 27
 func volumeToAPIType(v volume.Volume) *types.Volume {
... ...
@@ -8,25 +8,24 @@ import (
8 8
 	"strconv"
9 9
 
10 10
 	"github.com/docker/docker/container"
11
-	"github.com/docker/docker/daemon/execdriver"
12 11
 	"github.com/docker/docker/volume"
13 12
 )
14 13
 
15 14
 // setupMounts iterates through each of the mount points for a container and
16 15
 // calls Setup() on each. It also looks to see if is a network mount such as
17 16
 // /etc/resolv.conf, and if it is not, appends it to the array of mounts.
18
-func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver.Mount, error) {
19
-	var mounts []execdriver.Mount
20
-	for _, m := range container.MountPoints {
21
-		if err := daemon.lazyInitializeVolume(container.ID, m); err != nil {
17
+func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, error) {
18
+	var mounts []container.Mount
19
+	for _, m := range c.MountPoints {
20
+		if err := daemon.lazyInitializeVolume(c.ID, m); err != nil {
22 21
 			return nil, err
23 22
 		}
24 23
 		path, err := m.Setup()
25 24
 		if err != nil {
26 25
 			return nil, err
27 26
 		}
28
-		if !container.TrySetNetworkMount(m.Destination, path) {
29
-			mnt := execdriver.Mount{
27
+		if !c.TrySetNetworkMount(m.Destination, path) {
28
+			mnt := container.Mount{
30 29
 				Source:      path,
31 30
 				Destination: m.Destination,
32 31
 				Writable:    m.RW,
... ...
@@ -35,7 +34,7 @@ func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver.
35 35
 			if m.Volume != nil {
36 36
 				attributes := map[string]string{
37 37
 					"driver":      m.Volume.DriverName(),
38
-					"container":   container.ID,
38
+					"container":   c.ID,
39 39
 					"destination": m.Destination,
40 40
 					"read/write":  strconv.FormatBool(m.RW),
41 41
 					"propagation": m.Propagation,
... ...
@@ -47,7 +46,7 @@ func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver.
47 47
 	}
48 48
 
49 49
 	mounts = sortMounts(mounts)
50
-	netMounts := container.NetworkMounts()
50
+	netMounts := c.NetworkMounts()
51 51
 	// if we are going to mount any of the network files from container
52 52
 	// metadata, the ownership must be set properly for potential container
53 53
 	// remapped root (user namespaces)
... ...
@@ -63,7 +62,7 @@ func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver.
63 63
 // sortMounts sorts an array of mounts in lexicographic order. This ensure that
64 64
 // when mounting, the mounts don't shadow other mounts. For example, if mounting
65 65
 // /etc and /etc/resolv.conf, /etc/resolv.conf must not be mounted first.
66
-func sortMounts(m []execdriver.Mount) []execdriver.Mount {
66
+func sortMounts(m []container.Mount) []container.Mount {
67 67
 	sort.Sort(mounts(m))
68 68
 	return m
69 69
 }
... ...
@@ -112,12 +112,13 @@ func (ls *mockLayerStore) CreateRWLayer(string, layer.ChainID, string, layer.Mou
112 112
 
113 113
 func (ls *mockLayerStore) GetRWLayer(string) (layer.RWLayer, error) {
114 114
 	return nil, errors.New("not implemented")
115
-
116 115
 }
117 116
 
118 117
 func (ls *mockLayerStore) ReleaseRWLayer(layer.RWLayer) ([]layer.Metadata, error) {
119 118
 	return nil, errors.New("not implemented")
120
-
119
+}
120
+func (ls *mockLayerStore) GetMountID(string) (string, error) {
121
+	return "", errors.New("not implemented")
121 122
 }
122 123
 
123 124
 func (ls *mockLayerStore) Cleanup() error {
... ...
@@ -29,6 +29,7 @@ import (
29 29
 	"github.com/docker/docker/daemon/logger"
30 30
 	"github.com/docker/docker/docker/listeners"
31 31
 	"github.com/docker/docker/dockerversion"
32
+	"github.com/docker/docker/libcontainerd"
32 33
 	"github.com/docker/docker/opts"
33 34
 	"github.com/docker/docker/pkg/jsonlog"
34 35
 	flag "github.com/docker/docker/pkg/mflag"
... ...
@@ -264,7 +265,13 @@ func (cli *DaemonCli) CmdDaemon(args ...string) error {
264 264
 	cli.TrustKeyPath = commonFlags.TrustKey
265 265
 
266 266
 	registryService := registry.NewService(cli.Config.ServiceOptions)
267
-	d, err := daemon.NewDaemon(cli.Config, registryService)
267
+
268
+	containerdRemote, err := libcontainerd.New(filepath.Join(cli.Config.ExecRoot, "libcontainerd"), cli.getPlatformRemoteOptions()...)
269
+	if err != nil {
270
+		logrus.Fatal(err)
271
+	}
272
+
273
+	d, err := daemon.NewDaemon(cli.Config, registryService, containerdRemote)
268 274
 	if err != nil {
269 275
 		if pfile != nil {
270 276
 			if err := pfile.Remove(); err != nil {
... ...
@@ -279,7 +286,6 @@ func (cli *DaemonCli) CmdDaemon(args ...string) error {
279 279
 	logrus.WithFields(logrus.Fields{
280 280
 		"version":     dockerversion.Version,
281 281
 		"commit":      dockerversion.GitCommit,
282
-		"execdriver":  d.ExecutionDriver().Name(),
283 282
 		"graphdriver": d.GraphDriverName(),
284 283
 	}).Info("Docker daemon")
285 284
 
... ...
@@ -330,6 +336,7 @@ func (cli *DaemonCli) CmdDaemon(args ...string) error {
330 330
 	// Wait for serve API to complete
331 331
 	errAPI := <-serveAPIWait
332 332
 	shutdownDaemon(d, 15)
333
+	containerdRemote.Cleanup()
333 334
 	if errAPI != nil {
334 335
 		if pfile != nil {
335 336
 			if err := pfile.Remove(); err != nil {
... ...
@@ -11,10 +11,9 @@ import (
11 11
 	"github.com/Sirupsen/logrus"
12 12
 	apiserver "github.com/docker/docker/api/server"
13 13
 	"github.com/docker/docker/daemon"
14
+	"github.com/docker/docker/libcontainerd"
14 15
 	"github.com/docker/docker/pkg/mflag"
15 16
 	"github.com/docker/docker/pkg/system"
16
-
17
-	_ "github.com/docker/docker/daemon/execdriver/native"
18 17
 )
19 18
 
20 19
 const defaultDaemonConfigFile = "/etc/docker/daemon.json"
... ...
@@ -65,3 +64,15 @@ func setupConfigReloadTrap(configFile string, flags *mflag.FlagSet, reload func(
65 65
 		}
66 66
 	}()
67 67
 }
68
+
69
+func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption {
70
+	opts := []libcontainerd.RemoteOption{
71
+		libcontainerd.WithDebugLog(cli.Config.Debug),
72
+	}
73
+	if cli.Config.ContainerdAddr != "" {
74
+		opts = append(opts, libcontainerd.WithRemoteAddr(cli.Config.ContainerdAddr))
75
+	} else {
76
+		opts = append(opts, libcontainerd.WithStartDaemon(true))
77
+	}
78
+	return opts
79
+}
... ...
@@ -142,6 +142,7 @@ func (d *Daemon) StartWithLogFile(out *os.File, providedArgs ...string) error {
142 142
 
143 143
 	args := append(d.GlobalFlags,
144 144
 		d.Command,
145
+		"--containerd", "/var/run/docker/libcontainerd/containerd.sock",
145 146
 		"--graph", d.root,
146 147
 		"--pidfile", fmt.Sprintf("%s/docker.pid", d.folder),
147 148
 		fmt.Sprintf("--userland-proxy=%t", d.userlandProxy),
... ...
@@ -245,6 +246,29 @@ func (d *Daemon) StartWithBusybox(arg ...string) error {
245 245
 	return d.LoadBusybox()
246 246
 }
247 247
 
248
+// Kill will send a SIGKILL to the daemon
249
+func (d *Daemon) Kill() error {
250
+	if d.cmd == nil || d.wait == nil {
251
+		return errors.New("daemon not started")
252
+	}
253
+
254
+	defer func() {
255
+		d.logFile.Close()
256
+		d.cmd = nil
257
+	}()
258
+
259
+	if err := d.cmd.Process.Kill(); err != nil {
260
+		d.c.Logf("Could not kill daemon: %v", err)
261
+		return err
262
+	}
263
+
264
+	if err := os.Remove(fmt.Sprintf("%s/docker.pid", d.folder)); err != nil {
265
+		return err
266
+	}
267
+
268
+	return nil
269
+}
270
+
248 271
 // Stop will send a SIGINT every second and wait for the daemon to stop.
249 272
 // If it timeouts, a SIGKILL is sent.
250 273
 // Stop will not delete the daemon directory. If a purged daemon is needed,
... ...
@@ -300,6 +324,10 @@ out2:
300 300
 		return err
301 301
 	}
302 302
 
303
+	if err := os.Remove(fmt.Sprintf("%s/docker.pid", d.folder)); err != nil {
304
+		return err
305
+	}
306
+
303 307
 	return nil
304 308
 }
305 309
 
306 310
new file mode 100644
... ...
@@ -0,0 +1,150 @@
0
+// +build daemon,!windows,experimental
1
+
2
+package main
3
+
4
+import (
5
+	"os/exec"
6
+	"strings"
7
+	"time"
8
+
9
+	"github.com/go-check/check"
10
+)
11
+
12
+// TestDaemonRestartWithKilledRunningContainer requires live restore of running containers
13
+func (s *DockerDaemonSuite) TestDaemonRestartWithKilledRunningContainer(t *check.C) {
14
+	// TODO(mlaventure): Not sure what would the exit code be on windows
15
+	testRequires(t, DaemonIsLinux)
16
+	if err := s.d.StartWithBusybox(); err != nil {
17
+		t.Fatal(err)
18
+	}
19
+
20
+	cid, err := s.d.Cmd("run", "-d", "--name", "test", "busybox", "top")
21
+	defer s.d.Stop()
22
+	if err != nil {
23
+		t.Fatal(cid, err)
24
+	}
25
+	cid = strings.TrimSpace(cid)
26
+
27
+	// Kill the daemon
28
+	if err := s.d.Kill(); err != nil {
29
+		t.Fatal(err)
30
+	}
31
+
32
+	// kill the container
33
+	runCmd := exec.Command("ctr", "--address", "/var/run/docker/libcontainerd/containerd.sock", "containers", "kill", cid)
34
+	if out, ec, err := runCommandWithOutput(runCmd); err != nil {
35
+		t.Fatalf("Failed to run ctr, ExitCode: %d, err: '%v' output: '%s' cid: '%s'\n", ec, err, out, cid)
36
+	}
37
+
38
+	// Give time to containerd to process the command if we don't
39
+	// the exit event might be received after we do the inspect
40
+	time.Sleep(3 * time.Second)
41
+
42
+	// restart the daemon
43
+	if err := s.d.Start(); err != nil {
44
+		t.Fatal(err)
45
+	}
46
+
47
+	// Check that we've got the correct exit code
48
+	out, err := s.d.Cmd("inspect", "-f", "{{.State.ExitCode}}", cid)
49
+	t.Assert(err, check.IsNil)
50
+
51
+	out = strings.TrimSpace(out)
52
+	if out != "143" {
53
+		t.Fatalf("Expected exit code '%s' got '%s' for container '%s'\n", "143", out, cid)
54
+	}
55
+
56
+}
57
+
58
+// TestDaemonRestartWithPausedRunningContainer requires live restore of running containers
59
+func (s *DockerDaemonSuite) TestDaemonRestartWithPausedRunningContainer(t *check.C) {
60
+	if err := s.d.StartWithBusybox(); err != nil {
61
+		t.Fatal(err)
62
+	}
63
+
64
+	cid, err := s.d.Cmd("run", "-d", "--name", "test", "busybox", "top")
65
+	defer s.d.Stop()
66
+	if err != nil {
67
+		t.Fatal(cid, err)
68
+	}
69
+	cid = strings.TrimSpace(cid)
70
+
71
+	// Kill the daemon
72
+	if err := s.d.Kill(); err != nil {
73
+		t.Fatal(err)
74
+	}
75
+
76
+	// kill the container
77
+	runCmd := exec.Command("ctr", "--address", "/var/run/docker/libcontainerd/containerd.sock", "containers", "pause", cid)
78
+	if out, ec, err := runCommandWithOutput(runCmd); err != nil {
79
+		t.Fatalf("Failed to run ctr, ExitCode: %d, err: '%v' output: '%s' cid: '%s'\n", ec, err, out, cid)
80
+	}
81
+
82
+	// Give time to containerd to process the command if we don't
83
+	// the pause event might be received after we do the inspect
84
+	time.Sleep(3 * time.Second)
85
+
86
+	// restart the daemon
87
+	if err := s.d.Start(); err != nil {
88
+		t.Fatal(err)
89
+	}
90
+
91
+	// Check that we've got the correct status
92
+	out, err := s.d.Cmd("inspect", "-f", "{{.State.Status}}", cid)
93
+	t.Assert(err, check.IsNil)
94
+
95
+	out = strings.TrimSpace(out)
96
+	if out != "paused" {
97
+		t.Fatalf("Expected exit code '%s' got '%s' for container '%s'\n", "paused", out, cid)
98
+	}
99
+}
100
+
101
+// TestDaemonRestartWithUnpausedRunningContainer requires live restore of running containers.
102
+func (s *DockerDaemonSuite) TestDaemonRestartWithUnpausedRunningContainer(t *check.C) {
103
+	// TODO(mlaventure): Not sure what would the exit code be on windows
104
+	testRequires(t, DaemonIsLinux)
105
+	if err := s.d.StartWithBusybox(); err != nil {
106
+		t.Fatal(err)
107
+	}
108
+
109
+	cid, err := s.d.Cmd("run", "-d", "--name", "test", "busybox", "top")
110
+	defer s.d.Stop()
111
+	if err != nil {
112
+		t.Fatal(cid, err)
113
+	}
114
+	cid = strings.TrimSpace(cid)
115
+
116
+	// pause the container
117
+	if _, err := s.d.Cmd("pause", cid); err != nil {
118
+		t.Fatal(cid, err)
119
+	}
120
+
121
+	// Kill the daemon
122
+	if err := s.d.Kill(); err != nil {
123
+		t.Fatal(err)
124
+	}
125
+
126
+	// resume the container
127
+	runCmd := exec.Command("ctr", "--address", "/var/run/docker/libcontainerd/containerd.sock", "containers", "resume", cid)
128
+	if out, ec, err := runCommandWithOutput(runCmd); err != nil {
129
+		t.Fatalf("Failed to run ctr, ExitCode: %d, err: '%v' output: '%s' cid: '%s'\n", ec, err, out, cid)
130
+	}
131
+
132
+	// Give time to containerd to process the command if we don't
133
+	// the resume event might be received after we do the inspect
134
+	time.Sleep(3 * time.Second)
135
+
136
+	// restart the daemon
137
+	if err := s.d.Start(); err != nil {
138
+		t.Fatal(err)
139
+	}
140
+
141
+	// Check that we've got the correct status
142
+	out, err := s.d.Cmd("inspect", "-f", "{{.State.Status}}", cid)
143
+	t.Assert(err, check.IsNil)
144
+
145
+	out = strings.TrimSpace(out)
146
+	if out != "running" {
147
+		t.Fatalf("Expected exit code '%s' got '%s' for container '%s'\n", "running", out, cid)
148
+	}
149
+}
... ...
@@ -1507,7 +1507,18 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterCrash(c *check.C) {
1507 1507
 	out, err := s.d.Cmd("run", "-d", "busybox", "top")
1508 1508
 	c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
1509 1509
 	id := strings.TrimSpace(out)
1510
-	c.Assert(s.d.cmd.Process.Signal(os.Kill), check.IsNil)
1510
+	c.Assert(s.d.Kill(), check.IsNil)
1511
+
1512
+	// kill the container
1513
+	runCmd := exec.Command("ctr", "--address", "/var/run/docker/libcontainerd/containerd.sock", "containers", "kill", id)
1514
+	if out, ec, err := runCommandWithOutput(runCmd); err != nil {
1515
+		c.Fatalf("Failed to run ctr, ExitCode: %d, err: '%v' output: '%s' cid: '%s'\n", ec, err, out, id)
1516
+	}
1517
+
1518
+	// Give time to containerd to process the command if we don't
1519
+	// the exit event might be received after we do the inspect
1520
+	time.Sleep(3 * time.Second)
1521
+
1511 1522
 	c.Assert(s.d.Start(), check.IsNil)
1512 1523
 	mountOut, err := ioutil.ReadFile("/proc/self/mountinfo")
1513 1524
 	c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
... ...
@@ -1840,6 +1851,7 @@ func (s *DockerDaemonSuite) TestDaemonNoSpaceleftOnDeviceError(c *check.C) {
1840 1840
 // Test daemon restart with container links + auto restart
1841 1841
 func (s *DockerDaemonSuite) TestDaemonRestartContainerLinksRestart(c *check.C) {
1842 1842
 	d := NewDaemon(c)
1843
+	defer d.Stop()
1843 1844
 	err := d.StartWithBusybox()
1844 1845
 	c.Assert(err, checker.IsNil)
1845 1846
 
... ...
@@ -8,7 +8,6 @@ import (
8 8
 	"net/http"
9 9
 	"os"
10 10
 	"os/exec"
11
-	"path/filepath"
12 11
 	"reflect"
13 12
 	"sort"
14 13
 	"strings"
... ...
@@ -375,57 +374,6 @@ func (s *DockerSuite) TestLinksPingLinkedContainersOnRename(c *check.C) {
375 375
 	dockerCmd(c, "exec", "container2", "ping", "-c", "1", "alias1", "-W", "1")
376 376
 }
377 377
 
378
-func (s *DockerSuite) TestExecDir(c *check.C) {
379
-	// TODO Windows CI. This requires some work to port as it uses execDriverPath
380
-	// which is currently (and incorrectly) hard coded as a string assuming
381
-	// the daemon is running Linux :(
382
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
383
-
384
-	out, _ := runSleepingContainer(c, "-d")
385
-	id := strings.TrimSpace(out)
386
-
387
-	execDir := filepath.Join(execDriverPath, id)
388
-	stateFile := filepath.Join(execDir, "state.json")
389
-
390
-	{
391
-		fi, err := os.Stat(execDir)
392
-		c.Assert(err, checker.IsNil)
393
-		if !fi.IsDir() {
394
-			c.Fatalf("%q must be a directory", execDir)
395
-		}
396
-		fi, err = os.Stat(stateFile)
397
-		c.Assert(err, checker.IsNil)
398
-	}
399
-
400
-	dockerCmd(c, "stop", id)
401
-	{
402
-		_, err := os.Stat(execDir)
403
-		c.Assert(err, checker.NotNil)
404
-		c.Assert(err, checker.NotNil, check.Commentf("Exec directory %q exists for removed container!", execDir))
405
-		if !os.IsNotExist(err) {
406
-			c.Fatalf("Error should be about non-existing, got %s", err)
407
-		}
408
-	}
409
-	dockerCmd(c, "start", id)
410
-	{
411
-		fi, err := os.Stat(execDir)
412
-		c.Assert(err, checker.IsNil)
413
-		if !fi.IsDir() {
414
-			c.Fatalf("%q must be a directory", execDir)
415
-		}
416
-		fi, err = os.Stat(stateFile)
417
-		c.Assert(err, checker.IsNil)
418
-	}
419
-	dockerCmd(c, "rm", "-f", id)
420
-	{
421
-		_, err := os.Stat(execDir)
422
-		c.Assert(err, checker.NotNil, check.Commentf("Exec directory %q exists for removed container!", execDir))
423
-		if !os.IsNotExist(err) {
424
-			c.Fatalf("Error should be about non-existing, got %s", err)
425
-		}
426
-	}
427
-}
428
-
429 378
 func (s *DockerSuite) TestRunMutableNetworkFiles(c *check.C) {
430 379
 	// Not applicable on Windows to Windows CI.
431 380
 	testRequires(c, SameHostDaemon, DaemonIsLinux)
... ...
@@ -22,7 +22,6 @@ func (s *DockerSuite) TestInfoEnsureSucceeds(c *check.C) {
22 22
 		" Paused:",
23 23
 		" Stopped:",
24 24
 		"Images:",
25
-		"Execution Driver:",
26 25
 		"OSType:",
27 26
 		"Architecture:",
28 27
 		"Logging Driver:",
... ...
@@ -1109,7 +1109,7 @@ func (s *DockerSuite) TestRunProcNotWritableInNonPrivilegedContainers(c *check.C
1109 1109
 func (s *DockerSuite) TestRunProcWritableInPrivilegedContainers(c *check.C) {
1110 1110
 	// Not applicable for Windows as there is no concept of --privileged
1111 1111
 	testRequires(c, DaemonIsLinux, NotUserNamespace)
1112
-	if _, code := dockerCmd(c, "run", "--privileged", "busybox", "touch", "/proc/sysrq-trigger"); code != 0 {
1112
+	if _, code := dockerCmd(c, "run", "--privileged", "busybox", "sh", "-c", "umount /proc/sysrq-trigger && touch /proc/sysrq-trigger"); code != 0 {
1113 1113
 		c.Fatalf("proc should be writable in privileged container")
1114 1114
 	}
1115 1115
 }
... ...
@@ -3021,7 +3021,8 @@ func (s *DockerSuite) TestRunUnshareProc(c *check.C) {
3021 3021
 		out, _, err := dockerCmdWithError("run", "--name", name, "--security-opt", "seccomp:unconfined", "debian:jessie", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc")
3022 3022
 		if err == nil ||
3023 3023
 			!(strings.Contains(strings.ToLower(out), "mount: cannot mount none") ||
3024
-				strings.Contains(strings.ToLower(out), "permission denied")) {
3024
+				strings.Contains(strings.ToLower(out), "permission denied") ||
3025
+				strings.Contains(strings.ToLower(out), "operation not permitted")) {
3025 3026
 			errChan <- fmt.Errorf("unshare and mount of /proc should have failed with 'mount: cannot mount none' or 'permission denied', got: %s, %v", out, err)
3026 3027
 		} else {
3027 3028
 			errChan <- nil
... ...
@@ -3034,7 +3035,8 @@ func (s *DockerSuite) TestRunUnshareProc(c *check.C) {
3034 3034
 		out, _, err := dockerCmdWithError("run", "--privileged", "--security-opt", "seccomp:unconfined", "--security-opt", "apparmor:docker-default", "--name", name, "debian:jessie", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc")
3035 3035
 		if err == nil ||
3036 3036
 			!(strings.Contains(strings.ToLower(out), "mount: cannot mount none") ||
3037
-				strings.Contains(strings.ToLower(out), "permission denied")) {
3037
+				strings.Contains(strings.ToLower(out), "permission denied") ||
3038
+				strings.Contains(strings.ToLower(out), "operation not permitted")) {
3038 3039
 			errChan <- fmt.Errorf("privileged unshare with apparmor should have failed with 'mount: cannot mount none' or 'permission denied', got: %s, %v", out, err)
3039 3040
 		} else {
3040 3041
 			errChan <- nil
... ...
@@ -4232,7 +4234,10 @@ func (s *DockerSuite) TestRunAttachFailedNoLeak(c *check.C) {
4232 4232
 	out, _, err := dockerCmdWithError("run", "-p", "8000:8000", "busybox", "true")
4233 4233
 	c.Assert(err, checker.NotNil)
4234 4234
 	// check for windows error as well
4235
-	c.Assert(strings.Contains(string(out), "port is already allocated") || strings.Contains(string(out), "were not connected because a duplicate name exists"), checker.Equals, true, check.Commentf("Output: %s", out))
4235
+	// TODO Windows Post TP5. Fix the error message string
4236
+	c.Assert(strings.Contains(string(out), "port is already allocated") ||
4237
+		strings.Contains(string(out), "were not connected because a duplicate name exists") ||
4238
+		strings.Contains(string(out), "HNS failed with error : Failed to create endpoint"), checker.Equals, true, check.Commentf("Output: %s", out))
4236 4239
 	dockerCmd(c, "rm", "-f", "test")
4237 4240
 
4238 4241
 	// NGoroutines is not updated right away, so we need to wait before failing
... ...
@@ -169,6 +169,7 @@ type Store interface {
169 169
 
170 170
 	CreateRWLayer(id string, parent ChainID, mountLabel string, initFunc MountInit) (RWLayer, error)
171 171
 	GetRWLayer(id string) (RWLayer, error)
172
+	GetMountID(id string) (string, error)
172 173
 	ReleaseRWLayer(RWLayer) ([]Metadata, error)
173 174
 
174 175
 	Cleanup() error
... ...
@@ -480,6 +480,18 @@ func (ls *layerStore) GetRWLayer(id string) (RWLayer, error) {
480 480
 	return mount.getReference(), nil
481 481
 }
482 482
 
483
+func (ls *layerStore) GetMountID(id string) (string, error) {
484
+	ls.mountL.Lock()
485
+	defer ls.mountL.Unlock()
486
+	mount, ok := ls.mounts[id]
487
+	if !ok {
488
+		return "", ErrMountDoesNotExist
489
+	}
490
+	logrus.Debugf("GetRWLayer id: %s -> mountID: %s", id, mount.mountID)
491
+
492
+	return mount.mountID, nil
493
+}
494
+
483 495
 func (ls *layerStore) ReleaseRWLayer(l RWLayer) ([]Metadata, error) {
484 496
 	ls.mountL.Lock()
485 497
 	defer ls.mountL.Unlock()
486 498
new file mode 100644
... ...
@@ -0,0 +1,58 @@
0
+package libcontainerd
1
+
2
+import (
3
+	"fmt"
4
+	"sync"
5
+
6
+	"github.com/Sirupsen/logrus"
7
+)
8
+
9
+// clientCommon contains the platform agnostic fields used in the client structure
10
+type clientCommon struct {
11
+	backend          Backend
12
+	containers       map[string]*container
13
+	containerMutexes map[string]*sync.Mutex // lock by container ID
14
+	mapMutex         sync.RWMutex           // protects read/write oprations from containers map
15
+	sync.Mutex                              // lock for containerMutexes map access
16
+}
17
+
18
+func (clnt *client) lock(containerID string) {
19
+	clnt.Lock()
20
+	if _, ok := clnt.containerMutexes[containerID]; !ok {
21
+		clnt.containerMutexes[containerID] = &sync.Mutex{}
22
+	}
23
+	clnt.Unlock()
24
+	clnt.containerMutexes[containerID].Lock()
25
+}
26
+
27
+func (clnt *client) unlock(containerID string) {
28
+	clnt.Lock()
29
+	if l, ok := clnt.containerMutexes[containerID]; ok {
30
+		l.Unlock()
31
+	} else {
32
+		logrus.Warnf("unlock of non-existing mutex: %s", containerID)
33
+	}
34
+	clnt.Unlock()
35
+}
36
+
37
+// must hold a lock for cont.containerID
38
+func (clnt *client) appendContainer(cont *container) {
39
+	clnt.mapMutex.Lock()
40
+	clnt.containers[cont.containerID] = cont
41
+	clnt.mapMutex.Unlock()
42
+}
43
+func (clnt *client) deleteContainer(friendlyName string) {
44
+	clnt.mapMutex.Lock()
45
+	delete(clnt.containers, friendlyName)
46
+	clnt.mapMutex.Unlock()
47
+}
48
+
49
+func (clnt *client) getContainer(containerID string) (*container, error) {
50
+	clnt.mapMutex.RLock()
51
+	container, ok := clnt.containers[containerID]
52
+	defer clnt.mapMutex.RUnlock()
53
+	if !ok {
54
+		return nil, fmt.Errorf("invalid container: %s", containerID) // fixme: typed error
55
+	}
56
+	return container, nil
57
+}
0 58
new file mode 100644
... ...
@@ -0,0 +1,394 @@
0
+package libcontainerd
1
+
2
+import (
3
+	"encoding/json"
4
+	"fmt"
5
+	"os"
6
+	"path/filepath"
7
+	"strings"
8
+	"sync"
9
+	"syscall"
10
+
11
+	"github.com/Sirupsen/logrus"
12
+	containerd "github.com/docker/containerd/api/grpc/types"
13
+	"github.com/docker/docker/pkg/idtools"
14
+	"github.com/docker/docker/pkg/mount"
15
+	"github.com/opencontainers/specs/specs-go"
16
+	"golang.org/x/net/context"
17
+)
18
+
19
+type client struct {
20
+	clientCommon
21
+
22
+	// Platform specific properties below here.
23
+	remote        *remote
24
+	q             queue
25
+	exitNotifiers map[string]*exitNotifier
26
+}
27
+
28
+func (clnt *client) AddProcess(containerID, processFriendlyName string, specp Process) error {
29
+	clnt.lock(containerID)
30
+	defer clnt.unlock(containerID)
31
+	container, err := clnt.getContainer(containerID)
32
+	if err != nil {
33
+		return err
34
+	}
35
+
36
+	spec, err := container.spec()
37
+	if err != nil {
38
+		return err
39
+	}
40
+	sp := spec.Process
41
+	sp.Args = specp.Args
42
+	sp.Terminal = specp.Terminal
43
+	if specp.Env != nil {
44
+		sp.Env = specp.Env
45
+	}
46
+	if specp.Cwd != nil {
47
+		sp.Cwd = *specp.Cwd
48
+	}
49
+	if specp.User != nil {
50
+		sp.User = specs.User{
51
+			UID:            specp.User.UID,
52
+			GID:            specp.User.GID,
53
+			AdditionalGids: specp.User.AdditionalGids,
54
+		}
55
+	}
56
+	if specp.Capabilities != nil {
57
+		sp.Capabilities = specp.Capabilities
58
+	}
59
+
60
+	p := container.newProcess(processFriendlyName)
61
+
62
+	r := &containerd.AddProcessRequest{
63
+		Args:     sp.Args,
64
+		Cwd:      sp.Cwd,
65
+		Terminal: sp.Terminal,
66
+		Id:       containerID,
67
+		Env:      sp.Env,
68
+		User: &containerd.User{
69
+			Uid:            sp.User.UID,
70
+			Gid:            sp.User.GID,
71
+			AdditionalGids: sp.User.AdditionalGids,
72
+		},
73
+		Pid:             processFriendlyName,
74
+		Stdin:           p.fifo(syscall.Stdin),
75
+		Stdout:          p.fifo(syscall.Stdout),
76
+		Stderr:          p.fifo(syscall.Stderr),
77
+		Capabilities:    sp.Capabilities,
78
+		ApparmorProfile: sp.ApparmorProfile,
79
+		SelinuxLabel:    sp.SelinuxLabel,
80
+		NoNewPrivileges: sp.NoNewPrivileges,
81
+	}
82
+
83
+	iopipe, err := p.openFifos(sp.Terminal)
84
+	if err != nil {
85
+		return err
86
+	}
87
+
88
+	if _, err := clnt.remote.apiClient.AddProcess(context.Background(), r); err != nil {
89
+		p.closeFifos(iopipe)
90
+		return err
91
+	}
92
+
93
+	container.processes[processFriendlyName] = p
94
+
95
+	clnt.unlock(containerID)
96
+
97
+	if err := clnt.backend.AttachStreams(processFriendlyName, *iopipe); err != nil {
98
+		return err
99
+	}
100
+	clnt.lock(containerID)
101
+
102
+	return nil
103
+}
104
+
105
+func (clnt *client) prepareBundleDir(uid, gid int) (string, error) {
106
+	root, err := filepath.Abs(clnt.remote.stateDir)
107
+	if err != nil {
108
+		return "", err
109
+	}
110
+	if uid == 0 && gid == 0 {
111
+		return root, nil
112
+	}
113
+	p := string(filepath.Separator)
114
+	for _, d := range strings.Split(root, string(filepath.Separator))[1:] {
115
+		p = filepath.Join(p, d)
116
+		fi, err := os.Stat(p)
117
+		if err != nil && !os.IsNotExist(err) {
118
+			return "", err
119
+		}
120
+		if os.IsNotExist(err) || fi.Mode()&1 == 0 {
121
+			p = fmt.Sprintf("%s.%d.%d", p, uid, gid)
122
+			if err := idtools.MkdirAs(p, 0700, uid, gid); err != nil && !os.IsExist(err) {
123
+				return "", err
124
+			}
125
+		}
126
+	}
127
+	return p, nil
128
+}
129
+
130
+func (clnt *client) Create(containerID string, spec Spec, options ...CreateOption) (err error) {
131
+	clnt.lock(containerID)
132
+	defer clnt.unlock(containerID)
133
+
134
+	if ctr, err := clnt.getContainer(containerID); err == nil {
135
+		if ctr.restarting { // docker doesn't actually call start if restart is on atm, but probably should in the future
136
+			ctr.restartManager.Cancel()
137
+			ctr.clean()
138
+		} else {
139
+			return fmt.Errorf("Container %s is aleady active", containerID)
140
+		}
141
+	}
142
+
143
+	uid, gid, err := getRootIDs(specs.Spec(spec))
144
+	if err != nil {
145
+		return err
146
+	}
147
+	dir, err := clnt.prepareBundleDir(uid, gid)
148
+	if err != nil {
149
+		return err
150
+	}
151
+
152
+	container := clnt.newContainer(filepath.Join(dir, containerID), options...)
153
+	if err := container.clean(); err != nil {
154
+		return err
155
+	}
156
+
157
+	defer func() {
158
+		if err != nil {
159
+			container.clean()
160
+			clnt.deleteContainer(containerID)
161
+		}
162
+	}()
163
+
164
+	// uid/gid
165
+	rootfsDir := filepath.Join(container.dir, "rootfs")
166
+	if err := idtools.MkdirAllAs(rootfsDir, 0700, uid, gid); err != nil && !os.IsExist(err) {
167
+		return err
168
+	}
169
+	if err := syscall.Mount(spec.Root.Path, rootfsDir, "bind", syscall.MS_REC|syscall.MS_BIND, ""); err != nil {
170
+		return err
171
+	}
172
+	spec.Root.Path = "rootfs"
173
+
174
+	f, err := os.Create(filepath.Join(container.dir, configFilename))
175
+	if err != nil {
176
+		return err
177
+	}
178
+	defer f.Close()
179
+	if err := json.NewEncoder(f).Encode(spec); err != nil {
180
+		return err
181
+	}
182
+
183
+	return container.start()
184
+}
185
+
186
+func (clnt *client) Signal(containerID string, sig int) error {
187
+	clnt.lock(containerID)
188
+	defer clnt.unlock(containerID)
189
+	_, err := clnt.remote.apiClient.Signal(context.Background(), &containerd.SignalRequest{
190
+		Id:     containerID,
191
+		Pid:    InitFriendlyName,
192
+		Signal: uint32(sig),
193
+	})
194
+	return err
195
+}
196
+
197
+func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error {
198
+	clnt.lock(containerID)
199
+	defer clnt.unlock(containerID)
200
+	if _, err := clnt.getContainer(containerID); err != nil {
201
+		return err
202
+	}
203
+	_, err := clnt.remote.apiClient.UpdateProcess(context.Background(), &containerd.UpdateProcessRequest{
204
+		Id:     containerID,
205
+		Pid:    processFriendlyName,
206
+		Width:  uint32(width),
207
+		Height: uint32(height),
208
+	})
209
+	return err
210
+}
211
+
212
+func (clnt *client) Pause(containerID string) error {
213
+	return clnt.setState(containerID, StatePause)
214
+}
215
+
216
+func (clnt *client) setState(containerID, state string) error {
217
+	clnt.lock(containerID)
218
+	container, err := clnt.getContainer(containerID)
219
+	if err != nil {
220
+		clnt.unlock(containerID)
221
+		return err
222
+	}
223
+	if container.systemPid == 0 {
224
+		clnt.unlock(containerID)
225
+		return fmt.Errorf("No active process for container %s", containerID)
226
+	}
227
+	st := "running"
228
+	if state == StatePause {
229
+		st = "paused"
230
+	}
231
+	chstate := make(chan struct{})
232
+	_, err = clnt.remote.apiClient.UpdateContainer(context.Background(), &containerd.UpdateContainerRequest{
233
+		Id:     containerID,
234
+		Pid:    InitFriendlyName,
235
+		Status: st,
236
+	})
237
+	if err != nil {
238
+		clnt.unlock(containerID)
239
+		return err
240
+	}
241
+	container.pauseMonitor.append(state, chstate)
242
+	clnt.unlock(containerID)
243
+	<-chstate
244
+	return nil
245
+}
246
+
247
+func (clnt *client) Resume(containerID string) error {
248
+	return clnt.setState(containerID, StateResume)
249
+}
250
+
251
+func (clnt *client) Stats(containerID string) (*Stats, error) {
252
+	resp, err := clnt.remote.apiClient.Stats(context.Background(), &containerd.StatsRequest{containerID})
253
+	if err != nil {
254
+		return nil, err
255
+	}
256
+	return (*Stats)(resp), nil
257
+}
258
+
259
+func (clnt *client) setExited(containerID string) error {
260
+	clnt.lock(containerID)
261
+	defer clnt.unlock(containerID)
262
+
263
+	var exitCode uint32
264
+	if event, ok := clnt.remote.pastEvents[containerID]; ok {
265
+		exitCode = event.Status
266
+		delete(clnt.remote.pastEvents, containerID)
267
+	}
268
+
269
+	err := clnt.backend.StateChanged(containerID, StateInfo{
270
+		State:    StateExit,
271
+		ExitCode: exitCode,
272
+	})
273
+
274
+	// Unmount and delete the bundle folder
275
+	if mts, err := mount.GetMounts(); err == nil {
276
+		for _, mts := range mts {
277
+			if strings.HasSuffix(mts.Mountpoint, containerID+"/rootfs") {
278
+				if err := syscall.Unmount(mts.Mountpoint, syscall.MNT_DETACH); err == nil {
279
+					os.RemoveAll(strings.TrimSuffix(mts.Mountpoint, "/rootfs"))
280
+				}
281
+				break
282
+			}
283
+		}
284
+	}
285
+
286
+	return err
287
+}
288
+
289
+func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) {
290
+	cont, err := clnt.getContainerdContainer(containerID)
291
+	if err != nil {
292
+		return nil, err
293
+	}
294
+	pids := make([]int, len(cont.Pids))
295
+	for i, p := range cont.Pids {
296
+		pids[i] = int(p)
297
+	}
298
+	return pids, nil
299
+}
300
+
301
+func (clnt *client) getContainerdContainer(containerID string) (*containerd.Container, error) {
302
+	resp, err := clnt.remote.apiClient.State(context.Background(), &containerd.StateRequest{Id: containerID})
303
+	if err != nil {
304
+		return nil, err
305
+	}
306
+	for _, cont := range resp.Containers {
307
+		if cont.Id == containerID {
308
+			return cont, nil
309
+		}
310
+	}
311
+	return nil, fmt.Errorf("invalid state response")
312
+}
313
+
314
+func (clnt *client) newContainer(dir string, options ...CreateOption) *container {
315
+	container := &container{
316
+		containerCommon: containerCommon{
317
+			process: process{
318
+				dir: dir,
319
+				processCommon: processCommon{
320
+					containerID:  filepath.Base(dir),
321
+					client:       clnt,
322
+					friendlyName: InitFriendlyName,
323
+				},
324
+			},
325
+			processes: make(map[string]*process),
326
+		},
327
+	}
328
+	for _, option := range options {
329
+		if err := option.Apply(container); err != nil {
330
+			logrus.Error(err)
331
+		}
332
+	}
333
+	return container
334
+}
335
+
336
+func (clnt *client) UpdateResources(containerID string, resources Resources) error {
337
+	clnt.lock(containerID)
338
+	defer clnt.unlock(containerID)
339
+	container, err := clnt.getContainer(containerID)
340
+	if err != nil {
341
+		return err
342
+	}
343
+	if container.systemPid == 0 {
344
+		return fmt.Errorf("No active process for container %s", containerID)
345
+	}
346
+	_, err = clnt.remote.apiClient.UpdateContainer(context.Background(), &containerd.UpdateContainerRequest{
347
+		Id:        containerID,
348
+		Pid:       InitFriendlyName,
349
+		Resources: (*containerd.UpdateResource)(&resources),
350
+	})
351
+	if err != nil {
352
+		return err
353
+	}
354
+	return nil
355
+}
356
+
357
+func (clnt *client) getExitNotifier(containerID string) *exitNotifier {
358
+	clnt.mapMutex.RLock()
359
+	defer clnt.mapMutex.RUnlock()
360
+	return clnt.exitNotifiers[containerID]
361
+}
362
+
363
+func (clnt *client) getOrCreateExitNotifier(containerID string) *exitNotifier {
364
+	clnt.mapMutex.Lock()
365
+	w, ok := clnt.exitNotifiers[containerID]
366
+	defer clnt.mapMutex.Unlock()
367
+	if !ok {
368
+		w = &exitNotifier{c: make(chan struct{}), client: clnt}
369
+		clnt.exitNotifiers[containerID] = w
370
+	}
371
+	return w
372
+}
373
+
374
+type exitNotifier struct {
375
+	id     string
376
+	client *client
377
+	c      chan struct{}
378
+	once   sync.Once
379
+}
380
+
381
+func (en *exitNotifier) close() {
382
+	en.once.Do(func() {
383
+		close(en.c)
384
+		en.client.mapMutex.Lock()
385
+		if en == en.client.exitNotifiers[en.id] {
386
+			delete(en.client.exitNotifiers, en.id)
387
+		}
388
+		en.client.mapMutex.Unlock()
389
+	})
390
+}
391
+func (en *exitNotifier) wait() <-chan struct{} {
392
+	return en.c
393
+}
0 394
new file mode 100644
... ...
@@ -0,0 +1,83 @@
0
+// +build experimental
1
+
2
+package libcontainerd
3
+
4
+import (
5
+	"fmt"
6
+
7
+	"github.com/Sirupsen/logrus"
8
+	containerd "github.com/docker/containerd/api/grpc/types"
9
+)
10
+
11
+func (clnt *client) restore(cont *containerd.Container, options ...CreateOption) (err error) {
12
+	clnt.lock(cont.Id)
13
+	defer clnt.unlock(cont.Id)
14
+
15
+	logrus.Debugf("restore container %s state %s", cont.Id, cont.Status)
16
+
17
+	containerID := cont.Id
18
+	if _, err := clnt.getContainer(containerID); err == nil {
19
+		return fmt.Errorf("container %s is aleady active", containerID)
20
+	}
21
+
22
+	defer func() {
23
+		if err != nil {
24
+			clnt.deleteContainer(cont.Id)
25
+		}
26
+	}()
27
+
28
+	container := clnt.newContainer(cont.BundlePath, options...)
29
+	container.systemPid = systemPid(cont)
30
+
31
+	var terminal bool
32
+	for _, p := range cont.Processes {
33
+		if p.Pid == InitFriendlyName {
34
+			terminal = p.Terminal
35
+		}
36
+	}
37
+
38
+	iopipe, err := container.openFifos(terminal)
39
+	if err != nil {
40
+		return err
41
+	}
42
+
43
+	if err := clnt.backend.AttachStreams(containerID, *iopipe); err != nil {
44
+		return err
45
+	}
46
+
47
+	clnt.appendContainer(container)
48
+
49
+	err = clnt.backend.StateChanged(containerID, StateInfo{
50
+		State: StateRestore,
51
+		Pid:   container.systemPid,
52
+	})
53
+
54
+	if err != nil {
55
+		return err
56
+	}
57
+
58
+	if event, ok := clnt.remote.pastEvents[containerID]; ok {
59
+		// This should only be a pause or resume event
60
+		if event.Type == StatePause || event.Type == StateResume {
61
+			return clnt.backend.StateChanged(containerID, StateInfo{
62
+				State: event.Type,
63
+				Pid:   container.systemPid,
64
+			})
65
+		}
66
+
67
+		logrus.Warnf("unexpected backlog event: %#v", event)
68
+	}
69
+
70
+	return nil
71
+}
72
+
73
+func (clnt *client) Restore(containerID string, options ...CreateOption) error {
74
+	cont, err := clnt.getContainerdContainer(containerID)
75
+	if err == nil && cont.Status != "stopped" {
76
+		if err := clnt.restore(cont, options...); err != nil {
77
+			logrus.Errorf("error restoring %s: %v", containerID, err)
78
+		}
79
+		return nil
80
+	}
81
+	return clnt.setExited(containerID)
82
+}
0 83
new file mode 100644
... ...
@@ -0,0 +1,39 @@
0
+// +build !experimental
1
+
2
+package libcontainerd
3
+
4
+import (
5
+	"syscall"
6
+	"time"
7
+
8
+	"github.com/Sirupsen/logrus"
9
+)
10
+
11
+func (clnt *client) Restore(containerID string, options ...CreateOption) error {
12
+	w := clnt.getOrCreateExitNotifier(containerID)
13
+	defer w.close()
14
+	cont, err := clnt.getContainerdContainer(containerID)
15
+	if err == nil && cont.Status != "stopped" {
16
+		clnt.lock(cont.Id)
17
+		container := clnt.newContainer(cont.BundlePath)
18
+		container.systemPid = systemPid(cont)
19
+		clnt.appendContainer(container)
20
+		clnt.unlock(cont.Id)
21
+
22
+		if err := clnt.Signal(containerID, int(syscall.SIGTERM)); err != nil {
23
+			logrus.Errorf("error sending sigterm to %v: %v", containerID, err)
24
+		}
25
+		select {
26
+		case <-time.After(10 * time.Second):
27
+			if err := clnt.Signal(containerID, int(syscall.SIGKILL)); err != nil {
28
+				logrus.Errorf("error sending sigkill to %v: %v", containerID, err)
29
+			}
30
+			select {
31
+			case <-time.After(2 * time.Second):
32
+			case <-w.wait():
33
+			}
34
+		case <-w.wait():
35
+		}
36
+	}
37
+	return clnt.setExited(containerID)
38
+}
0 39
new file mode 100644
... ...
@@ -0,0 +1,38 @@
0
+package libcontainerd
1
+
2
+import (
3
+	"fmt"
4
+
5
+	"github.com/docker/docker/restartmanager"
6
+)
7
+
8
+const (
9
+	// InitFriendlyName is the name given in the lookup map of processes
10
+	// for the first process started in a container.
11
+	InitFriendlyName = "init"
12
+	configFilename   = "config.json"
13
+)
14
+
15
+type containerCommon struct {
16
+	process
17
+	restartManager restartmanager.RestartManager
18
+	restarting     bool
19
+	processes      map[string]*process
20
+}
21
+
22
+// WithRestartManager sets the restartmanager to be used with the container.
23
+func WithRestartManager(rm restartmanager.RestartManager) CreateOption {
24
+	return restartManager{rm}
25
+}
26
+
27
+type restartManager struct {
28
+	rm restartmanager.RestartManager
29
+}
30
+
31
+func (rm restartManager) Apply(p interface{}) error {
32
+	if pr, ok := p.(*container); ok {
33
+		pr.restartManager = rm.rm
34
+		return nil
35
+	}
36
+	return fmt.Errorf("WithRestartManager option not supported for this client")
37
+}
0 38
new file mode 100644
... ...
@@ -0,0 +1,166 @@
0
+package libcontainerd
1
+
2
+import (
3
+	"encoding/json"
4
+	"io/ioutil"
5
+	"os"
6
+	"path/filepath"
7
+	"syscall"
8
+
9
+	"github.com/Sirupsen/logrus"
10
+	containerd "github.com/docker/containerd/api/grpc/types"
11
+	"github.com/opencontainers/specs/specs-go"
12
+	"golang.org/x/net/context"
13
+)
14
+
15
+type container struct {
16
+	containerCommon
17
+
18
+	// Platform specific fields are below here.
19
+	pauseMonitor
20
+	oom bool
21
+}
22
+
23
+func (ctr *container) clean() error {
24
+	if _, err := os.Lstat(ctr.dir); err != nil {
25
+		if os.IsNotExist(err) {
26
+			return nil
27
+		}
28
+		return err
29
+	}
30
+
31
+	syscall.Unmount(filepath.Join(ctr.dir, "rootfs"), syscall.MNT_DETACH) // ignore error
32
+	if err := os.RemoveAll(ctr.dir); err != nil {
33
+		return err
34
+	}
35
+	return nil
36
+}
37
+
38
+func (ctr *container) spec() (*specs.Spec, error) {
39
+	var spec specs.Spec
40
+	dt, err := ioutil.ReadFile(filepath.Join(ctr.dir, configFilename))
41
+	if err != nil {
42
+		return nil, err
43
+	}
44
+	if err := json.Unmarshal(dt, &spec); err != nil {
45
+		return nil, err
46
+	}
47
+	return &spec, nil
48
+}
49
+
50
+func (ctr *container) start() error {
51
+	spec, err := ctr.spec()
52
+	if err != nil {
53
+		return nil
54
+	}
55
+	iopipe, err := ctr.openFifos(spec.Process.Terminal)
56
+	if err != nil {
57
+		return err
58
+	}
59
+
60
+	r := &containerd.CreateContainerRequest{
61
+		Id:         ctr.containerID,
62
+		BundlePath: ctr.dir,
63
+		Stdin:      ctr.fifo(syscall.Stdin),
64
+		Stdout:     ctr.fifo(syscall.Stdout),
65
+		Stderr:     ctr.fifo(syscall.Stderr),
66
+	}
67
+	ctr.client.appendContainer(ctr)
68
+
69
+	resp, err := ctr.client.remote.apiClient.CreateContainer(context.Background(), r)
70
+	if err != nil {
71
+		ctr.closeFifos(iopipe)
72
+		return err
73
+	}
74
+
75
+	if err := ctr.client.backend.AttachStreams(ctr.containerID, *iopipe); err != nil {
76
+		return err
77
+	}
78
+	ctr.systemPid = systemPid(resp.Container)
79
+
80
+	return ctr.client.backend.StateChanged(ctr.containerID, StateInfo{
81
+		State: StateStart,
82
+		Pid:   ctr.systemPid,
83
+	})
84
+}
85
+
86
+func (ctr *container) newProcess(friendlyName string) *process {
87
+	return &process{
88
+		dir: ctr.dir,
89
+		processCommon: processCommon{
90
+			containerID:  ctr.containerID,
91
+			friendlyName: friendlyName,
92
+			client:       ctr.client,
93
+		},
94
+	}
95
+}
96
+
97
+func (ctr *container) handleEvent(e *containerd.Event) error {
98
+	ctr.client.lock(ctr.containerID)
99
+	defer ctr.client.unlock(ctr.containerID)
100
+	switch e.Type {
101
+	case StateExit, StatePause, StateResume, StateOOM:
102
+		st := StateInfo{
103
+			State:     e.Type,
104
+			ExitCode:  e.Status,
105
+			OOMKilled: e.Type == StateExit && ctr.oom,
106
+		}
107
+		if e.Type == StateOOM {
108
+			ctr.oom = true
109
+		}
110
+		if e.Type == StateExit && e.Pid != InitFriendlyName {
111
+			st.ProcessID = e.Pid
112
+			st.State = StateExitProcess
113
+		}
114
+		if st.State == StateExit && ctr.restartManager != nil {
115
+			restart, wait, err := ctr.restartManager.ShouldRestart(e.Status)
116
+			if err != nil {
117
+				logrus.Error(err)
118
+			} else if restart {
119
+				st.State = StateRestart
120
+				ctr.restarting = true
121
+				go func() {
122
+					err := <-wait
123
+					ctr.restarting = false
124
+					if err != nil {
125
+						st.State = StateExit
126
+						ctr.client.q.append(e.Id, func() {
127
+							if err := ctr.client.backend.StateChanged(e.Id, st); err != nil {
128
+								logrus.Error(err)
129
+							}
130
+						})
131
+						logrus.Error(err)
132
+					} else {
133
+						ctr.start()
134
+					}
135
+				}()
136
+			}
137
+		}
138
+
139
+		// Remove process from list if we have exited
140
+		// We need to do so here in case the Message Handler decides to restart it.
141
+		if st.State == StateExit {
142
+			if os.Getenv("LIBCONTAINERD_NOCLEAN") != "1" {
143
+				ctr.clean()
144
+			}
145
+			ctr.client.deleteContainer(e.Id)
146
+		}
147
+		ctr.client.q.append(e.Id, func() {
148
+			if err := ctr.client.backend.StateChanged(e.Id, st); err != nil {
149
+				logrus.Error(err)
150
+			}
151
+			if e.Type == StatePause || e.Type == StateResume {
152
+				ctr.pauseMonitor.handle(e.Type)
153
+			}
154
+			if e.Type == StateExit {
155
+				if en := ctr.client.getExitNotifier(e.Id); en != nil {
156
+					en.close()
157
+				}
158
+			}
159
+		})
160
+
161
+	default:
162
+		logrus.Debugf("event unhandled: %+v", e)
163
+	}
164
+	return nil
165
+}
0 166
new file mode 100644
... ...
@@ -0,0 +1,31 @@
0
+package libcontainerd
1
+
2
+// pauseMonitor is helper to get notifications from pause state changes.
3
+type pauseMonitor struct {
4
+	waiters map[string][]chan struct{}
5
+}
6
+
7
+func (m *pauseMonitor) handle(t string) {
8
+	if m.waiters == nil {
9
+		return
10
+	}
11
+	q, ok := m.waiters[t]
12
+	if !ok {
13
+		return
14
+	}
15
+	if len(q) > 0 {
16
+		close(q[0])
17
+		m.waiters[t] = q[1:]
18
+	}
19
+}
20
+
21
+func (m *pauseMonitor) append(t string, waiter chan struct{}) {
22
+	if m.waiters == nil {
23
+		m.waiters = make(map[string][]chan struct{})
24
+	}
25
+	_, ok := m.waiters[t]
26
+	if !ok {
27
+		m.waiters[t] = make([]chan struct{}, 0)
28
+	}
29
+	m.waiters[t] = append(m.waiters[t], waiter)
30
+}
0 31
new file mode 100644
... ...
@@ -0,0 +1,18 @@
0
+package libcontainerd
1
+
2
+// processCommon are the platform common fields as part of the process structure
3
+// which keeps the state for the main container process, as well as any exec
4
+// processes.
5
+type processCommon struct {
6
+	client *client
7
+
8
+	// containerID is the Container ID
9
+	containerID string
10
+
11
+	// friendlyName is an identifier for the process (or `InitFriendlyName`
12
+	// for the first process)
13
+	friendlyName string
14
+
15
+	// systemPid is the PID of the main container process
16
+	systemPid uint32
17
+}
0 18
new file mode 100644
... ...
@@ -0,0 +1,107 @@
0
+package libcontainerd
1
+
2
+import (
3
+	"fmt"
4
+	"io"
5
+	"os"
6
+	"path/filepath"
7
+	"syscall"
8
+
9
+	containerd "github.com/docker/containerd/api/grpc/types"
10
+	"github.com/docker/docker/pkg/ioutils"
11
+	"golang.org/x/net/context"
12
+)
13
+
14
+var fdNames = map[int]string{
15
+	syscall.Stdin:  "stdin",
16
+	syscall.Stdout: "stdout",
17
+	syscall.Stderr: "stderr",
18
+}
19
+
20
+// process keeps the state for both main container process and exec process.
21
+type process struct {
22
+	processCommon
23
+
24
+	// Platform specific fields are below here.
25
+	dir string
26
+}
27
+
28
+func (p *process) openFifos(terminal bool) (*IOPipe, error) {
29
+	bundleDir := p.dir
30
+	if err := os.MkdirAll(bundleDir, 0700); err != nil {
31
+		return nil, err
32
+	}
33
+
34
+	for i := 0; i < 3; i++ {
35
+		f := p.fifo(i)
36
+		if err := syscall.Mkfifo(f, 0700); err != nil && !os.IsExist(err) {
37
+			return nil, fmt.Errorf("mkfifo: %s %v", f, err)
38
+		}
39
+	}
40
+
41
+	io := &IOPipe{}
42
+	stdinf, err := os.OpenFile(p.fifo(syscall.Stdin), syscall.O_RDWR, 0)
43
+	if err != nil {
44
+		return nil, err
45
+	}
46
+
47
+	io.Stdout = openReaderFromFifo(p.fifo(syscall.Stdout))
48
+	if !terminal {
49
+		io.Stderr = openReaderFromFifo(p.fifo(syscall.Stderr))
50
+	} else {
51
+		io.Stderr = emptyReader{}
52
+	}
53
+
54
+	io.Stdin = ioutils.NewWriteCloserWrapper(stdinf, func() error {
55
+		stdinf.Close()
56
+		_, err := p.client.remote.apiClient.UpdateProcess(context.Background(), &containerd.UpdateProcessRequest{
57
+			Id:         p.containerID,
58
+			Pid:        p.friendlyName,
59
+			CloseStdin: true,
60
+		})
61
+		return err
62
+	})
63
+
64
+	return io, nil
65
+}
66
+
67
+func (p *process) closeFifos(io *IOPipe) {
68
+	io.Stdin.Close()
69
+	closeReaderFifo(p.fifo(syscall.Stdout))
70
+	closeReaderFifo(p.fifo(syscall.Stderr))
71
+}
72
+
73
+type emptyReader struct{}
74
+
75
+func (r emptyReader) Read(b []byte) (int, error) {
76
+	return 0, io.EOF
77
+}
78
+
79
+func openReaderFromFifo(fn string) io.Reader {
80
+	r, w := io.Pipe()
81
+	go func() {
82
+		stdoutf, err := os.OpenFile(fn, syscall.O_RDONLY, 0)
83
+		if err != nil {
84
+			r.CloseWithError(err)
85
+		}
86
+		if _, err := io.Copy(w, stdoutf); err != nil {
87
+			r.CloseWithError(err)
88
+		}
89
+		w.Close()
90
+		stdoutf.Close()
91
+	}()
92
+	return r
93
+}
94
+
95
+// closeReaderFifo closes fifo that may be blocked on open by opening the write side.
96
+func closeReaderFifo(fn string) {
97
+	f, err := os.OpenFile(fn, syscall.O_WRONLY|syscall.O_NONBLOCK, 0)
98
+	if err != nil {
99
+		return
100
+	}
101
+	f.Close()
102
+}
103
+
104
+func (p *process) fifo(index int) string {
105
+	return filepath.Join(p.dir, p.friendlyName+"-"+fdNames[index])
106
+}
0 107
new file mode 100644
... ...
@@ -0,0 +1,29 @@
0
+package libcontainerd
1
+
2
+import "sync"
3
+
4
+type queue struct {
5
+	sync.Mutex
6
+	fns map[string]chan struct{}
7
+}
8
+
9
+func (q *queue) append(id string, f func()) {
10
+	q.Lock()
11
+	defer q.Unlock()
12
+
13
+	if q.fns == nil {
14
+		q.fns = make(map[string]chan struct{})
15
+	}
16
+
17
+	done := make(chan struct{})
18
+
19
+	fn, ok := q.fns[id]
20
+	q.fns[id] = done
21
+	go func() {
22
+		if ok {
23
+			<-fn
24
+		}
25
+		f()
26
+		close(done)
27
+	}()
28
+}
0 29
new file mode 100644
... ...
@@ -0,0 +1,18 @@
0
+package libcontainerd
1
+
2
+// Remote on Linux defines the accesspoint to the containerd grpc API.
3
+// Remote on Windows is largely an unimplemented interface as there is
4
+// no remote containerd.
5
+type Remote interface {
6
+	// Client returns a new Client instance connected with given Backend.
7
+	Client(Backend) (Client, error)
8
+	// Cleanup stops containerd if it was started by libcontainerd.
9
+	// Note this is not used on Windows as there is no remote containerd.
10
+	Cleanup()
11
+}
12
+
13
+// RemoteOption allows to configure paramters of remotes.
14
+// This is unused on Windows.
15
+type RemoteOption interface {
16
+	Apply(Remote) error
17
+}
0 18
new file mode 100644
... ...
@@ -0,0 +1,401 @@
0
+package libcontainerd
1
+
2
+import (
3
+	"fmt"
4
+	"io"
5
+	"net"
6
+	"os"
7
+	"os/exec"
8
+	"path/filepath"
9
+	"strconv"
10
+	"sync"
11
+	"syscall"
12
+	"time"
13
+
14
+	"github.com/Sirupsen/logrus"
15
+	containerd "github.com/docker/containerd/api/grpc/types"
16
+	sysinfo "github.com/docker/docker/pkg/system"
17
+	"github.com/docker/docker/utils"
18
+	"golang.org/x/net/context"
19
+	"google.golang.org/grpc"
20
+)
21
+
22
+const (
23
+	maxConnectionRetryCount   = 3
24
+	connectionRetryDelay      = 3 * time.Second
25
+	containerdShutdownTimeout = 15 * time.Second
26
+	containerdBinary          = "containerd"
27
+	containerdPidFilename     = "containerd.pid"
28
+	containerdSockFilename    = "containerd.sock"
29
+	eventTimestampFilename    = "event.ts"
30
+)
31
+
32
+type remote struct {
33
+	sync.RWMutex
34
+	apiClient   containerd.APIClient
35
+	daemonPid   int
36
+	stateDir    string
37
+	rpcAddr     string
38
+	startDaemon bool
39
+	debugLog    bool
40
+	rpcConn     *grpc.ClientConn
41
+	clients     []*client
42
+	eventTsPath string
43
+	pastEvents  map[string]*containerd.Event
44
+}
45
+
46
+// New creates a fresh instance of libcontainerd remote.
47
+func New(stateDir string, options ...RemoteOption) (_ Remote, err error) {
48
+	defer func() {
49
+		if err != nil {
50
+			err = fmt.Errorf("Failed to connect to containerd. Please make sure containerd is installed in your PATH or you have specificed the correct address. Got error: %v", err)
51
+		}
52
+	}()
53
+	r := &remote{
54
+		stateDir:    stateDir,
55
+		daemonPid:   -1,
56
+		eventTsPath: filepath.Join(stateDir, eventTimestampFilename),
57
+		pastEvents:  make(map[string]*containerd.Event),
58
+	}
59
+	for _, option := range options {
60
+		if err := option.Apply(r); err != nil {
61
+			return nil, err
62
+		}
63
+	}
64
+
65
+	if err := sysinfo.MkdirAll(stateDir, 0700); err != nil {
66
+		return nil, err
67
+	}
68
+
69
+	if r.rpcAddr == "" {
70
+		r.rpcAddr = filepath.Join(stateDir, containerdSockFilename)
71
+	}
72
+
73
+	if r.startDaemon {
74
+		if err := r.runContainerdDaemon(); err != nil {
75
+			return nil, err
76
+		}
77
+	}
78
+
79
+	dialOpts := append([]grpc.DialOption{grpc.WithInsecure()},
80
+		grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
81
+			return net.DialTimeout("unix", addr, timeout)
82
+		}),
83
+	)
84
+	conn, err := grpc.Dial(r.rpcAddr, dialOpts...)
85
+	if err != nil {
86
+		return nil, fmt.Errorf("error connecting to containerd: %v", err)
87
+	}
88
+
89
+	r.rpcConn = conn
90
+	r.apiClient = containerd.NewAPIClient(conn)
91
+
92
+	go r.handleConnectionChange()
93
+
94
+	if err := r.startEventsMonitor(); err != nil {
95
+		return nil, err
96
+	}
97
+
98
+	return r, nil
99
+}
100
+
101
+func (r *remote) handleConnectionChange() {
102
+	var transientFailureCount = 0
103
+	state := grpc.Idle
104
+	for {
105
+		s, err := r.rpcConn.WaitForStateChange(context.Background(), state)
106
+		if err != nil {
107
+			break
108
+		}
109
+		state = s
110
+		logrus.Debugf("containerd connection state change: %v", s)
111
+
112
+		if r.daemonPid != -1 {
113
+			switch state {
114
+			case grpc.TransientFailure:
115
+				// Reset state to be notified of next failure
116
+				transientFailureCount++
117
+				if transientFailureCount >= maxConnectionRetryCount {
118
+					transientFailureCount = 0
119
+					if utils.IsProcessAlive(r.daemonPid) {
120
+						utils.KillProcess(r.daemonPid)
121
+					}
122
+					if err := r.runContainerdDaemon(); err != nil { //FIXME: Handle error
123
+						logrus.Errorf("error restarting containerd: %v", err)
124
+					}
125
+				} else {
126
+					state = grpc.Idle
127
+					time.Sleep(connectionRetryDelay)
128
+				}
129
+			case grpc.Shutdown:
130
+				// Well, we asked for it to stop, just return
131
+				return
132
+			}
133
+		}
134
+	}
135
+}
136
+
137
+func (r *remote) Cleanup() {
138
+	if r.daemonPid == -1 {
139
+		return
140
+	}
141
+	r.rpcConn.Close()
142
+	// Ask the daemon to quit
143
+	syscall.Kill(r.daemonPid, syscall.SIGTERM)
144
+
145
+	// Wait up to 15secs for it to stop
146
+	for i := time.Duration(0); i < containerdShutdownTimeout; i += time.Second {
147
+		if !utils.IsProcessAlive(r.daemonPid) {
148
+			break
149
+		}
150
+		time.Sleep(time.Second)
151
+	}
152
+
153
+	if utils.IsProcessAlive(r.daemonPid) {
154
+		logrus.Warnf("libcontainerd: containerd (%d) didn't stop within 15 secs, killing it\n", r.daemonPid)
155
+		syscall.Kill(r.daemonPid, syscall.SIGKILL)
156
+	}
157
+
158
+	// cleanup some files
159
+	os.Remove(filepath.Join(r.stateDir, containerdPidFilename))
160
+	os.Remove(filepath.Join(r.stateDir, containerdSockFilename))
161
+}
162
+
163
+func (r *remote) Client(b Backend) (Client, error) {
164
+	c := &client{
165
+		clientCommon: clientCommon{
166
+			backend:          b,
167
+			containerMutexes: make(map[string]*sync.Mutex),
168
+			containers:       make(map[string]*container),
169
+		},
170
+		remote:        r,
171
+		exitNotifiers: make(map[string]*exitNotifier),
172
+	}
173
+
174
+	r.Lock()
175
+	r.clients = append(r.clients, c)
176
+	r.Unlock()
177
+	return c, nil
178
+}
179
+
180
+func (r *remote) updateEventTimestamp(t time.Time) {
181
+	f, err := os.OpenFile(r.eventTsPath, syscall.O_CREAT|syscall.O_WRONLY|syscall.O_TRUNC, 0600)
182
+	defer f.Close()
183
+	if err != nil {
184
+		logrus.Warnf("libcontainerd: failed to open event timestamp file: %v", err)
185
+		return
186
+	}
187
+
188
+	b, err := t.MarshalText()
189
+	if err != nil {
190
+		logrus.Warnf("libcontainerd: failed to encode timestamp: %v", err)
191
+		return
192
+	}
193
+
194
+	n, err := f.Write(b)
195
+	if err != nil || n != len(b) {
196
+		logrus.Warnf("libcontainerd: failed to update event timestamp file: %v", err)
197
+		f.Truncate(0)
198
+		return
199
+	}
200
+
201
+}
202
+
203
+func (r *remote) getLastEventTimestamp() int64 {
204
+	t := time.Now()
205
+
206
+	fi, err := os.Stat(r.eventTsPath)
207
+	if os.IsNotExist(err) {
208
+		return t.Unix()
209
+	}
210
+
211
+	f, err := os.Open(r.eventTsPath)
212
+	defer f.Close()
213
+	if err != nil {
214
+		logrus.Warn("libcontainerd: Unable to access last event ts: %v", err)
215
+		return t.Unix()
216
+	}
217
+
218
+	b := make([]byte, fi.Size())
219
+	n, err := f.Read(b)
220
+	if err != nil || n != len(b) {
221
+		logrus.Warn("libcontainerd: Unable to read last event ts: %v", err)
222
+		return t.Unix()
223
+	}
224
+
225
+	t.UnmarshalText(b)
226
+
227
+	return t.Unix()
228
+}
229
+
230
+func (r *remote) startEventsMonitor() error {
231
+	// First, get past events
232
+	er := &containerd.EventsRequest{
233
+		Timestamp: uint64(r.getLastEventTimestamp()),
234
+	}
235
+	events, err := r.apiClient.Events(context.Background(), er)
236
+	if err != nil {
237
+		return err
238
+	}
239
+	go r.handleEventStream(events)
240
+	return nil
241
+}
242
+
243
+func (r *remote) handleEventStream(events containerd.API_EventsClient) {
244
+	live := false
245
+	for {
246
+		e, err := events.Recv()
247
+		if err != nil {
248
+			logrus.Errorf("failed to receive event from containerd: %v", err)
249
+			go r.startEventsMonitor()
250
+			return
251
+		}
252
+
253
+		if live == false {
254
+			logrus.Debugf("received past containerd event: %#v", e)
255
+
256
+			// Pause/Resume events should never happens after exit one
257
+			switch e.Type {
258
+			case StateExit:
259
+				r.pastEvents[e.Id] = e
260
+			case StatePause:
261
+				r.pastEvents[e.Id] = e
262
+			case StateResume:
263
+				r.pastEvents[e.Id] = e
264
+			case stateLive:
265
+				live = true
266
+				r.updateEventTimestamp(time.Unix(int64(e.Timestamp), 0))
267
+			}
268
+		} else {
269
+			logrus.Debugf("received containerd event: %#v", e)
270
+
271
+			var container *container
272
+			var c *client
273
+			r.RLock()
274
+			for _, c = range r.clients {
275
+				container, err = c.getContainer(e.Id)
276
+				if err == nil {
277
+					break
278
+				}
279
+			}
280
+			r.RUnlock()
281
+			if container == nil {
282
+				logrus.Errorf("no state for container: %q", err)
283
+				continue
284
+			}
285
+
286
+			if err := container.handleEvent(e); err != nil {
287
+				logrus.Errorf("error processing state change for %s: %v", e.Id, err)
288
+			}
289
+
290
+			r.updateEventTimestamp(time.Unix(int64(e.Timestamp), 0))
291
+		}
292
+	}
293
+}
294
+
295
+func (r *remote) runContainerdDaemon() error {
296
+	pidFilename := filepath.Join(r.stateDir, containerdPidFilename)
297
+	f, err := os.OpenFile(pidFilename, os.O_RDWR|os.O_CREATE, 0600)
298
+	defer f.Close()
299
+	if err != nil {
300
+		return err
301
+	}
302
+
303
+	// File exist, check if the daemon is alive
304
+	b := make([]byte, 8)
305
+	n, err := f.Read(b)
306
+	if err != nil && err != io.EOF {
307
+		return err
308
+	}
309
+
310
+	if n > 0 {
311
+		pid, err := strconv.ParseUint(string(b[:n]), 10, 64)
312
+		if err != nil {
313
+			return err
314
+		}
315
+		if utils.IsProcessAlive(int(pid)) {
316
+			logrus.Infof("previous instance of containerd still alive (%d)", pid)
317
+			r.daemonPid = int(pid)
318
+			return nil
319
+		}
320
+	}
321
+
322
+	// rewind the file
323
+	_, err = f.Seek(0, os.SEEK_SET)
324
+	if err != nil {
325
+		return err
326
+	}
327
+
328
+	// Truncate it
329
+	err = f.Truncate(0)
330
+	if err != nil {
331
+		return err
332
+	}
333
+
334
+	// Start a new instance
335
+	args := []string{"-l", r.rpcAddr}
336
+	if r.debugLog {
337
+		args = append(args, "--debug", "true")
338
+	}
339
+	cmd := exec.Command(containerdBinary, args...)
340
+	// TODO: store logs?
341
+	cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
342
+	if err := cmd.Start(); err != nil {
343
+		return err
344
+	}
345
+	logrus.Infof("New containerd process, pid: %d\n", cmd.Process.Pid)
346
+
347
+	if _, err := f.WriteString(fmt.Sprintf("%d", cmd.Process.Pid)); err != nil {
348
+		utils.KillProcess(cmd.Process.Pid)
349
+		return err
350
+	}
351
+
352
+	go cmd.Wait() // Reap our child when needed
353
+	r.daemonPid = cmd.Process.Pid
354
+	return nil
355
+}
356
+
357
+// WithRemoteAddr sets the external containerd socket to connect to.
358
+func WithRemoteAddr(addr string) RemoteOption {
359
+	return rpcAddr(addr)
360
+}
361
+
362
+type rpcAddr string
363
+
364
+func (a rpcAddr) Apply(r Remote) error {
365
+	if remote, ok := r.(*remote); ok {
366
+		remote.rpcAddr = string(a)
367
+		return nil
368
+	}
369
+	return fmt.Errorf("WithRemoteAddr option not supported for this remote")
370
+}
371
+
372
+// WithStartDaemon defines if libcontainerd should also run containerd daemon.
373
+func WithStartDaemon(start bool) RemoteOption {
374
+	return startDaemon(start)
375
+}
376
+
377
+type startDaemon bool
378
+
379
+func (s startDaemon) Apply(r Remote) error {
380
+	if remote, ok := r.(*remote); ok {
381
+		remote.startDaemon = bool(s)
382
+		return nil
383
+	}
384
+	return fmt.Errorf("WithStartDaemon option not supported for this remote")
385
+}
386
+
387
+// WithDebugLog defines if containerd debug logs will be enabled for daemon.
388
+func WithDebugLog(debug bool) RemoteOption {
389
+	return debugLog(debug)
390
+}
391
+
392
+type debugLog bool
393
+
394
+func (d debugLog) Apply(r Remote) error {
395
+	if remote, ok := r.(*remote); ok {
396
+		remote.debugLog = bool(d)
397
+		return nil
398
+	}
399
+	return fmt.Errorf("WithDebugLog option not supported for this remote")
400
+}
0 401
new file mode 100644
... ...
@@ -0,0 +1,59 @@
0
+package libcontainerd
1
+
2
+import "io"
3
+
4
+// State constants used in state change reporting.
5
+const (
6
+	StateStart        = "start-container"
7
+	StatePause        = "pause"
8
+	StateResume       = "resume"
9
+	StateExit         = "exit"
10
+	StateRestart      = "restart"
11
+	StateRestore      = "restore"
12
+	StateStartProcess = "start-process"
13
+	StateExitProcess  = "exit-process"
14
+	StateOOM          = "oom" // fake state
15
+	stateLive         = "live"
16
+)
17
+
18
+// StateInfo contains description about the new state container has entered.
19
+type StateInfo struct { // FIXME: event?
20
+	State     string
21
+	Pid       uint32
22
+	ExitCode  uint32
23
+	ProcessID string
24
+	OOMKilled bool // TODO Windows containerd factor out
25
+}
26
+
27
+// Backend defines callbacks that the client of the library needs to implement.
28
+type Backend interface {
29
+	StateChanged(containerID string, state StateInfo) error
30
+	AttachStreams(processFriendlyName string, io IOPipe) error
31
+}
32
+
33
+// Client provides access to containerd features.
34
+type Client interface {
35
+	Create(containerID string, spec Spec, options ...CreateOption) error
36
+	Signal(containerID string, sig int) error
37
+	AddProcess(containerID, processFriendlyName string, process Process) error
38
+	Resize(containerID, processFriendlyName string, width, height int) error
39
+	Pause(containerID string) error
40
+	Resume(containerID string) error
41
+	Restore(containerID string, options ...CreateOption) error
42
+	Stats(containerID string) (*Stats, error)
43
+	GetPidsForContainer(containerID string) ([]int, error)
44
+	UpdateResources(containerID string, resources Resources) error
45
+}
46
+
47
+// CreateOption allows to configure parameters of container creation.
48
+type CreateOption interface {
49
+	Apply(interface{}) error
50
+}
51
+
52
+// IOPipe contains the stdio streams.
53
+type IOPipe struct {
54
+	Stdin    io.WriteCloser
55
+	Stdout   io.Reader
56
+	Stderr   io.Reader
57
+	Terminal bool // Whether stderr is connected on Windows
58
+}
0 59
new file mode 100644
... ...
@@ -0,0 +1,44 @@
0
+package libcontainerd
1
+
2
+import (
3
+	containerd "github.com/docker/containerd/api/grpc/types"
4
+	"github.com/opencontainers/specs/specs-go"
5
+)
6
+
7
+// Spec is the base configuration for the container.  It specifies platform
8
+// independent configuration. This information must be included when the
9
+// bundle is packaged for distribution.
10
+type Spec specs.Spec
11
+
12
+// Process contains information to start a specific application inside the container.
13
+type Process struct {
14
+	// Terminal creates an interactive terminal for the container.
15
+	Terminal bool `json:"terminal"`
16
+	// User specifies user information for the process.
17
+	User *User `json:"user"`
18
+	// Args specifies the binary and arguments for the application to execute.
19
+	Args []string `json:"args"`
20
+	// Env populates the process environment for the process.
21
+	Env []string `json:"env,omitempty"`
22
+	// Cwd is the current working directory for the process and must be
23
+	// relative to the container's root.
24
+	Cwd *string `json:"cwd"`
25
+	// Capabilities are linux capabilities that are kept for the container.
26
+	Capabilities []string `json:"capabilities,omitempty"`
27
+	// Rlimits specifies rlimit options to apply to the process.
28
+	Rlimits []specs.Rlimit `json:"rlimits,omitempty"`
29
+	// ApparmorProfile specified the apparmor profile for the container.
30
+	ApparmorProfile *string `json:"apparmorProfile,omitempty"`
31
+	// SelinuxProcessLabel specifies the selinux context that the container process is run as.
32
+	SelinuxLabel *string `json:"selinuxLabel,omitempty"`
33
+}
34
+
35
+// Stats contains a stats properties from containerd.
36
+type Stats containerd.StatsResponse
37
+
38
+// User specifies linux specific user and group information for the container's
39
+// main process.
40
+type User specs.User
41
+
42
+// Resources defines updatable container resource values.
43
+type Resources containerd.UpdateResource
0 44
new file mode 100644
... ...
@@ -0,0 +1,41 @@
0
+package libcontainerd
1
+
2
+import (
3
+	containerd "github.com/docker/containerd/api/grpc/types"
4
+	"github.com/opencontainers/specs/specs-go"
5
+)
6
+
7
+func getRootIDs(s specs.Spec) (int, int, error) {
8
+	var hasUserns bool
9
+	for _, ns := range s.Linux.Namespaces {
10
+		if ns.Type == specs.UserNamespace {
11
+			hasUserns = true
12
+			break
13
+		}
14
+	}
15
+	if !hasUserns {
16
+		return 0, 0, nil
17
+	}
18
+	uid := hostIDFromMap(0, s.Linux.UIDMappings)
19
+	gid := hostIDFromMap(0, s.Linux.GIDMappings)
20
+	return uid, gid, nil
21
+}
22
+
23
+func hostIDFromMap(id uint32, mp []specs.IDMapping) int {
24
+	for _, m := range mp {
25
+		if id >= m.ContainerID && id <= m.ContainerID+m.Size-1 {
26
+			return int(m.HostID + id - m.ContainerID)
27
+		}
28
+	}
29
+	return 0
30
+}
31
+
32
+func systemPid(ctr *containerd.Container) uint32 {
33
+	var pid uint32
34
+	for _, p := range ctr.Processes {
35
+		if p.Pid == InitFriendlyName {
36
+			pid = p.SystemPid
37
+		}
38
+	}
39
+	return pid
40
+}
0 41
new file mode 100644
... ...
@@ -0,0 +1,214 @@
0
+package oci
1
+
2
+import (
3
+	"os"
4
+	"runtime"
5
+
6
+	"github.com/opencontainers/specs/specs-go"
7
+)
8
+
9
+func sPtr(s string) *string      { return &s }
10
+func rPtr(r rune) *rune          { return &r }
11
+func iPtr(i int64) *int64        { return &i }
12
+func u32Ptr(i int64) *uint32     { u := uint32(i); return &u }
13
+func fmPtr(i int64) *os.FileMode { fm := os.FileMode(i); return &fm }
14
+
15
+// DefaultSpec returns default oci spec used by docker.
16
+func DefaultSpec() specs.Spec {
17
+	s := specs.Spec{
18
+		Version: specs.Version,
19
+		Platform: specs.Platform{
20
+			OS:   runtime.GOOS,
21
+			Arch: runtime.GOARCH,
22
+		},
23
+	}
24
+	s.Mounts = []specs.Mount{
25
+		{
26
+			Destination: "/proc",
27
+			Type:        "proc",
28
+			Source:      "proc",
29
+			Options:     []string{"nosuid", "noexec", "nodev"},
30
+		},
31
+		{
32
+			Destination: "/dev",
33
+			Type:        "tmpfs",
34
+			Source:      "tmpfs",
35
+			Options:     []string{"nosuid", "strictatime", "mode=755"},
36
+		},
37
+		{
38
+			Destination: "/dev/pts",
39
+			Type:        "devpts",
40
+			Source:      "devpts",
41
+			Options:     []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"},
42
+		},
43
+		{
44
+			Destination: "/sys",
45
+			Type:        "sysfs",
46
+			Source:      "sysfs",
47
+			Options:     []string{"nosuid", "noexec", "nodev", "ro"},
48
+		},
49
+		{
50
+			Destination: "/sys/fs/cgroup",
51
+			Type:        "cgroup",
52
+			Source:      "cgroup",
53
+			Options:     []string{"ro", "nosuid", "noexec", "nodev"},
54
+		},
55
+		{
56
+			Destination: "/dev/mqueue",
57
+			Type:        "mqueue",
58
+			Source:      "mqueue",
59
+			Options:     []string{"nosuid", "noexec", "nodev"},
60
+		},
61
+	}
62
+
63
+	s.Process.Capabilities = []string{
64
+		"CAP_CHOWN",
65
+		"CAP_DAC_OVERRIDE",
66
+		"CAP_FSETID",
67
+		"CAP_FOWNER",
68
+		"CAP_MKNOD",
69
+		"CAP_NET_RAW",
70
+		"CAP_SETGID",
71
+		"CAP_SETUID",
72
+		"CAP_SETFCAP",
73
+		"CAP_SETPCAP",
74
+		"CAP_NET_BIND_SERVICE",
75
+		"CAP_SYS_CHROOT",
76
+		"CAP_KILL",
77
+		"CAP_AUDIT_WRITE",
78
+	}
79
+
80
+	s.Linux = specs.Linux{
81
+		Namespaces: []specs.Namespace{
82
+			{Type: "mount"},
83
+			{Type: "network"},
84
+			{Type: "uts"},
85
+			{Type: "pid"},
86
+			{Type: "ipc"},
87
+		},
88
+		Devices: []specs.Device{
89
+			{
90
+				Type:     "c",
91
+				Path:     "/dev/zero",
92
+				Major:    1,
93
+				Minor:    5,
94
+				FileMode: fmPtr(0666),
95
+				UID:      u32Ptr(0),
96
+				GID:      u32Ptr(0),
97
+			},
98
+			{
99
+				Type:     "c",
100
+				Path:     "/dev/null",
101
+				Major:    1,
102
+				Minor:    3,
103
+				FileMode: fmPtr(0666),
104
+				UID:      u32Ptr(0),
105
+				GID:      u32Ptr(0),
106
+			},
107
+			{
108
+				Type:     "c",
109
+				Path:     "/dev/urandom",
110
+				Major:    1,
111
+				Minor:    9,
112
+				FileMode: fmPtr(0666),
113
+				UID:      u32Ptr(0),
114
+				GID:      u32Ptr(0),
115
+			},
116
+			{
117
+				Type:     "c",
118
+				Path:     "/dev/random",
119
+				Major:    1,
120
+				Minor:    8,
121
+				FileMode: fmPtr(0666),
122
+				UID:      u32Ptr(0),
123
+				GID:      u32Ptr(0),
124
+			},
125
+			// {
126
+			// 	Type:     "c",
127
+			// 	Path:     "/dev/tty",
128
+			// 	Major:    5,
129
+			// 	Minor:    0,
130
+			// 	FileMode: fmPtr(0666),
131
+			// 	UID:      u32Ptr(0),
132
+			// 	GID:      u32Ptr(0),
133
+			// },
134
+			// {
135
+			// 	Type:     "c",
136
+			// 	Path:     "/dev/console",
137
+			// 	Major:    5,
138
+			// 	Minor:    1,
139
+			// 	FileMode: fmPtr(0666),
140
+			// 	UID:      u32Ptr(0),
141
+			// 	GID:      u32Ptr(0),
142
+			// },
143
+			{
144
+				Type:     "c",
145
+				Path:     "/dev/fuse",
146
+				Major:    10,
147
+				Minor:    229,
148
+				FileMode: fmPtr(0666),
149
+				UID:      u32Ptr(0),
150
+				GID:      u32Ptr(0),
151
+			},
152
+		},
153
+		Resources: &specs.Resources{
154
+			Devices: []specs.DeviceCgroup{
155
+				{
156
+					Allow:  false,
157
+					Access: sPtr("rwm"),
158
+				},
159
+				{
160
+					Allow:  true,
161
+					Type:   sPtr("c"),
162
+					Major:  iPtr(1),
163
+					Minor:  iPtr(5),
164
+					Access: sPtr("rwm"),
165
+				},
166
+				{
167
+					Allow:  true,
168
+					Type:   sPtr("c"),
169
+					Major:  iPtr(1),
170
+					Minor:  iPtr(3),
171
+					Access: sPtr("rwm"),
172
+				},
173
+				{
174
+					Allow:  true,
175
+					Type:   sPtr("c"),
176
+					Major:  iPtr(1),
177
+					Minor:  iPtr(9),
178
+					Access: sPtr("rwm"),
179
+				},
180
+				{
181
+					Allow:  true,
182
+					Type:   sPtr("c"),
183
+					Major:  iPtr(1),
184
+					Minor:  iPtr(8),
185
+					Access: sPtr("rwm"),
186
+				},
187
+				{
188
+					Allow:  true,
189
+					Type:   sPtr("c"),
190
+					Major:  iPtr(5),
191
+					Minor:  iPtr(0),
192
+					Access: sPtr("rwm"),
193
+				},
194
+				{
195
+					Allow:  true,
196
+					Type:   sPtr("c"),
197
+					Major:  iPtr(5),
198
+					Minor:  iPtr(1),
199
+					Access: sPtr("rwm"),
200
+				},
201
+				{
202
+					Allow:  false,
203
+					Type:   sPtr("c"),
204
+					Major:  iPtr(10),
205
+					Minor:  iPtr(229),
206
+					Access: sPtr("rwm"),
207
+				},
208
+			},
209
+		},
210
+	}
211
+
212
+	return s
213
+}
... ...
@@ -9,3 +9,9 @@ import "syscall"
9 9
 func Unmount(dest string) error {
10 10
 	return syscall.Unmount(dest, 0)
11 11
 }
12
+
13
+// CommandLineToArgv should not be used on Unix.
14
+// It simply returns commandLine in the only element in the returned array.
15
+func CommandLineToArgv(commandLine string) ([]string, error) {
16
+	return []string{commandLine}, nil
17
+}
... ...
@@ -3,6 +3,7 @@ package system
3 3
 import (
4 4
 	"fmt"
5 5
 	"syscall"
6
+	"unsafe"
6 7
 )
7 8
 
8 9
 // OSVersion is a wrapper for Windows version information
... ...
@@ -34,3 +35,26 @@ func GetOSVersion() (OSVersion, error) {
34 34
 func Unmount(dest string) error {
35 35
 	return nil
36 36
 }
37
+
38
+// CommandLineToArgv wraps the Windows syscall to turn a commandline into an argument array.
39
+func CommandLineToArgv(commandLine string) ([]string, error) {
40
+	var argc int32
41
+
42
+	argsPtr, err := syscall.UTF16PtrFromString(commandLine)
43
+	if err != nil {
44
+		return nil, err
45
+	}
46
+
47
+	argv, err := syscall.CommandLineToArgv(argsPtr, &argc)
48
+	if err != nil {
49
+		return nil, err
50
+	}
51
+	defer syscall.LocalFree(syscall.Handle(uintptr(unsafe.Pointer(argv))))
52
+
53
+	newArgs := make([]string, argc)
54
+	for i, v := range (*argv)[:argc] {
55
+		newArgs[i] = string(syscall.UTF16ToString((*v)[:]))
56
+	}
57
+
58
+	return newArgs, nil
59
+}
37 60
new file mode 100644
... ...
@@ -0,0 +1,118 @@
0
+package restartmanager
1
+
2
+import (
3
+	"fmt"
4
+	"sync"
5
+	"time"
6
+
7
+	"github.com/docker/engine-api/types/container"
8
+)
9
+
10
+const (
11
+	backoffMultiplier = 2
12
+	defaultTimeout    = 100 * time.Millisecond
13
+)
14
+
15
+// RestartManager defines object that controls container restarting rules.
16
+type RestartManager interface {
17
+	Cancel() error
18
+	ShouldRestart(exitCode uint32) (bool, chan error, error)
19
+}
20
+
21
+type restartManager struct {
22
+	sync.Mutex
23
+	sync.Once
24
+	policy       container.RestartPolicy
25
+	failureCount int
26
+	timeout      time.Duration
27
+	active       bool
28
+	cancel       chan struct{}
29
+	canceled     bool
30
+}
31
+
32
+// New returns a new restartmanager based on a policy.
33
+func New(policy container.RestartPolicy) RestartManager {
34
+	return &restartManager{policy: policy, cancel: make(chan struct{})}
35
+}
36
+
37
+func (rm *restartManager) SetPolicy(policy container.RestartPolicy) {
38
+	rm.Lock()
39
+	rm.policy = policy
40
+	rm.Unlock()
41
+}
42
+
43
+func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, error) {
44
+	rm.Lock()
45
+	unlockOnExit := true
46
+	defer func() {
47
+		if unlockOnExit {
48
+			rm.Unlock()
49
+		}
50
+	}()
51
+
52
+	if rm.canceled {
53
+		return false, nil, nil
54
+	}
55
+
56
+	if rm.active {
57
+		return false, nil, fmt.Errorf("invalid call on active restartmanager")
58
+	}
59
+
60
+	if exitCode != 0 {
61
+		rm.failureCount++
62
+	} else {
63
+		rm.failureCount = 0
64
+	}
65
+
66
+	if rm.timeout == 0 {
67
+		rm.timeout = defaultTimeout
68
+	} else {
69
+		rm.timeout *= backoffMultiplier
70
+	}
71
+
72
+	var restart bool
73
+	switch {
74
+	case rm.policy.IsAlways(), rm.policy.IsUnlessStopped():
75
+		restart = true
76
+	case rm.policy.IsOnFailure():
77
+		// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count
78
+		if max := rm.policy.MaximumRetryCount; max == 0 || rm.failureCount <= max {
79
+			restart = exitCode != 0
80
+		}
81
+	}
82
+
83
+	if !restart {
84
+		rm.active = false
85
+		return false, nil, nil
86
+	}
87
+
88
+	unlockOnExit = false
89
+	rm.active = true
90
+	rm.Unlock()
91
+
92
+	ch := make(chan error)
93
+	go func() {
94
+		select {
95
+		case <-rm.cancel:
96
+			ch <- fmt.Errorf("restartmanager canceled")
97
+			close(ch)
98
+		case <-time.After(rm.timeout):
99
+			rm.Lock()
100
+			close(ch)
101
+			rm.active = false
102
+			rm.Unlock()
103
+		}
104
+	}()
105
+
106
+	return true, ch, nil
107
+}
108
+
109
+func (rm *restartManager) Cancel() error {
110
+	rm.Do(func() {
111
+		rm.Lock()
112
+		rm.canceled = true
113
+		close(rm.cancel)
114
+		rm.Unlock()
115
+	})
116
+	return nil
117
+}
0 118
new file mode 100644
... ...
@@ -0,0 +1,3 @@
0
+package restartmanager
1
+
2
+// FIXME
... ...
@@ -5,6 +5,7 @@ import (
5 5
 	"io"
6 6
 	"io/ioutil"
7 7
 	"strings"
8
+	"sync"
8 9
 
9 10
 	"github.com/docker/docker/pkg/broadcaster"
10 11
 	"github.com/docker/docker/pkg/ioutils"
... ...
@@ -20,6 +21,7 @@ import (
20 20
 // copied and delivered to all StdoutPipe and StderrPipe consumers, using
21 21
 // a kind of "broadcaster".
22 22
 type StreamConfig struct {
23
+	sync.WaitGroup
23 24
 	stdout    *broadcaster.Unbuffered
24 25
 	stderr    *broadcaster.Unbuffered
25 26
 	stdin     io.ReadCloser
26 27
new file mode 100644
... ...
@@ -0,0 +1,22 @@
0
+// +build linux freebsd
1
+
2
+package utils
3
+
4
+import (
5
+	"syscall"
6
+)
7
+
8
+// IsProcessAlive returns true if process with a given pid is running.
9
+func IsProcessAlive(pid int) bool {
10
+	err := syscall.Kill(pid, syscall.Signal(0))
11
+	if err == nil || err == syscall.EPERM {
12
+		return true
13
+	}
14
+
15
+	return false
16
+}
17
+
18
+// KillProcess force-stops a process.
19
+func KillProcess(pid int) {
20
+	syscall.Kill(pid, syscall.SIGKILL)
21
+}
0 22
new file mode 100644
... ...
@@ -0,0 +1,20 @@
0
+package utils
1
+
2
+// IsProcessAlive returns true if process with a given pid is running.
3
+func IsProcessAlive(pid int) bool {
4
+	// TODO Windows containerd. Not sure this is needed
5
+	//	p, err := os.FindProcess(pid)
6
+	//	if err == nil {
7
+	//		return true
8
+	//	}
9
+	return false
10
+}
11
+
12
+// KillProcess force-stops a process.
13
+func KillProcess(pid int) {
14
+	// TODO Windows containerd. Not sure this is needed
15
+	//	p, err := os.FindProcess(pid)
16
+	//	if err == nil {
17
+	//		p.Kill()
18
+	//	}
19
+}