Browse code

Refactor libcontainerd to minimize c8d RPCs

The containerd client is very chatty at the best of times. Because the
libcontained API is stateless and references containers and processes by
string ID for every method call, the implementation is essentially
forced to use the containerd client in a way which amplifies the number
of redundant RPCs invoked to perform any operation. The libcontainerd
remote implementation has to reload the containerd container, task
and/or process metadata for nearly every operation. This in turn
amplifies the number of context switches between dockerd and containerd
to perform any container operation or handle a containerd event,
increasing the load on the system which could otherwise be allocated to
workloads.

Overhaul the libcontainerd interface to reduce the impedance mismatch
with the containerd client so that the containerd client can be used
more efficiently. Split the API out into container, task and process
interfaces which the consumer is expected to retain so that
libcontainerd can retain state---especially the analogous containerd
client objects---without having to manage any state-store inside the
libcontainerd client.

Signed-off-by: Cory Snider <csnider@mirantis.com>

Cory Snider authored on 2022/05/11 04:59:00
Showing 37 changed files
... ...
@@ -19,7 +19,6 @@ import (
19 19
 	mounttypes "github.com/docker/docker/api/types/mount"
20 20
 	swarmtypes "github.com/docker/docker/api/types/swarm"
21 21
 	"github.com/docker/docker/container/stream"
22
-	"github.com/docker/docker/daemon/exec"
23 22
 	"github.com/docker/docker/daemon/logger"
24 23
 	"github.com/docker/docker/daemon/logger/jsonfilelog"
25 24
 	"github.com/docker/docker/daemon/logger/local"
... ...
@@ -28,6 +27,7 @@ import (
28 28
 	"github.com/docker/docker/errdefs"
29 29
 	"github.com/docker/docker/image"
30 30
 	"github.com/docker/docker/layer"
31
+	libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
31 32
 	"github.com/docker/docker/pkg/containerfs"
32 33
 	"github.com/docker/docker/pkg/idtools"
33 34
 	"github.com/docker/docker/pkg/ioutils"
... ...
@@ -86,7 +86,7 @@ type Container struct {
86 86
 	HasBeenManuallyRestarted bool `json:"-"` // used to distinguish restart caused by restart policy from the manual one
87 87
 	MountPoints              map[string]*volumemounts.MountPoint
88 88
 	HostConfig               *containertypes.HostConfig `json:"-"` // do not serialize the host config in the json, otherwise we'll make the container unportable
89
-	ExecCommands             *exec.Store                `json:"-"`
89
+	ExecCommands             *ExecStore                 `json:"-"`
90 90
 	DependencyStore          agentexec.DependencyGetter `json:"-"`
91 91
 	SecretReferences         []*swarmtypes.SecretReference
92 92
 	ConfigReferences         []*swarmtypes.ConfigReference
... ...
@@ -121,7 +121,7 @@ func NewBaseContainer(id, root string) *Container {
121 121
 	return &Container{
122 122
 		ID:            id,
123 123
 		State:         NewState(),
124
-		ExecCommands:  exec.NewStore(),
124
+		ExecCommands:  NewExecStore(),
125 125
 		Root:          root,
126 126
 		MountPoints:   make(map[string]*volumemounts.MountPoint),
127 127
 		StreamConfig:  stream.NewConfig(),
... ...
@@ -752,6 +752,47 @@ func (container *Container) CreateDaemonEnvironment(tty bool, linkedEnv []string
752 752
 	return env
753 753
 }
754 754
 
755
+// RestoreTask restores the containerd container and task handles and reattaches
756
+// the IO for the running task. Container state is not synced with containerd's
757
+// state.
758
+//
759
+// An errdefs.NotFound error is returned if the container does not exist in
760
+// containerd. However, a nil error is returned if the task does not exist in
761
+// containerd.
762
+func (container *Container) RestoreTask(ctx context.Context, client libcontainerdtypes.Client) error {
763
+	container.Lock()
764
+	defer container.Unlock()
765
+	var err error
766
+	container.ctr, err = client.LoadContainer(ctx, container.ID)
767
+	if err != nil {
768
+		return err
769
+	}
770
+	container.task, err = container.ctr.AttachTask(ctx, container.InitializeStdio)
771
+	if err != nil && !errdefs.IsNotFound(err) {
772
+		return err
773
+	}
774
+	return nil
775
+}
776
+
777
+// GetRunningTask asserts that the container is running and returns the Task for
778
+// the container. An errdefs.Conflict error is returned if the container is not
779
+// in the Running state.
780
+//
781
+// A system error is returned if container is in a bad state: Running is true
782
+// but has a nil Task.
783
+//
784
+// The container lock must be held when calling this method.
785
+func (container *Container) GetRunningTask() (libcontainerdtypes.Task, error) {
786
+	if !container.Running {
787
+		return nil, errdefs.Conflict(fmt.Errorf("container %s is not running", container.ID))
788
+	}
789
+	tsk, ok := container.Task()
790
+	if !ok {
791
+		return nil, errdefs.System(errors.WithStack(fmt.Errorf("container %s is in Running state but has no containerd Task set", container.ID)))
792
+	}
793
+	return tsk, nil
794
+}
795
+
755 796
 type rio struct {
756 797
 	cio.IO
757 798
 
758 799
new file mode 100644
... ...
@@ -0,0 +1,131 @@
0
+package container // import "github.com/docker/docker/container"
1
+
2
+import (
3
+	"runtime"
4
+	"sync"
5
+
6
+	"github.com/containerd/containerd/cio"
7
+	"github.com/docker/docker/container/stream"
8
+	"github.com/docker/docker/libcontainerd/types"
9
+	"github.com/docker/docker/pkg/stringid"
10
+	"github.com/sirupsen/logrus"
11
+)
12
+
13
+// ExecConfig holds the configurations for execs. The Daemon keeps
14
+// track of both running and finished execs so that they can be
15
+// examined both during and after completion.
16
+type ExecConfig struct {
17
+	sync.Mutex
18
+	Started      chan struct{}
19
+	StreamConfig *stream.Config
20
+	ID           string
21
+	Running      bool
22
+	ExitCode     *int
23
+	OpenStdin    bool
24
+	OpenStderr   bool
25
+	OpenStdout   bool
26
+	CanRemove    bool
27
+	Container    *Container
28
+	DetachKeys   []byte
29
+	Entrypoint   string
30
+	Args         []string
31
+	Tty          bool
32
+	Privileged   bool
33
+	User         string
34
+	WorkingDir   string
35
+	Env          []string
36
+	Process      types.Process
37
+	ConsoleSize  *[2]uint
38
+}
39
+
40
+// NewExecConfig initializes the a new exec configuration
41
+func NewExecConfig(c *Container) *ExecConfig {
42
+	return &ExecConfig{
43
+		ID:           stringid.GenerateRandomID(),
44
+		Container:    c,
45
+		StreamConfig: stream.NewConfig(),
46
+		Started:      make(chan struct{}),
47
+	}
48
+}
49
+
50
+// InitializeStdio is called by libcontainerd to connect the stdio.
51
+func (c *ExecConfig) InitializeStdio(iop *cio.DirectIO) (cio.IO, error) {
52
+	c.StreamConfig.CopyToPipe(iop)
53
+
54
+	if c.StreamConfig.Stdin() == nil && !c.Tty && runtime.GOOS == "windows" {
55
+		if iop.Stdin != nil {
56
+			if err := iop.Stdin.Close(); err != nil {
57
+				logrus.Errorf("error closing exec stdin: %+v", err)
58
+			}
59
+		}
60
+	}
61
+
62
+	return &rio{IO: iop, sc: c.StreamConfig}, nil
63
+}
64
+
65
+// CloseStreams closes the stdio streams for the exec
66
+func (c *ExecConfig) CloseStreams() error {
67
+	return c.StreamConfig.CloseStreams()
68
+}
69
+
70
+// SetExitCode sets the exec config's exit code
71
+func (c *ExecConfig) SetExitCode(code int) {
72
+	c.ExitCode = &code
73
+}
74
+
75
+// ExecStore keeps track of the exec configurations.
76
+type ExecStore struct {
77
+	byID map[string]*ExecConfig
78
+	mu   sync.RWMutex
79
+}
80
+
81
+// NewExecStore initializes a new exec store.
82
+func NewExecStore() *ExecStore {
83
+	return &ExecStore{
84
+		byID: make(map[string]*ExecConfig),
85
+	}
86
+}
87
+
88
+// Commands returns the exec configurations in the store.
89
+func (e *ExecStore) Commands() map[string]*ExecConfig {
90
+	e.mu.RLock()
91
+	byID := make(map[string]*ExecConfig, len(e.byID))
92
+	for id, config := range e.byID {
93
+		byID[id] = config
94
+	}
95
+	e.mu.RUnlock()
96
+	return byID
97
+}
98
+
99
+// Add adds a new exec configuration to the store.
100
+func (e *ExecStore) Add(id string, Config *ExecConfig) {
101
+	e.mu.Lock()
102
+	e.byID[id] = Config
103
+	e.mu.Unlock()
104
+}
105
+
106
+// Get returns an exec configuration by its id.
107
+func (e *ExecStore) Get(id string) *ExecConfig {
108
+	e.mu.RLock()
109
+	res := e.byID[id]
110
+	e.mu.RUnlock()
111
+	return res
112
+}
113
+
114
+// Delete removes an exec configuration from the store.
115
+func (e *ExecStore) Delete(id string) {
116
+	e.mu.Lock()
117
+	delete(e.byID, id)
118
+	e.mu.Unlock()
119
+}
120
+
121
+// List returns the list of exec ids in the store.
122
+func (e *ExecStore) List() []string {
123
+	var IDs []string
124
+	e.mu.RLock()
125
+	for id := range e.byID {
126
+		IDs = append(IDs, id)
127
+	}
128
+	e.mu.RUnlock()
129
+	return IDs
130
+}
... ...
@@ -8,6 +8,7 @@ import (
8 8
 	"time"
9 9
 
10 10
 	"github.com/docker/docker/api/types"
11
+	libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
11 12
 	units "github.com/docker/go-units"
12 13
 )
13 14
 
... ...
@@ -36,6 +37,14 @@ type State struct {
36 36
 
37 37
 	stopWaiters       []chan<- StateStatus
38 38
 	removeOnlyWaiters []chan<- StateStatus
39
+
40
+	// The libcontainerd reference fields are unexported to force consumers
41
+	// to access them through the getter methods with multi-valued returns
42
+	// so that they can't forget to nil-check: the code won't compile unless
43
+	// the nil-check result is explicitly consumed or discarded.
44
+
45
+	ctr  libcontainerdtypes.Container
46
+	task libcontainerdtypes.Task
39 47
 }
40 48
 
41 49
 // StateStatus is used to return container wait results.
... ...
@@ -260,7 +269,7 @@ func (s *State) SetExitCode(ec int) {
260 260
 }
261 261
 
262 262
 // SetRunning sets the state of the container to "running".
263
-func (s *State) SetRunning(pid int, initial bool) {
263
+func (s *State) SetRunning(ctr libcontainerdtypes.Container, tsk libcontainerdtypes.Task, initial bool) {
264 264
 	s.ErrorMsg = ""
265 265
 	s.Paused = false
266 266
 	s.Running = true
... ...
@@ -269,7 +278,13 @@ func (s *State) SetRunning(pid int, initial bool) {
269 269
 		s.Paused = false
270 270
 	}
271 271
 	s.ExitCodeValue = 0
272
-	s.Pid = pid
272
+	s.ctr = ctr
273
+	s.task = tsk
274
+	if tsk != nil {
275
+		s.Pid = int(tsk.Pid())
276
+	} else {
277
+		s.Pid = 0
278
+	}
273 279
 	s.OOMKilled = false
274 280
 	if initial {
275 281
 		s.StartedAt = time.Now().UTC()
... ...
@@ -404,3 +419,21 @@ func (s *State) notifyAndClear(waiters *[]chan<- StateStatus) {
404 404
 	}
405 405
 	*waiters = nil
406 406
 }
407
+
408
+// C8dContainer returns a reference to the libcontainerd Container object for
409
+// the container and whether the reference is valid.
410
+//
411
+// The container lock must be held when calling this method.
412
+func (s *State) C8dContainer() (_ libcontainerdtypes.Container, ok bool) {
413
+	return s.ctr, s.ctr != nil
414
+}
415
+
416
+// Task returns a reference to the libcontainerd Task object for the container
417
+// and whether the reference is valid.
418
+//
419
+// The container lock must be held when calling this method.
420
+//
421
+// See also: (*Container).GetRunningTask().
422
+func (s *State) Task() (_ libcontainerdtypes.Task, ok bool) {
423
+	return s.task, s.task != nil
424
+}
... ...
@@ -6,6 +6,7 @@ import (
6 6
 	"time"
7 7
 
8 8
 	"github.com/docker/docker/api/types"
9
+	libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
9 10
 )
10 11
 
11 12
 func TestIsValidHealthString(t *testing.T) {
... ...
@@ -28,6 +29,13 @@ func TestIsValidHealthString(t *testing.T) {
28 28
 	}
29 29
 }
30 30
 
31
+type mockTask struct {
32
+	libcontainerdtypes.Task
33
+	pid uint32
34
+}
35
+
36
+func (t *mockTask) Pid() uint32 { return t.pid }
37
+
31 38
 func TestStateRunStop(t *testing.T) {
32 39
 	s := NewState()
33 40
 
... ...
@@ -60,7 +68,7 @@ func TestStateRunStop(t *testing.T) {
60 60
 
61 61
 		// Set the state to "Running".
62 62
 		s.Lock()
63
-		s.SetRunning(i, true)
63
+		s.SetRunning(nil, &mockTask{pid: uint32(i)}, true)
64 64
 		s.Unlock()
65 65
 
66 66
 		// Assert desired state.
... ...
@@ -125,7 +133,7 @@ func TestStateTimeoutWait(t *testing.T) {
125 125
 	s := NewState()
126 126
 
127 127
 	s.Lock()
128
-	s.SetRunning(0, true)
128
+	s.SetRunning(nil, nil, true)
129 129
 	s.Unlock()
130 130
 
131 131
 	// Start a wait with a timeout.
... ...
@@ -174,7 +182,7 @@ func TestCorrectStateWaitResultAfterRestart(t *testing.T) {
174 174
 	s := NewState()
175 175
 
176 176
 	s.Lock()
177
-	s.SetRunning(0, true)
177
+	s.SetRunning(nil, nil, true)
178 178
 	s.Unlock()
179 179
 
180 180
 	waitC := s.Wait(context.Background(), WaitConditionNotRunning)
... ...
@@ -185,7 +193,7 @@ func TestCorrectStateWaitResultAfterRestart(t *testing.T) {
185 185
 	s.Unlock()
186 186
 
187 187
 	s.Lock()
188
-	s.SetRunning(0, true)
188
+	s.SetRunning(nil, nil, true)
189 189
 	s.Unlock()
190 190
 
191 191
 	got := <-waitC
... ...
@@ -57,8 +57,11 @@ func (daemon *Daemon) CheckpointCreate(name string, config types.CheckpointCreat
57 57
 		return err
58 58
 	}
59 59
 
60
-	if !container.IsRunning() {
61
-		return fmt.Errorf("Container %s not running", name)
60
+	container.Lock()
61
+	tsk, err := container.GetRunningTask()
62
+	container.Unlock()
63
+	if err != nil {
64
+		return err
62 65
 	}
63 66
 
64 67
 	if !validCheckpointNamePattern.MatchString(config.CheckpointID) {
... ...
@@ -70,7 +73,7 @@ func (daemon *Daemon) CheckpointCreate(name string, config types.CheckpointCreat
70 70
 		return fmt.Errorf("cannot checkpoint container %s: %s", name, err)
71 71
 	}
72 72
 
73
-	err = daemon.containerd.CreateCheckpoint(context.Background(), container.ID, checkpointDir, config.Exit)
73
+	err = tsk.CreateCheckpoint(context.Background(), checkpointDir, config.Exit)
74 74
 	if err != nil {
75 75
 		os.RemoveAll(checkpointDir)
76 76
 		return fmt.Errorf("Cannot checkpoint container %s: %s", name, err)
... ...
@@ -30,7 +30,6 @@ import (
30 30
 	"github.com/docker/docker/daemon/config"
31 31
 	ctrd "github.com/docker/docker/daemon/containerd"
32 32
 	"github.com/docker/docker/daemon/events"
33
-	"github.com/docker/docker/daemon/exec"
34 33
 	_ "github.com/docker/docker/daemon/graphdriver/register" // register graph drivers
35 34
 	"github.com/docker/docker/daemon/images"
36 35
 	"github.com/docker/docker/daemon/logger"
... ...
@@ -75,7 +74,7 @@ type Daemon struct {
75 75
 	repository            string
76 76
 	containers            container.Store
77 77
 	containersReplica     container.ViewDB
78
-	execCommands          *exec.Store
78
+	execCommands          *container.ExecStore
79 79
 	imageService          ImageService
80 80
 	configStore           *config.Config
81 81
 	statsCollector        *stats.Collector
... ...
@@ -317,40 +316,43 @@ func (daemon *Daemon) restore() error {
317 317
 
318 318
 			logger(c).Debug("restoring container")
319 319
 
320
-			var (
321
-				err      error
322
-				alive    bool
323
-				ec       uint32
324
-				exitedAt time.Time
325
-				process  libcontainerdtypes.Process
326
-			)
320
+			var es *containerd.ExitStatus
327 321
 
328
-			alive, _, process, err = daemon.containerd.Restore(context.Background(), c.ID, c.InitializeStdio)
329
-			if err != nil && !errdefs.IsNotFound(err) {
322
+			if err := c.RestoreTask(context.Background(), daemon.containerd); err != nil && !errdefs.IsNotFound(err) {
330 323
 				logger(c).WithError(err).Error("failed to restore container with containerd")
331 324
 				return
332 325
 			}
333
-			logger(c).Debugf("alive: %v", alive)
334
-			if !alive {
335
-				// If process is not nil, cleanup dead container from containerd.
336
-				// If process is nil then the above `containerd.Restore` returned an errdefs.NotFoundError,
337
-				// and docker's view of the container state will be updated accorrdingly via SetStopped further down.
338
-				if process != nil {
339
-					logger(c).Debug("cleaning up dead container process")
340
-					ec, exitedAt, err = process.Delete(context.Background())
341
-					if err != nil && !errdefs.IsNotFound(err) {
342
-						logger(c).WithError(err).Error("failed to delete container from containerd")
343
-						return
326
+
327
+			alive := false
328
+			status := containerd.Unknown
329
+			if tsk, ok := c.Task(); ok {
330
+				s, err := tsk.Status(context.Background())
331
+				if err != nil {
332
+					logger(c).WithError(err).Error("failed to get task status")
333
+				} else {
334
+					status = s.Status
335
+					alive = status != containerd.Stopped
336
+					if !alive {
337
+						logger(c).Debug("cleaning up dead container process")
338
+						es, err = tsk.Delete(context.Background())
339
+						if err != nil && !errdefs.IsNotFound(err) {
340
+							logger(c).WithError(err).Error("failed to delete task from containerd")
341
+							return
342
+						}
343
+					} else if !daemon.configStore.LiveRestoreEnabled {
344
+						logger(c).Debug("shutting down container considered alive by containerd")
345
+						if err := daemon.shutdownContainer(c); err != nil && !errdefs.IsNotFound(err) {
346
+							log.WithError(err).Error("error shutting down container")
347
+							return
348
+						}
349
+						status = containerd.Stopped
350
+						alive = false
351
+						c.ResetRestartManager(false)
344 352
 					}
345 353
 				}
346
-			} else if !daemon.configStore.LiveRestoreEnabled {
347
-				logger(c).Debug("shutting down container considered alive by containerd")
348
-				if err := daemon.shutdownContainer(c); err != nil && !errdefs.IsNotFound(err) {
349
-					log.WithError(err).Error("error shutting down container")
350
-					return
351
-				}
352
-				c.ResetRestartManager(false)
353 354
 			}
355
+			// If the containerd task for the container was not found, docker's view of the
356
+			// container state will be updated accordingly via SetStopped further down.
354 357
 
355 358
 			if c.IsRunning() || c.IsPaused() {
356 359
 				logger(c).Debug("syncing container on disk state with real state")
... ...
@@ -359,29 +361,22 @@ func (daemon *Daemon) restore() error {
359 359
 
360 360
 				switch {
361 361
 				case c.IsPaused() && alive:
362
-					s, err := daemon.containerd.Status(context.Background(), c.ID)
363
-					if err != nil {
364
-						logger(c).WithError(err).Error("failed to get container status")
365
-					} else {
366
-						logger(c).WithField("state", s).Info("restored container paused")
367
-						switch s {
368
-						case containerd.Paused, containerd.Pausing:
369
-							// nothing to do
370
-						case containerd.Stopped:
371
-							alive = false
372
-						case containerd.Unknown:
373
-							log.Error("unknown status for paused container during restore")
374
-						default:
375
-							// running
376
-							c.Lock()
377
-							c.Paused = false
378
-							daemon.setStateCounter(c)
379
-							daemon.updateHealthMonitor(c)
380
-							if err := c.CheckpointTo(daemon.containersReplica); err != nil {
381
-								log.WithError(err).Error("failed to update paused container state")
382
-							}
383
-							c.Unlock()
362
+					logger(c).WithField("state", status).Info("restored container paused")
363
+					switch status {
364
+					case containerd.Paused, containerd.Pausing:
365
+						// nothing to do
366
+					case containerd.Unknown, containerd.Stopped, "":
367
+						log.WithField("status", status).Error("unexpected status for paused container during restore")
368
+					default:
369
+						// running
370
+						c.Lock()
371
+						c.Paused = false
372
+						daemon.setStateCounter(c)
373
+						daemon.updateHealthMonitor(c)
374
+						if err := c.CheckpointTo(daemon.containersReplica); err != nil {
375
+							log.WithError(err).Error("failed to update paused container state")
384 376
 						}
377
+						c.Unlock()
385 378
 					}
386 379
 				case !c.IsPaused() && alive:
387 380
 					logger(c).Debug("restoring healthcheck")
... ...
@@ -393,7 +388,12 @@ func (daemon *Daemon) restore() error {
393 393
 				if !alive {
394 394
 					logger(c).Debug("setting stopped state")
395 395
 					c.Lock()
396
-					c.SetStopped(&container.ExitStatus{ExitCode: int(ec), ExitedAt: exitedAt})
396
+					var ces container.ExitStatus
397
+					if es != nil {
398
+						ces.ExitCode = int(es.ExitCode())
399
+						ces.ExitedAt = es.ExitTime()
400
+					}
401
+					c.SetStopped(&ces)
397 402
 					daemon.Cleanup(c)
398 403
 					if err := c.CheckpointTo(daemon.containersReplica); err != nil {
399 404
 						log.WithError(err).Error("failed to update stopped container state")
... ...
@@ -956,7 +956,7 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
956 956
 	if d.containersReplica, err = container.NewViewDB(); err != nil {
957 957
 		return nil, err
958 958
 	}
959
-	d.execCommands = exec.NewStore()
959
+	d.execCommands = container.NewExecStore()
960 960
 	d.statsCollector = d.newStatsCollector(1 * time.Second)
961 961
 
962 962
 	d.EventsService = events.New()
... ...
@@ -1387,10 +1387,13 @@ func copyBlkioEntry(entries []*statsV1.BlkIOEntry) []types.BlkioStatEntry {
1387 1387
 }
1388 1388
 
1389 1389
 func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
1390
-	if !c.IsRunning() {
1391
-		return nil, errNotRunning(c.ID)
1390
+	c.Lock()
1391
+	task, err := c.GetRunningTask()
1392
+	c.Unlock()
1393
+	if err != nil {
1394
+		return nil, err
1392 1395
 	}
1393
-	cs, err := daemon.containerd.Stats(context.Background(), c.ID)
1396
+	cs, err := task.Stats(context.Background())
1394 1397
 	if err != nil {
1395 1398
 		if strings.Contains(err.Error(), "container not found") {
1396 1399
 			return nil, containerNotFound(c.ID)
... ...
@@ -14,6 +14,7 @@ import (
14 14
 	containertypes "github.com/docker/docker/api/types/container"
15 15
 	"github.com/docker/docker/container"
16 16
 	"github.com/docker/docker/daemon/config"
17
+	"github.com/docker/docker/errdefs"
17 18
 	"github.com/docker/docker/libcontainerd/local"
18 19
 	"github.com/docker/docker/libcontainerd/remote"
19 20
 	"github.com/docker/docker/libnetwork"
... ...
@@ -515,14 +516,17 @@ func driverOptions(_ *config.Config) nwconfig.Option {
515 515
 }
516 516
 
517 517
 func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
518
-	if !c.IsRunning() {
519
-		return nil, errNotRunning(c.ID)
518
+	c.Lock()
519
+	task, err := c.GetRunningTask()
520
+	c.Unlock()
521
+	if err != nil {
522
+		return nil, err
520 523
 	}
521 524
 
522 525
 	// Obtain the stats from HCS via libcontainerd
523
-	stats, err := daemon.containerd.Stats(context.Background(), c.ID)
526
+	stats, err := task.Stats(context.Background())
524 527
 	if err != nil {
525
-		if strings.Contains(err.Error(), "container not found") {
528
+		if errdefs.IsNotFound(err) {
526 529
 			return nil, containerNotFound(c.ID)
527 530
 		}
528 531
 		return nil, err
... ...
@@ -52,7 +52,7 @@ func TestContainerDelete(t *testing.T) {
52 52
 			fixMsg: "Stop the container before attempting removal or force remove",
53 53
 			initContainer: func() *container.Container {
54 54
 				c := newContainerWithState(container.NewState())
55
-				c.SetRunning(0, true)
55
+				c.SetRunning(nil, nil, true)
56 56
 				c.SetRestarting(&container.ExitStatus{})
57 57
 				return c
58 58
 			}},
... ...
@@ -2,18 +2,19 @@ package daemon // import "github.com/docker/docker/daemon"
2 2
 
3 3
 import (
4 4
 	"context"
5
+	"encoding/json"
5 6
 	"fmt"
6 7
 	"io"
7 8
 	"runtime"
8 9
 	"strings"
9 10
 	"time"
10 11
 
12
+	"github.com/containerd/containerd"
11 13
 	"github.com/docker/docker/api/types"
12 14
 	containertypes "github.com/docker/docker/api/types/container"
13 15
 	"github.com/docker/docker/api/types/strslice"
14 16
 	"github.com/docker/docker/container"
15 17
 	"github.com/docker/docker/container/stream"
16
-	"github.com/docker/docker/daemon/exec"
17 18
 	"github.com/docker/docker/errdefs"
18 19
 	"github.com/docker/docker/pkg/pools"
19 20
 	"github.com/moby/sys/signal"
... ...
@@ -23,7 +24,7 @@ import (
23 23
 	"github.com/sirupsen/logrus"
24 24
 )
25 25
 
26
-func (daemon *Daemon) registerExecCommand(container *container.Container, config *exec.Config) {
26
+func (daemon *Daemon) registerExecCommand(container *container.Container, config *container.ExecConfig) {
27 27
 	// Storing execs in container in order to kill them gracefully whenever the container is stopped or removed.
28 28
 	container.ExecCommands.Add(config.ID, config)
29 29
 	// Storing execs in daemon for easy access via Engine API.
... ...
@@ -41,7 +42,7 @@ func (daemon *Daemon) ExecExists(name string) (bool, error) {
41 41
 
42 42
 // getExecConfig looks up the exec instance by name. If the container associated
43 43
 // with the exec instance is stopped or paused, it will return an error.
44
-func (daemon *Daemon) getExecConfig(name string) (*exec.Config, error) {
44
+func (daemon *Daemon) getExecConfig(name string) (*container.ExecConfig, error) {
45 45
 	ec := daemon.execCommands.Get(name)
46 46
 	if ec == nil {
47 47
 		return nil, errExecNotFound(name)
... ...
@@ -52,7 +53,7 @@ func (daemon *Daemon) getExecConfig(name string) (*exec.Config, error) {
52 52
 	// saying the container isn't running, we should return a 404 so that
53 53
 	// the user sees the same error now that they will after the
54 54
 	// 5 minute clean-up loop is run which erases old/dead execs.
55
-	ctr := daemon.containers.Get(ec.ContainerID)
55
+	ctr := daemon.containers.Get(ec.Container.ID)
56 56
 	if ctr == nil {
57 57
 		return nil, containerNotFound(name)
58 58
 	}
... ...
@@ -68,9 +69,9 @@ func (daemon *Daemon) getExecConfig(name string) (*exec.Config, error) {
68 68
 	return ec, nil
69 69
 }
70 70
 
71
-func (daemon *Daemon) unregisterExecCommand(container *container.Container, execConfig *exec.Config) {
72
-	container.ExecCommands.Delete(execConfig.ID, execConfig.Pid)
73
-	daemon.execCommands.Delete(execConfig.ID, execConfig.Pid)
71
+func (daemon *Daemon) unregisterExecCommand(container *container.Container, execConfig *container.ExecConfig) {
72
+	container.ExecCommands.Delete(execConfig.ID)
73
+	daemon.execCommands.Delete(execConfig.ID)
74 74
 }
75 75
 
76 76
 func (daemon *Daemon) getActiveContainer(name string) (*container.Container, error) {
... ...
@@ -110,11 +111,10 @@ func (daemon *Daemon) ContainerExecCreate(name string, config *types.ExecConfig)
110 110
 		}
111 111
 	}
112 112
 
113
-	execConfig := exec.NewConfig()
113
+	execConfig := container.NewExecConfig(cntr)
114 114
 	execConfig.OpenStdin = config.AttachStdin
115 115
 	execConfig.OpenStdout = config.AttachStdout
116 116
 	execConfig.OpenStderr = config.AttachStderr
117
-	execConfig.ContainerID = cntr.ID
118 117
 	execConfig.DetachKeys = keys
119 118
 	execConfig.Entrypoint = entrypoint
120 119
 	execConfig.Args = args
... ...
@@ -174,15 +174,11 @@ func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, optio
174 174
 	ec.Running = true
175 175
 	ec.Unlock()
176 176
 
177
-	c := daemon.containers.Get(ec.ContainerID)
178
-	if c == nil {
179
-		return containerNotFound(ec.ContainerID)
180
-	}
181
-	logrus.Debugf("starting exec command %s in container %s", ec.ID, c.ID)
177
+	logrus.Debugf("starting exec command %s in container %s", ec.ID, ec.Container.ID)
182 178
 	attributes := map[string]string{
183 179
 		"execID": ec.ID,
184 180
 	}
185
-	daemon.LogContainerEventWithAttributes(c, "exec_start: "+ec.Entrypoint+" "+strings.Join(ec.Args, " "), attributes)
181
+	daemon.LogContainerEventWithAttributes(ec.Container, "exec_start: "+ec.Entrypoint+" "+strings.Join(ec.Args, " "), attributes)
186 182
 
187 183
 	defer func() {
188 184
 		if err != nil {
... ...
@@ -191,10 +187,10 @@ func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, optio
191 191
 			exitCode := 126
192 192
 			ec.ExitCode = &exitCode
193 193
 			if err := ec.CloseStreams(); err != nil {
194
-				logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
194
+				logrus.Errorf("failed to cleanup exec %s streams: %s", ec.Container.ID, err)
195 195
 			}
196 196
 			ec.Unlock()
197
-			c.ExecCommands.Delete(ec.ID, ec.Pid)
197
+			ec.Container.ExecCommands.Delete(ec.ID)
198 198
 		}
199 199
 	}()
200 200
 
... ...
@@ -222,15 +218,18 @@ func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, optio
222 222
 
223 223
 	p := &specs.Process{}
224 224
 	if runtime.GOOS != "windows" {
225
-		ctr, err := daemon.containerdCli.LoadContainer(ctx, ec.ContainerID)
225
+		ctr, err := daemon.containerdCli.LoadContainer(ctx, ec.Container.ID)
226 226
 		if err != nil {
227 227
 			return err
228 228
 		}
229
-		spec, err := ctr.Spec(ctx)
229
+		md, err := ctr.Info(ctx, containerd.WithoutRefreshedMetadata)
230 230
 		if err != nil {
231 231
 			return err
232 232
 		}
233
-		p = spec.Process
233
+		spec := specs.Spec{Process: p}
234
+		if err := json.Unmarshal(md.Spec.GetValue(), &spec); err != nil {
235
+			return err
236
+		}
234 237
 	}
235 238
 	p.Args = append([]string{ec.Entrypoint}, ec.Args...)
236 239
 	p.Env = ec.Env
... ...
@@ -253,7 +252,7 @@ func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, optio
253 253
 		p.Cwd = "/"
254 254
 	}
255 255
 
256
-	if err := daemon.execSetPlatformOpt(c, ec, p); err != nil {
256
+	if err := daemon.execSetPlatformOpt(ctx, ec, p); err != nil {
257 257
 		return err
258 258
 	}
259 259
 
... ...
@@ -274,9 +273,16 @@ func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, optio
274 274
 	defer cancel()
275 275
 	attachErr := ec.StreamConfig.CopyStreams(copyCtx, &attachConfig)
276 276
 
277
+	ec.Container.Lock()
278
+	tsk, err := ec.Container.GetRunningTask()
279
+	ec.Container.Unlock()
280
+	if err != nil {
281
+		return err
282
+	}
283
+
277 284
 	// Synchronize with libcontainerd event loop
278 285
 	ec.Lock()
279
-	systemPid, err := daemon.containerd.Exec(ctx, c.ID, ec.ID, p, cStdin != nil, ec.InitializeStdio)
286
+	ec.Process, err = tsk.Exec(ctx, ec.ID, p, cStdin != nil, ec.InitializeStdio)
280 287
 	// the exec context should be ready, or error happened.
281 288
 	// close the chan to notify readiness
282 289
 	close(ec.Started)
... ...
@@ -284,18 +290,17 @@ func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, optio
284 284
 		ec.Unlock()
285 285
 		return translateContainerdStartErr(ec.Entrypoint, ec.SetExitCode, err)
286 286
 	}
287
-	ec.Pid = systemPid
288 287
 	ec.Unlock()
289 288
 
290 289
 	select {
291 290
 	case <-ctx.Done():
292 291
 		log := logrus.
293
-			WithField("container", c.ID).
294
-			WithField("exec", name)
292
+			WithField("container", ec.Container.ID).
293
+			WithField("exec", ec.ID)
295 294
 		log.Debug("Sending KILL signal to container process")
296 295
 		sigCtx, cancelFunc := context.WithTimeout(context.Background(), 30*time.Second)
297 296
 		defer cancelFunc()
298
-		err := daemon.containerd.SignalProcess(sigCtx, c.ID, name, signal.SignalMap["KILL"])
297
+		err := ec.Process.Kill(sigCtx, signal.SignalMap["KILL"])
299 298
 		if err != nil {
300 299
 			log.WithError(err).Error("Could not send KILL signal to container process")
301 300
 		}
... ...
@@ -308,7 +313,7 @@ func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, optio
308 308
 			attributes := map[string]string{
309 309
 				"execID": ec.ID,
310 310
 			}
311
-			daemon.LogContainerEventWithAttributes(c, "exec_detach", attributes)
311
+			daemon.LogContainerEventWithAttributes(ec.Container, "exec_detach", attributes)
312 312
 		}
313 313
 	}
314 314
 	return nil
... ...
@@ -325,7 +330,7 @@ func (daemon *Daemon) execCommandGC() {
325 325
 		for id, config := range daemon.execCommands.Commands() {
326 326
 			if config.CanRemove {
327 327
 				cleaned++
328
-				daemon.execCommands.Delete(id, config.Pid)
328
+				daemon.execCommands.Delete(id)
329 329
 			} else {
330 330
 				if _, exists := liveExecCommands[id]; !exists {
331 331
 					config.CanRemove = true
332 332
deleted file mode 100644
... ...
@@ -1,148 +0,0 @@
1
-package exec // import "github.com/docker/docker/daemon/exec"
2
-
3
-import (
4
-	"context"
5
-	"runtime"
6
-	"sync"
7
-
8
-	"github.com/containerd/containerd/cio"
9
-	"github.com/docker/docker/container/stream"
10
-	"github.com/docker/docker/pkg/stringid"
11
-	"github.com/sirupsen/logrus"
12
-)
13
-
14
-// Config holds the configurations for execs. The Daemon keeps
15
-// track of both running and finished execs so that they can be
16
-// examined both during and after completion.
17
-type Config struct {
18
-	sync.Mutex
19
-	Started      chan struct{}
20
-	StreamConfig *stream.Config
21
-	ID           string
22
-	Running      bool
23
-	ExitCode     *int
24
-	OpenStdin    bool
25
-	OpenStderr   bool
26
-	OpenStdout   bool
27
-	CanRemove    bool
28
-	ContainerID  string
29
-	DetachKeys   []byte
30
-	Entrypoint   string
31
-	Args         []string
32
-	Tty          bool
33
-	Privileged   bool
34
-	User         string
35
-	WorkingDir   string
36
-	Env          []string
37
-	Pid          int
38
-	ConsoleSize  *[2]uint
39
-}
40
-
41
-// NewConfig initializes the a new exec configuration
42
-func NewConfig() *Config {
43
-	return &Config{
44
-		ID:           stringid.GenerateRandomID(),
45
-		StreamConfig: stream.NewConfig(),
46
-		Started:      make(chan struct{}),
47
-	}
48
-}
49
-
50
-type rio struct {
51
-	cio.IO
52
-
53
-	sc *stream.Config
54
-}
55
-
56
-func (i *rio) Close() error {
57
-	i.IO.Close()
58
-
59
-	return i.sc.CloseStreams()
60
-}
61
-
62
-func (i *rio) Wait() {
63
-	i.sc.Wait(context.Background())
64
-
65
-	i.IO.Wait()
66
-}
67
-
68
-// InitializeStdio is called by libcontainerd to connect the stdio.
69
-func (c *Config) InitializeStdio(iop *cio.DirectIO) (cio.IO, error) {
70
-	c.StreamConfig.CopyToPipe(iop)
71
-
72
-	if c.StreamConfig.Stdin() == nil && !c.Tty && runtime.GOOS == "windows" {
73
-		if iop.Stdin != nil {
74
-			if err := iop.Stdin.Close(); err != nil {
75
-				logrus.Errorf("error closing exec stdin: %+v", err)
76
-			}
77
-		}
78
-	}
79
-
80
-	return &rio{IO: iop, sc: c.StreamConfig}, nil
81
-}
82
-
83
-// CloseStreams closes the stdio streams for the exec
84
-func (c *Config) CloseStreams() error {
85
-	return c.StreamConfig.CloseStreams()
86
-}
87
-
88
-// SetExitCode sets the exec config's exit code
89
-func (c *Config) SetExitCode(code int) {
90
-	c.ExitCode = &code
91
-}
92
-
93
-// Store keeps track of the exec configurations.
94
-type Store struct {
95
-	byID map[string]*Config
96
-	mu   sync.RWMutex
97
-}
98
-
99
-// NewStore initializes a new exec store.
100
-func NewStore() *Store {
101
-	return &Store{
102
-		byID: make(map[string]*Config),
103
-	}
104
-}
105
-
106
-// Commands returns the exec configurations in the store.
107
-func (e *Store) Commands() map[string]*Config {
108
-	e.mu.RLock()
109
-	byID := make(map[string]*Config, len(e.byID))
110
-	for id, config := range e.byID {
111
-		byID[id] = config
112
-	}
113
-	e.mu.RUnlock()
114
-	return byID
115
-}
116
-
117
-// Add adds a new exec configuration to the store.
118
-func (e *Store) Add(id string, Config *Config) {
119
-	e.mu.Lock()
120
-	e.byID[id] = Config
121
-	e.mu.Unlock()
122
-}
123
-
124
-// Get returns an exec configuration by its id.
125
-func (e *Store) Get(id string) *Config {
126
-	e.mu.RLock()
127
-	res := e.byID[id]
128
-	e.mu.RUnlock()
129
-	return res
130
-}
131
-
132
-// Delete removes an exec configuration from the store.
133
-func (e *Store) Delete(id string, pid int) {
134
-	e.mu.Lock()
135
-	delete(e.byID, id)
136
-	e.mu.Unlock()
137
-}
138
-
139
-// List returns the list of exec ids in the store.
140
-func (e *Store) List() []string {
141
-	var IDs []string
142
-	e.mu.RLock()
143
-	for id := range e.byID {
144
-		IDs = append(IDs, id)
145
-	}
146
-	e.mu.RUnlock()
147
-	return IDs
148
-}
... ...
@@ -5,15 +5,14 @@ import (
5 5
 
6 6
 	"github.com/containerd/containerd/pkg/apparmor"
7 7
 	"github.com/docker/docker/container"
8
-	"github.com/docker/docker/daemon/exec"
9 8
 	"github.com/docker/docker/oci/caps"
10 9
 	specs "github.com/opencontainers/runtime-spec/specs-go"
11 10
 )
12 11
 
13
-func (daemon *Daemon) execSetPlatformOpt(c *container.Container, ec *exec.Config, p *specs.Process) error {
12
+func (daemon *Daemon) execSetPlatformOpt(ctx context.Context, ec *container.ExecConfig, p *specs.Process) error {
14 13
 	if len(ec.User) > 0 {
15 14
 		var err error
16
-		p.User, err = getUser(c, ec.User)
15
+		p.User, err = getUser(ec.Container, ec.User)
17 16
 		if err != nil {
18 17
 			return err
19 18
 		}
... ...
@@ -27,9 +26,9 @@ func (daemon *Daemon) execSetPlatformOpt(c *container.Container, ec *exec.Config
27 27
 	}
28 28
 	if apparmor.HostSupports() {
29 29
 		var appArmorProfile string
30
-		if c.AppArmorProfile != "" {
31
-			appArmorProfile = c.AppArmorProfile
32
-		} else if c.HostConfig.Privileged {
30
+		if ec.Container.AppArmorProfile != "" {
31
+			appArmorProfile = ec.Container.AppArmorProfile
32
+		} else if ec.Container.HostConfig.Privileged {
33 33
 			// `docker exec --privileged` does not currently disable AppArmor
34 34
 			// profiles. Privileged configuration of the container is inherited
35 35
 			appArmorProfile = unconfinedAppArmorProfile
... ...
@@ -51,5 +50,5 @@ func (daemon *Daemon) execSetPlatformOpt(c *container.Container, ec *exec.Config
51 51
 		p.ApparmorProfile = appArmorProfile
52 52
 	}
53 53
 	s := &specs.Spec{Process: p}
54
-	return WithRlimits(daemon, c)(context.Background(), nil, nil, s)
54
+	return WithRlimits(daemon, ec.Container)(ctx, nil, nil, s)
55 55
 }
... ...
@@ -4,13 +4,13 @@
4 4
 package daemon
5 5
 
6 6
 import (
7
+	"context"
7 8
 	"testing"
8 9
 
9 10
 	"github.com/containerd/containerd/pkg/apparmor"
10 11
 	containertypes "github.com/docker/docker/api/types/container"
11 12
 	"github.com/docker/docker/container"
12 13
 	"github.com/docker/docker/daemon/config"
13
-	"github.com/docker/docker/daemon/exec"
14 14
 	specs "github.com/opencontainers/runtime-spec/specs-go"
15 15
 	"gotest.tools/v3/assert"
16 16
 )
... ...
@@ -79,10 +79,10 @@ func TestExecSetPlatformOptAppArmor(t *testing.T) {
79 79
 						Privileged: tc.privileged,
80 80
 					},
81 81
 				}
82
-				ec := &exec.Config{Privileged: execPrivileged}
82
+				ec := &container.ExecConfig{Container: c, Privileged: execPrivileged}
83 83
 				p := &specs.Process{}
84 84
 
85
-				err := d.execSetPlatformOpt(c, ec, p)
85
+				err := d.execSetPlatformOpt(context.Background(), ec, p)
86 86
 				assert.NilError(t, err)
87 87
 				assert.Equal(t, p.ApparmorProfile, tc.expectedProfile)
88 88
 			})
... ...
@@ -1,13 +1,14 @@
1 1
 package daemon // import "github.com/docker/docker/daemon"
2 2
 
3 3
 import (
4
+	"context"
5
+
4 6
 	"github.com/docker/docker/container"
5
-	"github.com/docker/docker/daemon/exec"
6 7
 	specs "github.com/opencontainers/runtime-spec/specs-go"
7 8
 )
8 9
 
9
-func (daemon *Daemon) execSetPlatformOpt(c *container.Container, ec *exec.Config, p *specs.Process) error {
10
-	if c.OS == "windows" {
10
+func (daemon *Daemon) execSetPlatformOpt(ctx context.Context, ec *container.ExecConfig, p *specs.Process) error {
11
+	if ec.Container.OS == "windows" {
11 12
 		p.User.Username = ec.User
12 13
 	}
13 14
 	return nil
... ...
@@ -13,7 +13,6 @@ import (
13 13
 	containertypes "github.com/docker/docker/api/types/container"
14 14
 	"github.com/docker/docker/api/types/strslice"
15 15
 	"github.com/docker/docker/container"
16
-	"github.com/docker/docker/daemon/exec"
17 16
 	"github.com/sirupsen/logrus"
18 17
 )
19 18
 
... ...
@@ -69,11 +68,10 @@ func (p *cmdProbe) run(ctx context.Context, d *Daemon, cntr *container.Container
69 69
 		cmdSlice = append(getShell(cntr), cmdSlice...)
70 70
 	}
71 71
 	entrypoint, args := d.getEntrypointAndArgs(strslice.StrSlice{}, cmdSlice)
72
-	execConfig := exec.NewConfig()
72
+	execConfig := container.NewExecConfig(cntr)
73 73
 	execConfig.OpenStdin = false
74 74
 	execConfig.OpenStdout = true
75 75
 	execConfig.OpenStderr = true
76
-	execConfig.ContainerID = cntr.ID
77 76
 	execConfig.DetachKeys = []byte{}
78 77
 	execConfig.Entrypoint = entrypoint
79 78
 	execConfig.Args = args
... ...
@@ -214,11 +214,15 @@ func (daemon *Daemon) ContainerExecInspect(id string) (*backend.ExecInspect, err
214 214
 		return nil, errExecNotFound(id)
215 215
 	}
216 216
 
217
-	if ctr := daemon.containers.Get(e.ContainerID); ctr == nil {
217
+	if ctr := daemon.containers.Get(e.Container.ID); ctr == nil {
218 218
 		return nil, errExecNotFound(id)
219 219
 	}
220 220
 
221 221
 	pc := inspectExecProcessConfig(e)
222
+	var pid int
223
+	if e.Process != nil {
224
+		pid = int(e.Process.Pid())
225
+	}
222 226
 
223 227
 	return &backend.ExecInspect{
224 228
 		ID:            e.ID,
... ...
@@ -229,9 +233,9 @@ func (daemon *Daemon) ContainerExecInspect(id string) (*backend.ExecInspect, err
229 229
 		OpenStdout:    e.OpenStdout,
230 230
 		OpenStderr:    e.OpenStderr,
231 231
 		CanRemove:     e.CanRemove,
232
-		ContainerID:   e.ContainerID,
232
+		ContainerID:   e.Container.ID,
233 233
 		DetachKeys:    e.DetachKeys,
234
-		Pid:           e.Pid,
234
+		Pid:           pid,
235 235
 	}, nil
236 236
 }
237 237
 
... ...
@@ -5,7 +5,6 @@ import (
5 5
 	"github.com/docker/docker/api/types/backend"
6 6
 	"github.com/docker/docker/api/types/versions/v1p19"
7 7
 	"github.com/docker/docker/container"
8
-	"github.com/docker/docker/daemon/exec"
9 8
 )
10 9
 
11 10
 // This sets platform-specific fields
... ...
@@ -62,7 +61,7 @@ func (daemon *Daemon) containerInspectPre120(name string) (*v1p19.ContainerJSON,
62 62
 	}, nil
63 63
 }
64 64
 
65
-func inspectExecProcessConfig(e *exec.Config) *backend.ExecProcessConfig {
65
+func inspectExecProcessConfig(e *container.ExecConfig) *backend.ExecProcessConfig {
66 66
 	return &backend.ExecProcessConfig{
67 67
 		Tty:        e.Tty,
68 68
 		Entrypoint: e.Entrypoint,
... ...
@@ -6,7 +6,6 @@ import (
6 6
 	containertypes "github.com/docker/docker/api/types/container"
7 7
 	"github.com/docker/docker/container"
8 8
 	"github.com/docker/docker/daemon/config"
9
-	"github.com/docker/docker/daemon/exec"
10 9
 	"gotest.tools/v3/assert"
11 10
 	is "gotest.tools/v3/assert/cmp"
12 11
 )
... ...
@@ -16,7 +15,7 @@ func TestGetInspectData(t *testing.T) {
16 16
 		ID:           "inspect-me",
17 17
 		HostConfig:   &containertypes.HostConfig{},
18 18
 		State:        container.NewState(),
19
-		ExecCommands: exec.NewStore(),
19
+		ExecCommands: container.NewExecStore(),
20 20
 	}
21 21
 
22 22
 	d := &Daemon{
... ...
@@ -4,7 +4,6 @@ import (
4 4
 	"github.com/docker/docker/api/types"
5 5
 	"github.com/docker/docker/api/types/backend"
6 6
 	"github.com/docker/docker/container"
7
-	"github.com/docker/docker/daemon/exec"
8 7
 )
9 8
 
10 9
 // This sets platform-specific fields
... ...
@@ -17,7 +16,7 @@ func (daemon *Daemon) containerInspectPre120(name string) (*types.ContainerJSON,
17 17
 	return daemon.ContainerInspectCurrent(name, false)
18 18
 }
19 19
 
20
-func inspectExecProcessConfig(e *exec.Config) *backend.ExecProcessConfig {
20
+func inspectExecProcessConfig(e *container.ExecConfig) *backend.ExecProcessConfig {
21 21
 	return &backend.ExecProcessConfig{
22 22
 		Tty:        e.Tty,
23 23
 		Entrypoint: e.Entrypoint,
... ...
@@ -9,7 +9,6 @@ import (
9 9
 
10 10
 	containerpkg "github.com/docker/docker/container"
11 11
 	"github.com/docker/docker/errdefs"
12
-	libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
13 12
 	"github.com/moby/sys/signal"
14 13
 	"github.com/pkg/errors"
15 14
 	"github.com/sirupsen/logrus"
... ...
@@ -65,8 +64,9 @@ func (daemon *Daemon) killWithSignal(container *containerpkg.Container, stopSign
65 65
 	container.Lock()
66 66
 	defer container.Unlock()
67 67
 
68
-	if !container.Running {
69
-		return errNotRunning(container.ID)
68
+	task, err := container.GetRunningTask()
69
+	if err != nil {
70
+		return err
70 71
 	}
71 72
 
72 73
 	var unpause bool
... ...
@@ -96,8 +96,7 @@ func (daemon *Daemon) killWithSignal(container *containerpkg.Container, stopSign
96 96
 		return nil
97 97
 	}
98 98
 
99
-	err := daemon.containerd.SignalProcess(context.Background(), container.ID, libcontainerdtypes.InitProcessName, stopSignal)
100
-	if err != nil {
99
+	if err := task.Kill(context.Background(), stopSignal); err != nil {
101 100
 		if errdefs.IsNotFound(err) {
102 101
 			unpause = false
103 102
 			logrus.WithError(err).WithField("container", container.ID).WithField("action", "kill").Debug("container kill failed because of 'container not found' or 'no such process'")
... ...
@@ -121,7 +120,7 @@ func (daemon *Daemon) killWithSignal(container *containerpkg.Container, stopSign
121 121
 
122 122
 	if unpause {
123 123
 		// above kill signal will be sent once resume is finished
124
-		if err := daemon.containerd.Resume(context.Background(), container.ID); err != nil {
124
+		if err := task.Resume(context.Background()); err != nil {
125 125
 			logrus.Warnf("Cannot unpause container %s: %s", container.ID, err)
126 126
 		}
127 127
 	}
... ...
@@ -7,6 +7,7 @@ import (
7 7
 
8 8
 	"github.com/docker/docker/api/types"
9 9
 	"github.com/docker/docker/container"
10
+	"github.com/docker/docker/errdefs"
10 11
 	libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
11 12
 	"github.com/docker/docker/restartmanager"
12 13
 	"github.com/pkg/errors"
... ...
@@ -25,24 +26,29 @@ func (daemon *Daemon) setStateCounter(c *container.Container) {
25 25
 }
26 26
 
27 27
 func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontainerdtypes.EventInfo) error {
28
+	var exitStatus container.ExitStatus
28 29
 	c.Lock()
29
-	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
30
-	ec, et, err := daemon.containerd.DeleteTask(ctx, c.ID)
31
-	cancel()
32
-	if err != nil {
33
-		logrus.WithError(err).WithField("container", c.ID).Warnf("failed to delete container from containerd")
30
+	tsk, ok := c.Task()
31
+	if ok {
32
+		ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
33
+		es, err := tsk.Delete(ctx)
34
+		cancel()
35
+		if err != nil {
36
+			logrus.WithError(err).WithField("container", c.ID).Warnf("failed to delete container from containerd")
37
+		} else {
38
+			exitStatus = container.ExitStatus{
39
+				ExitCode: int(es.ExitCode()),
40
+				ExitedAt: es.ExitTime(),
41
+			}
42
+		}
34 43
 	}
35 44
 
36
-	ctx, cancel = context.WithTimeout(context.Background(), 2*time.Second)
45
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
37 46
 	c.StreamConfig.Wait(ctx)
38 47
 	cancel()
39 48
 
40 49
 	c.Reset(false)
41 50
 
42
-	exitStatus := container.ExitStatus{
43
-		ExitCode: int(ec),
44
-		ExitedAt: et,
45
-	}
46 51
 	if e != nil {
47 52
 		exitStatus.ExitCode = int(e.ExitCode)
48 53
 		exitStatus.ExitedAt = e.ExitedAt
... ...
@@ -53,7 +59,7 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
53 53
 
54 54
 	daemonShutdown := daemon.IsShuttingDown()
55 55
 	execDuration := time.Since(c.StartedAt)
56
-	restart, wait, err := c.RestartManager().ShouldRestart(ec, daemonShutdown || c.HasBeenManuallyStopped, execDuration)
56
+	restart, wait, err := c.RestartManager().ShouldRestart(uint32(exitStatus.ExitCode), daemonShutdown || c.HasBeenManuallyStopped, execDuration)
57 57
 	if err != nil {
58 58
 		logrus.WithError(err).
59 59
 			WithField("container", c.ID).
... ...
@@ -70,7 +76,7 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
70 70
 	// restarted if/when the container is started again
71 71
 	daemon.stopHealthchecks(c)
72 72
 	attributes := map[string]string{
73
-		"exitCode": strconv.Itoa(int(ec)),
73
+		"exitCode": strconv.Itoa(exitStatus.ExitCode),
74 74
 	}
75 75
 	daemon.Cleanup(c)
76 76
 
... ...
@@ -170,9 +176,18 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
170 170
 
171 171
 			// remove the exec command from the container's store only and not the
172 172
 			// daemon's store so that the exec command can be inspected.
173
-			c.ExecCommands.Delete(execConfig.ID, execConfig.Pid)
173
+			c.ExecCommands.Delete(execConfig.ID)
174 174
 
175 175
 			exitCode = ec
176
+
177
+			go func() {
178
+				if _, err := execConfig.Process.Delete(context.Background()); err != nil {
179
+					logrus.WithError(err).WithFields(logrus.Fields{
180
+						"container": ei.ContainerID,
181
+						"process":   ei.ProcessID,
182
+					}).Warn("failed to delete process")
183
+				}
184
+			}()
176 185
 		}
177 186
 		attributes := map[string]string{
178 187
 			"execID":   ei.ProcessID,
... ...
@@ -185,7 +200,27 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
185 185
 
186 186
 		// This is here to handle start not generated by docker
187 187
 		if !c.Running {
188
-			c.SetRunning(int(ei.Pid), false)
188
+			ctr, err := daemon.containerd.LoadContainer(context.Background(), c.ID)
189
+			if err != nil {
190
+				if errdefs.IsNotFound(err) {
191
+					// The container was started by not-docker and so could have been deleted by
192
+					// not-docker before we got around to loading it from containerd.
193
+					logrus.WithField("container", c.ID).WithError(err).
194
+						Debug("could not load containerd container for start event")
195
+					return nil
196
+				}
197
+				return err
198
+			}
199
+			tsk, err := ctr.Task(context.Background())
200
+			if err != nil {
201
+				if errdefs.IsNotFound(err) {
202
+					logrus.WithField("container", c.ID).WithError(err).
203
+						Debug("failed to load task for externally-started container")
204
+					return nil
205
+				}
206
+				return err
207
+			}
208
+			c.SetRunning(ctr, tsk, false)
189 209
 			c.HasBeenManuallyStopped = false
190 210
 			c.HasBeenStartedBefore = true
191 211
 			daemon.setStateCounter(c)
... ...
@@ -24,8 +24,9 @@ func (daemon *Daemon) containerPause(container *container.Container) error {
24 24
 	defer container.Unlock()
25 25
 
26 26
 	// We cannot Pause the container which is not running
27
-	if !container.Running {
28
-		return errNotRunning(container.ID)
27
+	tsk, err := container.GetRunningTask()
28
+	if err != nil {
29
+		return err
29 30
 	}
30 31
 
31 32
 	// We cannot Pause the container which is already paused
... ...
@@ -38,8 +39,8 @@ func (daemon *Daemon) containerPause(container *container.Container) error {
38 38
 		return errContainerIsRestarting(container.ID)
39 39
 	}
40 40
 
41
-	if err := daemon.containerd.Pause(context.Background(), container.ID); err != nil {
42
-		return fmt.Errorf("Cannot pause container %s: %s", container.ID, err)
41
+	if err := tsk.Pause(context.Background()); err != nil {
42
+		return fmt.Errorf("cannot pause container %s: %s", container.ID, err)
43 43
 	}
44 44
 
45 45
 	container.Paused = true
... ...
@@ -4,8 +4,6 @@ import (
4 4
 	"context"
5 5
 	"fmt"
6 6
 	"time"
7
-
8
-	libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
9 7
 )
10 8
 
11 9
 // ContainerResize changes the size of the TTY of the process running
... ...
@@ -16,11 +14,14 @@ func (daemon *Daemon) ContainerResize(name string, height, width int) error {
16 16
 		return err
17 17
 	}
18 18
 
19
-	if !container.IsRunning() {
20
-		return errNotRunning(container.ID)
19
+	container.Lock()
20
+	tsk, err := container.GetRunningTask()
21
+	container.Unlock()
22
+	if err != nil {
23
+		return err
21 24
 	}
22 25
 
23
-	if err = daemon.containerd.ResizeTerminal(context.Background(), container.ID, libcontainerdtypes.InitProcessName, width, height); err == nil {
26
+	if err = tsk.Resize(context.Background(), uint32(width), uint32(height)); err == nil {
24 27
 		attributes := map[string]string{
25 28
 			"height": fmt.Sprintf("%d", height),
26 29
 			"width":  fmt.Sprintf("%d", width),
... ...
@@ -46,7 +47,7 @@ func (daemon *Daemon) ContainerExecResize(name string, height, width int) error
46 46
 
47 47
 	select {
48 48
 	case <-ec.Started:
49
-		return daemon.containerd.ResizeTerminal(context.Background(), ec.ContainerID, ec.ID, width, height)
49
+		return ec.Process.Resize(context.Background(), uint32(width), uint32(height))
50 50
 	case <-timeout.C:
51 51
 		return fmt.Errorf("timeout waiting for exec session ready")
52 52
 	}
... ...
@@ -8,7 +8,7 @@ import (
8 8
 	"testing"
9 9
 
10 10
 	"github.com/docker/docker/container"
11
-	"github.com/docker/docker/daemon/exec"
11
+	"github.com/docker/docker/libcontainerd/types"
12 12
 	"gotest.tools/v3/assert"
13 13
 )
14 14
 
... ...
@@ -16,32 +16,28 @@ import (
16 16
 func TestExecResizeNoSuchExec(t *testing.T) {
17 17
 	n := "TestExecResize"
18 18
 	d := &Daemon{
19
-		execCommands: exec.NewStore(),
19
+		execCommands: container.NewExecStore(),
20 20
 	}
21 21
 	c := &container.Container{
22
-		ExecCommands: exec.NewStore(),
22
+		ExecCommands: container.NewExecStore(),
23 23
 	}
24
-	ec := &exec.Config{
25
-		ID: n,
24
+	ec := &container.ExecConfig{
25
+		ID:        n,
26
+		Container: c,
26 27
 	}
27 28
 	d.registerExecCommand(c, ec)
28 29
 	err := d.ContainerExecResize("nil", 24, 8)
29 30
 	assert.ErrorContains(t, err, "No such exec instance")
30 31
 }
31 32
 
32
-type execResizeMockContainerdClient struct {
33
-	MockContainerdClient
34
-	ProcessID   string
35
-	ContainerID string
36
-	Width       int
37
-	Height      int
33
+type execResizeMockProcess struct {
34
+	types.Process
35
+	Width, Height int
38 36
 }
39 37
 
40
-func (c *execResizeMockContainerdClient) ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error {
41
-	c.ProcessID = processID
42
-	c.ContainerID = containerID
43
-	c.Width = width
44
-	c.Height = height
38
+func (p *execResizeMockProcess) Resize(ctx context.Context, width, height uint32) error {
39
+	p.Width = int(width)
40
+	p.Height = int(height)
45 41
 	return nil
46 42
 }
47 43
 
... ...
@@ -50,30 +46,29 @@ func TestExecResize(t *testing.T) {
50 50
 	n := "TestExecResize"
51 51
 	width := 24
52 52
 	height := 8
53
-	ec := &exec.Config{
54
-		ID:          n,
55
-		ContainerID: n,
56
-		Started:     make(chan struct{}),
57
-	}
58
-	close(ec.Started)
59
-	mc := &execResizeMockContainerdClient{}
53
+	mp := &execResizeMockProcess{}
60 54
 	d := &Daemon{
61
-		execCommands: exec.NewStore(),
62
-		containerd:   mc,
55
+		execCommands: container.NewExecStore(),
63 56
 		containers:   container.NewMemoryStore(),
64 57
 	}
65 58
 	c := &container.Container{
66
-		ExecCommands: exec.NewStore(),
59
+		ID:           n,
60
+		ExecCommands: container.NewExecStore(),
67 61
 		State:        &container.State{Running: true},
68 62
 	}
63
+	ec := &container.ExecConfig{
64
+		ID:        n,
65
+		Container: c,
66
+		Process:   mp,
67
+		Started:   make(chan struct{}),
68
+	}
69
+	close(ec.Started)
69 70
 	d.containers.Add(n, c)
70 71
 	d.registerExecCommand(c, ec)
71 72
 	err := d.ContainerExecResize(n, height, width)
72 73
 	assert.NilError(t, err)
73
-	assert.Equal(t, mc.Width, width)
74
-	assert.Equal(t, mc.Height, height)
75
-	assert.Equal(t, mc.ProcessID, n)
76
-	assert.Equal(t, mc.ContainerID, n)
74
+	assert.Equal(t, mp.Width, width)
75
+	assert.Equal(t, mp.Height, height)
77 76
 }
78 77
 
79 78
 // This test is to make sure that when exec context is not ready, a timeout error should happen.
... ...
@@ -82,21 +77,22 @@ func TestExecResizeTimeout(t *testing.T) {
82 82
 	n := "TestExecResize"
83 83
 	width := 24
84 84
 	height := 8
85
-	ec := &exec.Config{
86
-		ID:          n,
87
-		ContainerID: n,
88
-		Started:     make(chan struct{}),
89
-	}
90
-	mc := &execResizeMockContainerdClient{}
85
+	mp := &execResizeMockProcess{}
91 86
 	d := &Daemon{
92
-		execCommands: exec.NewStore(),
93
-		containerd:   mc,
87
+		execCommands: container.NewExecStore(),
94 88
 		containers:   container.NewMemoryStore(),
95 89
 	}
96 90
 	c := &container.Container{
97
-		ExecCommands: exec.NewStore(),
91
+		ID:           n,
92
+		ExecCommands: container.NewExecStore(),
98 93
 		State:        &container.State{Running: true},
99 94
 	}
95
+	ec := &container.ExecConfig{
96
+		ID:        n,
97
+		Container: c,
98
+		Process:   mp,
99
+		Started:   make(chan struct{}),
100
+	}
100 101
 	d.containers.Add(n, c)
101 102
 	d.registerExecCommand(c, ec)
102 103
 	err := d.ContainerExecResize(n, height, width)
... ...
@@ -178,16 +178,12 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint
178 178
 
179 179
 	ctx := context.TODO()
180 180
 
181
-	err = daemon.containerd.Create(ctx, container.ID, spec, shim, createOptions)
181
+	ctr, err := daemon.containerd.NewContainer(ctx, container.ID, spec, shim, createOptions)
182 182
 	if err != nil {
183 183
 		if errdefs.IsConflict(err) {
184 184
 			logrus.WithError(err).WithField("container", container.ID).Error("Container not cleaned up from containerd from previous run")
185
-			// best effort to clean up old container object
186
-			daemon.containerd.DeleteTask(ctx, container.ID)
187
-			if err := daemon.containerd.Delete(ctx, container.ID); err != nil && !errdefs.IsNotFound(err) {
188
-				logrus.WithError(err).WithField("container", container.ID).Error("Error cleaning up stale containerd container object")
189
-			}
190
-			err = daemon.containerd.Create(ctx, container.ID, spec, shim, createOptions)
185
+			daemon.cleanupStaleContainer(ctx, container.ID)
186
+			ctr, err = daemon.containerd.NewContainer(ctx, container.ID, spec, shim, createOptions)
191 187
 		}
192 188
 		if err != nil {
193 189
 			return translateContainerdStartErr(container.Path, container.SetExitCode, err)
... ...
@@ -195,11 +191,11 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint
195 195
 	}
196 196
 
197 197
 	// TODO(mlaventure): we need to specify checkpoint options here
198
-	pid, err := daemon.containerd.Start(context.Background(), container.ID, checkpointDir,
198
+	tsk, err := ctr.Start(ctx, checkpointDir,
199 199
 		container.StreamConfig.Stdin() != nil || container.Config.Tty,
200 200
 		container.InitializeStdio)
201 201
 	if err != nil {
202
-		if err := daemon.containerd.Delete(context.Background(), container.ID); err != nil {
202
+		if err := ctr.Delete(context.Background()); err != nil {
203 203
 			logrus.WithError(err).WithField("container", container.ID).
204 204
 				Error("failed to delete failed start container")
205 205
 		}
... ...
@@ -207,7 +203,7 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint
207 207
 	}
208 208
 
209 209
 	container.HasBeenManuallyRestarted = false
210
-	container.SetRunning(pid, true)
210
+	container.SetRunning(ctr, tsk, true)
211 211
 	container.HasBeenStartedBefore = true
212 212
 	daemon.setStateCounter(container)
213 213
 
... ...
@@ -224,9 +220,42 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint
224 224
 	return nil
225 225
 }
226 226
 
227
+func (daemon *Daemon) cleanupStaleContainer(ctx context.Context, id string) {
228
+	// best effort to clean up old container object
229
+	log := logrus.WithContext(ctx).WithField("container", id)
230
+	ctr, err := daemon.containerd.LoadContainer(ctx, id)
231
+	if err != nil {
232
+		// Log an error no matter the kind. A container existed with the
233
+		// ID, so a NotFound error would be an exceptional situation
234
+		// worth logging.
235
+		log.WithError(err).Error("Error loading stale containerd container object")
236
+		return
237
+	}
238
+	if tsk, err := ctr.Task(ctx); err != nil {
239
+		if !errdefs.IsNotFound(err) {
240
+			log.WithError(err).Error("Error loading stale containerd task object")
241
+		}
242
+	} else {
243
+		if err := tsk.ForceDelete(ctx); err != nil {
244
+			log.WithError(err).Error("Error cleaning up stale containerd task object")
245
+		}
246
+	}
247
+	if err := ctr.Delete(ctx); err != nil && !errdefs.IsNotFound(err) {
248
+		log.WithError(err).Error("Error cleaning up stale containerd container object")
249
+	}
250
+}
251
+
227 252
 // Cleanup releases any network resources allocated to the container along with any rules
228 253
 // around how containers are linked together.  It also unmounts the container's root filesystem.
229 254
 func (daemon *Daemon) Cleanup(container *container.Container) {
255
+	// Microsoft HCS containers get in a bad state if host resources are
256
+	// released while the container still exists.
257
+	if ctr, ok := container.C8dContainer(); ok {
258
+		if err := ctr.Delete(context.Background()); err != nil {
259
+			logrus.Errorf("%s cleanup: failed to delete container from containerd: %v", container.ID, err)
260
+		}
261
+	}
262
+
230 263
 	daemon.releaseNetwork(container)
231 264
 
232 265
 	if err := container.UnmountIpcMount(); err != nil {
... ...
@@ -260,8 +289,4 @@ func (daemon *Daemon) Cleanup(container *container.Container) {
260 260
 	}
261 261
 
262 262
 	container.CancelAttachContext()
263
-
264
-	if err := daemon.containerd.Delete(context.Background(), container.ID); err != nil {
265
-		logrus.Errorf("%s cleanup: failed to delete container from containerd: %v", container.ID, err)
266
-	}
267 263
 }
... ...
@@ -14,6 +14,7 @@ import (
14 14
 
15 15
 	"github.com/docker/docker/api/types/container"
16 16
 	"github.com/docker/docker/errdefs"
17
+	libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
17 18
 	"github.com/pkg/errors"
18 19
 )
19 20
 
... ...
@@ -150,18 +151,31 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*container.Conta
150 150
 		return nil, err
151 151
 	}
152 152
 
153
-	if !ctr.IsRunning() {
154
-		return nil, errNotRunning(ctr.ID)
155
-	}
153
+	tsk, err := func() (libcontainerdtypes.Task, error) {
154
+		ctr.Lock()
155
+		defer ctr.Unlock()
156 156
 
157
-	if ctr.IsRestarting() {
158
-		return nil, errContainerIsRestarting(ctr.ID)
157
+		tsk, err := ctr.GetRunningTask()
158
+		if err != nil {
159
+			return nil, err
160
+		}
161
+		if ctr.Restarting {
162
+			return nil, errContainerIsRestarting(ctr.ID)
163
+		}
164
+		return tsk, nil
165
+	}()
166
+	if err != nil {
167
+		return nil, err
159 168
 	}
160 169
 
161
-	procs, err := daemon.containerd.ListPids(context.Background(), ctr.ID)
170
+	infos, err := tsk.Pids(context.Background())
162 171
 	if err != nil {
163 172
 		return nil, err
164 173
 	}
174
+	procs := make([]uint32, len(infos))
175
+	for i, p := range infos {
176
+		procs[i] = p.Pid
177
+	}
165 178
 
166 179
 	args := strings.Split(psArgs, " ")
167 180
 	pids := psPidsArg(procs)
... ...
@@ -7,6 +7,7 @@ import (
7 7
 	"time"
8 8
 
9 9
 	containertypes "github.com/docker/docker/api/types/container"
10
+	libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
10 11
 	units "github.com/docker/go-units"
11 12
 )
12 13
 
... ...
@@ -36,15 +37,21 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*containertypes.
36 36
 		return nil, err
37 37
 	}
38 38
 
39
-	if !container.IsRunning() {
40
-		return nil, errNotRunning(container.ID)
41
-	}
39
+	task, err := func() (libcontainerdtypes.Task, error) {
40
+		container.Lock()
41
+		defer container.Unlock()
42 42
 
43
-	if container.IsRestarting() {
44
-		return nil, errContainerIsRestarting(container.ID)
45
-	}
43
+		task, err := container.GetRunningTask()
44
+		if err != nil {
45
+			return nil, err
46
+		}
47
+		if container.Restarting {
48
+			return nil, errContainerIsRestarting(container.ID)
49
+		}
50
+		return task, nil
51
+	}()
46 52
 
47
-	s, err := daemon.containerd.Summary(context.Background(), container.ID)
53
+	s, err := task.Summary(context.Background())
48 54
 	if err != nil {
49 55
 		return nil, err
50 56
 	}
... ...
@@ -26,8 +26,12 @@ func (daemon *Daemon) containerUnpause(ctr *container.Container) error {
26 26
 	if !ctr.Paused {
27 27
 		return fmt.Errorf("Container %s is not paused", ctr.ID)
28 28
 	}
29
+	tsk, err := ctr.GetRunningTask()
30
+	if err != nil {
31
+		return err
32
+	}
29 33
 
30
-	if err := daemon.containerd.Resume(context.Background(), ctr.ID); err != nil {
34
+	if err := tsk.Resume(context.Background()); err != nil {
31 35
 		return fmt.Errorf("Cannot unpause container %s: %s", ctr.ID, err)
32 36
 	}
33 37
 
... ...
@@ -74,19 +74,28 @@ func (daemon *Daemon) update(name string, hostConfig *container.HostConfig) erro
74 74
 		ctr.UpdateMonitor(hostConfig.RestartPolicy)
75 75
 	}
76 76
 
77
+	defer daemon.LogContainerEvent(ctr, "update")
78
+
77 79
 	// If container is not running, update hostConfig struct is enough,
78 80
 	// resources will be updated when the container is started again.
79 81
 	// If container is running (including paused), we need to update configs
80 82
 	// to the real world.
81
-	if ctr.IsRunning() && !ctr.IsRestarting() {
82
-		if err := daemon.containerd.UpdateResources(context.Background(), ctr.ID, toContainerdResources(hostConfig.Resources)); err != nil {
83
-			restoreConfig = true
84
-			// TODO: it would be nice if containerd responded with better errors here so we can classify this better.
85
-			return errCannotUpdate(ctr.ID, errdefs.System(err))
86
-		}
83
+	ctr.Lock()
84
+	isRestarting := ctr.Restarting
85
+	tsk, err := ctr.GetRunningTask()
86
+	ctr.Unlock()
87
+	if errdefs.IsConflict(err) || isRestarting {
88
+		return nil
89
+	}
90
+	if err != nil {
91
+		return err
87 92
 	}
88 93
 
89
-	daemon.LogContainerEvent(ctr, "update")
94
+	if err := tsk.UpdateResources(context.TODO(), toContainerdResources(hostConfig.Resources)); err != nil {
95
+		restoreConfig = true
96
+		// TODO: it would be nice if containerd responded with better errors here so we can classify this better.
97
+		return errCannotUpdate(ctr.ID, errdefs.System(err))
98
+	}
90 99
 
91 100
 	return nil
92 101
 }
93 102
deleted file mode 100644
... ...
@@ -1,74 +0,0 @@
1
-//go:build linux
2
-// +build linux
3
-
4
-package daemon
5
-
6
-import (
7
-	"context"
8
-	"syscall"
9
-	"time"
10
-
11
-	"github.com/containerd/containerd"
12
-	libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
13
-	specs "github.com/opencontainers/runtime-spec/specs-go"
14
-)
15
-
16
-type mockProcess struct {
17
-}
18
-
19
-func (m *mockProcess) Delete(_ context.Context) (uint32, time.Time, error) {
20
-	return 0, time.Time{}, nil
21
-}
22
-
23
-// Mock containerd client implementation, for unit tests.
24
-type MockContainerdClient struct {
25
-}
26
-
27
-func (c *MockContainerdClient) Version(ctx context.Context) (containerd.Version, error) {
28
-	return containerd.Version{}, nil
29
-}
30
-func (c *MockContainerdClient) Restore(ctx context.Context, containerID string, attachStdio libcontainerdtypes.StdioCallback) (alive bool, pid int, p libcontainerdtypes.Process, err error) {
31
-	return false, 0, &mockProcess{}, nil
32
-}
33
-func (c *MockContainerdClient) Create(ctx context.Context, containerID string, spec *specs.Spec, shim string, runtimeOptions interface{}, opts ...containerd.NewContainerOpts) error {
34
-	return nil
35
-}
36
-func (c *MockContainerdClient) Start(ctx context.Context, containerID, checkpointDir string, withStdin bool, attachStdio libcontainerdtypes.StdioCallback) (pid int, err error) {
37
-	return 0, nil
38
-}
39
-func (c *MockContainerdClient) SignalProcess(ctx context.Context, containerID, processID string, signal syscall.Signal) error {
40
-	return nil
41
-}
42
-func (c *MockContainerdClient) Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio libcontainerdtypes.StdioCallback) (int, error) {
43
-	return 0, nil
44
-}
45
-func (c *MockContainerdClient) ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error {
46
-	return nil
47
-}
48
-func (c *MockContainerdClient) CloseStdin(ctx context.Context, containerID, processID string) error {
49
-	return nil
50
-}
51
-func (c *MockContainerdClient) Pause(ctx context.Context, containerID string) error  { return nil }
52
-func (c *MockContainerdClient) Resume(ctx context.Context, containerID string) error { return nil }
53
-func (c *MockContainerdClient) Stats(ctx context.Context, containerID string) (*libcontainerdtypes.Stats, error) {
54
-	return nil, nil
55
-}
56
-func (c *MockContainerdClient) ListPids(ctx context.Context, containerID string) ([]uint32, error) {
57
-	return nil, nil
58
-}
59
-func (c *MockContainerdClient) Summary(ctx context.Context, containerID string) ([]libcontainerdtypes.Summary, error) {
60
-	return nil, nil
61
-}
62
-func (c *MockContainerdClient) DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error) {
63
-	return 0, time.Time{}, nil
64
-}
65
-func (c *MockContainerdClient) Delete(ctx context.Context, containerID string) error { return nil }
66
-func (c *MockContainerdClient) Status(ctx context.Context, containerID string) (containerd.ProcessStatus, error) {
67
-	return "null", nil
68
-}
69
-func (c *MockContainerdClient) UpdateResources(ctx context.Context, containerID string, resources *libcontainerdtypes.Resources) error {
70
-	return nil
71
-}
72
-func (c *MockContainerdClient) CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error {
73
-	return nil
74
-}
... ...
@@ -32,29 +32,44 @@ import (
32 32
 )
33 33
 
34 34
 type process struct {
35
-	id         string
36
-	pid        int
37
-	hcsProcess hcsshim.Process
35
+	// mu guards the mutable fields of this struct.
36
+	//
37
+	// Always lock mu before ctr's mutex to prevent deadlocks.
38
+	mu         sync.Mutex
39
+	id         string                 // Invariants: immutable
40
+	ctr        *container             // Invariants: immutable, ctr != nil
41
+	hcsProcess hcsshim.Process        // Is set to nil on process exit
42
+	exited     *containerd.ExitStatus // Valid iff waitCh is closed
43
+	waitCh     chan struct{}
44
+}
45
+
46
+type task struct {
47
+	process
38 48
 }
39 49
 
40 50
 type container struct {
41
-	sync.Mutex
51
+	mu sync.Mutex
42 52
 
43 53
 	// The ociSpec is required, as client.Create() needs a spec, but can
44 54
 	// be called from the RestartManager context which does not otherwise
45 55
 	// have access to the Spec
56
+	//
57
+	// A container value with ociSpec == nil represents a container which
58
+	// has been loaded with (*client).LoadContainer, and is ineligible to
59
+	// be Start()ed.
46 60
 	ociSpec *specs.Spec
47 61
 
48
-	hcsContainer hcsshim.Container
62
+	hcsContainer hcsshim.Container // Is set to nil on container delete
63
+	isPaused     bool
49 64
 
65
+	client           *client
50 66
 	id               string
51
-	status           containerd.ProcessStatus
52
-	exitedAt         time.Time
53
-	exitCode         uint32
54
-	waitCh           chan struct{}
55
-	init             *process
56
-	execs            map[string]*process
57 67
 	terminateInvoked bool
68
+
69
+	// task is a reference to the current task for the container. As a
70
+	// corollary, when task == nil the container has no current task: the
71
+	// container was never Start()ed or the task was Delete()d.
72
+	task *task
58 73
 }
59 74
 
60 75
 // defaultOwner is a tag passed to HCS to allow it to differentiate between
... ...
@@ -63,22 +78,18 @@ type container struct {
63 63
 const defaultOwner = "docker"
64 64
 
65 65
 type client struct {
66
-	sync.Mutex
67
-
68
-	stateDir   string
69
-	backend    libcontainerdtypes.Backend
70
-	logger     *logrus.Entry
71
-	eventQ     queue.Queue
72
-	containers map[string]*container
66
+	stateDir string
67
+	backend  libcontainerdtypes.Backend
68
+	logger   *logrus.Entry
69
+	eventQ   queue.Queue
73 70
 }
74 71
 
75 72
 // NewClient creates a new local executor for windows
76 73
 func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend) (libcontainerdtypes.Client, error) {
77 74
 	c := &client{
78
-		stateDir:   stateDir,
79
-		backend:    b,
80
-		logger:     logrus.WithField("module", "libcontainerd").WithField("module", "libcontainerd").WithField("namespace", ns),
81
-		containers: make(map[string]*container),
75
+		stateDir: stateDir,
76
+		backend:  b,
77
+		logger:   logrus.WithField("module", "libcontainerd").WithField("namespace", ns),
82 78
 	}
83 79
 
84 80
 	return c, nil
... ...
@@ -88,7 +99,7 @@ func (c *client) Version(ctx context.Context) (containerd.Version, error) {
88 88
 	return containerd.Version{}, errors.New("not implemented on Windows")
89 89
 }
90 90
 
91
-// Create is the entrypoint to create a container from a spec.
91
+// NewContainer is the entrypoint to create a container from a spec.
92 92
 // Table below shows the fields required for HCS JSON calling parameters,
93 93
 // where if not populated, is omitted.
94 94
 // +-----------------+--------------------------------------------+---------------------------------------------------+
... ...
@@ -139,16 +150,12 @@ func (c *client) Version(ctx context.Context) (containerd.Version, error) {
139 139
 //			"ImagePath": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c\\\\UtilityVM"
140 140
 //		},
141 141
 //	}
142
-func (c *client) Create(_ context.Context, id string, spec *specs.Spec, shim string, runtimeOptions interface{}, opts ...containerd.NewContainerOpts) error {
143
-	if ctr := c.getContainer(id); ctr != nil {
144
-		return errors.WithStack(errdefs.Conflict(errors.New("id already in use")))
145
-	}
146
-
142
+func (c *client) NewContainer(_ context.Context, id string, spec *specs.Spec, shim string, runtimeOptions interface{}, opts ...containerd.NewContainerOpts) (libcontainerdtypes.Container, error) {
147 143
 	var err error
148 144
 	if spec.Linux != nil {
149
-		return errors.New("linux containers are not supported on this platform")
145
+		return nil, errors.New("linux containers are not supported on this platform")
150 146
 	}
151
-	err = c.createWindows(id, spec, runtimeOptions)
147
+	ctr, err := c.createWindows(id, spec, runtimeOptions)
152 148
 
153 149
 	if err == nil {
154 150
 		c.eventQ.Append(id, func() {
... ...
@@ -168,10 +175,10 @@ func (c *client) Create(_ context.Context, id string, spec *specs.Spec, shim str
168 168
 			}
169 169
 		})
170 170
 	}
171
-	return err
171
+	return ctr, err
172 172
 }
173 173
 
174
-func (c *client) createWindows(id string, spec *specs.Spec, runtimeOptions interface{}) error {
174
+func (c *client) createWindows(id string, spec *specs.Spec, runtimeOptions interface{}) (*container, error) {
175 175
 	logger := c.logger.WithField("container", id)
176 176
 	configuration := &hcsshim.ContainerConfig{
177 177
 		SystemType:              "Container",
... ...
@@ -215,7 +222,7 @@ func (c *client) createWindows(id string, spec *specs.Spec, runtimeOptions inter
215 215
 	// We must have least two layers in the spec, the bottom one being a
216 216
 	// base image, the top one being the RW layer.
217 217
 	if spec.Windows.LayerFolders == nil || len(spec.Windows.LayerFolders) < 2 {
218
-		return fmt.Errorf("OCI spec is invalid - at least two LayerFolders must be supplied to the runtime")
218
+		return nil, fmt.Errorf("OCI spec is invalid - at least two LayerFolders must be supplied to the runtime")
219 219
 	}
220 220
 
221 221
 	// Strip off the top-most layer as that's passed in separately to HCS
... ...
@@ -226,7 +233,7 @@ func (c *client) createWindows(id string, spec *specs.Spec, runtimeOptions inter
226 226
 		// We don't currently support setting the utility VM image explicitly.
227 227
 		// TODO circa RS5, this may be re-locatable.
228 228
 		if spec.Windows.HyperV.UtilityVMPath != "" {
229
-			return errors.New("runtime does not support an explicit utility VM path for Hyper-V containers")
229
+			return nil, errors.New("runtime does not support an explicit utility VM path for Hyper-V containers")
230 230
 		}
231 231
 
232 232
 		// Find the upper-most utility VM image.
... ...
@@ -239,35 +246,35 @@ func (c *client) createWindows(id string, spec *specs.Spec, runtimeOptions inter
239 239
 				break
240 240
 			}
241 241
 			if !os.IsNotExist(err) {
242
-				return err
242
+				return nil, err
243 243
 			}
244 244
 		}
245 245
 		if uvmImagePath == "" {
246
-			return errors.New("utility VM image could not be found")
246
+			return nil, errors.New("utility VM image could not be found")
247 247
 		}
248 248
 		configuration.HvRuntime = &hcsshim.HvRuntime{ImagePath: uvmImagePath}
249 249
 
250 250
 		if spec.Root.Path != "" {
251
-			return errors.New("OCI spec is invalid - Root.Path must be omitted for a Hyper-V container")
251
+			return nil, errors.New("OCI spec is invalid - Root.Path must be omitted for a Hyper-V container")
252 252
 		}
253 253
 	} else {
254 254
 		const volumeGUIDRegex = `^\\\\\?\\(Volume)\{{0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}(\}){0,1}\}\\$`
255 255
 		if _, err := regexp.MatchString(volumeGUIDRegex, spec.Root.Path); err != nil {
256
-			return fmt.Errorf(`OCI spec is invalid - Root.Path '%s' must be a volume GUID path in the format '\\?\Volume{GUID}\'`, spec.Root.Path)
256
+			return nil, fmt.Errorf(`OCI spec is invalid - Root.Path '%s' must be a volume GUID path in the format '\\?\Volume{GUID}\'`, spec.Root.Path)
257 257
 		}
258 258
 		// HCS API requires the trailing backslash to be removed
259 259
 		configuration.VolumePath = spec.Root.Path[:len(spec.Root.Path)-1]
260 260
 	}
261 261
 
262 262
 	if spec.Root.Readonly {
263
-		return errors.New(`OCI spec is invalid - Root.Readonly must not be set on Windows`)
263
+		return nil, errors.New(`OCI spec is invalid - Root.Readonly must not be set on Windows`)
264 264
 	}
265 265
 
266 266
 	for _, layerPath := range layerFolders {
267 267
 		_, filename := filepath.Split(layerPath)
268 268
 		g, err := hcsshim.NameToGuid(filename)
269 269
 		if err != nil {
270
-			return err
270
+			return nil, err
271 271
 		}
272 272
 		configuration.Layers = append(configuration.Layers, hcsshim.Layer{
273 273
 			ID:   g.ToString(),
... ...
@@ -281,7 +288,7 @@ func (c *client) createWindows(id string, spec *specs.Spec, runtimeOptions inter
281 281
 	for _, mount := range spec.Mounts {
282 282
 		const pipePrefix = `\\.\pipe\`
283 283
 		if mount.Type != "" {
284
-			return fmt.Errorf("OCI spec is invalid - Mount.Type '%s' must not be set", mount.Type)
284
+			return nil, fmt.Errorf("OCI spec is invalid - Mount.Type '%s' must not be set", mount.Type)
285 285
 		}
286 286
 		if strings.HasPrefix(mount.Destination, pipePrefix) {
287 287
 			mp := hcsshim.MappedPipe{
... ...
@@ -309,13 +316,13 @@ func (c *client) createWindows(id string, spec *specs.Spec, runtimeOptions inter
309 309
 	if len(spec.Windows.Devices) > 0 {
310 310
 		// Add any device assignments
311 311
 		if configuration.HvPartition {
312
-			return errors.New("device assignment is not supported for HyperV containers")
312
+			return nil, errors.New("device assignment is not supported for HyperV containers")
313 313
 		}
314 314
 		for _, d := range spec.Windows.Devices {
315 315
 			// Per https://github.com/microsoft/hcsshim/blob/v0.9.2/internal/uvm/virtual_device.go#L17-L18,
316 316
 			// these represent an Interface Class GUID.
317 317
 			if d.IDType != "class" && d.IDType != "vpci-class-guid" {
318
-				return errors.Errorf("device assignment of type '%s' is not supported", d.IDType)
318
+				return nil, errors.Errorf("device assignment of type '%s' is not supported", d.IDType)
319 319
 			}
320 320
 			configuration.AssignedDevices = append(configuration.AssignedDevices, hcsshim.AssignedDevice{InterfaceClassGUID: d.ID})
321 321
 		}
... ...
@@ -323,38 +330,32 @@ func (c *client) createWindows(id string, spec *specs.Spec, runtimeOptions inter
323 323
 
324 324
 	hcsContainer, err := hcsshim.CreateContainer(id, configuration)
325 325
 	if err != nil {
326
-		return err
326
+		return nil, err
327 327
 	}
328 328
 
329 329
 	// Construct a container object for calling start on it.
330 330
 	ctr := &container{
331
+		client:       c,
331 332
 		id:           id,
332
-		execs:        make(map[string]*process),
333 333
 		ociSpec:      spec,
334 334
 		hcsContainer: hcsContainer,
335
-		status:       containerd.Created,
336
-		waitCh:       make(chan struct{}),
337 335
 	}
338 336
 
339 337
 	logger.Debug("starting container")
340
-	if err = hcsContainer.Start(); err != nil {
341
-		c.logger.WithError(err).Error("failed to start container")
342
-		ctr.Lock()
343
-		if err := c.terminateContainer(ctr); err != nil {
344
-			c.logger.WithError(err).Error("failed to cleanup after a failed Start")
338
+	if err := ctr.hcsContainer.Start(); err != nil {
339
+		logger.WithError(err).Error("failed to start container")
340
+		ctr.mu.Lock()
341
+		if err := ctr.terminateContainer(); err != nil {
342
+			logger.WithError(err).Error("failed to cleanup after a failed Start")
345 343
 		} else {
346
-			c.logger.Debug("cleaned up after failed Start by calling Terminate")
344
+			logger.Debug("cleaned up after failed Start by calling Terminate")
347 345
 		}
348
-		ctr.Unlock()
349
-		return err
346
+		ctr.mu.Unlock()
347
+		return nil, err
350 348
 	}
351 349
 
352
-	c.Lock()
353
-	c.containers[id] = ctr
354
-	c.Unlock()
355
-
356 350
 	logger.Debug("createWindows() completed successfully")
357
-	return nil
351
+	return ctr, nil
358 352
 
359 353
 }
360 354
 
... ...
@@ -388,16 +389,18 @@ func (c *client) extractResourcesFromSpec(spec *specs.Spec, configuration *hcssh
388 388
 	}
389 389
 }
390 390
 
391
-func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachStdio libcontainerdtypes.StdioCallback) (int, error) {
392
-	ctr := c.getContainer(id)
391
+func (ctr *container) Start(_ context.Context, _ string, withStdin bool, attachStdio libcontainerdtypes.StdioCallback) (libcontainerdtypes.Task, error) {
392
+	ctr.mu.Lock()
393
+	defer ctr.mu.Unlock()
394
+
393 395
 	switch {
394
-	case ctr == nil:
395
-		return -1, errors.WithStack(errdefs.NotFound(errors.New("no such container")))
396
-	case ctr.init != nil:
397
-		return -1, errors.WithStack(errdefs.NotModified(errors.New("container already started")))
396
+	case ctr.ociSpec == nil:
397
+		return nil, errors.WithStack(errdefs.NotImplemented(errors.New("a restored container cannot be started")))
398
+	case ctr.task != nil:
399
+		return nil, errors.WithStack(errdefs.NotModified(containerderrdefs.ErrAlreadyExists))
398 400
 	}
399 401
 
400
-	logger := c.logger.WithField("container", id)
402
+	logger := ctr.client.logger.WithField("container", ctr.id)
401 403
 
402 404
 	// Note we always tell HCS to create stdout as it's required
403 405
 	// regardless of '-i' or '-t' options, so that docker can always grab
... ...
@@ -435,32 +438,13 @@ func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachSt
435 435
 
436 436
 	createProcessParms.User = ctr.ociSpec.Process.User.Username
437 437
 
438
-	ctr.Lock()
439
-
440 438
 	// Start the command running in the container.
441 439
 	newProcess, err := ctr.hcsContainer.CreateProcess(createProcessParms)
442 440
 	if err != nil {
443 441
 		logger.WithError(err).Error("CreateProcess() failed")
444
-		// Fix for https://github.com/moby/moby/issues/38719.
445
-		// If the init process failed to launch, we still need to reap the
446
-		// container to avoid leaking it.
447
-		//
448
-		// Note we use the explicit exit code of 127 which is the
449
-		// Linux shell equivalent of "command not found". Windows cannot
450
-		// know ahead of time whether or not the command exists, especially
451
-		// in the case of Hyper-V containers.
452
-		ctr.Unlock()
453
-		exitedAt := time.Now()
454
-		p := &process{
455
-			id:  libcontainerdtypes.InitProcessName,
456
-			pid: 0,
457
-		}
458
-		c.reapContainer(ctr, p, 127, exitedAt, nil, logger)
459
-		return -1, err
442
+		return nil, err
460 443
 	}
461 444
 
462
-	defer ctr.Unlock()
463
-
464 445
 	defer func() {
465 446
 		if err != nil {
466 447
 			if err := newProcess.Kill(); err != nil {
... ...
@@ -476,55 +460,69 @@ func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachSt
476 476
 			}()
477 477
 		}
478 478
 	}()
479
-	p := &process{
480
-		hcsProcess: newProcess,
479
+	t := &task{process: process{
481 480
 		id:         libcontainerdtypes.InitProcessName,
482
-		pid:        newProcess.Pid(),
483
-	}
484
-	logger.WithField("pid", p.pid).Debug("init process started")
485
-
486
-	ctr.status = containerd.Running
487
-	ctr.init = p
481
+		ctr:        ctr,
482
+		hcsProcess: newProcess,
483
+		waitCh:     make(chan struct{}),
484
+	}}
485
+	pid := t.Pid()
486
+	logger.WithField("pid", pid).Debug("init process started")
488 487
 
489
-	// Spin up a go routine waiting for exit to handle cleanup
490
-	go c.reapProcess(ctr, p)
488
+	// Spin up a goroutine to notify the backend and clean up resources when
489
+	// the task exits. Defer until after the start event is sent so that the
490
+	// exit event is not sent out-of-order.
491
+	defer func() { go t.reap() }()
491 492
 
492 493
 	// Don't shadow err here due to our deferred clean-up.
493 494
 	var dio *cio.DirectIO
494 495
 	dio, err = newIOFromProcess(newProcess, ctr.ociSpec.Process.Terminal)
495 496
 	if err != nil {
496 497
 		logger.WithError(err).Error("failed to get stdio pipes")
497
-		return -1, err
498
+		return nil, err
498 499
 	}
499 500
 	_, err = attachStdio(dio)
500 501
 	if err != nil {
501 502
 		logger.WithError(err).Error("failed to attach stdio")
502
-		return -1, err
503
+		return nil, err
503 504
 	}
504 505
 
506
+	// All fallible operations have succeeded so it is now safe to set the
507
+	// container's current task.
508
+	ctr.task = t
509
+
505 510
 	// Generate the associated event
506
-	c.eventQ.Append(id, func() {
511
+	ctr.client.eventQ.Append(ctr.id, func() {
507 512
 		ei := libcontainerdtypes.EventInfo{
508
-			ContainerID: id,
513
+			ContainerID: ctr.id,
509 514
 			ProcessID:   libcontainerdtypes.InitProcessName,
510
-			Pid:         uint32(p.pid),
515
+			Pid:         pid,
511 516
 		}
512
-		c.logger.WithFields(logrus.Fields{
517
+		ctr.client.logger.WithFields(logrus.Fields{
513 518
 			"container":  ctr.id,
514 519
 			"event":      libcontainerdtypes.EventStart,
515 520
 			"event-info": ei,
516 521
 		}).Info("sending event")
517
-		err := c.backend.ProcessEvent(ei.ContainerID, libcontainerdtypes.EventStart, ei)
522
+		err := ctr.client.backend.ProcessEvent(ei.ContainerID, libcontainerdtypes.EventStart, ei)
518 523
 		if err != nil {
519
-			c.logger.WithError(err).WithFields(logrus.Fields{
520
-				"container":  id,
524
+			ctr.client.logger.WithError(err).WithFields(logrus.Fields{
525
+				"container":  ei.ContainerID,
521 526
 				"event":      libcontainerdtypes.EventStart,
522 527
 				"event-info": ei,
523 528
 			}).Error("failed to process event")
524 529
 		}
525 530
 	})
526 531
 	logger.Debug("start() completed")
527
-	return p.pid, nil
532
+	return t, nil
533
+}
534
+
535
+func (ctr *container) Task(context.Context) (libcontainerdtypes.Task, error) {
536
+	ctr.mu.Lock()
537
+	defer ctr.mu.Unlock()
538
+	if ctr.task == nil {
539
+		return nil, errdefs.NotFound(containerderrdefs.ErrNotFound)
540
+	}
541
+	return ctr.task, nil
528 542
 }
529 543
 
530 544
 // setCommandLineAndArgs configures the HCS ProcessConfig based on an OCI process spec
... ...
@@ -554,19 +552,18 @@ func newIOFromProcess(newProcess hcsshim.Process, terminal bool) (*cio.DirectIO,
554 554
 	return dio, nil
555 555
 }
556 556
 
557
-// Exec adds a process in an running container
558
-func (c *client) Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio libcontainerdtypes.StdioCallback) (int, error) {
559
-	ctr := c.getContainer(containerID)
560
-	switch {
561
-	case ctr == nil:
562
-		return -1, errors.WithStack(errdefs.NotFound(errors.New("no such container")))
563
-	case ctr.hcsContainer == nil:
564
-		return -1, errors.WithStack(errdefs.InvalidParameter(errors.New("container is not running")))
565
-	case ctr.execs != nil && ctr.execs[processID] != nil:
566
-		return -1, errors.WithStack(errdefs.Conflict(errors.New("id already in use")))
567
-	}
568
-	logger := c.logger.WithFields(logrus.Fields{
569
-		"container": containerID,
557
+// Exec launches a process in a running container.
558
+//
559
+// The processID argument is entirely informational. As there is no mechanism
560
+// (exposed through the libcontainerd interfaces) to enumerate or reference an
561
+// exec'd process by ID, uniqueness is not currently enforced.
562
+func (t *task) Exec(ctx context.Context, processID string, spec *specs.Process, withStdin bool, attachStdio libcontainerdtypes.StdioCallback) (libcontainerdtypes.Process, error) {
563
+	hcsContainer, err := t.getHCSContainer()
564
+	if err != nil {
565
+		return nil, err
566
+	}
567
+	logger := t.ctr.client.logger.WithFields(logrus.Fields{
568
+		"container": t.ctr.id,
570 569
 		"exec":      processID,
571 570
 	})
572 571
 
... ...
@@ -593,7 +590,7 @@ func (c *client) Exec(ctx context.Context, containerID, processID string, spec *
593 593
 	if spec.Cwd != "" {
594 594
 		createProcessParms.WorkingDirectory = spec.Cwd
595 595
 	} else {
596
-		createProcessParms.WorkingDirectory = ctr.ociSpec.Process.Cwd
596
+		createProcessParms.WorkingDirectory = t.ctr.ociSpec.Process.Cwd
597 597
 	}
598 598
 
599 599
 	// Configure the environment for the process
... ...
@@ -606,10 +603,10 @@ func (c *client) Exec(ctx context.Context, containerID, processID string, spec *
606 606
 	createProcessParms.User = spec.User.Username
607 607
 
608 608
 	// Start the command running in the container.
609
-	newProcess, err := ctr.hcsContainer.CreateProcess(createProcessParms)
609
+	newProcess, err := hcsContainer.CreateProcess(createProcessParms)
610 610
 	if err != nil {
611 611
 		logger.WithError(err).Errorf("exec's CreateProcess() failed")
612
-		return -1, err
612
+		return nil, err
613 613
 	}
614 614
 	pid := newProcess.Pid()
615 615
 	defer func() {
... ...
@@ -631,163 +628,180 @@ func (c *client) Exec(ctx context.Context, containerID, processID string, spec *
631 631
 	dio, err := newIOFromProcess(newProcess, spec.Terminal)
632 632
 	if err != nil {
633 633
 		logger.WithError(err).Error("failed to get stdio pipes")
634
-		return -1, err
634
+		return nil, err
635 635
 	}
636 636
 	// Tell the engine to attach streams back to the client
637 637
 	_, err = attachStdio(dio)
638 638
 	if err != nil {
639
-		return -1, err
639
+		return nil, err
640 640
 	}
641 641
 
642 642
 	p := &process{
643 643
 		id:         processID,
644
-		pid:        pid,
644
+		ctr:        t.ctr,
645 645
 		hcsProcess: newProcess,
646
+		waitCh:     make(chan struct{}),
646 647
 	}
647 648
 
648
-	// Add the process to the container's list of processes
649
-	ctr.Lock()
650
-	ctr.execs[processID] = p
651
-	ctr.Unlock()
652
-
653
-	// Spin up a go routine waiting for exit to handle cleanup
654
-	go c.reapProcess(ctr, p)
649
+	// Spin up a goroutine to notify the backend and clean up resources when
650
+	// the process exits. Defer until after the start event is sent so that
651
+	// the exit event is not sent out-of-order.
652
+	defer func() { go p.reap() }()
655 653
 
656
-	c.eventQ.Append(ctr.id, func() {
654
+	t.ctr.client.eventQ.Append(t.ctr.id, func() {
657 655
 		ei := libcontainerdtypes.EventInfo{
658
-			ContainerID: ctr.id,
656
+			ContainerID: t.ctr.id,
659 657
 			ProcessID:   p.id,
660
-			Pid:         uint32(p.pid),
658
+			Pid:         uint32(pid),
661 659
 		}
662
-		c.logger.WithFields(logrus.Fields{
663
-			"container":  ctr.id,
660
+		t.ctr.client.logger.WithFields(logrus.Fields{
661
+			"container":  t.ctr.id,
664 662
 			"event":      libcontainerdtypes.EventExecAdded,
665 663
 			"event-info": ei,
666 664
 		}).Info("sending event")
667
-		err := c.backend.ProcessEvent(ctr.id, libcontainerdtypes.EventExecAdded, ei)
665
+		err := t.ctr.client.backend.ProcessEvent(t.ctr.id, libcontainerdtypes.EventExecAdded, ei)
668 666
 		if err != nil {
669
-			c.logger.WithError(err).WithFields(logrus.Fields{
670
-				"container":  ctr.id,
667
+			t.ctr.client.logger.WithError(err).WithFields(logrus.Fields{
668
+				"container":  t.ctr.id,
671 669
 				"event":      libcontainerdtypes.EventExecAdded,
672 670
 				"event-info": ei,
673 671
 			}).Error("failed to process event")
674 672
 		}
675
-		err = c.backend.ProcessEvent(ctr.id, libcontainerdtypes.EventExecStarted, ei)
673
+		err = t.ctr.client.backend.ProcessEvent(t.ctr.id, libcontainerdtypes.EventExecStarted, ei)
676 674
 		if err != nil {
677
-			c.logger.WithError(err).WithFields(logrus.Fields{
678
-				"container":  ctr.id,
675
+			t.ctr.client.logger.WithError(err).WithFields(logrus.Fields{
676
+				"container":  t.ctr.id,
679 677
 				"event":      libcontainerdtypes.EventExecStarted,
680 678
 				"event-info": ei,
681 679
 			}).Error("failed to process event")
682 680
 		}
683 681
 	})
684 682
 
685
-	return pid, nil
683
+	return p, nil
684
+}
685
+
686
+func (p *process) Pid() uint32 {
687
+	p.mu.Lock()
688
+	hcsProcess := p.hcsProcess
689
+	p.mu.Unlock()
690
+	if hcsProcess == nil {
691
+		return 0
692
+	}
693
+	return uint32(hcsProcess.Pid())
694
+}
695
+
696
+func (p *process) Kill(_ context.Context, signal syscall.Signal) error {
697
+	p.mu.Lock()
698
+	hcsProcess := p.hcsProcess
699
+	p.mu.Unlock()
700
+	if hcsProcess == nil {
701
+		return errors.WithStack(errdefs.NotFound(errors.New("process not found")))
702
+	}
703
+	return hcsProcess.Kill()
686 704
 }
687 705
 
688
-// SignalProcess handles `docker stop` on Windows. While Linux has support for
706
+// Kill handles `docker stop` on Windows. While Linux has support for
689 707
 // the full range of signals, signals aren't really implemented on Windows.
690 708
 // We fake supporting regular stop and -9 to force kill.
691
-func (c *client) SignalProcess(_ context.Context, containerID, processID string, signal syscall.Signal) error {
692
-	ctr, p, err := c.getProcess(containerID, processID)
709
+func (t *task) Kill(_ context.Context, signal syscall.Signal) error {
710
+	hcsContainer, err := t.getHCSContainer()
693 711
 	if err != nil {
694 712
 		return err
695 713
 	}
696 714
 
697
-	logger := c.logger.WithFields(logrus.Fields{
698
-		"container": containerID,
699
-		"process":   processID,
700
-		"pid":       p.pid,
715
+	logger := t.ctr.client.logger.WithFields(logrus.Fields{
716
+		"container": t.ctr.id,
717
+		"process":   t.id,
718
+		"pid":       t.Pid(),
701 719
 		"signal":    signal,
702 720
 	})
703 721
 	logger.Debug("Signal()")
704 722
 
705
-	if processID == libcontainerdtypes.InitProcessName {
706
-		if syscall.Signal(signal) == syscall.SIGKILL {
707
-			// Terminate the compute system
708
-			ctr.Lock()
709
-			ctr.terminateInvoked = true
710
-			if err := ctr.hcsContainer.Terminate(); err != nil {
711
-				if !hcsshim.IsPending(err) {
712
-					logger.WithError(err).Error("failed to terminate hccshim container")
713
-				}
714
-			}
715
-			ctr.Unlock()
716
-		} else {
717
-			// Shut down the container
718
-			if err := ctr.hcsContainer.Shutdown(); err != nil {
719
-				if !hcsshim.IsPending(err) && !hcsshim.IsAlreadyStopped(err) {
720
-					// ignore errors
721
-					logger.WithError(err).Error("failed to shutdown hccshim container")
722
-				}
723
-			}
724
-		}
723
+	var op string
724
+	if signal == syscall.SIGKILL {
725
+		// Terminate the compute system
726
+		t.ctr.mu.Lock()
727
+		t.ctr.terminateInvoked = true
728
+		t.ctr.mu.Unlock()
729
+		op, err = "terminate", hcsContainer.Terminate()
725 730
 	} else {
726
-		return p.hcsProcess.Kill()
731
+		// Shut down the container
732
+		op, err = "shutdown", hcsContainer.Shutdown()
733
+	}
734
+	if err != nil {
735
+		if !hcsshim.IsPending(err) && !hcsshim.IsAlreadyStopped(err) {
736
+			// ignore errors
737
+			logger.WithError(err).Errorf("failed to %s hccshim container", op)
738
+		}
727 739
 	}
728 740
 
729 741
 	return nil
730 742
 }
731 743
 
732
-// ResizeTerminal handles a CLI event to resize an interactive docker run or docker
744
+// Resize handles a CLI event to resize an interactive docker run or docker
733 745
 // exec window.
734
-func (c *client) ResizeTerminal(_ context.Context, containerID, processID string, width, height int) error {
735
-	_, p, err := c.getProcess(containerID, processID)
736
-	if err != nil {
737
-		return err
746
+func (p *process) Resize(_ context.Context, width, height uint32) error {
747
+	p.mu.Lock()
748
+	hcsProcess := p.hcsProcess
749
+	p.mu.Unlock()
750
+	if hcsProcess == nil {
751
+		return errors.WithStack(errdefs.NotFound(errors.New("process not found")))
738 752
 	}
739 753
 
740
-	c.logger.WithFields(logrus.Fields{
741
-		"container": containerID,
742
-		"process":   processID,
754
+	p.ctr.client.logger.WithFields(logrus.Fields{
755
+		"container": p.ctr.id,
756
+		"process":   p.id,
743 757
 		"height":    height,
744 758
 		"width":     width,
745
-		"pid":       p.pid,
759
+		"pid":       hcsProcess.Pid(),
746 760
 	}).Debug("resizing")
747
-	return p.hcsProcess.ResizeConsole(uint16(width), uint16(height))
761
+	return hcsProcess.ResizeConsole(uint16(width), uint16(height))
748 762
 }
749 763
 
750
-func (c *client) CloseStdin(_ context.Context, containerID, processID string) error {
751
-	_, p, err := c.getProcess(containerID, processID)
752
-	if err != nil {
753
-		return err
764
+func (p *process) CloseStdin(context.Context) error {
765
+	p.mu.Lock()
766
+	hcsProcess := p.hcsProcess
767
+	p.mu.Unlock()
768
+	if hcsProcess == nil {
769
+		return errors.WithStack(errdefs.NotFound(errors.New("process not found")))
754 770
 	}
755 771
 
756
-	return p.hcsProcess.CloseStdin()
772
+	return hcsProcess.CloseStdin()
757 773
 }
758 774
 
759 775
 // Pause handles pause requests for containers
760
-func (c *client) Pause(_ context.Context, containerID string) error {
761
-	ctr, _, err := c.getProcess(containerID, libcontainerdtypes.InitProcessName)
762
-	if err != nil {
763
-		return err
764
-	}
765
-
766
-	if ctr.ociSpec.Windows.HyperV == nil {
776
+func (t *task) Pause(_ context.Context) error {
777
+	if t.ctr.ociSpec.Windows.HyperV == nil {
767 778
 		return containerderrdefs.ErrNotImplemented
768 779
 	}
769 780
 
770
-	ctr.Lock()
771
-	defer ctr.Unlock()
781
+	t.ctr.mu.Lock()
782
+	defer t.ctr.mu.Unlock()
772 783
 
773
-	if err = ctr.hcsContainer.Pause(); err != nil {
784
+	if err := t.assertIsCurrentTask(); err != nil {
785
+		return err
786
+	}
787
+	if t.ctr.hcsContainer == nil {
788
+		return errdefs.NotFound(errors.WithStack(fmt.Errorf("container %q not found", t.ctr.id)))
789
+	}
790
+	if err := t.ctr.hcsContainer.Pause(); err != nil {
774 791
 		return err
775 792
 	}
776 793
 
777
-	ctr.status = containerd.Paused
794
+	t.ctr.isPaused = true
778 795
 
779
-	c.eventQ.Append(containerID, func() {
780
-		err := c.backend.ProcessEvent(containerID, libcontainerdtypes.EventPaused, libcontainerdtypes.EventInfo{
781
-			ContainerID: containerID,
796
+	t.ctr.client.eventQ.Append(t.ctr.id, func() {
797
+		err := t.ctr.client.backend.ProcessEvent(t.ctr.id, libcontainerdtypes.EventPaused, libcontainerdtypes.EventInfo{
798
+			ContainerID: t.ctr.id,
782 799
 			ProcessID:   libcontainerdtypes.InitProcessName,
783 800
 		})
784
-		c.logger.WithFields(logrus.Fields{
785
-			"container": ctr.id,
801
+		t.ctr.client.logger.WithFields(logrus.Fields{
802
+			"container": t.ctr.id,
786 803
 			"event":     libcontainerdtypes.EventPaused,
787 804
 		}).Info("sending event")
788 805
 		if err != nil {
789
-			c.logger.WithError(err).WithFields(logrus.Fields{
790
-				"container": containerID,
806
+			t.ctr.client.logger.WithError(err).WithFields(logrus.Fields{
807
+				"container": t.ctr.id,
791 808
 				"event":     libcontainerdtypes.EventPaused,
792 809
 			}).Error("failed to process event")
793 810
 		}
... ...
@@ -797,37 +811,38 @@ func (c *client) Pause(_ context.Context, containerID string) error {
797 797
 }
798 798
 
799 799
 // Resume handles resume requests for containers
800
-func (c *client) Resume(_ context.Context, containerID string) error {
801
-	ctr, _, err := c.getProcess(containerID, libcontainerdtypes.InitProcessName)
802
-	if err != nil {
803
-		return err
804
-	}
805
-
806
-	if ctr.ociSpec.Windows.HyperV == nil {
800
+func (t *task) Resume(ctx context.Context) error {
801
+	if t.ctr.ociSpec.Windows.HyperV == nil {
807 802
 		return errors.New("cannot resume Windows Server Containers")
808 803
 	}
809 804
 
810
-	ctr.Lock()
811
-	defer ctr.Unlock()
805
+	t.ctr.mu.Lock()
806
+	defer t.ctr.mu.Unlock()
812 807
 
813
-	if err = ctr.hcsContainer.Resume(); err != nil {
808
+	if err := t.assertIsCurrentTask(); err != nil {
809
+		return err
810
+	}
811
+	if t.ctr.hcsContainer == nil {
812
+		return errdefs.NotFound(errors.WithStack(fmt.Errorf("container %q not found", t.ctr.id)))
813
+	}
814
+	if err := t.ctr.hcsContainer.Resume(); err != nil {
814 815
 		return err
815 816
 	}
816 817
 
817
-	ctr.status = containerd.Running
818
+	t.ctr.isPaused = false
818 819
 
819
-	c.eventQ.Append(containerID, func() {
820
-		err := c.backend.ProcessEvent(containerID, libcontainerdtypes.EventResumed, libcontainerdtypes.EventInfo{
821
-			ContainerID: containerID,
820
+	t.ctr.client.eventQ.Append(t.ctr.id, func() {
821
+		err := t.ctr.client.backend.ProcessEvent(t.ctr.id, libcontainerdtypes.EventResumed, libcontainerdtypes.EventInfo{
822
+			ContainerID: t.ctr.id,
822 823
 			ProcessID:   libcontainerdtypes.InitProcessName,
823 824
 		})
824
-		c.logger.WithFields(logrus.Fields{
825
-			"container": ctr.id,
825
+		t.ctr.client.logger.WithFields(logrus.Fields{
826
+			"container": t.ctr.id,
826 827
 			"event":     libcontainerdtypes.EventResumed,
827 828
 		}).Info("sending event")
828 829
 		if err != nil {
829
-			c.logger.WithError(err).WithFields(logrus.Fields{
830
-				"container": containerID,
830
+			t.ctr.client.logger.WithError(err).WithFields(logrus.Fields{
831
+				"container": t.ctr.id,
831 832
 				"event":     libcontainerdtypes.EventResumed,
832 833
 			}).Error("failed to process event")
833 834
 		}
... ...
@@ -837,14 +852,14 @@ func (c *client) Resume(_ context.Context, containerID string) error {
837 837
 }
838 838
 
839 839
 // Stats handles stats requests for containers
840
-func (c *client) Stats(_ context.Context, containerID string) (*libcontainerdtypes.Stats, error) {
841
-	ctr, _, err := c.getProcess(containerID, libcontainerdtypes.InitProcessName)
840
+func (t *task) Stats(_ context.Context) (*libcontainerdtypes.Stats, error) {
841
+	hc, err := t.getHCSContainer()
842 842
 	if err != nil {
843 843
 		return nil, err
844 844
 	}
845 845
 
846 846
 	readAt := time.Now()
847
-	s, err := ctr.hcsContainer.Statistics()
847
+	s, err := hc.Statistics()
848 848
 	if err != nil {
849 849
 		return nil, err
850 850
 	}
... ...
@@ -854,9 +869,9 @@ func (c *client) Stats(_ context.Context, containerID string) (*libcontainerdtyp
854 854
 	}, nil
855 855
 }
856 856
 
857
-// Restore is the handler for restoring a container
858
-func (c *client) Restore(ctx context.Context, id string, attachStdio libcontainerdtypes.StdioCallback) (bool, int, libcontainerdtypes.Process, error) {
859
-	c.logger.WithField("container", id).Debug("restore()")
857
+// LoadContainer is the handler for restoring a container
858
+func (c *client) LoadContainer(ctx context.Context, id string) (libcontainerdtypes.Container, error) {
859
+	c.logger.WithField("container", id).Debug("LoadContainer()")
860 860
 
861 861
 	// TODO Windows: On RS1, a re-attach isn't possible.
862 862
 	// However, there is a scenario in which there is an issue.
... ...
@@ -865,30 +880,40 @@ func (c *client) Restore(ctx context.Context, id string, attachStdio libcontaine
865 865
 	// For consistence, we call in to shoot it regardless if HCS knows about it
866 866
 	// We explicitly just log a warning if the terminate fails.
867 867
 	// Then we tell the backend the container exited.
868
-	if hc, err := hcsshim.OpenContainer(id); err == nil {
869
-		const terminateTimeout = time.Minute * 2
870
-		err := hc.Terminate()
871
-
872
-		if hcsshim.IsPending(err) {
873
-			err = hc.WaitTimeout(terminateTimeout)
874
-		} else if hcsshim.IsAlreadyStopped(err) {
875
-			err = nil
876
-		}
868
+	hc, err := hcsshim.OpenContainer(id)
869
+	if err != nil {
870
+		return nil, errdefs.NotFound(errors.New("container not found"))
871
+	}
872
+	const terminateTimeout = time.Minute * 2
873
+	err = hc.Terminate()
877 874
 
878
-		if err != nil {
879
-			c.logger.WithField("container", id).WithError(err).Debug("terminate failed on restore")
880
-			return false, -1, nil, err
881
-		}
875
+	if hcsshim.IsPending(err) {
876
+		err = hc.WaitTimeout(terminateTimeout)
877
+	} else if hcsshim.IsAlreadyStopped(err) {
878
+		err = nil
879
+	}
880
+
881
+	if err != nil {
882
+		c.logger.WithField("container", id).WithError(err).Debug("terminate failed on restore")
883
+		return nil, err
882 884
 	}
883
-	return false, -1, &restoredProcess{
884
-		c:  c,
885
-		id: id,
885
+	return &container{
886
+		client:       c,
887
+		hcsContainer: hc,
888
+		id:           id,
886 889
 	}, nil
887 890
 }
888 891
 
889
-// ListPids returns a list of process IDs running in a container. It is not
892
+// AttachTask is only called by the daemon when restoring containers. As
893
+// re-attach isn't possible (see LoadContainer), a NotFound error is
894
+// unconditionally returned to allow restore to make progress.
895
+func (*container) AttachTask(context.Context, libcontainerdtypes.StdioCallback) (libcontainerdtypes.Task, error) {
896
+	return nil, errdefs.NotFound(containerderrdefs.ErrNotImplemented)
897
+}
898
+
899
+// Pids returns a list of process IDs running in a container. It is not
890 900
 // implemented on Windows.
891
-func (c *client) ListPids(_ context.Context, _ string) ([]uint32, error) {
901
+func (t *task) Pids(context.Context) ([]containerd.ProcessInfo, error) {
892 902
 	return nil, errors.New("not implemented on Windows")
893 903
 }
894 904
 
... ...
@@ -898,13 +923,13 @@ func (c *client) ListPids(_ context.Context, _ string) ([]uint32, error) {
898 898
 // the containers could be Hyper-V containers, they would not be
899 899
 // visible on the container host. However, libcontainerd does have
900 900
 // that information.
901
-func (c *client) Summary(_ context.Context, containerID string) ([]libcontainerdtypes.Summary, error) {
902
-	ctr, _, err := c.getProcess(containerID, libcontainerdtypes.InitProcessName)
901
+func (t *task) Summary(_ context.Context) ([]libcontainerdtypes.Summary, error) {
902
+	hc, err := t.getHCSContainer()
903 903
 	if err != nil {
904 904
 		return nil, err
905 905
 	}
906 906
 
907
-	p, err := ctr.hcsContainer.ProcessList()
907
+	p, err := hc.ProcessList()
908 908
 	if err != nil {
909 909
 		return nil, err
910 910
 	}
... ...
@@ -926,118 +951,114 @@ func (c *client) Summary(_ context.Context, containerID string) ([]libcontainerd
926 926
 	return pl, nil
927 927
 }
928 928
 
929
-type restoredProcess struct {
930
-	id string
931
-	c  *client
932
-}
933
-
934
-func (p *restoredProcess) Delete(ctx context.Context) (uint32, time.Time, error) {
935
-	return p.c.DeleteTask(ctx, p.id)
936
-}
937
-
938
-func (c *client) DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error) {
939
-	ec := -1
940
-	ctr := c.getContainer(containerID)
941
-	if ctr == nil {
942
-		return uint32(ec), time.Now(), errors.WithStack(errdefs.NotFound(errors.New("no such container")))
929
+func (p *process) Delete(ctx context.Context) (*containerd.ExitStatus, error) {
930
+	select {
931
+	case <-ctx.Done():
932
+		return nil, errors.WithStack(ctx.Err())
933
+	case <-p.waitCh:
934
+	default:
935
+		return nil, errdefs.Conflict(errors.New("process is running"))
943 936
 	}
937
+	return p.exited, nil
938
+}
944 939
 
940
+func (t *task) Delete(ctx context.Context) (*containerd.ExitStatus, error) {
945 941
 	select {
946 942
 	case <-ctx.Done():
947
-		return uint32(ec), time.Now(), errors.WithStack(ctx.Err())
948
-	case <-ctr.waitCh:
943
+		return nil, errors.WithStack(ctx.Err())
944
+	case <-t.waitCh:
949 945
 	default:
950
-		return uint32(ec), time.Now(), errors.New("container is not stopped")
946
+		return nil, errdefs.Conflict(errors.New("container is not stopped"))
951 947
 	}
952 948
 
953
-	ctr.Lock()
954
-	defer ctr.Unlock()
955
-	return ctr.exitCode, ctr.exitedAt, nil
949
+	t.ctr.mu.Lock()
950
+	defer t.ctr.mu.Unlock()
951
+	if err := t.assertIsCurrentTask(); err != nil {
952
+		return nil, err
953
+	}
954
+	t.ctr.task = nil
955
+	return t.exited, nil
956 956
 }
957 957
 
958
-func (c *client) Delete(_ context.Context, containerID string) error {
959
-	c.Lock()
960
-	defer c.Unlock()
961
-	ctr := c.containers[containerID]
962
-	if ctr == nil {
963
-		return errors.WithStack(errdefs.NotFound(errors.New("no such container")))
958
+func (t *task) ForceDelete(ctx context.Context) error {
959
+	select {
960
+	case <-t.waitCh: // Task is already stopped.
961
+		_, err := t.Delete(ctx)
962
+		return err
963
+	default:
964 964
 	}
965 965
 
966
-	ctr.Lock()
967
-	defer ctr.Unlock()
968
-
969
-	switch ctr.status {
970
-	case containerd.Created:
971
-		if err := c.shutdownContainer(ctr); err != nil {
972
-			return err
973
-		}
974
-		fallthrough
975
-	case containerd.Stopped:
976
-		delete(c.containers, containerID)
977
-		return nil
966
+	if err := t.Kill(ctx, syscall.SIGKILL); err != nil {
967
+		return errors.Wrap(err, "could not force-kill task")
978 968
 	}
979 969
 
980
-	return errors.WithStack(errdefs.InvalidParameter(errors.New("container is not stopped")))
970
+	select {
971
+	case <-ctx.Done():
972
+		return ctx.Err()
973
+	case <-t.waitCh:
974
+		_, err := t.Delete(ctx)
975
+		return err
976
+	}
981 977
 }
982 978
 
983
-func (c *client) Status(ctx context.Context, containerID string) (containerd.ProcessStatus, error) {
984
-	c.Lock()
985
-	defer c.Unlock()
986
-	ctr := c.containers[containerID]
987
-	if ctr == nil {
988
-		return containerd.Unknown, errors.WithStack(errdefs.NotFound(errors.New("no such container")))
979
+func (t *task) Status(ctx context.Context) (containerd.Status, error) {
980
+	select {
981
+	case <-t.waitCh:
982
+		return containerd.Status{
983
+			Status:     containerd.Stopped,
984
+			ExitStatus: t.exited.ExitCode(),
985
+			ExitTime:   t.exited.ExitTime(),
986
+		}, nil
987
+	default:
989 988
 	}
990 989
 
991
-	ctr.Lock()
992
-	defer ctr.Unlock()
993
-	return ctr.status, nil
990
+	t.ctr.mu.Lock()
991
+	defer t.ctr.mu.Unlock()
992
+	s := containerd.Running
993
+	if t.ctr.isPaused {
994
+		s = containerd.Paused
995
+	}
996
+	return containerd.Status{Status: s}, nil
994 997
 }
995 998
 
996
-func (c *client) UpdateResources(ctx context.Context, containerID string, resources *libcontainerdtypes.Resources) error {
999
+func (*task) UpdateResources(ctx context.Context, resources *libcontainerdtypes.Resources) error {
997 1000
 	// Updating resource isn't supported on Windows
998 1001
 	// but we should return nil for enabling updating container
999 1002
 	return nil
1000 1003
 }
1001 1004
 
1002
-func (c *client) CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error {
1005
+func (*task) CreateCheckpoint(ctx context.Context, checkpointDir string, exit bool) error {
1003 1006
 	return errors.New("Windows: Containers do not support checkpoints")
1004 1007
 }
1005 1008
 
1006
-func (c *client) getContainer(id string) *container {
1007
-	c.Lock()
1008
-	ctr := c.containers[id]
1009
-	c.Unlock()
1010
-
1011
-	return ctr
1009
+// assertIsCurrentTask returns a non-nil error if the task has been deleted.
1010
+func (t *task) assertIsCurrentTask() error {
1011
+	if t.ctr.task != t {
1012
+		return errors.WithStack(errdefs.NotFound(fmt.Errorf("task %q not found", t.id)))
1013
+	}
1014
+	return nil
1012 1015
 }
1013 1016
 
1014
-func (c *client) getProcess(containerID, processID string) (*container, *process, error) {
1015
-	ctr := c.getContainer(containerID)
1016
-	switch {
1017
-	case ctr == nil:
1018
-		return nil, nil, errors.WithStack(errdefs.NotFound(errors.New("no such container")))
1019
-	case ctr.init == nil:
1020
-		return nil, nil, errors.WithStack(errdefs.NotFound(errors.New("container is not running")))
1021
-	case processID == libcontainerdtypes.InitProcessName:
1022
-		return ctr, ctr.init, nil
1023
-	default:
1024
-		ctr.Lock()
1025
-		defer ctr.Unlock()
1026
-		if ctr.execs == nil {
1027
-			return nil, nil, errors.WithStack(errdefs.NotFound(errors.New("no execs")))
1028
-		}
1017
+// getHCSContainer returns a reference to the hcsshim Container for the task's
1018
+// container if neither the task nor container have been deleted.
1019
+//
1020
+// t.ctr.mu must not be locked by the calling goroutine when calling this
1021
+// function.
1022
+func (t *task) getHCSContainer() (hcsshim.Container, error) {
1023
+	t.ctr.mu.Lock()
1024
+	defer t.ctr.mu.Unlock()
1025
+	if err := t.assertIsCurrentTask(); err != nil {
1026
+		return nil, err
1029 1027
 	}
1030
-
1031
-	p := ctr.execs[processID]
1032
-	if p == nil {
1033
-		return nil, nil, errors.WithStack(errdefs.NotFound(errors.New("no such exec")))
1028
+	hc := t.ctr.hcsContainer
1029
+	if hc == nil {
1030
+		return nil, errors.WithStack(errdefs.NotFound(fmt.Errorf("container %q not found", t.ctr.id)))
1034 1031
 	}
1035
-
1036
-	return ctr, p, nil
1032
+	return hc, nil
1037 1033
 }
1038 1034
 
1039 1035
 // ctr mutex must be held when calling this function.
1040
-func (c *client) shutdownContainer(ctr *container) error {
1036
+func (ctr *container) shutdownContainer() error {
1041 1037
 	var err error
1042 1038
 	const waitTimeout = time.Minute * 5
1043 1039
 
... ...
@@ -1052,11 +1073,11 @@ func (c *client) shutdownContainer(ctr *container) error {
1052 1052
 	}
1053 1053
 
1054 1054
 	if err != nil {
1055
-		c.logger.WithError(err).WithField("container", ctr.id).
1055
+		ctr.client.logger.WithError(err).WithField("container", ctr.id).
1056 1056
 			Debug("failed to shutdown container, terminating it")
1057
-		terminateErr := c.terminateContainer(ctr)
1057
+		terminateErr := ctr.terminateContainer()
1058 1058
 		if terminateErr != nil {
1059
-			c.logger.WithError(terminateErr).WithField("container", ctr.id).
1059
+			ctr.client.logger.WithError(terminateErr).WithField("container", ctr.id).
1060 1060
 				Error("failed to shutdown container, and subsequent terminate also failed")
1061 1061
 			return fmt.Errorf("%s: subsequent terminate failed %s", err, terminateErr)
1062 1062
 		}
... ...
@@ -1067,7 +1088,7 @@ func (c *client) shutdownContainer(ctr *container) error {
1067 1067
 }
1068 1068
 
1069 1069
 // ctr mutex must be held when calling this function.
1070
-func (c *client) terminateContainer(ctr *container) error {
1070
+func (ctr *container) terminateContainer() error {
1071 1071
 	const terminateTimeout = time.Minute * 5
1072 1072
 	ctr.terminateInvoked = true
1073 1073
 	err := ctr.hcsContainer.Terminate()
... ...
@@ -1079,7 +1100,7 @@ func (c *client) terminateContainer(ctr *container) error {
1079 1079
 	}
1080 1080
 
1081 1081
 	if err != nil {
1082
-		c.logger.WithError(err).WithField("container", ctr.id).
1082
+		ctr.client.logger.WithError(err).WithField("container", ctr.id).
1083 1083
 			Debug("failed to terminate container")
1084 1084
 		return err
1085 1085
 	}
... ...
@@ -1087,9 +1108,9 @@ func (c *client) terminateContainer(ctr *container) error {
1087 1087
 	return nil
1088 1088
 }
1089 1089
 
1090
-func (c *client) reapProcess(ctr *container, p *process) int {
1091
-	logger := c.logger.WithFields(logrus.Fields{
1092
-		"container": ctr.id,
1090
+func (p *process) reap() {
1091
+	logger := p.ctr.client.logger.WithFields(logrus.Fields{
1092
+		"container": p.ctr.id,
1093 1093
 		"process":   p.id,
1094 1094
 	})
1095 1095
 
... ...
@@ -1100,10 +1121,9 @@ func (c *client) reapProcess(ctr *container, p *process) int {
1100 1100
 		if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE {
1101 1101
 			logger.WithError(err).Warnf("Wait() failed (container may have been killed)")
1102 1102
 		}
1103
-		// Fall through here, do not return. This ensures we attempt to
1104
-		// continue the shutdown in HCS and tell the docker engine that the
1105
-		// process/container has exited to avoid a container being dropped on
1106
-		// the floor.
1103
+		// Fall through here, do not return. This ensures we tell the
1104
+		// docker engine that the process/container has exited to avoid
1105
+		// a container being dropped on the floor.
1107 1106
 	}
1108 1107
 	exitedAt := time.Now()
1109 1108
 
... ...
@@ -1116,87 +1136,88 @@ func (c *client) reapProcess(ctr *container, p *process) int {
1116 1116
 		// code we return doesn't incorrectly indicate success.
1117 1117
 		exitCode = -1
1118 1118
 
1119
-		// Fall through here, do not return. This ensures we attempt to
1120
-		// continue the shutdown in HCS and tell the docker engine that the
1121
-		// process/container has exited to avoid a container being dropped on
1122
-		// the floor.
1119
+		// Fall through here, do not return. This ensures we tell the
1120
+		// docker engine that the process/container has exited to avoid
1121
+		// a container being dropped on the floor.
1123 1122
 	}
1124 1123
 
1125
-	if err := p.hcsProcess.Close(); err != nil {
1124
+	p.mu.Lock()
1125
+	hcsProcess := p.hcsProcess
1126
+	p.hcsProcess = nil
1127
+	p.mu.Unlock()
1128
+
1129
+	if err := hcsProcess.Close(); err != nil {
1126 1130
 		logger.WithError(err).Warnf("failed to cleanup hcs process resources")
1127 1131
 		exitCode = -1
1128 1132
 		eventErr = fmt.Errorf("hcsProcess.Close() failed %s", err)
1129 1133
 	}
1130 1134
 
1131
-	if p.id == libcontainerdtypes.InitProcessName {
1132
-		exitCode, eventErr = c.reapContainer(ctr, p, exitCode, exitedAt, eventErr, logger)
1133
-	}
1135
+	// Explicit locking is not required as reads from exited are
1136
+	// synchronized using waitCh.
1137
+	p.exited = containerd.NewExitStatus(uint32(exitCode), exitedAt, nil)
1138
+	close(p.waitCh)
1134 1139
 
1135
-	c.eventQ.Append(ctr.id, func() {
1140
+	p.ctr.client.eventQ.Append(p.ctr.id, func() {
1136 1141
 		ei := libcontainerdtypes.EventInfo{
1137
-			ContainerID: ctr.id,
1142
+			ContainerID: p.ctr.id,
1138 1143
 			ProcessID:   p.id,
1139
-			Pid:         uint32(p.pid),
1144
+			Pid:         uint32(hcsProcess.Pid()),
1140 1145
 			ExitCode:    uint32(exitCode),
1141 1146
 			ExitedAt:    exitedAt,
1142 1147
 			Error:       eventErr,
1143 1148
 		}
1144
-		c.logger.WithFields(logrus.Fields{
1145
-			"container":  ctr.id,
1149
+		p.ctr.client.logger.WithFields(logrus.Fields{
1150
+			"container":  p.ctr.id,
1146 1151
 			"event":      libcontainerdtypes.EventExit,
1147 1152
 			"event-info": ei,
1148 1153
 		}).Info("sending event")
1149
-		err := c.backend.ProcessEvent(ctr.id, libcontainerdtypes.EventExit, ei)
1154
+		err := p.ctr.client.backend.ProcessEvent(p.ctr.id, libcontainerdtypes.EventExit, ei)
1150 1155
 		if err != nil {
1151
-			c.logger.WithError(err).WithFields(logrus.Fields{
1152
-				"container":  ctr.id,
1156
+			p.ctr.client.logger.WithError(err).WithFields(logrus.Fields{
1157
+				"container":  p.ctr.id,
1153 1158
 				"event":      libcontainerdtypes.EventExit,
1154 1159
 				"event-info": ei,
1155 1160
 			}).Error("failed to process event")
1156 1161
 		}
1157
-		if p.id != libcontainerdtypes.InitProcessName {
1158
-			ctr.Lock()
1159
-			delete(ctr.execs, p.id)
1160
-			ctr.Unlock()
1161
-		}
1162 1162
 	})
1163
-
1164
-	return exitCode
1165 1163
 }
1166 1164
 
1167
-// reapContainer shuts down the container and releases associated resources. It returns
1168
-// the error to be logged in the eventInfo sent back to the monitor.
1169
-func (c *client) reapContainer(ctr *container, p *process, exitCode int, exitedAt time.Time, eventErr error, logger *logrus.Entry) (int, error) {
1170
-	// Update container status
1171
-	ctr.Lock()
1172
-	ctr.status = containerd.Stopped
1173
-	ctr.exitedAt = exitedAt
1174
-	ctr.exitCode = uint32(exitCode)
1175
-	close(ctr.waitCh)
1176
-
1177
-	if err := c.shutdownContainer(ctr); err != nil {
1178
-		exitCode = -1
1179
-		logger.WithError(err).Warn("failed to shutdown container")
1180
-		thisErr := errors.Wrap(err, "failed to shutdown container")
1181
-		if eventErr != nil {
1182
-			eventErr = errors.Wrap(eventErr, thisErr.Error())
1183
-		} else {
1184
-			eventErr = thisErr
1165
+func (ctr *container) Delete(context.Context) error {
1166
+	ctr.mu.Lock()
1167
+	defer ctr.mu.Unlock()
1168
+
1169
+	if ctr.hcsContainer == nil {
1170
+		return errors.WithStack(errdefs.NotFound(fmt.Errorf("container %q not found", ctr.id)))
1171
+	}
1172
+
1173
+	// Check that there is no task currently running.
1174
+	if ctr.task != nil {
1175
+		select {
1176
+		case <-ctr.task.waitCh:
1177
+		default:
1178
+			return errors.WithStack(errdefs.Conflict(errors.New("container is not stopped")))
1185 1179
 		}
1180
+	}
1181
+
1182
+	var (
1183
+		logger = ctr.client.logger.WithFields(logrus.Fields{
1184
+			"container": ctr.id,
1185
+		})
1186
+		thisErr error
1187
+	)
1188
+
1189
+	if err := ctr.shutdownContainer(); err != nil {
1190
+		logger.WithError(err).Warn("failed to shutdown container")
1191
+		thisErr = errors.Wrap(err, "failed to shutdown container")
1186 1192
 	} else {
1187 1193
 		logger.Debug("completed container shutdown")
1188 1194
 	}
1189
-	ctr.Unlock()
1190 1195
 
1191 1196
 	if err := ctr.hcsContainer.Close(); err != nil {
1192
-		exitCode = -1
1193 1197
 		logger.WithError(err).Error("failed to clean hcs container resources")
1194
-		thisErr := errors.Wrap(err, "failed to terminate container")
1195
-		if eventErr != nil {
1196
-			eventErr = errors.Wrap(eventErr, thisErr.Error())
1197
-		} else {
1198
-			eventErr = thisErr
1199
-		}
1198
+		thisErr = errors.Wrap(err, "failed to terminate container")
1200 1199
 	}
1201
-	return exitCode, eventErr
1200
+
1201
+	ctr.hcsContainer = nil
1202
+	return thisErr
1202 1203
 }
... ...
@@ -38,7 +38,3 @@ func createStdInCloser(pipe io.WriteCloser, process hcsshim.Process) io.WriteClo
38 38
 		return nil
39 39
 	})
40 40
 }
41
-
42
-func (p *process) Cleanup() error {
43
-	return nil
44
-}
... ...
@@ -45,22 +45,34 @@ type client struct {
45 45
 	logger   *logrus.Entry
46 46
 	ns       string
47 47
 
48
-	backend         libcontainerdtypes.Backend
49
-	eventQ          queue.Queue
50
-	v2runcoptionsMu sync.Mutex
51
-	// v2runcoptions is used for copying options specified on Create() to Start()
52
-	v2runcoptions map[string]v2runcoptions.Options
48
+	backend libcontainerdtypes.Backend
49
+	eventQ  queue.Queue
50
+}
51
+
52
+type container struct {
53
+	client *client
54
+	c8dCtr containerd.Container
55
+
56
+	v2runcoptions *v2runcoptions.Options
57
+}
58
+
59
+type task struct {
60
+	containerd.Task
61
+	ctr *container
62
+}
63
+
64
+type process struct {
65
+	containerd.Process
53 66
 }
54 67
 
55 68
 // NewClient creates a new libcontainerd client from a containerd client
56 69
 func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b libcontainerdtypes.Backend) (libcontainerdtypes.Client, error) {
57 70
 	c := &client{
58
-		client:        cli,
59
-		stateDir:      stateDir,
60
-		logger:        logrus.WithField("module", "libcontainerd").WithField("namespace", ns),
61
-		ns:            ns,
62
-		backend:       b,
63
-		v2runcoptions: make(map[string]v2runcoptions.Options),
71
+		client:   cli,
72
+		stateDir: stateDir,
73
+		logger:   logrus.WithField("module", "libcontainerd").WithField("namespace", ns),
74
+		ns:       ns,
75
+		backend:  b,
64 76
 	}
65 77
 
66 78
 	go c.processEventStream(ctx, ns)
... ...
@@ -72,58 +84,36 @@ func (c *client) Version(ctx context.Context) (containerd.Version, error) {
72 72
 	return c.client.Version(ctx)
73 73
 }
74 74
 
75
-// Restore loads the containerd container.
76
-// It should not be called concurrently with any other operation for the given ID.
77
-func (c *client) Restore(ctx context.Context, id string, attachStdio libcontainerdtypes.StdioCallback) (alive bool, pid int, p libcontainerdtypes.Process, err error) {
75
+func (c *container) newTask(t containerd.Task) *task {
76
+	return &task{Task: t, ctr: c}
77
+}
78
+
79
+func (c *container) AttachTask(ctx context.Context, attachStdio libcontainerdtypes.StdioCallback) (_ libcontainerdtypes.Task, err error) {
78 80
 	var dio *cio.DirectIO
79 81
 	defer func() {
80 82
 		if err != nil && dio != nil {
81 83
 			dio.Cancel()
82 84
 			dio.Close()
83 85
 		}
84
-		err = wrapError(err)
85 86
 	}()
86 87
 
87
-	ctr, err := c.client.LoadContainer(ctx, id)
88
-	if err != nil {
89
-		return false, -1, nil, errors.WithStack(wrapError(err))
90
-	}
91
-
92 88
 	attachIO := func(fifos *cio.FIFOSet) (cio.IO, error) {
93 89
 		// dio must be assigned to the previously defined dio for the defer above
94 90
 		// to handle cleanup
95
-		dio, err = c.newDirectIO(ctx, fifos)
91
+		dio, err = c.client.newDirectIO(ctx, fifos)
96 92
 		if err != nil {
97 93
 			return nil, err
98 94
 		}
99 95
 		return attachStdio(dio)
100 96
 	}
101
-	t, err := ctr.Task(ctx, attachIO)
102
-	if err != nil && !containerderrors.IsNotFound(err) {
103
-		return false, -1, nil, errors.Wrap(wrapError(err), "error getting containerd task for container")
104
-	}
105
-
106
-	if t != nil {
107
-		s, err := t.Status(ctx)
108
-		if err != nil {
109
-			return false, -1, nil, errors.Wrap(wrapError(err), "error getting task status")
110
-		}
111
-		alive = s.Status != containerd.Stopped
112
-		pid = int(t.Pid())
97
+	t, err := c.c8dCtr.Task(ctx, attachIO)
98
+	if err != nil {
99
+		return nil, errors.Wrap(wrapError(err), "error getting containerd task for container")
113 100
 	}
114
-
115
-	c.logger.WithFields(logrus.Fields{
116
-		"container": id,
117
-		"alive":     alive,
118
-		"pid":       pid,
119
-	}).Debug("restored container")
120
-
121
-	return alive, pid, &restoredProcess{
122
-		p: t,
123
-	}, nil
101
+	return c.newTask(t), nil
124 102
 }
125 103
 
126
-func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, shim string, runtimeOptions interface{}, opts ...containerd.NewContainerOpts) error {
104
+func (c *client) NewContainer(ctx context.Context, id string, ociSpec *specs.Spec, shim string, runtimeOptions interface{}, opts ...containerd.NewContainerOpts) (libcontainerdtypes.Container, error) {
127 105
 	bdir := c.bundleDir(id)
128 106
 	c.logger.WithField("bundle", bdir).WithField("root", ociSpec.Root.Path).Debug("bundle dir created")
129 107
 
... ...
@@ -134,44 +124,43 @@ func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, shi
134 134
 	}
135 135
 	opts = append(opts, newOpts...)
136 136
 
137
-	_, err := c.client.NewContainer(ctx, id, opts...)
137
+	ctr, err := c.client.NewContainer(ctx, id, opts...)
138 138
 	if err != nil {
139 139
 		if containerderrors.IsAlreadyExists(err) {
140
-			return errors.WithStack(errdefs.Conflict(errors.New("id already in use")))
140
+			return nil, errors.WithStack(errdefs.Conflict(errors.New("id already in use")))
141 141
 		}
142
-		return wrapError(err)
142
+		return nil, wrapError(err)
143
+	}
144
+
145
+	created := container{
146
+		client: c,
147
+		c8dCtr: ctr,
143 148
 	}
144 149
 	if x, ok := runtimeOptions.(*v2runcoptions.Options); ok {
145
-		c.v2runcoptionsMu.Lock()
146
-		c.v2runcoptions[id] = *x
147
-		c.v2runcoptionsMu.Unlock()
150
+		created.v2runcoptions = x
148 151
 	}
149
-	return nil
152
+	return &created, nil
150 153
 }
151 154
 
152 155
 // Start create and start a task for the specified containerd id
153
-func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin bool, attachStdio libcontainerdtypes.StdioCallback) (int, error) {
154
-	ctr, err := c.getContainer(ctx, id)
155
-	if err != nil {
156
-		return -1, err
157
-	}
156
+func (c *container) Start(ctx context.Context, checkpointDir string, withStdin bool, attachStdio libcontainerdtypes.StdioCallback) (libcontainerdtypes.Task, error) {
158 157
 	var (
159 158
 		cp             *types.Descriptor
160 159
 		t              containerd.Task
161 160
 		rio            cio.IO
162
-		stdinCloseSync = make(chan struct{})
161
+		stdinCloseSync = make(chan containerd.Process, 1)
163 162
 	)
164 163
 
165 164
 	if checkpointDir != "" {
166 165
 		// write checkpoint to the content store
167 166
 		tar := archive.Diff(ctx, "", checkpointDir)
168
-		cp, err = c.writeContent(ctx, images.MediaTypeContainerd1Checkpoint, checkpointDir, tar)
167
+		cp, err := c.client.writeContent(ctx, images.MediaTypeContainerd1Checkpoint, checkpointDir, tar)
169 168
 		// remove the checkpoint when we're done
170 169
 		defer func() {
171 170
 			if cp != nil {
172
-				err := c.client.ContentStore().Delete(context.Background(), cp.Digest)
171
+				err := c.client.client.ContentStore().Delete(ctx, cp.Digest)
173 172
 				if err != nil {
174
-					c.logger.WithError(err).WithFields(logrus.Fields{
173
+					c.client.logger.WithError(err).WithFields(logrus.Fields{
175 174
 						"ref":    checkpointDir,
176 175
 						"digest": cp.Digest,
177 176
 					}).Warnf("failed to delete temporary checkpoint entry")
... ...
@@ -179,23 +168,27 @@ func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin
179 179
 			}
180 180
 		}()
181 181
 		if err := tar.Close(); err != nil {
182
-			return -1, errors.Wrap(err, "failed to close checkpoint tar stream")
182
+			return nil, errors.Wrap(err, "failed to close checkpoint tar stream")
183 183
 		}
184 184
 		if err != nil {
185
-			return -1, errors.Wrapf(err, "failed to upload checkpoint to containerd")
185
+			return nil, errors.Wrapf(err, "failed to upload checkpoint to containerd")
186 186
 		}
187 187
 	}
188 188
 
189
-	spec, err := ctr.Spec(ctx)
189
+	// Optimization: assume the relevant metadata has not changed in the
190
+	// moment since the container was created. Elide redundant RPC requests
191
+	// to refresh the metadata separately for spec and labels.
192
+	md, err := c.c8dCtr.Info(ctx, containerd.WithoutRefreshedMetadata)
190 193
 	if err != nil {
191
-		return -1, errors.Wrap(err, "failed to retrieve spec")
194
+		return nil, errors.Wrap(err, "failed to retrieve metadata")
192 195
 	}
193
-	labels, err := ctr.Labels(ctx)
194
-	if err != nil {
195
-		return -1, errors.Wrap(err, "failed to retrieve labels")
196
+	bundle := md.Labels[DockerContainerBundlePath]
197
+
198
+	var spec specs.Spec
199
+	if err := json.Unmarshal(md.Spec.GetValue(), &spec); err != nil {
200
+		return nil, errors.Wrap(err, "failed to retrieve spec")
196 201
 	}
197
-	bundle := labels[DockerContainerBundlePath]
198
-	uid, gid := getSpecUser(spec)
202
+	uid, gid := getSpecUser(&spec)
199 203
 
200 204
 	taskOpts := []containerd.NewTaskOpts{
201 205
 		func(_ context.Context, _ *containerd.Client, info *containerd.TaskInfo) error {
... ...
@@ -206,10 +199,8 @@ func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin
206 206
 
207 207
 	if runtime.GOOS != "windows" {
208 208
 		taskOpts = append(taskOpts, func(_ context.Context, _ *containerd.Client, info *containerd.TaskInfo) error {
209
-			c.v2runcoptionsMu.Lock()
210
-			opts, ok := c.v2runcoptions[id]
211
-			c.v2runcoptionsMu.Unlock()
212
-			if ok {
209
+			if c.v2runcoptions != nil {
210
+				opts := *c.v2runcoptions
213 211
 				opts.IoUid = uint32(uid)
214 212
 				opts.IoGid = uint32(gid)
215 213
 				info.Options = &opts
... ...
@@ -217,14 +208,14 @@ func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin
217 217
 			return nil
218 218
 		})
219 219
 	} else {
220
-		taskOpts = append(taskOpts, withLogLevel(c.logger.Level))
220
+		taskOpts = append(taskOpts, withLogLevel(c.client.logger.Level))
221 221
 	}
222 222
 
223
-	t, err = ctr.NewTask(ctx,
223
+	t, err = c.c8dCtr.NewTask(ctx,
224 224
 		func(id string) (cio.IO, error) {
225 225
 			fifos := newFIFOSet(bundle, libcontainerdtypes.InitProcessName, withStdin, spec.Process.Terminal)
226 226
 
227
-			rio, err = c.createIO(fifos, id, libcontainerdtypes.InitProcessName, stdinCloseSync, attachStdio)
227
+			rio, err = c.createIO(fifos, libcontainerdtypes.InitProcessName, stdinCloseSync, attachStdio)
228 228
 			return rio, err
229 229
 		},
230 230
 		taskOpts...,
... ...
@@ -235,21 +226,21 @@ func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin
235 235
 			rio.Cancel()
236 236
 			rio.Close()
237 237
 		}
238
-		return -1, wrapError(err)
238
+		return nil, errors.Wrap(wrapError(err), "failed to create task for container")
239 239
 	}
240 240
 
241 241
 	// Signal c.createIO that it can call CloseIO
242
-	close(stdinCloseSync)
242
+	stdinCloseSync <- t
243 243
 
244 244
 	if err := t.Start(ctx); err != nil {
245 245
 		if _, err := t.Delete(ctx); err != nil {
246
-			c.logger.WithError(err).WithField("container", id).
246
+			c.client.logger.WithError(err).WithField("container", c.c8dCtr.ID()).
247 247
 				Error("failed to delete task after fail start")
248 248
 		}
249
-		return -1, wrapError(err)
249
+		return nil, wrapError(err)
250 250
 	}
251 251
 
252
-	return int(t.Pid()), nil
252
+	return c.newTask(t), nil
253 253
 }
254 254
 
255 255
 // Exec creates exec process.
... ...
@@ -259,31 +250,21 @@ func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin
259 259
 // for the container main process, the stdin fifo will be created in Create not
260 260
 // the Start call. stdinCloseSync channel should be closed after Start exec
261 261
 // process.
262
-func (c *client) Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio libcontainerdtypes.StdioCallback) (int, error) {
263
-	ctr, err := c.getContainer(ctx, containerID)
264
-	if err != nil {
265
-		return -1, err
266
-	}
267
-	t, err := ctr.Task(ctx, nil)
268
-	if err != nil {
269
-		if containerderrors.IsNotFound(err) {
270
-			return -1, errors.WithStack(errdefs.InvalidParameter(errors.New("container is not running")))
271
-		}
272
-		return -1, wrapError(err)
273
-	}
274
-
262
+func (t *task) Exec(ctx context.Context, processID string, spec *specs.Process, withStdin bool, attachStdio libcontainerdtypes.StdioCallback) (libcontainerdtypes.Process, error) {
275 263
 	var (
276 264
 		p              containerd.Process
277 265
 		rio            cio.IO
278
-		stdinCloseSync = make(chan struct{})
266
+		stdinCloseSync = make(chan containerd.Process, 1)
279 267
 	)
280 268
 
281
-	labels, err := ctr.Labels(ctx)
269
+	// Optimization: assume the DockerContainerBundlePath label has not been
270
+	// updated since the container metadata was last loaded/refreshed.
271
+	md, err := t.ctr.c8dCtr.Info(ctx, containerd.WithoutRefreshedMetadata)
282 272
 	if err != nil {
283
-		return -1, wrapError(err)
273
+		return nil, wrapError(err)
284 274
 	}
285 275
 
286
-	fifos := newFIFOSet(labels[DockerContainerBundlePath], processID, withStdin, spec.Terminal)
276
+	fifos := newFIFOSet(md.Labels[DockerContainerBundlePath], processID, withStdin, spec.Terminal)
287 277
 
288 278
 	defer func() {
289 279
 		if err != nil {
... ...
@@ -294,22 +275,22 @@ func (c *client) Exec(ctx context.Context, containerID, processID string, spec *
294 294
 		}
295 295
 	}()
296 296
 
297
-	p, err = t.Exec(ctx, processID, spec, func(id string) (cio.IO, error) {
298
-		rio, err = c.createIO(fifos, containerID, processID, stdinCloseSync, attachStdio)
297
+	p, err = t.Task.Exec(ctx, processID, spec, func(id string) (cio.IO, error) {
298
+		rio, err = t.ctr.createIO(fifos, processID, stdinCloseSync, attachStdio)
299 299
 		return rio, err
300 300
 	})
301 301
 	if err != nil {
302 302
 		close(stdinCloseSync)
303 303
 		if containerderrors.IsAlreadyExists(err) {
304
-			return -1, errors.WithStack(errdefs.Conflict(errors.New("id already in use")))
304
+			return nil, errors.WithStack(errdefs.Conflict(errors.New("id already in use")))
305 305
 		}
306
-		return -1, wrapError(err)
306
+		return nil, wrapError(err)
307 307
 	}
308 308
 
309 309
 	// Signal c.createIO that it can call CloseIO
310 310
 	//
311 311
 	// the stdin of exec process will be created after p.Start in containerd
312
-	defer close(stdinCloseSync)
312
+	defer func() { stdinCloseSync <- p }()
313 313
 
314 314
 	if err = p.Start(ctx); err != nil {
315 315
 		// use new context for cleanup because old one may be cancelled by user, but leave a timeout to make sure
... ...
@@ -318,62 +299,29 @@ func (c *client) Exec(ctx context.Context, containerID, processID string, spec *
318 318
 		ctx, cancel := context.WithTimeout(context.Background(), 45*time.Second)
319 319
 		defer cancel()
320 320
 		p.Delete(ctx)
321
-		return -1, wrapError(err)
322
-	}
323
-	return int(p.Pid()), nil
324
-}
325
-
326
-func (c *client) SignalProcess(ctx context.Context, containerID, processID string, signal syscall.Signal) error {
327
-	p, err := c.getProcess(ctx, containerID, processID)
328
-	if err != nil {
329
-		return err
321
+		return nil, wrapError(err)
330 322
 	}
331
-	return wrapError(p.Kill(ctx, signal))
323
+	return process{p}, nil
332 324
 }
333 325
 
334
-func (c *client) ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error {
335
-	p, err := c.getProcess(ctx, containerID, processID)
336
-	if err != nil {
337
-		return err
338
-	}
339
-
340
-	return p.Resize(ctx, uint32(width), uint32(height))
326
+func (t *task) Kill(ctx context.Context, signal syscall.Signal) error {
327
+	return wrapError(t.Task.Kill(ctx, signal))
341 328
 }
342 329
 
343
-func (c *client) CloseStdin(ctx context.Context, containerID, processID string) error {
344
-	p, err := c.getProcess(ctx, containerID, processID)
345
-	if err != nil {
346
-		return err
347
-	}
348
-
349
-	return p.CloseIO(ctx, containerd.WithStdinCloser)
330
+func (p process) Kill(ctx context.Context, signal syscall.Signal) error {
331
+	return wrapError(p.Process.Kill(ctx, signal))
350 332
 }
351 333
 
352
-func (c *client) Pause(ctx context.Context, containerID string) error {
353
-	p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName)
354
-	if err != nil {
355
-		return err
356
-	}
357
-
358
-	return wrapError(p.(containerd.Task).Pause(ctx))
334
+func (t *task) Pause(ctx context.Context) error {
335
+	return wrapError(t.Task.Pause(ctx))
359 336
 }
360 337
 
361
-func (c *client) Resume(ctx context.Context, containerID string) error {
362
-	p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName)
363
-	if err != nil {
364
-		return err
365
-	}
366
-
367
-	return p.(containerd.Task).Resume(ctx)
338
+func (t *task) Resume(ctx context.Context) error {
339
+	return wrapError(t.Task.Resume(ctx))
368 340
 }
369 341
 
370
-func (c *client) Stats(ctx context.Context, containerID string) (*libcontainerdtypes.Stats, error) {
371
-	p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName)
372
-	if err != nil {
373
-		return nil, err
374
-	}
375
-
376
-	m, err := p.(containerd.Task).Metrics(ctx)
342
+func (t *task) Stats(ctx context.Context) (*libcontainerdtypes.Stats, error) {
343
+	m, err := t.Metrics(ctx)
377 344
 	if err != nil {
378 345
 		return nil, err
379 346
 	}
... ...
@@ -385,32 +333,8 @@ func (c *client) Stats(ctx context.Context, containerID string) (*libcontainerdt
385 385
 	return libcontainerdtypes.InterfaceToStats(m.Timestamp, v), nil
386 386
 }
387 387
 
388
-func (c *client) ListPids(ctx context.Context, containerID string) ([]uint32, error) {
389
-	p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName)
390
-	if err != nil {
391
-		return nil, err
392
-	}
393
-
394
-	pis, err := p.(containerd.Task).Pids(ctx)
395
-	if err != nil {
396
-		return nil, err
397
-	}
398
-
399
-	var pids []uint32
400
-	for _, i := range pis {
401
-		pids = append(pids, i.Pid)
402
-	}
403
-
404
-	return pids, nil
405
-}
406
-
407
-func (c *client) Summary(ctx context.Context, containerID string) ([]libcontainerdtypes.Summary, error) {
408
-	p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName)
409
-	if err != nil {
410
-		return nil, err
411
-	}
412
-
413
-	pis, err := p.(containerd.Task).Pids(ctx)
388
+func (t *task) Summary(ctx context.Context) ([]libcontainerdtypes.Summary, error) {
389
+	pis, err := t.Pids(ctx)
414 390
 	if err != nil {
415 391
 		return nil, err
416 392
 	}
... ...
@@ -431,54 +355,31 @@ func (c *client) Summary(ctx context.Context, containerID string) ([]libcontaine
431 431
 	return infos, nil
432 432
 }
433 433
 
434
-type restoredProcess struct {
435
-	p containerd.Process
436
-}
437
-
438
-func (p *restoredProcess) Delete(ctx context.Context) (uint32, time.Time, error) {
439
-	if p.p == nil {
440
-		return 255, time.Now(), nil
441
-	}
442
-	status, err := p.p.Delete(ctx)
443
-	if err != nil {
444
-		return 255, time.Now(), nil
445
-	}
446
-	return status.ExitCode(), status.ExitTime(), nil
434
+func (t *task) Delete(ctx context.Context) (*containerd.ExitStatus, error) {
435
+	s, err := t.Task.Delete(ctx)
436
+	return s, wrapError(err)
447 437
 }
448 438
 
449
-func (c *client) DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error) {
450
-	p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName)
451
-	if err != nil {
452
-		return 255, time.Now(), nil
453
-	}
454
-
455
-	status, err := p.Delete(ctx)
456
-	if err != nil {
457
-		return 255, time.Now(), nil
458
-	}
459
-	return status.ExitCode(), status.ExitTime(), nil
439
+func (p process) Delete(ctx context.Context) (*containerd.ExitStatus, error) {
440
+	s, err := p.Process.Delete(ctx)
441
+	return s, wrapError(err)
460 442
 }
461 443
 
462
-func (c *client) Delete(ctx context.Context, containerID string) error {
463
-	ctr, err := c.getContainer(ctx, containerID)
444
+func (c *container) Delete(ctx context.Context) error {
445
+	// Optimization: assume the DockerContainerBundlePath label has not been
446
+	// updated since the container metadata was last loaded/refreshed.
447
+	md, err := c.c8dCtr.Info(ctx, containerd.WithoutRefreshedMetadata)
464 448
 	if err != nil {
465 449
 		return err
466 450
 	}
467
-	labels, err := ctr.Labels(ctx)
468
-	if err != nil {
469
-		return err
470
-	}
471
-	bundle := labels[DockerContainerBundlePath]
472
-	if err := ctr.Delete(ctx); err != nil {
451
+	bundle := md.Labels[DockerContainerBundlePath]
452
+	if err := c.c8dCtr.Delete(ctx); err != nil {
473 453
 		return wrapError(err)
474 454
 	}
475
-	c.v2runcoptionsMu.Lock()
476
-	delete(c.v2runcoptions, containerID)
477
-	c.v2runcoptionsMu.Unlock()
478 455
 	if os.Getenv("LIBCONTAINERD_NOCLEAN") != "1" {
479 456
 		if err := os.RemoveAll(bundle); err != nil {
480
-			c.logger.WithError(err).WithFields(logrus.Fields{
481
-				"container": containerID,
457
+			c.client.logger.WithContext(ctx).WithError(err).WithFields(logrus.Fields{
458
+				"container": c.c8dCtr.ID(),
482 459
 				"bundle":    bundle,
483 460
 			}).Error("failed to remove state dir")
484 461
 		}
... ...
@@ -486,28 +387,25 @@ func (c *client) Delete(ctx context.Context, containerID string) error {
486 486
 	return nil
487 487
 }
488 488
 
489
-func (c *client) Status(ctx context.Context, containerID string) (containerd.ProcessStatus, error) {
490
-	t, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName)
491
-	if err != nil {
492
-		return containerd.Unknown, err
493
-	}
494
-	s, err := t.Status(ctx)
495
-	if err != nil {
496
-		return containerd.Unknown, wrapError(err)
497
-	}
498
-	return s.Status, nil
489
+func (t *task) ForceDelete(ctx context.Context) error {
490
+	_, err := t.Task.Delete(ctx, containerd.WithProcessKill)
491
+	return wrapError(err)
492
+}
493
+
494
+func (t *task) Status(ctx context.Context) (containerd.Status, error) {
495
+	s, err := t.Task.Status(ctx)
496
+	return s, wrapError(err)
499 497
 }
500 498
 
501
-func (c *client) getCheckpointOptions(id string, exit bool) containerd.CheckpointTaskOpts {
499
+func (p process) Status(ctx context.Context) (containerd.Status, error) {
500
+	s, err := p.Process.Status(ctx)
501
+	return s, wrapError(err)
502
+}
503
+
504
+func (c *container) getCheckpointOptions(exit bool) containerd.CheckpointTaskOpts {
502 505
 	return func(r *containerd.CheckpointTaskInfo) error {
503
-		if r.Options == nil {
504
-			c.v2runcoptionsMu.Lock()
505
-			_, ok := c.v2runcoptions[id]
506
-			c.v2runcoptionsMu.Unlock()
507
-			if ok {
508
-				r.Options = &v2runcoptions.CheckpointOptions{Exit: exit}
509
-			}
510
-			return nil
506
+		if r.Options == nil && c.v2runcoptions != nil {
507
+			r.Options = &v2runcoptions.CheckpointOptions{}
511 508
 		}
512 509
 
513 510
 		switch opts := r.Options.(type) {
... ...
@@ -519,27 +417,21 @@ func (c *client) getCheckpointOptions(id string, exit bool) containerd.Checkpoin
519 519
 	}
520 520
 }
521 521
 
522
-func (c *client) CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error {
523
-	p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName)
524
-	if err != nil {
525
-		return err
526
-	}
527
-
528
-	opts := []containerd.CheckpointTaskOpts{c.getCheckpointOptions(containerID, exit)}
529
-	img, err := p.(containerd.Task).Checkpoint(ctx, opts...)
522
+func (t *task) CreateCheckpoint(ctx context.Context, checkpointDir string, exit bool) error {
523
+	img, err := t.Task.Checkpoint(ctx, t.ctr.getCheckpointOptions(exit))
530 524
 	if err != nil {
531 525
 		return wrapError(err)
532 526
 	}
533 527
 	// Whatever happens, delete the checkpoint from containerd
534 528
 	defer func() {
535
-		err := c.client.ImageService().Delete(context.Background(), img.Name())
529
+		err := t.ctr.client.client.ImageService().Delete(ctx, img.Name())
536 530
 		if err != nil {
537
-			c.logger.WithError(err).WithField("digest", img.Target().Digest).
531
+			t.ctr.client.logger.WithError(err).WithField("digest", img.Target().Digest).
538 532
 				Warnf("failed to delete checkpoint image")
539 533
 		}
540 534
 	}()
541 535
 
542
-	b, err := content.ReadBlob(ctx, c.client.ContentStore(), img.Target())
536
+	b, err := content.ReadBlob(ctx, t.ctr.client.client.ContentStore(), img.Target())
543 537
 	if err != nil {
544 538
 		return errdefs.System(errors.Wrapf(err, "failed to retrieve checkpoint data"))
545 539
 	}
... ...
@@ -560,7 +452,7 @@ func (c *client) CreateCheckpoint(ctx context.Context, containerID, checkpointDi
560 560
 		return errdefs.System(errors.Wrapf(err, "invalid checkpoint"))
561 561
 	}
562 562
 
563
-	rat, err := c.client.ContentStore().ReaderAt(ctx, *cpDesc)
563
+	rat, err := t.ctr.client.client.ContentStore().ReaderAt(ctx, *cpDesc)
564 564
 	if err != nil {
565 565
 		return errdefs.System(errors.Wrapf(err, "failed to get checkpoint reader"))
566 566
 	}
... ...
@@ -573,7 +465,8 @@ func (c *client) CreateCheckpoint(ctx context.Context, containerID, checkpointDi
573 573
 	return err
574 574
 }
575 575
 
576
-func (c *client) getContainer(ctx context.Context, id string) (containerd.Container, error) {
576
+// LoadContainer loads the containerd container.
577
+func (c *client) LoadContainer(ctx context.Context, id string) (libcontainerdtypes.Container, error) {
577 578
 	ctr, err := c.client.LoadContainer(ctx, id)
578 579
 	if err != nil {
579 580
 		if containerderrors.IsNotFound(err) {
... ...
@@ -581,42 +474,25 @@ func (c *client) getContainer(ctx context.Context, id string) (containerd.Contai
581 581
 		}
582 582
 		return nil, wrapError(err)
583 583
 	}
584
-	return ctr, nil
584
+	return &container{client: c, c8dCtr: ctr}, nil
585 585
 }
586 586
 
587
-func (c *client) getProcess(ctx context.Context, containerID, processID string) (containerd.Process, error) {
588
-	ctr, err := c.getContainer(ctx, containerID)
587
+func (c *container) Task(ctx context.Context) (libcontainerdtypes.Task, error) {
588
+	t, err := c.c8dCtr.Task(ctx, nil)
589 589
 	if err != nil {
590
-		return nil, err
591
-	}
592
-	t, err := ctr.Task(ctx, nil)
593
-	if err != nil {
594
-		if containerderrors.IsNotFound(err) {
595
-			return nil, errors.WithStack(errdefs.NotFound(errors.New("container is not running")))
596
-		}
597 590
 		return nil, wrapError(err)
598 591
 	}
599
-	if processID == libcontainerdtypes.InitProcessName {
600
-		return t, nil
601
-	}
602
-	p, err := t.LoadProcess(ctx, processID, nil)
603
-	if err != nil {
604
-		if containerderrors.IsNotFound(err) {
605
-			return nil, errors.WithStack(errdefs.NotFound(errors.New("no such exec")))
606
-		}
607
-		return nil, wrapError(err)
608
-	}
609
-	return p, nil
592
+	return c.newTask(t), nil
610 593
 }
611 594
 
612 595
 // createIO creates the io to be used by a process
613 596
 // This needs to get a pointer to interface as upon closure the process may not have yet been registered
614
-func (c *client) createIO(fifos *cio.FIFOSet, containerID, processID string, stdinCloseSync chan struct{}, attachStdio libcontainerdtypes.StdioCallback) (cio.IO, error) {
597
+func (c *container) createIO(fifos *cio.FIFOSet, processID string, stdinCloseSync chan containerd.Process, attachStdio libcontainerdtypes.StdioCallback) (cio.IO, error) {
615 598
 	var (
616 599
 		io  *cio.DirectIO
617 600
 		err error
618 601
 	)
619
-	io, err = c.newDirectIO(context.Background(), fifos)
602
+	io, err = c.client.newDirectIO(context.Background(), fifos)
620 603
 	if err != nil {
621 604
 		return nil, err
622 605
 	}
... ...
@@ -633,13 +509,13 @@ func (c *client) createIO(fifos *cio.FIFOSet, containerID, processID string, std
633 633
 				// Do the rest in a new routine to avoid a deadlock if the
634 634
 				// Exec/Start call failed.
635 635
 				go func() {
636
-					<-stdinCloseSync
637
-					p, err := c.getProcess(context.Background(), containerID, processID)
638
-					if err == nil {
639
-						err = p.CloseIO(context.Background(), containerd.WithStdinCloser)
640
-						if err != nil && strings.Contains(err.Error(), "transport is closing") {
641
-							err = nil
642
-						}
636
+					p, ok := <-stdinCloseSync
637
+					if !ok {
638
+						return
639
+					}
640
+					err = p.CloseIO(context.Background(), containerd.WithStdinCloser)
641
+					if err != nil && strings.Contains(err.Error(), "transport is closing") {
642
+						err = nil
643 643
 					}
644 644
 				}()
645 645
 			})
... ...
@@ -659,51 +535,12 @@ func (c *client) processEvent(ctx context.Context, et libcontainerdtypes.EventTy
659 659
 	c.eventQ.Append(ei.ContainerID, func() {
660 660
 		err := c.backend.ProcessEvent(ei.ContainerID, et, ei)
661 661
 		if err != nil {
662
-			c.logger.WithError(err).WithFields(logrus.Fields{
662
+			c.logger.WithContext(ctx).WithError(err).WithFields(logrus.Fields{
663 663
 				"container":  ei.ContainerID,
664 664
 				"event":      et,
665 665
 				"event-info": ei,
666 666
 			}).Error("failed to process event")
667 667
 		}
668
-
669
-		if et == libcontainerdtypes.EventExit && ei.ProcessID != ei.ContainerID {
670
-			p, err := c.getProcess(ctx, ei.ContainerID, ei.ProcessID)
671
-			if err != nil {
672
-
673
-				c.logger.WithError(errors.New("no such process")).
674
-					WithFields(logrus.Fields{
675
-						"error":     err,
676
-						"container": ei.ContainerID,
677
-						"process":   ei.ProcessID,
678
-					}).Error("exit event")
679
-				return
680
-			}
681
-
682
-			ctr, err := c.getContainer(ctx, ei.ContainerID)
683
-			if err != nil {
684
-				c.logger.WithFields(logrus.Fields{
685
-					"container": ei.ContainerID,
686
-					"error":     err,
687
-				}).Error("failed to find container")
688
-			} else {
689
-				labels, err := ctr.Labels(ctx)
690
-				if err != nil {
691
-					c.logger.WithFields(logrus.Fields{
692
-						"container": ei.ContainerID,
693
-						"error":     err,
694
-					}).Error("failed to get container labels")
695
-					return
696
-				}
697
-				newFIFOSet(labels[DockerContainerBundlePath], ei.ProcessID, true, false).Close()
698
-			}
699
-			_, err = p.Delete(context.Background())
700
-			if err != nil {
701
-				c.logger.WithError(err).WithFields(logrus.Fields{
702
-					"container": ei.ContainerID,
703
-					"process":   ei.ProcessID,
704
-				}).Warn("failed to delete process")
705
-			}
706
-		}
707 668
 	})
708 669
 }
709 670
 
... ...
@@ -20,15 +20,10 @@ func summaryFromInterface(i interface{}) (*libcontainerdtypes.Summary, error) {
20 20
 	return &libcontainerdtypes.Summary{}, nil
21 21
 }
22 22
 
23
-func (c *client) UpdateResources(ctx context.Context, containerID string, resources *libcontainerdtypes.Resources) error {
24
-	p, err := c.getProcess(ctx, containerID, libcontainerdtypes.InitProcessName)
25
-	if err != nil {
26
-		return err
27
-	}
28
-
23
+func (t *task) UpdateResources(ctx context.Context, resources *libcontainerdtypes.Resources) error {
29 24
 	// go doesn't like the alias in 1.8, this means this need to be
30 25
 	// platform specific
31
-	return p.(containerd.Task).Update(ctx, containerd.WithResources((*specs.LinuxResources)(resources)))
26
+	return t.Update(ctx, containerd.WithResources((*specs.LinuxResources)(resources)))
32 27
 }
33 28
 
34 29
 func hostIDFromMap(id uint32, mp []specs.LinuxIDMapping) int {
... ...
@@ -87,7 +87,7 @@ func (c *client) newDirectIO(ctx context.Context, fifos *cio.FIFOSet) (*cio.Dire
87 87
 	return cio.NewDirectIOFromFIFOSet(ctx, pipes.stdin, pipes.stdout, pipes.stderr, fifos), nil
88 88
 }
89 89
 
90
-func (c *client) UpdateResources(ctx context.Context, containerID string, resources *libcontainerdtypes.Resources) error {
90
+func (t *task) UpdateResources(ctx context.Context, resources *libcontainerdtypes.Resources) error {
91 91
 	// TODO: (containerd): Not implemented, but don't error.
92 92
 	return nil
93 93
 }
... ...
@@ -43,32 +43,58 @@ type Backend interface {
43 43
 
44 44
 // Process of a container
45 45
 type Process interface {
46
-	Delete(context.Context) (uint32, time.Time, error)
46
+	// Pid is the system specific process id
47
+	Pid() uint32
48
+	// Kill sends the provided signal to the process
49
+	Kill(ctx context.Context, signal syscall.Signal) error
50
+	// Resize changes the width and height of the process's terminal
51
+	Resize(ctx context.Context, width, height uint32) error
52
+	// Delete removes the process and any resources allocated returning the exit status
53
+	Delete(context.Context) (*containerd.ExitStatus, error)
47 54
 }
48 55
 
49 56
 // Client provides access to containerd features.
50 57
 type Client interface {
51 58
 	Version(ctx context.Context) (containerd.Version, error)
59
+	// LoadContainer loads the metadata for a container from containerd.
60
+	LoadContainer(ctx context.Context, containerID string) (Container, error)
61
+	// NewContainer creates a new containerd container.
62
+	NewContainer(ctx context.Context, containerID string, spec *specs.Spec, shim string, runtimeOptions interface{}, opts ...containerd.NewContainerOpts) (Container, error)
63
+}
52 64
 
53
-	Restore(ctx context.Context, containerID string, attachStdio StdioCallback) (alive bool, pid int, p Process, err error)
54
-
55
-	Create(ctx context.Context, containerID string, spec *specs.Spec, shim string, runtimeOptions interface{}, opts ...containerd.NewContainerOpts) error
56
-	Start(ctx context.Context, containerID, checkpointDir string, withStdin bool, attachStdio StdioCallback) (pid int, err error)
57
-	SignalProcess(ctx context.Context, containerID, processID string, signal syscall.Signal) error
58
-	Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (int, error)
59
-	ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error
60
-	CloseStdin(ctx context.Context, containerID, processID string) error
61
-	Pause(ctx context.Context, containerID string) error
62
-	Resume(ctx context.Context, containerID string) error
63
-	Stats(ctx context.Context, containerID string) (*Stats, error)
64
-	ListPids(ctx context.Context, containerID string) ([]uint32, error)
65
-	Summary(ctx context.Context, containerID string) ([]Summary, error)
66
-	DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error)
67
-	Delete(ctx context.Context, containerID string) error
68
-	Status(ctx context.Context, containerID string) (containerd.ProcessStatus, error)
65
+// Container provides access to a containerd container.
66
+type Container interface {
67
+	Start(ctx context.Context, checkpointDir string, withStdin bool, attachStdio StdioCallback) (Task, error)
68
+	Task(ctx context.Context) (Task, error)
69
+	// AttachTask returns the current task for the container and reattaches
70
+	// to the IO for the running task. If no task exists for the container
71
+	// a NotFound error is returned.
72
+	//
73
+	// Clients must make sure that only one reader is attached to the task.
74
+	AttachTask(ctx context.Context, attachStdio StdioCallback) (Task, error)
75
+	// Delete removes the container and associated resources
76
+	Delete(context.Context) error
77
+}
69 78
 
70
-	UpdateResources(ctx context.Context, containerID string, resources *Resources) error
71
-	CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error
79
+// Task provides access to a running containerd container.
80
+type Task interface {
81
+	Process
82
+	// Pause suspends the execution of the task
83
+	Pause(context.Context) error
84
+	// Resume the execution of the task
85
+	Resume(context.Context) error
86
+	Stats(ctx context.Context) (*Stats, error)
87
+	// Pids returns a list of system specific process ids inside the task
88
+	Pids(context.Context) ([]containerd.ProcessInfo, error)
89
+	Summary(ctx context.Context) ([]Summary, error)
90
+	// ForceDelete forcefully kills the task's processes and deletes the task
91
+	ForceDelete(context.Context) error
92
+	// Status returns the executing status of the task
93
+	Status(ctx context.Context) (containerd.Status, error)
94
+	// Exec creates and starts a new process inside the task
95
+	Exec(ctx context.Context, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (Process, error)
96
+	UpdateResources(ctx context.Context, resources *Resources) error
97
+	CreateCheckpoint(ctx context.Context, checkpointDir string, exit bool) error
72 98
 }
73 99
 
74 100
 // StdioCallback is called to connect a container or process stdio.
... ...
@@ -2,6 +2,7 @@ package containerd // import "github.com/docker/docker/plugin/executor/container
2 2
 
3 3
 import (
4 4
 	"context"
5
+	"fmt"
5 6
 	"io"
6 7
 	"sync"
7 8
 	"syscall"
... ...
@@ -28,6 +29,7 @@ func New(ctx context.Context, rootDir string, cli *containerd.Client, ns string,
28 28
 		rootDir:     rootDir,
29 29
 		exitHandler: exitHandler,
30 30
 		runtime:     runtime,
31
+		plugins:     make(map[string]*c8dPlugin),
31 32
 	}
32 33
 
33 34
 	client, err := libcontainerd.NewClient(ctx, cli, rootDir, ns, e)
... ...
@@ -44,41 +46,62 @@ type Executor struct {
44 44
 	client      libcontainerdtypes.Client
45 45
 	exitHandler ExitHandler
46 46
 	runtime     types.Runtime
47
+
48
+	mu      sync.Mutex // Guards plugins map
49
+	plugins map[string]*c8dPlugin
50
+}
51
+
52
+type c8dPlugin struct {
53
+	log *logrus.Entry
54
+	ctr libcontainerdtypes.Container
55
+	tsk libcontainerdtypes.Task
47 56
 }
48 57
 
49 58
 // deleteTaskAndContainer deletes plugin task and then plugin container from containerd
50
-func deleteTaskAndContainer(ctx context.Context, cli libcontainerdtypes.Client, id string, p libcontainerdtypes.Process) {
51
-	if p != nil {
52
-		if _, _, err := p.Delete(ctx); err != nil && !errdefs.IsNotFound(err) {
53
-			logrus.WithError(err).WithField("id", id).Error("failed to delete plugin task from containerd")
54
-		}
55
-	} else {
56
-		if _, _, err := cli.DeleteTask(ctx, id); err != nil && !errdefs.IsNotFound(err) {
57
-			logrus.WithError(err).WithField("id", id).Error("failed to delete plugin task from containerd")
59
+func (p c8dPlugin) deleteTaskAndContainer(ctx context.Context) {
60
+	if p.tsk != nil {
61
+		if _, err := p.tsk.Delete(ctx); err != nil && !errdefs.IsNotFound(err) {
62
+			p.log.WithError(err).Error("failed to delete plugin task from containerd")
58 63
 		}
59 64
 	}
60
-
61
-	if err := cli.Delete(ctx, id); err != nil && !errdefs.IsNotFound(err) {
62
-		logrus.WithError(err).WithField("id", id).Error("failed to delete plugin container from containerd")
65
+	if p.ctr != nil {
66
+		if err := p.ctr.Delete(ctx); err != nil && !errdefs.IsNotFound(err) {
67
+			p.log.WithError(err).Error("failed to delete plugin container from containerd")
68
+		}
63 69
 	}
64 70
 }
65 71
 
66 72
 // Create creates a new container
67 73
 func (e *Executor) Create(id string, spec specs.Spec, stdout, stderr io.WriteCloser) error {
68 74
 	ctx := context.Background()
69
-	err := e.client.Create(ctx, id, &spec, e.runtime.Shim.Binary, e.runtime.Shim.Opts)
75
+	log := logrus.WithField("plugin", id)
76
+	ctr, err := e.client.NewContainer(ctx, id, &spec, e.runtime.Shim.Binary, e.runtime.Shim.Opts)
70 77
 	if err != nil {
71
-		status, err2 := e.client.Status(ctx, id)
78
+		ctr2, err2 := e.client.LoadContainer(ctx, id)
72 79
 		if err2 != nil {
73 80
 			if !errdefs.IsNotFound(err2) {
74
-				logrus.WithError(err2).WithField("id", id).Warn("Received an error while attempting to read plugin status")
81
+				log.WithError(err2).Warn("Received an error while attempting to load containerd container for plugin")
75 82
 			}
76 83
 		} else {
84
+			status := containerd.Unknown
85
+			t, err2 := ctr2.Task(ctx)
86
+			if err2 != nil {
87
+				if !errdefs.IsNotFound(err2) {
88
+					log.WithError(err2).Warn("Received an error while attempting to load containerd task for plugin")
89
+				}
90
+			} else {
91
+				s, err2 := t.Status(ctx)
92
+				if err2 != nil {
93
+					log.WithError(err2).Warn("Received an error while attempting to read plugin status")
94
+				} else {
95
+					status = s.Status
96
+				}
97
+			}
77 98
 			if status != containerd.Running && status != containerd.Unknown {
78
-				if err2 := e.client.Delete(ctx, id); err2 != nil && !errdefs.IsNotFound(err2) {
79
-					logrus.WithError(err2).WithField("plugin", id).Error("Error cleaning up containerd container")
99
+				if err2 := ctr2.Delete(ctx); err2 != nil && !errdefs.IsNotFound(err2) {
100
+					log.WithError(err2).Error("Error cleaning up containerd container")
80 101
 				}
81
-				err = e.client.Create(ctx, id, &spec, e.runtime.Shim.Binary, e.runtime.Shim.Opts)
102
+				ctr, err = e.client.NewContainer(ctx, id, &spec, e.runtime.Shim.Binary, e.runtime.Shim.Opts)
82 103
 			}
83 104
 		}
84 105
 
... ...
@@ -87,34 +110,78 @@ func (e *Executor) Create(id string, spec specs.Spec, stdout, stderr io.WriteClo
87 87
 		}
88 88
 	}
89 89
 
90
-	_, err = e.client.Start(ctx, id, "", false, attachStreamsFunc(stdout, stderr))
90
+	p := c8dPlugin{log: log, ctr: ctr}
91
+	p.tsk, err = ctr.Start(ctx, "", false, attachStreamsFunc(stdout, stderr))
91 92
 	if err != nil {
92
-		deleteTaskAndContainer(ctx, e.client, id, nil)
93
+		p.deleteTaskAndContainer(ctx)
94
+		return err
93 95
 	}
94
-	return err
96
+	e.mu.Lock()
97
+	defer e.mu.Unlock()
98
+	e.plugins[id] = &p
99
+	return nil
95 100
 }
96 101
 
97 102
 // Restore restores a container
98 103
 func (e *Executor) Restore(id string, stdout, stderr io.WriteCloser) (bool, error) {
99
-	alive, _, p, err := e.client.Restore(context.Background(), id, attachStreamsFunc(stdout, stderr))
100
-	if err != nil && !errdefs.IsNotFound(err) {
104
+	ctx := context.Background()
105
+	p := c8dPlugin{log: logrus.WithField("plugin", id)}
106
+	ctr, err := e.client.LoadContainer(ctx, id)
107
+	if err != nil {
108
+		if errdefs.IsNotFound(err) {
109
+			return false, nil
110
+		}
101 111
 		return false, err
102 112
 	}
103
-	if !alive {
104
-		deleteTaskAndContainer(context.Background(), e.client, id, p)
113
+	p.tsk, err = ctr.AttachTask(ctx, attachStreamsFunc(stdout, stderr))
114
+	if err != nil {
115
+		if errdefs.IsNotFound(err) {
116
+			p.deleteTaskAndContainer(ctx)
117
+			return false, nil
118
+		}
119
+		return false, err
120
+	}
121
+	s, err := p.tsk.Status(ctx)
122
+	if err != nil {
123
+		if errdefs.IsNotFound(err) {
124
+			// Task vanished after attaching?
125
+			p.tsk = nil
126
+			p.deleteTaskAndContainer(ctx)
127
+			return false, nil
128
+		}
129
+		return false, err
130
+	}
131
+	if s.Status == containerd.Stopped {
132
+		p.deleteTaskAndContainer(ctx)
133
+		return false, nil
105 134
 	}
106
-	return alive, nil
135
+	e.mu.Lock()
136
+	defer e.mu.Unlock()
137
+	e.plugins[id] = &p
138
+	return true, nil
107 139
 }
108 140
 
109 141
 // IsRunning returns if the container with the given id is running
110 142
 func (e *Executor) IsRunning(id string) (bool, error) {
111
-	status, err := e.client.Status(context.Background(), id)
112
-	return status == containerd.Running, err
143
+	e.mu.Lock()
144
+	p := e.plugins[id]
145
+	e.mu.Unlock()
146
+	if p == nil {
147
+		return false, errdefs.NotFound(fmt.Errorf("unknown plugin %q", id))
148
+	}
149
+	status, err := p.tsk.Status(context.Background())
150
+	return status.Status == containerd.Running, err
113 151
 }
114 152
 
115 153
 // Signal sends the specified signal to the container
116 154
 func (e *Executor) Signal(id string, signal syscall.Signal) error {
117
-	return e.client.SignalProcess(context.Background(), id, libcontainerdtypes.InitProcessName, signal)
155
+	e.mu.Lock()
156
+	p := e.plugins[id]
157
+	e.mu.Unlock()
158
+	if p == nil {
159
+		return errdefs.NotFound(fmt.Errorf("unknown plugin %q", id))
160
+	}
161
+	return p.tsk.Kill(context.Background(), signal)
118 162
 }
119 163
 
120 164
 // ProcessEvent handles events from containerd
... ...
@@ -122,7 +189,14 @@ func (e *Executor) Signal(id string, signal syscall.Signal) error {
122 122
 func (e *Executor) ProcessEvent(id string, et libcontainerdtypes.EventType, ei libcontainerdtypes.EventInfo) error {
123 123
 	switch et {
124 124
 	case libcontainerdtypes.EventExit:
125
-		deleteTaskAndContainer(context.Background(), e.client, id, nil)
125
+		e.mu.Lock()
126
+		p := e.plugins[id]
127
+		e.mu.Unlock()
128
+		if p == nil {
129
+			logrus.WithField("id", id).Warn("Received exit event for an unknown plugin")
130
+		} else {
131
+			p.deleteTaskAndContainer(context.Background())
132
+		}
126 133
 		return e.exitHandler.HandleExitEvent(ei.ContainerID)
127 134
 	}
128 135
 	return nil