Browse code

Add ability to pause/unpause containers via cgroups freeze

This patch adds pause/unpause to the command line, api, and drivers
for use on containers. This is implemented using the cgroups/freeze
utility in libcontainer and lxc freeze/unfreeze.

Co-Authored-By: Eric Windisch <ewindisch@docker.com>
Co-Authored-By: Chris Alfonso <calfonso@redhat.com>
Docker-DCO-1.1-Signed-off-by: Ian Main <imain@redhat.com> (github: imain)

Ian Main authored on 2014/05/22 06:06:18
Showing 10 changed files
... ...
@@ -65,6 +65,7 @@ func (cli *DockerCli) CmdHelp(args ...string) error {
65 65
 		{"login", "Register or Login to the docker registry server"},
66 66
 		{"logs", "Fetch the logs of a container"},
67 67
 		{"port", "Lookup the public-facing port which is NAT-ed to PRIVATE_PORT"},
68
+		{"pause", "Pause all processes within a container"},
68 69
 		{"ps", "List containers"},
69 70
 		{"pull", "Pull an image or a repository from the docker registry server"},
70 71
 		{"push", "Push an image or a repository to the docker registry server"},
... ...
@@ -78,6 +79,7 @@ func (cli *DockerCli) CmdHelp(args ...string) error {
78 78
 		{"stop", "Stop a running container"},
79 79
 		{"tag", "Tag an image into a repository"},
80 80
 		{"top", "Lookup the running processes of a container"},
81
+		{"unpause", "Unpause a paused container"},
81 82
 		{"version", "Show the docker version information"},
82 83
 		{"wait", "Block until a container stops, then print its exit code"},
83 84
 	} {
... ...
@@ -648,6 +650,52 @@ func (cli *DockerCli) CmdStart(args ...string) error {
648 648
 	return nil
649 649
 }
650 650
 
651
+func (cli *DockerCli) CmdUnpause(args ...string) error {
652
+	cmd := cli.Subcmd("unpause", "CONTAINER", "Unpause all processes within a container")
653
+	if err := cmd.Parse(args); err != nil {
654
+		return nil
655
+	}
656
+
657
+	if cmd.NArg() != 1 {
658
+		cmd.Usage()
659
+		return nil
660
+	}
661
+
662
+	var encounteredError error
663
+	for _, name := range cmd.Args() {
664
+		if _, _, err := readBody(cli.call("POST", fmt.Sprintf("/containers/%s/unpause", name), nil, false)); err != nil {
665
+			fmt.Fprintf(cli.err, "%s\n", err)
666
+			encounteredError = fmt.Errorf("Error: failed to unpause container named %s", name)
667
+		} else {
668
+			fmt.Fprintf(cli.out, "%s\n", name)
669
+		}
670
+	}
671
+	return encounteredError
672
+}
673
+
674
+func (cli *DockerCli) CmdPause(args ...string) error {
675
+	cmd := cli.Subcmd("pause", "CONTAINER", "Pause all processes within a container")
676
+	if err := cmd.Parse(args); err != nil {
677
+		return nil
678
+	}
679
+
680
+	if cmd.NArg() != 1 {
681
+		cmd.Usage()
682
+		return nil
683
+	}
684
+
685
+	var encounteredError error
686
+	for _, name := range cmd.Args() {
687
+		if _, _, err := readBody(cli.call("POST", fmt.Sprintf("/containers/%s/pause", name), nil, false)); err != nil {
688
+			fmt.Fprintf(cli.err, "%s\n", err)
689
+			encounteredError = fmt.Errorf("Error: failed to pause container named %s", name)
690
+		} else {
691
+			fmt.Fprintf(cli.out, "%s\n", name)
692
+		}
693
+	}
694
+	return encounteredError
695
+}
696
+
651 697
 func (cli *DockerCli) CmdInspect(args ...string) error {
652 698
 	cmd := cli.Subcmd("inspect", "CONTAINER|IMAGE [CONTAINER|IMAGE...]", "Return low-level information on a container/image")
653 699
 	tmplStr := cmd.String([]string{"f", "#format", "-format"}, "", "Format the output using the given go template.")
... ...
@@ -165,6 +165,36 @@ func postContainersKill(eng *engine.Engine, version version.Version, w http.Resp
165 165
 	return nil
166 166
 }
167 167
 
168
+func postContainersPause(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
169
+	if vars == nil {
170
+		return fmt.Errorf("Missing parameter")
171
+	}
172
+	if err := parseForm(r); err != nil {
173
+		return err
174
+	}
175
+	job := eng.Job("pause", vars["name"])
176
+	if err := job.Run(); err != nil {
177
+		return err
178
+	}
179
+	w.WriteHeader(http.StatusNoContent)
180
+	return nil
181
+}
182
+
183
+func postContainersUnpause(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
184
+	if vars == nil {
185
+		return fmt.Errorf("Missing parameter")
186
+	}
187
+	if err := parseForm(r); err != nil {
188
+		return err
189
+	}
190
+	job := eng.Job("unpause", vars["name"])
191
+	if err := job.Run(); err != nil {
192
+		return err
193
+	}
194
+	w.WriteHeader(http.StatusNoContent)
195
+	return nil
196
+}
197
+
168 198
 func getContainersExport(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
169 199
 	if vars == nil {
170 200
 		return fmt.Errorf("Missing parameter")
... ...
@@ -1087,6 +1117,8 @@ func createRouter(eng *engine.Engine, logging, enableCors bool, dockerVersion st
1087 1087
 			"/images/{name:.*}/tag":         postImagesTag,
1088 1088
 			"/containers/create":            postContainersCreate,
1089 1089
 			"/containers/{name:.*}/kill":    postContainersKill,
1090
+			"/containers/{name:.*}/pause":   postContainersPause,
1091
+			"/containers/{name:.*}/unpause": postContainersUnpause,
1090 1092
 			"/containers/{name:.*}/restart": postContainersRestart,
1091 1093
 			"/containers/{name:.*}/start":   postContainersStart,
1092 1094
 			"/containers/{name:.*}/stop":    postContainersStop,
... ...
@@ -544,6 +544,26 @@ func (container *Container) KillSig(sig int) error {
544 544
 	return container.daemon.Kill(container, sig)
545 545
 }
546 546
 
547
+func (container *Container) Pause() error {
548
+	if container.State.IsPaused() {
549
+		return fmt.Errorf("Container %s is already paused", container.ID)
550
+	}
551
+	if !container.State.IsRunning() {
552
+		return fmt.Errorf("Container %s is not running", container.ID)
553
+	}
554
+	return container.daemon.Pause(container)
555
+}
556
+
557
+func (container *Container) Unpause() error {
558
+	if !container.State.IsPaused() {
559
+		return fmt.Errorf("Container %s is not paused", container.ID)
560
+	}
561
+	if !container.State.IsRunning() {
562
+		return fmt.Errorf("Container %s is not running", container.ID)
563
+	}
564
+	return container.daemon.Unpause(container)
565
+}
566
+
547 567
 func (container *Container) Kill() error {
548 568
 	if !container.State.IsRunning() {
549 569
 		return nil
... ...
@@ -574,6 +594,11 @@ func (container *Container) Stop(seconds int) error {
574 574
 		return nil
575 575
 	}
576 576
 
577
+	// We could unpause the container for them rather than returning this error
578
+	if container.State.IsPaused() {
579
+		return fmt.Errorf("Container %s is paused. Unpause the container before stopping", container.ID)
580
+	}
581
+
577 582
 	// 1. Send a SIGTERM
578 583
 	if err := container.KillSig(15); err != nil {
579 584
 		log.Print("Failed to send SIGTERM to the process, force killing")
... ...
@@ -1014,6 +1014,24 @@ func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback e
1014 1014
 	return daemon.execDriver.Run(c.command, pipes, startCallback)
1015 1015
 }
1016 1016
 
1017
+func (daemon *Daemon) Pause(c *Container) error {
1018
+	err := daemon.execDriver.Pause(c.command)
1019
+	if err != nil {
1020
+		return err
1021
+	}
1022
+	c.State.SetPaused()
1023
+	return nil
1024
+}
1025
+
1026
+func (daemon *Daemon) Unpause(c *Container) error {
1027
+	err := daemon.execDriver.Unpause(c.command)
1028
+	if err != nil {
1029
+		return err
1030
+	}
1031
+	c.State.SetUnpaused()
1032
+	return nil
1033
+}
1034
+
1017 1035
 func (daemon *Daemon) Kill(c *Container, sig int) error {
1018 1036
 	return daemon.execDriver.Kill(c.command, sig)
1019 1037
 }
... ...
@@ -83,6 +83,8 @@ type TtyTerminal interface {
83 83
 type Driver interface {
84 84
 	Run(c *Command, pipes *Pipes, startCallback StartCallback) (int, error) // Run executes the process and blocks until the process exits and returns the exit code
85 85
 	Kill(c *Command, sig int) error
86
+	Pause(c *Command) error
87
+	Unpause(c *Command) error
86 88
 	Name() string                                 // Driver name
87 89
 	Info(id string) Info                          // "temporary" hack (until we move state from core to plugins)
88 90
 	GetPidsForContainer(id string) ([]int, error) // Returns a list of pids for the given container.
... ...
@@ -218,6 +218,30 @@ func (d *driver) Kill(c *execdriver.Command, sig int) error {
218 218
 	return KillLxc(c.ID, sig)
219 219
 }
220 220
 
221
+func (d *driver) Pause(c *execdriver.Command) error {
222
+	_, err := exec.LookPath("lxc-freeze")
223
+	if err == nil {
224
+		output, errExec := exec.Command("lxc-freeze", "-n", c.ID).CombinedOutput()
225
+		if errExec != nil {
226
+			return fmt.Errorf("Err: %s Output: %s", errExec, output)
227
+		}
228
+	}
229
+
230
+	return err
231
+}
232
+
233
+func (d *driver) Unpause(c *execdriver.Command) error {
234
+	_, err := exec.LookPath("lxc-unfreeze")
235
+	if err == nil {
236
+		output, errExec := exec.Command("lxc-unfreeze", "-n", c.ID).CombinedOutput()
237
+		if errExec != nil {
238
+			return fmt.Errorf("Err: %s Output: %s", errExec, output)
239
+		}
240
+	}
241
+
242
+	return err
243
+}
244
+
221 245
 func (d *driver) Terminate(c *execdriver.Command) error {
222 246
 	return KillLxc(c.ID, 9)
223 247
 }
... ...
@@ -27,6 +27,7 @@ var actions = map[string]Action{
27 27
 	"cgroups.memory_reservation": memoryReservation, // set the memory reservation
28 28
 	"cgroups.memory_swap":        memorySwap,        // set the memory swap limit
29 29
 	"cgroups.cpuset.cpus":        cpusetCpus,        // set the cpus used
30
+	"cgroups.freezer":            freezer,           // set the frozen/thaw state
30 31
 
31 32
 	"systemd.slice": systemdSlice, // set parent Slice used for systemd unit
32 33
 
... ...
@@ -35,6 +36,16 @@ var actions = map[string]Action{
35 35
 	"fs.readonly": readonlyFs, // make the rootfs of the container read only
36 36
 }
37 37
 
38
+func freezer(container *libcontainer.Container, context interface{}, value string) error {
39
+	if container.Cgroups == nil {
40
+		return fmt.Errorf("cannot set cgroups when they are disabled")
41
+	}
42
+
43
+	container.Cgroups.Freezer = value
44
+
45
+	return nil
46
+}
47
+
38 48
 func cpusetCpus(container *libcontainer.Container, context interface{}, value string) error {
39 49
 	if container.Cgroups == nil {
40 50
 		return fmt.Errorf("cannot set cgroups when they are disabled")
... ...
@@ -145,6 +145,32 @@ func (d *driver) Kill(p *execdriver.Command, sig int) error {
145 145
 	return syscall.Kill(p.Process.Pid, syscall.Signal(sig))
146 146
 }
147 147
 
148
+func (d *driver) Pause(c *execdriver.Command) error {
149
+	active := d.activeContainers[c.ID]
150
+	active.container.Cgroups.Freezer = "FROZEN"
151
+	pid := c.Process.Pid
152
+
153
+	if systemd.UseSystemd() {
154
+		_, err := systemd.Apply(active.container.Cgroups, pid)
155
+		return err
156
+	}
157
+	_, err := fs.Apply(active.container.Cgroups, pid)
158
+	return err
159
+}
160
+
161
+func (d *driver) Unpause(c *execdriver.Command) error {
162
+	active := d.activeContainers[c.ID]
163
+	active.container.Cgroups.Freezer = "THAWED"
164
+	pid := c.Process.Pid
165
+
166
+	if systemd.UseSystemd() {
167
+		_, err := systemd.Apply(active.container.Cgroups, pid)
168
+		return err
169
+	}
170
+	_, err := fs.Apply(active.container.Cgroups, pid)
171
+	return err
172
+}
173
+
148 174
 func (d *driver) Terminate(p *execdriver.Command) error {
149 175
 	// lets check the start time for the process
150 176
 	started, err := d.readStartTime(p)
... ...
@@ -11,6 +11,7 @@ import (
11 11
 type State struct {
12 12
 	sync.RWMutex
13 13
 	Running    bool
14
+	Paused     bool
14 15
 	Pid        int
15 16
 	ExitCode   int
16 17
 	StartedAt  time.Time
... ...
@@ -23,6 +24,9 @@ func (s *State) String() string {
23 23
 	defer s.RUnlock()
24 24
 
25 25
 	if s.Running {
26
+		if s.Paused {
27
+			return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
28
+		}
26 29
 		return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
27 30
 	}
28 31
 	if s.FinishedAt.IsZero() {
... ...
@@ -50,6 +54,7 @@ func (s *State) SetRunning(pid int) {
50 50
 	defer s.Unlock()
51 51
 
52 52
 	s.Running = true
53
+	s.Paused = false
53 54
 	s.ExitCode = 0
54 55
 	s.Pid = pid
55 56
 	s.StartedAt = time.Now().UTC()
... ...
@@ -64,3 +69,22 @@ func (s *State) SetStopped(exitCode int) {
64 64
 	s.FinishedAt = time.Now().UTC()
65 65
 	s.ExitCode = exitCode
66 66
 }
67
+
68
+func (s *State) SetPaused() {
69
+	s.Lock()
70
+	defer s.Unlock()
71
+	s.Paused = true
72
+}
73
+
74
+func (s *State) SetUnpaused() {
75
+	s.Lock()
76
+	defer s.Unlock()
77
+	s.Paused = false
78
+}
79
+
80
+func (s *State) IsPaused() bool {
81
+	s.RLock()
82
+	defer s.RUnlock()
83
+
84
+	return s.Paused
85
+}
... ...
@@ -125,6 +125,8 @@ func InitServer(job *engine.Job) engine.Status {
125 125
 		"restart":          srv.ContainerRestart,
126 126
 		"start":            srv.ContainerStart,
127 127
 		"kill":             srv.ContainerKill,
128
+		"pause":            srv.ContainerPause,
129
+		"unpause":          srv.ContainerUnpause,
128 130
 		"wait":             srv.ContainerWait,
129 131
 		"tag":              srv.ImageTag, // FIXME merge with "image_tag"
130 132
 		"resize":           srv.ContainerResize,
... ...
@@ -168,6 +170,42 @@ func InitServer(job *engine.Job) engine.Status {
168 168
 	return engine.StatusOK
169 169
 }
170 170
 
171
+func (srv *Server) ContainerPause(job *engine.Job) engine.Status {
172
+	if n := len(job.Args); n < 1 || n > 2 {
173
+		return job.Errorf("Usage: %s CONTAINER", job.Name)
174
+	}
175
+	var (
176
+		name = job.Args[0]
177
+	)
178
+
179
+	if container := srv.daemon.Get(name); container != nil {
180
+		if err := container.Pause(); err != nil {
181
+			return job.Errorf("Cannot pause container %s: %s", name, err)
182
+		}
183
+	} else {
184
+		return job.Errorf("No such container: %s", name)
185
+	}
186
+	return engine.StatusOK
187
+}
188
+
189
+func (srv *Server) ContainerUnpause(job *engine.Job) engine.Status {
190
+	if n := len(job.Args); n < 1 || n > 2 {
191
+		return job.Errorf("Usage: %s CONTAINER", job.Name)
192
+	}
193
+	var (
194
+		name = job.Args[0]
195
+	)
196
+
197
+	if container := srv.daemon.Get(name); container != nil {
198
+		if err := container.Unpause(); err != nil {
199
+			return job.Errorf("Cannot unpause container %s: %s", name, err)
200
+		}
201
+	} else {
202
+		return job.Errorf("No such container: %s", name)
203
+	}
204
+	return engine.StatusOK
205
+}
206
+
171 207
 // ContainerKill send signal to the container
172 208
 // If no signal is given (sig 0), then Kill with SIGKILL and wait
173 209
 // for the container to exit.