Browse code

Ensure a reliable way to kill ghost containers on reboot Docker-DCO-1.1-Signed-off-by: Michael Crosby <michael@crosbymichael.com> (github: crosbymichael)

Michael Crosby authored on 2014/03/26 15:48:16
Showing 7 changed files
... ...
@@ -50,8 +50,13 @@ func (ns *linuxNs) Exec(container *libcontainer.Container, term Terminal, args [
50 50
 	if err := command.Start(); err != nil {
51 51
 		return -1, err
52 52
 	}
53
+
54
+	started, err := system.GetProcessStartTime(command.Process.Pid)
55
+	if err != nil {
56
+		return -1, err
57
+	}
53 58
 	ns.logger.Printf("writting pid %d to file\n", command.Process.Pid)
54
-	if err := ns.stateWriter.WritePid(command.Process.Pid); err != nil {
59
+	if err := ns.stateWriter.WritePid(command.Process.Pid, started); err != nil {
55 60
 		command.Process.Kill()
56 61
 		return -1, err
57 62
 	}
... ...
@@ -10,7 +10,7 @@ import (
10 10
 // StateWriter handles writing and deleting the pid file
11 11
 // on disk
12 12
 type StateWriter interface {
13
-	WritePid(pid int) error
13
+	WritePid(pid int, startTime string) error
14 14
 	DeletePid() error
15 15
 }
16 16
 
... ...
@@ -19,10 +19,18 @@ type DefaultStateWriter struct {
19 19
 }
20 20
 
21 21
 // writePidFile writes the namespaced processes pid to pid in the rootfs for the container
22
-func (d *DefaultStateWriter) WritePid(pid int) error {
23
-	return ioutil.WriteFile(filepath.Join(d.Root, "pid"), []byte(fmt.Sprint(pid)), 0655)
22
+func (d *DefaultStateWriter) WritePid(pid int, startTime string) error {
23
+	err := ioutil.WriteFile(filepath.Join(d.Root, "pid"), []byte(fmt.Sprint(pid)), 0655)
24
+	if err != nil {
25
+		return err
26
+	}
27
+	return ioutil.WriteFile(filepath.Join(d.Root, "start"), []byte(startTime), 0655)
24 28
 }
25 29
 
26 30
 func (d *DefaultStateWriter) DeletePid() error {
27
-	return os.Remove(filepath.Join(d.Root, "pid"))
31
+	err := os.Remove(filepath.Join(d.Root, "pid"))
32
+	if serr := os.Remove(filepath.Join(d.Root, "start")); err == nil {
33
+		err = serr
34
+	}
35
+	return err
28 36
 }
29 37
new file mode 100644
... ...
@@ -0,0 +1,26 @@
0
+package system
1
+
2
+import (
3
+	"io/ioutil"
4
+	"path/filepath"
5
+	"strconv"
6
+	"strings"
7
+)
8
+
9
+// look in /proc to find the process start time so that we can verify
10
+// that this pid has started after ourself
11
+func GetProcessStartTime(pid int) (string, error) {
12
+	data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
13
+	if err != nil {
14
+		return "", err
15
+	}
16
+	parts := strings.Split(string(data), " ")
17
+	// the starttime is located at pos 22
18
+	// from the man page
19
+	//
20
+	// starttime %llu (was %lu before Linux 2.6)
21
+	// (22)  The  time the process started after system boot.  In kernels before Linux 2.6, this
22
+	// value was expressed in jiffies.  Since Linux 2.6, the value is expressed in  clock  ticks
23
+	// (divide by sysconf(_SC_CLK_TCK)).
24
+	return parts[22-1], nil // starts at 1
25
+}
... ...
@@ -84,6 +84,7 @@ type Driver interface {
84 84
 	Name() string                                 // Driver name
85 85
 	Info(id string) Info                          // "temporary" hack (until we move state from core to plugins)
86 86
 	GetPidsForContainer(id string) ([]int, error) // Returns a list of pids for the given container.
87
+	Terminate(c *Command) error                   // kill it with fire
87 88
 }
88 89
 
89 90
 // Network settings of the container
... ...
@@ -204,6 +204,10 @@ func (d *driver) Kill(c *execdriver.Command, sig int) error {
204 204
 	return KillLxc(c.ID, sig)
205 205
 }
206 206
 
207
+func (d *driver) Terminate(c *execdriver.Command) error {
208
+	return KillLxc(c.ID, 9)
209
+}
210
+
207 211
 func (d *driver) version() string {
208 212
 	var (
209 213
 		version string
... ...
@@ -117,9 +117,39 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
117 117
 }
118 118
 
119 119
 func (d *driver) Kill(p *execdriver.Command, sig int) error {
120
-	err := syscall.Kill(p.Process.Pid, syscall.Signal(sig))
120
+	return syscall.Kill(p.Process.Pid, syscall.Signal(sig))
121
+}
122
+
123
+func (d *driver) Terminate(p *execdriver.Command) error {
124
+	// lets check the start time for the process
125
+	started, err := d.readStartTime(p)
126
+	if err != nil {
127
+		// if we don't have the data on disk then we can assume the process is gone
128
+		// because this is only removed after we know the process has stopped
129
+		if os.IsNotExist(err) {
130
+			return nil
131
+		}
132
+		return err
133
+	}
134
+
135
+	currentStartTime, err := system.GetProcessStartTime(p.Process.Pid)
136
+	if err != nil {
137
+		return err
138
+	}
139
+	if started == currentStartTime {
140
+		err = syscall.Kill(p.Process.Pid, 9)
141
+	}
121 142
 	d.removeContainerRoot(p.ID)
122 143
 	return err
144
+
145
+}
146
+
147
+func (d *driver) readStartTime(p *execdriver.Command) (string, error) {
148
+	data, err := ioutil.ReadFile(filepath.Join(d.root, p.ID, "start"))
149
+	if err != nil {
150
+		return "", err
151
+	}
152
+	return string(data), nil
123 153
 }
124 154
 
125 155
 func (d *driver) Info(id string) execdriver.Info {
... ...
@@ -235,9 +265,9 @@ type dockerStateWriter struct {
235 235
 	callback execdriver.StartCallback
236 236
 }
237 237
 
238
-func (d *dockerStateWriter) WritePid(pid int) error {
238
+func (d *dockerStateWriter) WritePid(pid int, started string) error {
239 239
 	d.c.ContainerPid = pid
240
-	err := d.dsw.WritePid(pid)
240
+	err := d.dsw.WritePid(pid, started)
241 241
 	if d.callback != nil {
242 242
 		d.callback(d.c)
243 243
 	}
... ...
@@ -192,7 +192,7 @@ func (runtime *Runtime) Register(container *Container) error {
192 192
 				if err != nil {
193 193
 					utils.Debugf("cannot find existing process for %d", existingPid)
194 194
 				}
195
-				runtime.execDriver.Kill(cmd, 9)
195
+				runtime.execDriver.Terminate(cmd)
196 196
 			}
197 197
 			if err := container.Unmount(); err != nil {
198 198
 				utils.Debugf("ghost unmount error %s", err)