Browse code

Initial commit of libcontainer running docker Docker-DCO-1.1-Signed-off-by: Michael Crosby <michael@crosbymichael.com> (github: crosbymichael)

Michael Crosby authored on 2014/02/22 10:11:57
Showing 7 changed files
... ...
@@ -530,6 +530,7 @@ func (container *Container) Start() (err error) {
530 530
 	}
531 531
 
532 532
 	populateCommand(container)
533
+	container.command.Env = env
533 534
 
534 535
 	// Setup logging of stdout and stderr to disk
535 536
 	if err := container.runtime.LogToDisk(container.stdout, container.logPath("json"), "stdout"); err != nil {
536 537
new file mode 100644
... ...
@@ -0,0 +1,41 @@
0
+package namespaces
1
+
2
+import (
3
+	"github.com/dotcloud/docker/pkg/cgroups"
4
+	"github.com/dotcloud/docker/pkg/libcontainer"
5
+)
6
+
7
+// getDefaultTemplate returns the docker default for
8
+// the libcontainer configuration file
9
+func getDefaultTemplate() *libcontainer.Container {
10
+	return &libcontainer.Container{
11
+		Capabilities: libcontainer.Capabilities{
12
+			libcontainer.CAP_SETPCAP,
13
+			libcontainer.CAP_SYS_MODULE,
14
+			libcontainer.CAP_SYS_RAWIO,
15
+			libcontainer.CAP_SYS_PACCT,
16
+			libcontainer.CAP_SYS_ADMIN,
17
+			libcontainer.CAP_SYS_NICE,
18
+			libcontainer.CAP_SYS_RESOURCE,
19
+			libcontainer.CAP_SYS_TIME,
20
+			libcontainer.CAP_SYS_TTY_CONFIG,
21
+			libcontainer.CAP_MKNOD,
22
+			libcontainer.CAP_AUDIT_WRITE,
23
+			libcontainer.CAP_AUDIT_CONTROL,
24
+			libcontainer.CAP_MAC_ADMIN,
25
+			libcontainer.CAP_MAC_OVERRIDE,
26
+			libcontainer.CAP_NET_ADMIN,
27
+		},
28
+		Namespaces: libcontainer.Namespaces{
29
+			libcontainer.CLONE_NEWIPC,
30
+			libcontainer.CLONE_NEWNET,
31
+			libcontainer.CLONE_NEWNS,
32
+			libcontainer.CLONE_NEWPID,
33
+			libcontainer.CLONE_NEWUTS,
34
+		},
35
+		Cgroups: &cgroups.Cgroup{
36
+			Name:         "docker",
37
+			DeviceAccess: false,
38
+		},
39
+	}
40
+}
0 41
new file mode 100644
... ...
@@ -0,0 +1,349 @@
0
+package namespaces
1
+
2
+import (
3
+	"encoding/json"
4
+	"errors"
5
+	"fmt"
6
+	"github.com/dotcloud/docker/execdriver"
7
+	"github.com/dotcloud/docker/pkg/libcontainer"
8
+	"github.com/dotcloud/docker/pkg/libcontainer/network"
9
+	"github.com/dotcloud/docker/pkg/libcontainer/nsinit"
10
+	"github.com/dotcloud/docker/pkg/libcontainer/utils"
11
+	"github.com/dotcloud/docker/pkg/system"
12
+	"github.com/dotcloud/docker/pkg/term"
13
+	"io"
14
+	"io/ioutil"
15
+	"log"
16
+	"os"
17
+	"os/exec"
18
+	"path/filepath"
19
+	"strings"
20
+	"syscall"
21
+)
22
+
23
+const (
24
+	DriverName = "namespaces"
25
+	Version    = "0.1"
26
+)
27
+
28
+var (
29
+	ErrNotSupported = errors.New("not supported")
30
+)
31
+
32
+func init() {
33
+	execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error {
34
+		return nil
35
+	})
36
+}
37
+
38
+type driver struct {
39
+}
40
+
41
+func NewDriver() (*driver, error) {
42
+	return &driver{}, nil
43
+}
44
+
45
+func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
46
+	container := createContainer(c)
47
+	if err := writeContainerFile(container, c.Rootfs); err != nil {
48
+		return -1, err
49
+	}
50
+
51
+	var (
52
+		console string
53
+		master  *os.File
54
+		err     error
55
+
56
+		inPipe           io.WriteCloser
57
+		outPipe, errPipe io.ReadCloser
58
+	)
59
+
60
+	if container.Tty {
61
+		log.Printf("setting up master and console")
62
+		master, console, err = createMasterAndConsole()
63
+		if err != nil {
64
+			return -1, err
65
+		}
66
+	}
67
+	c.Terminal = NewTerm(pipes, master)
68
+
69
+	// create a pipe so that we can syncronize with the namespaced process and
70
+	// pass the veth name to the child
71
+	r, w, err := os.Pipe()
72
+	if err != nil {
73
+		return -1, err
74
+	}
75
+	system.UsetCloseOnExec(r.Fd())
76
+
77
+	args := append([]string{c.Entrypoint}, c.Arguments...)
78
+	createCommand(c, container, console, "/nsinit.logs", r.Fd(), args)
79
+	command := c
80
+
81
+	if !container.Tty {
82
+		log.Printf("opening pipes on command")
83
+		if inPipe, err = command.StdinPipe(); err != nil {
84
+			return -1, err
85
+		}
86
+		if outPipe, err = command.StdoutPipe(); err != nil {
87
+			return -1, err
88
+		}
89
+		if errPipe, err = command.StderrPipe(); err != nil {
90
+			return -1, err
91
+		}
92
+	}
93
+
94
+	log.Printf("staring init")
95
+	if err := command.Start(); err != nil {
96
+		return -1, err
97
+	}
98
+	log.Printf("writting state file")
99
+	if err := writePidFile(c.Rootfs, command.Process.Pid); err != nil {
100
+		command.Process.Kill()
101
+		return -1, err
102
+	}
103
+	defer deletePidFile(c.Rootfs)
104
+
105
+	// Do this before syncing with child so that no children
106
+	// can escape the cgroup
107
+	if container.Cgroups != nil {
108
+		log.Printf("setting up cgroups")
109
+		if err := container.Cgroups.Apply(command.Process.Pid); err != nil {
110
+			command.Process.Kill()
111
+			return -1, err
112
+		}
113
+	}
114
+
115
+	if container.Network != nil {
116
+		log.Printf("creating veth pair")
117
+		vethPair, err := initializeContainerVeth(container.Network.Bridge, container.Network.Mtu, command.Process.Pid)
118
+		if err != nil {
119
+			return -1, err
120
+		}
121
+		log.Printf("sending %s as veth pair name", vethPair)
122
+		sendVethName(w, vethPair)
123
+	}
124
+
125
+	// Sync with child
126
+	log.Printf("closing sync pipes")
127
+	w.Close()
128
+	r.Close()
129
+
130
+	if container.Tty {
131
+		log.Printf("starting copy for tty")
132
+		go io.Copy(pipes.Stdout, master)
133
+		if pipes.Stdin != nil {
134
+			go io.Copy(master, pipes.Stdin)
135
+		}
136
+
137
+		/*
138
+			state, err := setupWindow(master)
139
+			if err != nil {
140
+				command.Process.Kill()
141
+				return -1, err
142
+			}
143
+			defer term.RestoreTerminal(uintptr(syscall.Stdin), state)
144
+		*/
145
+	} else {
146
+		log.Printf("starting copy for std pipes")
147
+		if pipes.Stdin != nil {
148
+			go func() {
149
+				defer inPipe.Close()
150
+				io.Copy(inPipe, pipes.Stdin)
151
+			}()
152
+		}
153
+		go io.Copy(pipes.Stdout, outPipe)
154
+		go io.Copy(pipes.Stderr, errPipe)
155
+	}
156
+
157
+	if startCallback != nil {
158
+		startCallback(c)
159
+	}
160
+
161
+	log.Printf("waiting on process")
162
+	if err := command.Wait(); err != nil {
163
+		if _, ok := err.(*exec.ExitError); !ok {
164
+			return -1, err
165
+		}
166
+	}
167
+	log.Printf("process ended")
168
+	return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil
169
+}
170
+
171
+func (d *driver) Kill(p *execdriver.Command, sig int) error {
172
+	return p.Process.Kill()
173
+}
174
+
175
+func (d *driver) Restore(c *execdriver.Command) error {
176
+	return ErrNotSupported
177
+}
178
+
179
+func (d *driver) Info(id string) execdriver.Info {
180
+	return nil
181
+}
182
+
183
+func (d *driver) Name() string {
184
+	return fmt.Sprintf("%s-%s", DriverName, Version)
185
+}
186
+
187
+func (d *driver) GetPidsForContainer(id string) ([]int, error) {
188
+	return nil, ErrNotSupported
189
+}
190
+
191
+func writeContainerFile(container *libcontainer.Container, rootfs string) error {
192
+	data, err := json.Marshal(container)
193
+	if err != nil {
194
+		return err
195
+	}
196
+	return ioutil.WriteFile(filepath.Join(rootfs, "container.json"), data, 0755)
197
+}
198
+
199
+func getEnv(key string, env []string) string {
200
+	for _, pair := range env {
201
+		parts := strings.Split(pair, "=")
202
+		if parts[0] == key {
203
+			return parts[1]
204
+		}
205
+	}
206
+	return ""
207
+}
208
+
209
+// sendVethName writes the veth pair name to the child's stdin then closes the
210
+// pipe so that the child stops waiting for more data
211
+func sendVethName(pipe io.Writer, name string) {
212
+	fmt.Fprint(pipe, name)
213
+}
214
+
215
+// initializeContainerVeth will create a veth pair and setup the host's
216
+// side of the pair by setting the specified bridge as the master and bringing
217
+// up the interface.
218
+//
219
+// Then will with set the other side of the veth pair into the container's namespaced
220
+// using the pid and returns the veth's interface name to provide to the container to
221
+// finish setting up the interface inside the namespace
222
+func initializeContainerVeth(bridge string, mtu, nspid int) (string, error) {
223
+	name1, name2, err := createVethPair()
224
+	if err != nil {
225
+		return "", err
226
+	}
227
+	log.Printf("veth pair created %s <> %s", name1, name2)
228
+	if err := network.SetInterfaceMaster(name1, bridge); err != nil {
229
+		return "", err
230
+	}
231
+	if err := network.SetMtu(name1, mtu); err != nil {
232
+		return "", err
233
+	}
234
+	if err := network.InterfaceUp(name1); err != nil {
235
+		return "", err
236
+	}
237
+	log.Printf("setting %s inside %d namespace", name2, nspid)
238
+	if err := network.SetInterfaceInNamespacePid(name2, nspid); err != nil {
239
+		return "", err
240
+	}
241
+	return name2, nil
242
+}
243
+
244
+func setupWindow(master *os.File) (*term.State, error) {
245
+	ws, err := term.GetWinsize(os.Stdin.Fd())
246
+	if err != nil {
247
+		return nil, err
248
+	}
249
+	if err := term.SetWinsize(master.Fd(), ws); err != nil {
250
+		return nil, err
251
+	}
252
+	return term.SetRawTerminal(os.Stdin.Fd())
253
+}
254
+
255
+// createMasterAndConsole will open /dev/ptmx on the host and retreive the
256
+// pts name for use as the pty slave inside the container
257
+func createMasterAndConsole() (*os.File, string, error) {
258
+	master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
259
+	if err != nil {
260
+		return nil, "", err
261
+	}
262
+	console, err := system.Ptsname(master)
263
+	if err != nil {
264
+		return nil, "", err
265
+	}
266
+	if err := system.Unlockpt(master); err != nil {
267
+		return nil, "", err
268
+	}
269
+	return master, console, nil
270
+}
271
+
272
+// createVethPair will automatically generage two random names for
273
+// the veth pair and ensure that they have been created
274
+func createVethPair() (name1 string, name2 string, err error) {
275
+	name1, err = utils.GenerateRandomName("dock", 4)
276
+	if err != nil {
277
+		return
278
+	}
279
+	name2, err = utils.GenerateRandomName("dock", 4)
280
+	if err != nil {
281
+		return
282
+	}
283
+	if err = network.CreateVethPair(name1, name2); err != nil {
284
+		return
285
+	}
286
+	return
287
+}
288
+
289
+// writePidFile writes the namespaced processes pid to .nspid in the rootfs for the container
290
+func writePidFile(rootfs string, pid int) error {
291
+	return ioutil.WriteFile(filepath.Join(rootfs, ".nspid"), []byte(fmt.Sprint(pid)), 0655)
292
+}
293
+
294
+func deletePidFile(rootfs string) error {
295
+	return os.Remove(filepath.Join(rootfs, ".nspid"))
296
+}
297
+
298
+// createCommand will return an exec.Cmd with the Cloneflags set to the proper namespaces
299
+// defined on the container's configuration and use the current binary as the init with the
300
+// args provided
301
+func createCommand(c *execdriver.Command, container *libcontainer.Container,
302
+	console, logFile string, pipe uintptr, args []string) {
303
+
304
+	aname, _ := exec.LookPath("nsinit")
305
+	c.Path = aname
306
+	c.Args = append([]string{
307
+		aname,
308
+		"-console", console,
309
+		"-pipe", fmt.Sprint(pipe),
310
+		"-log", logFile,
311
+		"init",
312
+	}, args...)
313
+	c.SysProcAttr = &syscall.SysProcAttr{
314
+		Cloneflags: uintptr(nsinit.GetNamespaceFlags(container.Namespaces)),
315
+	}
316
+	c.Env = container.Env
317
+	c.Dir = c.Rootfs
318
+}
319
+
320
+func createContainer(c *execdriver.Command) *libcontainer.Container {
321
+	container := getDefaultTemplate()
322
+
323
+	container.Hostname = getEnv("HOSTNAME", c.Env)
324
+	container.Tty = c.Tty
325
+	container.User = c.User
326
+	container.WorkingDir = c.WorkingDir
327
+	container.Env = c.Env
328
+
329
+	container.Env = append(container.Env, "container=docker")
330
+
331
+	if c.Network != nil {
332
+		container.Network = &libcontainer.Network{
333
+			Mtu:     c.Network.Mtu,
334
+			Address: fmt.Sprintf("%s/%d", c.Network.IPAddress, c.Network.IPPrefixLen),
335
+			Gateway: c.Network.Gateway,
336
+			Bridge:  c.Network.Bridge,
337
+		}
338
+	}
339
+	if c.Privileged {
340
+		container.Capabilities = nil
341
+	}
342
+	if c.Resources != nil {
343
+		container.Cgroups.CpuShares = c.Resources.CpuShares
344
+		container.Cgroups.Memory = c.Resources.Memory
345
+		container.Cgroups.MemorySwap = c.Resources.MemorySwap
346
+	}
347
+	return container
348
+}
0 349
new file mode 100644
... ...
@@ -0,0 +1,26 @@
0
+package namespaces
1
+
2
+import (
3
+	"github.com/dotcloud/docker/execdriver"
4
+	"github.com/dotcloud/docker/pkg/term"
5
+	"os"
6
+)
7
+
8
+type NsinitTerm struct {
9
+	master *os.File
10
+}
11
+
12
+func NewTerm(pipes *execdriver.Pipes, master *os.File) *NsinitTerm {
13
+	return &NsinitTerm{master}
14
+}
15
+
16
+func (t *NsinitTerm) Close() error {
17
+	return t.master.Close()
18
+}
19
+
20
+func (t *NsinitTerm) Resize(h, w int) error {
21
+	if t.master != nil {
22
+		return term.SetWinsize(t.master.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
23
+	}
24
+	return nil
25
+}
... ...
@@ -227,7 +227,7 @@ func createCommand(container *libcontainer.Container, console, logFile string, p
227 227
 		"init"}, args...)...)
228 228
 
229 229
 	command.SysProcAttr = &syscall.SysProcAttr{
230
-		Cloneflags: uintptr(getNamespaceFlags(container.Namespaces)),
230
+		Cloneflags: uintptr(GetNamespaceFlags(container.Namespaces)),
231 231
 	}
232 232
 	command.Env = container.Env
233 233
 	return command
... ...
@@ -28,7 +28,7 @@ var namespaceFileMap = map[libcontainer.Namespace]string{
28 28
 
29 29
 // getNamespaceFlags parses the container's Namespaces options to set the correct
30 30
 // flags on clone, unshare, and setns
31
-func getNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) {
31
+func GetNamespaceFlags(namespaces libcontainer.Namespaces) (flag int) {
32 32
 	for _, ns := range namespaces {
33 33
 		flag |= namespaceMap[ns]
34 34
 	}
... ...
@@ -7,7 +7,8 @@ import (
7 7
 	"github.com/dotcloud/docker/dockerversion"
8 8
 	"github.com/dotcloud/docker/engine"
9 9
 	"github.com/dotcloud/docker/execdriver"
10
-	"github.com/dotcloud/docker/execdriver/lxc"
10
+	_ "github.com/dotcloud/docker/execdriver/lxc"
11
+	"github.com/dotcloud/docker/execdriver/namespaces"
11 12
 	"github.com/dotcloud/docker/graphdriver"
12 13
 	"github.com/dotcloud/docker/graphdriver/aufs"
13 14
 	_ "github.com/dotcloud/docker/graphdriver/btrfs"
... ...
@@ -703,7 +704,7 @@ func NewRuntimeFromDirectory(config *DaemonConfig, eng *engine.Engine) (*Runtime
703 703
 
704 704
 	sysInfo := sysinfo.New(false)
705 705
 
706
-	ed, err := lxc.NewDriver(config.Root, sysInfo.AppArmor)
706
+	ed, err := namespaces.NewDriver()
707 707
 	if err != nil {
708 708
 		return nil, err
709 709
 	}