Browse code

keep a consistent view of containers rendered

Replicate relevant mutations to the in-memory ACID store. Readers will
then be able to query container state without locking.

Signed-off-by: Fabio Kung <fabio.kung@gmail.com>

Fabio Kung authored on 2017/02/23 07:02:20
Showing 13 changed files
... ...
@@ -262,11 +262,8 @@ func (container *Container) ConfigMounts() []Mount {
262 262
 	return mounts
263 263
 }
264 264
 
265
-// UpdateContainer updates configuration of a container.
265
+// UpdateContainer updates configuration of a container. Callers must hold a Lock on the Container.
266 266
 func (container *Container) UpdateContainer(hostConfig *containertypes.HostConfig) error {
267
-	container.Lock()
268
-	defer container.Unlock()
269
-
270 267
 	// update resources of container
271 268
 	resources := hostConfig.Resources
272 269
 	cResources := &container.HostConfig.Resources
... ...
@@ -126,11 +126,8 @@ func (container *Container) TmpfsMounts() ([]Mount, error) {
126 126
 	return mounts, nil
127 127
 }
128 128
 
129
-// UpdateContainer updates configuration of a container
129
+// UpdateContainer updates configuration of a container. Callers must hold a Lock on the Container.
130 130
 func (container *Container) UpdateContainer(hostConfig *containertypes.HostConfig) error {
131
-	container.Lock()
132
-	defer container.Unlock()
133
-
134 131
 	resources := hostConfig.Resources
135 132
 	if resources.CPUShares != 0 ||
136 133
 		resources.Memory != 0 ||
... ...
@@ -99,7 +99,7 @@ func (daemon *Daemon) load(id string) (*container.Container, error) {
99 99
 }
100 100
 
101 101
 // Register makes a container object usable by the daemon as <container.ID>
102
-func (daemon *Daemon) Register(c *container.Container) {
102
+func (daemon *Daemon) Register(c *container.Container) error {
103 103
 	// Attach to stdout and stderr
104 104
 	if c.Config.OpenStdin {
105 105
 		c.StreamConfig.NewInputPipes()
... ...
@@ -107,8 +107,14 @@ func (daemon *Daemon) Register(c *container.Container) {
107 107
 		c.StreamConfig.NewNopInputPipe()
108 108
 	}
109 109
 
110
+	// once in the memory store it is visible to other goroutines
111
+	// grab a Lock until it has been replicated to avoid races
112
+	c.Lock()
113
+	defer c.Unlock()
114
+
110 115
 	daemon.containers.Add(c.ID, c)
111 116
 	daemon.idIndex.Add(c.ID)
117
+	return daemon.containersReplica.Save(c.Snapshot())
112 118
 }
113 119
 
114 120
 func (daemon *Daemon) newContainer(name string, platform string, config *containertypes.Config, hostConfig *containertypes.HostConfig, imgID image.ID, managed bool) (*container.Container, error) {
... ...
@@ -212,6 +218,9 @@ func (daemon *Daemon) setHostConfig(container *container.Container, hostConfig *
212 212
 
213 213
 	runconfig.SetDefaultNetModeIfBlank(hostConfig)
214 214
 	container.HostConfig = hostConfig
215
+	if err := daemon.containersReplica.Save(container.Snapshot()); err != nil {
216
+		return err
217
+	}
215 218
 	return container.ToDisk()
216 219
 }
217 220
 
... ...
@@ -44,6 +44,19 @@ func (daemon *Daemon) getDNSSearchSettings(container *container.Container) []str
44 44
 
45 45
 	return nil
46 46
 }
47
+
48
+func (daemon *Daemon) saveAndReplicate(container *container.Container) error {
49
+	container.Lock()
50
+	defer container.Unlock()
51
+	if err := daemon.containersReplica.Save(container.Snapshot()); err != nil {
52
+		return fmt.Errorf("Error replicating container state: %v", err)
53
+	}
54
+	if err := container.ToDisk(); err != nil {
55
+		return fmt.Errorf("Error saving container to disk: %v", err)
56
+	}
57
+	return nil
58
+}
59
+
47 60
 func (daemon *Daemon) buildSandboxOptions(container *container.Container) ([]libnetwork.SandboxOption, error) {
48 61
 	var (
49 62
 		sboxOptions []libnetwork.SandboxOption
... ...
@@ -1005,7 +1018,7 @@ func (daemon *Daemon) ConnectToNetwork(container *container.Container, idOrName
1005 1005
 			return err
1006 1006
 		}
1007 1007
 	}
1008
-	if err := container.ToDisk(); err != nil {
1008
+	if err := daemon.saveAndReplicate(container); err != nil {
1009 1009
 		return fmt.Errorf("Error saving container to disk: %v", err)
1010 1010
 	}
1011 1011
 	return nil
... ...
@@ -1044,16 +1057,16 @@ func (daemon *Daemon) DisconnectFromNetwork(container *container.Container, netw
1044 1044
 		return err
1045 1045
 	}
1046 1046
 
1047
-	if err := container.ToDisk(); err != nil {
1047
+	if err := daemon.saveAndReplicate(container); err != nil {
1048 1048
 		return fmt.Errorf("Error saving container to disk: %v", err)
1049 1049
 	}
1050 1050
 
1051 1051
 	if n != nil {
1052
-		attributes := map[string]string{
1052
+		daemon.LogNetworkEventWithAttributes(n, "disconnect", map[string]string{
1053 1053
 			"container": container.ID,
1054
-		}
1055
-		daemon.LogNetworkEventWithAttributes(n, "disconnect", attributes)
1054
+		})
1056 1055
 	}
1056
+
1057 1057
 	return nil
1058 1058
 }
1059 1059
 
... ...
@@ -172,7 +172,9 @@ func (daemon *Daemon) create(params types.ContainerCreateConfig, managed bool) (
172 172
 		logrus.Errorf("Error saving new container to disk: %v", err)
173 173
 		return nil, err
174 174
 	}
175
-	daemon.Register(container)
175
+	if err := daemon.Register(container); err != nil {
176
+		return nil, err
177
+	}
176 178
 	stateCtr.set(container.ID, "stopped")
177 179
 	daemon.LogContainerEvent(container, "create")
178 180
 	return container, nil
... ...
@@ -83,6 +83,7 @@ type Daemon struct {
83 83
 	ID                    string
84 84
 	repository            string
85 85
 	containers            container.Store
86
+	containersReplica     *container.MemDB
86 87
 	execCommands          *exec.Store
87 88
 	downloadManager       *xfer.LayerDownloadManager
88 89
 	uploadManager         *xfer.LayerUploadManager
... ...
@@ -182,11 +183,15 @@ func (daemon *Daemon) restore() error {
182 182
 	activeSandboxes := make(map[string]interface{})
183 183
 	for id, c := range containers {
184 184
 		if err := daemon.registerName(c); err != nil {
185
+			logrus.Errorf("Failed to register container name %s: %s", c.ID, err)
186
+			delete(containers, id)
187
+			continue
188
+		}
189
+		if err := daemon.Register(c); err != nil {
185 190
 			logrus.Errorf("Failed to register container %s: %s", c.ID, err)
186 191
 			delete(containers, id)
187 192
 			continue
188 193
 		}
189
-		daemon.Register(c)
190 194
 
191 195
 		// verify that all volumes valid and have been migrated from the pre-1.7 layout
192 196
 		if err := daemon.verifyVolumesInfo(c); err != nil {
... ...
@@ -757,6 +762,9 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
757 757
 	d.ID = trustKey.PublicKey().KeyID()
758 758
 	d.repository = daemonRepo
759 759
 	d.containers = container.NewMemoryStore()
760
+	if d.containersReplica, err = container.NewMemDB(); err != nil {
761
+		return nil, err
762
+	}
760 763
 	d.execCommands = exec.NewStore()
761 764
 	d.trustKey = trustKey
762 765
 	d.idIndex = truncindex.NewTruncIndex([]string{})
... ...
@@ -103,14 +103,20 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo
103 103
 	}
104 104
 
105 105
 	// Mark container dead. We don't want anybody to be restarting it.
106
-	container.SetDead()
106
+	container.Lock()
107
+	container.Dead = true
108
+	if err = daemon.containersReplica.Save(container.Snapshot()); err != nil {
109
+		container.Unlock()
110
+		return err
111
+	}
107 112
 
108 113
 	// Save container state to disk. So that if error happens before
109 114
 	// container meta file got removed from disk, then a restart of
110 115
 	// docker should not make a dead container alive.
111
-	if err := container.ToDiskLocking(); err != nil && !os.IsNotExist(err) {
116
+	if err := container.ToDisk(); err != nil && !os.IsNotExist(err) {
112 117
 		logrus.Errorf("Error saving dying container to disk: %v", err)
113 118
 	}
119
+	container.Unlock()
114 120
 
115 121
 	// When container creation fails and `RWLayer` has not been created yet, we
116 122
 	// do not call `ReleaseRWLayer`
... ...
@@ -131,6 +137,7 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo
131 131
 	selinuxFreeLxcContexts(container.ProcessLabel)
132 132
 	daemon.idIndex.Delete(container.ID)
133 133
 	daemon.containers.Delete(container.ID)
134
+	daemon.containersReplica.Delete(container.ID)
134 135
 	if e := daemon.removeMountPoints(container, removeVolume); e != nil {
135 136
 		logrus.Error(e)
136 137
 	}
... ...
@@ -167,6 +167,13 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch
167 167
 		// Else we're starting or healthy. Stay in that state.
168 168
 	}
169 169
 
170
+	// replicate Health status changes
171
+	if err := d.containersReplica.Save(c.Snapshot()); err != nil {
172
+		// queries will be inconsistent until the next probe runs or other state mutations
173
+		// trigger a replication
174
+		logrus.Errorf("Error replicating health state for container %s: %v", c.ID, err)
175
+	}
176
+
170 177
 	if oldStatus != h.Status {
171 178
 		d.LogContainerEvent(c, "health_status: "+h.Status)
172 179
 	}
... ...
@@ -29,7 +29,13 @@ func TestNoneHealthcheck(t *testing.T) {
29 29
 		},
30 30
 		State: &container.State{},
31 31
 	}
32
-	daemon := &Daemon{}
32
+	store, err := container.NewMemDB()
33
+	if err != nil {
34
+		t.Fatal(err)
35
+	}
36
+	daemon := &Daemon{
37
+		containersReplica: store,
38
+	}
33 39
 
34 40
 	daemon.initHealthMonitor(c)
35 41
 	if c.State.Health != nil {
... ...
@@ -62,8 +68,15 @@ func TestHealthStates(t *testing.T) {
62 62
 			Image: "image_name",
63 63
 		},
64 64
 	}
65
+
66
+	store, err := container.NewMemDB()
67
+	if err != nil {
68
+		t.Fatal(err)
69
+	}
70
+
65 71
 	daemon := &Daemon{
66
-		EventsService: e,
72
+		EventsService:     e,
73
+		containersReplica: store,
67 74
 	}
68 75
 
69 76
 	c.Config.Healthcheck = &containertypes.HealthConfig{
... ...
@@ -90,6 +90,9 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
90 90
 		daemon.setStateCounter(c)
91 91
 
92 92
 		defer c.Unlock()
93
+		if err := daemon.containersReplica.Save(c.Snapshot()); err != nil {
94
+			return err
95
+		}
93 96
 		if err := c.ToDisk(); err != nil {
94 97
 			return err
95 98
 		}
... ...
@@ -119,6 +122,10 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
119 119
 		c.HasBeenStartedBefore = true
120 120
 		daemon.setStateCounter(c)
121 121
 
122
+		if err := daemon.containersReplica.Save(c.Snapshot()); err != nil {
123
+			c.Reset(false)
124
+			return err
125
+		}
122 126
 		if err := c.ToDisk(); err != nil {
123 127
 			c.Reset(false)
124 128
 			return err
... ...
@@ -130,6 +137,9 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
130 130
 		// Container is already locked in this case
131 131
 		c.Paused = true
132 132
 		daemon.setStateCounter(c)
133
+		if err := daemon.containersReplica.Save(c.Snapshot()); err != nil {
134
+			return err
135
+		}
133 136
 		if err := c.ToDisk(); err != nil {
134 137
 			return err
135 138
 		}
... ...
@@ -139,6 +149,9 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
139 139
 		// Container is already locked in this case
140 140
 		c.Paused = false
141 141
 		daemon.setStateCounter(c)
142
+		if err := daemon.containersReplica.Save(c.Snapshot()); err != nil {
143
+			return err
144
+		}
142 145
 		if err := c.ToDisk(); err != nil {
143 146
 			return err
144 147
 		}
... ...
@@ -82,6 +82,9 @@ func (daemon *Daemon) ContainerRename(oldName, newName string) error {
82 82
 		daemon.nameIndex.Release(oldName + k)
83 83
 	}
84 84
 	daemon.releaseName(oldName)
85
+	if err = daemon.containersReplica.Save(container.Snapshot()); err != nil {
86
+		return err
87
+	}
85 88
 	if err = container.ToDisk(); err != nil {
86 89
 		return err
87 90
 	}
... ...
@@ -99,6 +102,9 @@ func (daemon *Daemon) ContainerRename(oldName, newName string) error {
99 99
 		if err != nil {
100 100
 			container.Name = oldName
101 101
 			container.NetworkSettings.IsAnonymousEndpoint = oldIsAnonymousEndpoint
102
+			if e := daemon.containersReplica.Save(container.Snapshot()); err != nil {
103
+				logrus.Errorf("%s: Failed in replicating state on rename failure: %v", container.ID, e)
104
+			}
102 105
 			if e := container.ToDisk(); e != nil {
103 106
 				logrus.Errorf("%s: Failed in writing to Disk on rename failure: %v", container.ID, e)
104 107
 			}
... ...
@@ -117,8 +117,12 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint
117 117
 			if container.ExitCode() == 0 {
118 118
 				container.SetExitCode(128)
119 119
 			}
120
-			container.ToDisk()
121
-
120
+			if err := daemon.containersReplica.Save(container.Snapshot()); err != nil {
121
+				logrus.Errorf("%s: failed replicating state on start failure: %v", container.ID, err)
122
+			}
123
+			if err := container.ToDisk(); err != nil {
124
+				logrus.Errorf("%s: failed writing to disk on start failure: %v", container.ID, err)
125
+			}
122 126
 			container.Reset(false)
123 127
 
124 128
 			daemon.Cleanup(container)
... ...
@@ -38,6 +38,7 @@ func (daemon *Daemon) update(name string, hostConfig *container.HostConfig) erro
38 38
 		if restoreConfig {
39 39
 			container.Lock()
40 40
 			container.HostConfig = &backupHostConfig
41
+			daemon.containersReplica.Save(container.Snapshot())
41 42
 			container.ToDisk()
42 43
 			container.Unlock()
43 44
 		}
... ...
@@ -47,10 +48,18 @@ func (daemon *Daemon) update(name string, hostConfig *container.HostConfig) erro
47 47
 		return errCannotUpdate(container.ID, fmt.Errorf("Container is marked for removal and cannot be \"update\"."))
48 48
 	}
49 49
 
50
+	container.Lock()
50 51
 	if err := container.UpdateContainer(hostConfig); err != nil {
51 52
 		restoreConfig = true
53
+		container.Unlock()
52 54
 		return errCannotUpdate(container.ID, err)
53 55
 	}
56
+	if err := daemon.containersReplica.Save(container.Snapshot()); err != nil {
57
+		restoreConfig = true
58
+		container.Unlock()
59
+		return errCannotUpdate(container.ID, err)
60
+	}
61
+	container.Unlock()
54 62
 
55 63
 	// if Restart Policy changed, we need to update container monitor
56 64
 	if hostConfig.RestartPolicy.Name != "" {