Browse code

Fix possible overlapping IPs

A node is no longer using its load balancer IP address when it no longer
has tasks that use the network that requires that load balancer. When
this occurs, the swarmkit manager will free that IP in IPAM, and may
reaassign it.

When a task shuts down cleanly, it attempts removal of the networks it
uses, and if it is the last task using those networks, this removal
succeeds, and the load balancer IP is freed.

However, this behavior is absent if the container fails. Removal of the
networks is never attempted.

To address this issue, I amend the executor. Whenever a node load
balancer IP is removed or changed, that information is passedd to the
executor by way of the Configure method. By keeping track of the set of
node NetworkAttachments from the previous call to Configure, we can
determine which, if any, have been removed or changed.

At first, this seems to create a race, by which a task can be attempting
to start and the network is removed right out from under it. However,
this is already addressed in the controller. The controller will attempt
to recreate missing networks before starting a task.

Signed-off-by: Drew Erny <derny@mirantis.com>
(cherry picked from commit 0d9b0ed678e2b106e769e974f13aaf100e46351c)
Signed-off-by: Ameya Gawde <agawde@mirantis.com>

Drew Erny authored on 2021/05/11 05:11:13
Showing 1 changed files
... ...
@@ -15,12 +15,15 @@ import (
15 15
 	"github.com/docker/docker/daemon/cluster/convert"
16 16
 	executorpkg "github.com/docker/docker/daemon/cluster/executor"
17 17
 	clustertypes "github.com/docker/docker/daemon/cluster/provider"
18
+	"github.com/docker/libnetwork"
18 19
 	networktypes "github.com/docker/libnetwork/types"
19 20
 	"github.com/docker/swarmkit/agent"
20 21
 	"github.com/docker/swarmkit/agent/exec"
21 22
 	"github.com/docker/swarmkit/api"
22 23
 	"github.com/docker/swarmkit/api/naming"
24
+	"github.com/docker/swarmkit/log"
23 25
 	"github.com/docker/swarmkit/template"
26
+	"github.com/pkg/errors"
24 27
 	"github.com/sirupsen/logrus"
25 28
 )
26 29
 
... ...
@@ -32,6 +35,14 @@ type executor struct {
32 32
 	dependencies  exec.DependencyManager
33 33
 	mutex         sync.Mutex // This mutex protects the following node field
34 34
 	node          *api.NodeDescription
35
+
36
+	// nodeObj holds a copy of the swarmkit Node object from the time of the
37
+	// last call to executor.Configure. This allows us to discover which
38
+	// network attachments the node previously had, which further allows us to
39
+	// determine which, if any, need to be removed. nodeObj is not protected by
40
+	// a mutex, because it is only written to in the method (Configure) that it
41
+	// is read from. If that changes, it may need to be guarded.
42
+	nodeObj *api.Node
35 43
 }
36 44
 
37 45
 // NewExecutor returns an executor from the docker client.
... ...
@@ -157,6 +168,40 @@ func (e *executor) Configure(ctx context.Context, node *api.Node) error {
157 157
 		attachments[na.Network.ID] = na.Addresses[0]
158 158
 	}
159 159
 
160
+	// discover which, if any, attachments have been removed.
161
+	//
162
+	// we aren't responsible directly for creating these networks. that is
163
+	// handled indirectly when a container using that network is created.
164
+	// however, when it comes time to remove the network, none of the relevant
165
+	// tasks may exist anymore. this means we should go ahead and try to remove
166
+	// any network we know to no longer be in use.
167
+
168
+	// removeAttachments maps the network ID to a boolean. This boolean
169
+	// indicates whether the attachment in question is totally removed (true),
170
+	// or has just had its IP changed (false)
171
+	removeAttachments := make(map[string]bool)
172
+
173
+	// the first time we Configure, nodeObj wil be nil, because it will not be
174
+	// set yet. in that case, skip this check.
175
+	if e.nodeObj != nil {
176
+		for _, na := range e.nodeObj.Attachments {
177
+			// same thing as above, check sanity of the attachments so we don't
178
+			// get a panic.
179
+			if na == nil || na.Network == nil || len(na.Addresses) == 0 {
180
+				logrus.WithField("NetworkAttachment", fmt.Sprintf("%#v", na)).
181
+					Warnf("skipping nil or malformed node network attachment entry")
182
+				continue
183
+			}
184
+
185
+			// now, check if the attachment exists and shares the same IP address.
186
+			if ip, ok := attachments[na.Network.ID]; !ok || na.Addresses[0] != ip {
187
+				// if the map entry exists, then the network still exists, and the
188
+				// IP must be what has changed
189
+				removeAttachments[na.Network.ID] = !ok
190
+			}
191
+		}
192
+	}
193
+
160 194
 	if (ingressNA == nil) && (node.Attachment != nil) && (len(node.Attachment.Addresses) > 0) {
161 195
 		ingressNA = node.Attachment
162 196
 		attachments[ingressNA.Network.ID] = ingressNA.Addresses[0]
... ...
@@ -197,6 +242,42 @@ func (e *executor) Configure(ctx context.Context, node *api.Node) error {
197 197
 		return err
198 198
 	}
199 199
 
200
+	var (
201
+		activeEndpointsError *libnetwork.ActiveEndpointsError
202
+		errNoSuchNetwork     libnetwork.ErrNoSuchNetwork
203
+	)
204
+
205
+	// now, finally, remove any network LB attachments that we no longer have.
206
+	for nw, gone := range removeAttachments {
207
+		err := e.backend.DeleteManagedNetwork(nw)
208
+		switch {
209
+		case err == nil:
210
+			continue
211
+		case errors.As(err, &activeEndpointsError):
212
+			// this is the purpose of the boolean in the map. it's literally
213
+			// just to log an appropriate, informative error. i'm unsure if
214
+			// this can ever actually occur, but we need to know if it does.
215
+			if gone {
216
+				log.G(ctx).Warnf("network %s should be removed, but still has active attachments", nw)
217
+			} else {
218
+				log.G(ctx).Warnf(
219
+					"network %s should have its node LB IP changed, but cannot be removed because of active attachments",
220
+					nw,
221
+				)
222
+			}
223
+			continue
224
+		case errors.As(err, &errNoSuchNetwork):
225
+			// NoSuchNetworkError indicates the network is already gone.
226
+			continue
227
+		default:
228
+			log.G(ctx).Errorf("network %s remove failed: %v", nw, err)
229
+		}
230
+	}
231
+
232
+	// now update our copy of the node object, reset the attachment store, and
233
+	// return
234
+	e.nodeObj = node
235
+
200 236
 	return e.backend.GetAttachmentStore().ResetAttachments(attachments)
201 237
 }
202 238