Browse code

Daemon to take care of ingress cleanup on leave & shutdown

Signed-off-by: Alessandro Boch <aboch@docker.com>

Alessandro Boch authored on 2017/04/01 06:07:55
Showing 4 changed files
... ...
@@ -27,8 +27,8 @@ type Backend interface {
27 27
 	CreateManagedNetwork(clustertypes.NetworkCreateRequest) error
28 28
 	DeleteManagedNetwork(name string) error
29 29
 	FindNetwork(idName string) (libnetwork.Network, error)
30
-	SetupIngress(req clustertypes.NetworkCreateRequest, nodeIP string) error
31
-	ReleaseIngress() error
30
+	SetupIngress(clustertypes.NetworkCreateRequest, string) (<-chan struct{}, error)
31
+	ReleaseIngress() (<-chan struct{}, error)
32 32
 	PullImage(ctx context.Context, image, tag string, metaHeaders map[string][]string, authConfig *types.AuthConfig, outStream io.Writer) error
33 33
 	CreateManagedContainer(config types.ContainerCreateConfig) (container.ContainerCreateCreatedBody, error)
34 34
 	ContainerStart(name string, hostConfig *container.HostConfig, checkpoint string, checkpointDir string) error
... ...
@@ -139,13 +139,15 @@ func (e *executor) Configure(ctx context.Context, node *api.Node) error {
139 139
 		options.IPAM.Config = append(options.IPAM.Config, c)
140 140
 	}
141 141
 
142
-	return e.backend.SetupIngress(clustertypes.NetworkCreateRequest{
142
+	_, err := e.backend.SetupIngress(clustertypes.NetworkCreateRequest{
143 143
 		ID: na.Network.ID,
144 144
 		NetworkCreateRequest: types.NetworkCreateRequest{
145 145
 			Name:          na.Network.Spec.Annotations.Name,
146 146
 			NetworkCreate: options,
147 147
 		},
148 148
 	}, na.Addresses[0])
149
+
150
+	return err
149 151
 }
150 152
 
151 153
 // Controller returns a docker container runner.
... ...
@@ -445,7 +445,25 @@ func (daemon *Daemon) DaemonLeavesCluster() {
445 445
 	// Daemon is in charge of removing the attachable networks with
446 446
 	// connected containers when the node leaves the swarm
447 447
 	daemon.clearAttachableNetworks()
448
+	// We no longer need the cluster provider, stop it now so that
449
+	// the network agent will stop listening to cluster events.
448 450
 	daemon.setClusterProvider(nil)
451
+	// Wait for the networking cluster agent to stop
452
+	daemon.netController.AgentStopWait()
453
+	// Daemon is in charge of removing the ingress network when the
454
+	// node leaves the swarm. Wait for job to be done or timeout.
455
+	// This is called also on graceful daemon shutdown. We need to
456
+	// wait, because the ingress release has to happen before the
457
+	// network controller is stopped.
458
+	if done, err := daemon.ReleaseIngress(); err == nil {
459
+		select {
460
+		case <-done:
461
+		case <-time.After(5 * time.Second):
462
+			logrus.Warnf("timeout while waiting for ingress network removal")
463
+		}
464
+	} else {
465
+		logrus.Warnf("failed to initiate ingress network removal: %v", err)
466
+	}
449 467
 }
450 468
 
451 469
 // setClusterProvider sets a component for querying the current cluster state.
... ...
@@ -832,6 +850,12 @@ func (daemon *Daemon) Shutdown() error {
832 832
 		}
833 833
 	}
834 834
 
835
+	// If we are part of a cluster, clean up cluster's stuff
836
+	if daemon.clusterProvider != nil {
837
+		logrus.Debugf("start clean shutdown of cluster resources...")
838
+		daemon.DaemonLeavesCluster()
839
+	}
840
+
835 841
 	// Shutdown plugins after containers and layerstore. Don't change the order.
836 842
 	daemon.pluginShutdown()
837 843
 
... ...
@@ -101,8 +101,9 @@ func (daemon *Daemon) getAllNetworks() []libnetwork.Network {
101 101
 }
102 102
 
103 103
 type ingressJob struct {
104
-	create *clustertypes.NetworkCreateRequest
105
-	ip     net.IP
104
+	create  *clustertypes.NetworkCreateRequest
105
+	ip      net.IP
106
+	jobDone chan struct{}
106 107
 }
107 108
 
108 109
 var (
... ...
@@ -124,6 +125,7 @@ func (daemon *Daemon) startIngressWorker() {
124 124
 					daemon.releaseIngress(ingressID)
125 125
 					ingressID = ""
126 126
 				}
127
+				close(r.jobDone)
127 128
 			}
128 129
 		}
129 130
 	}()
... ...
@@ -137,19 +139,23 @@ func (daemon *Daemon) enqueueIngressJob(job *ingressJob) {
137 137
 }
138 138
 
139 139
 // SetupIngress setups ingress networking.
140
-func (daemon *Daemon) SetupIngress(create clustertypes.NetworkCreateRequest, nodeIP string) error {
140
+// The function returns a channel which will signal the caller when the programming is completed.
141
+func (daemon *Daemon) SetupIngress(create clustertypes.NetworkCreateRequest, nodeIP string) (<-chan struct{}, error) {
141 142
 	ip, _, err := net.ParseCIDR(nodeIP)
142 143
 	if err != nil {
143
-		return err
144
+		return nil, err
144 145
 	}
145
-	daemon.enqueueIngressJob(&ingressJob{&create, ip})
146
-	return nil
146
+	done := make(chan struct{})
147
+	daemon.enqueueIngressJob(&ingressJob{&create, ip, done})
148
+	return done, nil
147 149
 }
148 150
 
149 151
 // ReleaseIngress releases the ingress networking.
150
-func (daemon *Daemon) ReleaseIngress() error {
151
-	daemon.enqueueIngressJob(&ingressJob{nil, nil})
152
-	return nil
152
+// The function returns a channel which will signal the caller when the programming is completed.
153
+func (daemon *Daemon) ReleaseIngress() (<-chan struct{}, error) {
154
+	done := make(chan struct{})
155
+	daemon.enqueueIngressJob(&ingressJob{nil, nil, done})
156
+	return done, nil
153 157
 }
154 158
 
155 159
 func (daemon *Daemon) setupIngress(create *clustertypes.NetworkCreateRequest, ip net.IP, staleID string) {