Browse code

deploy: remove deployer pods on cancellation

Michail Kargakis authored on 2016/06/12 08:53:54
Showing 2 changed files
... ...
@@ -61,17 +61,19 @@ func (c *DeploymentController) Handle(deployment *kapi.ReplicationController) er
61 61
 	switch currentStatus {
62 62
 	case deployapi.DeploymentStatusNew:
63 63
 		// If the deployment has been cancelled, don't create a deployer pod.
64
-		// Transition the deployment to Pending so that re-syncs will check
65
-		// up on the deployer pods and so that the deployment config controller
66
-		// continues to see the deployment as in-flight (which it is until we
67
-		// have deployer pod outcomes).
64
+		// Instead try to delete any deployer pods found and transition the
65
+		// deployment to Pending so that the deployment config controller
66
+		// continues to see the deployment as in-flight. Eventually the deletion
67
+		// of the deployer pod should cause a requeue of this deployment and
68
+		// then it can be transitioned to Failed by this controller.
68 69
 		if deployutil.IsDeploymentCancelled(deployment) {
69 70
 			nextStatus = deployapi.DeploymentStatusPending
70
-			if err := c.cancelDeployerPods(deployment); err != nil {
71
+			if err := c.cleanupDeployerPods(deployment); err != nil {
71 72
 				return err
72 73
 			}
73 74
 			break
74 75
 		}
76
+
75 77
 		// If the pod already exists, it's possible that a previous CreatePod
76 78
 		// succeeded but the deployment state update failed and now we're re-
77 79
 		// entering. Ensure that the pod is the one we created by verifying the
... ...
@@ -126,29 +128,34 @@ func (c *DeploymentController) Handle(deployment *kapi.ReplicationController) er
126 126
 	case deployapi.DeploymentStatusPending, deployapi.DeploymentStatusRunning:
127 127
 		// If the deployer pod has vanished, consider the deployment a failure.
128 128
 		deployerPodName := deployutil.DeployerPodNameForDeployment(deployment.Name)
129
-		if _, err := c.podClient.getPod(deployment.Namespace, deployerPodName); err != nil {
130
-			if kerrors.IsNotFound(err) {
131
-				nextStatus = deployapi.DeploymentStatusFailed
129
+		_, err := c.podClient.getPod(deployment.Namespace, deployerPodName)
130
+		switch {
131
+		case kerrors.IsNotFound(err):
132
+			nextStatus = deployapi.DeploymentStatusFailed
133
+			// If the deployment is cancelled here then we deleted the deployer in a previous
134
+			// resync of the deployment.
135
+			if !deployutil.IsDeploymentCancelled(deployment) {
132 136
 				deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(nextStatus)
133 137
 				deployment.Annotations[deployapi.DeploymentStatusReasonAnnotation] = deployapi.DeploymentFailedDeployerPodNoLongerExists
134 138
 				c.emitDeploymentEvent(deployment, kapi.EventTypeWarning, "Failed", fmt.Sprintf("Deployer pod %q has gone missing", deployerPodName))
135 139
 				glog.V(4).Infof("Failing deployment %q because its deployer pod %q disappeared", deployutil.LabelForDeployment(deployment), deployerPodName)
136
-				break
137
-			} else {
138
-				// We'll try again later on resync. Continue to process cancellations.
139
-				glog.V(2).Infof("Error getting deployer pod %s for deployment %s: %#v", deployerPodName, deployutil.LabelForDeployment(deployment), err)
140 140
 			}
141
-		}
142 141
 
143
-		// If the deployment is cancelled, terminate any deployer/hook pods.
144
-		// NOTE: Do not mark the deployment as Failed just yet.
145
-		// The deployment will be marked as Failed by the deployer pod controller
146
-		// when the deployer pod failure state is picked up.
147
-		// Then, the deployment config controller will scale down the failed deployment
148
-		// and scale back up the last successful completed deployment.
149
-		if deployutil.IsDeploymentCancelled(deployment) {
150
-			if err := c.cancelDeployerPods(deployment); err != nil {
151
-				return err
142
+		case err != nil:
143
+			// We'll try again later on resync. Continue to process cancellations.
144
+			glog.V(4).Infof("Error getting deployer pod %s for deployment %s: %#v", deployerPodName, deployutil.LabelForDeployment(deployment), err)
145
+
146
+		default: /* err == nil */
147
+			// If the deployment has been cancelled, delete any deployer pods
148
+			// found and transition the deployment to Pending so that the
149
+			// deployment config controller continues to see the deployment
150
+			// as in-flight. Eventually the deletion of the deployer pod should
151
+			// cause a requeue of this deployment and then it can be transitioned
152
+			// to Failed by this controller.
153
+			if deployutil.IsDeploymentCancelled(deployment) {
154
+				if err := c.cleanupDeployerPods(deployment); err != nil {
155
+					return err
156
+				}
152 157
 			}
153 158
 		}
154 159
 	case deployapi.DeploymentStatusFailed:
... ...
@@ -157,6 +164,13 @@ func (c *DeploymentController) Handle(deployment *kapi.ReplicationController) er
157 157
 			deploymentScaled = deployment.Spec.Replicas != 0
158 158
 			deployment.Spec.Replicas = 0
159 159
 		}
160
+		// Try to cleanup once more a cancelled deployment in case hook pods
161
+		// were created just after we issued the first cleanup request.
162
+		if deployutil.IsDeploymentCancelled(deployment) {
163
+			if err := c.cleanupDeployerPods(deployment); err != nil {
164
+				return err
165
+			}
166
+		}
160 167
 	case deployapi.DeploymentStatusComplete:
161 168
 		// Check for test deployment and ensure the deployment scale matches
162 169
 		if config, err := c.decodeConfig(deployment); err == nil && config.Spec.Test {
... ...
@@ -164,31 +178,8 @@ func (c *DeploymentController) Handle(deployment *kapi.ReplicationController) er
164 164
 			deployment.Spec.Replicas = 0
165 165
 		}
166 166
 
167
-		// now list any pods in the namespace that have the specified label
168
-		deployerPods, err := c.podClient.getDeployerPodsFor(deployment.Namespace, deployment.Name)
169
-		if err != nil {
170
-			return fmt.Errorf("couldn't fetch deployer pods for %s after successful completion: %v", deployutil.LabelForDeployment(deployment), err)
171
-		}
172
-		if len(deployerPods) > 0 {
173
-			glog.V(4).Infof("Deleting %d deployer pods for deployment %s", len(deployerPods), deployutil.LabelForDeployment(deployment))
174
-		}
175
-		cleanedAll := true
176
-		for _, deployerPod := range deployerPods {
177
-			if err := c.podClient.deletePod(deployerPod.Namespace, deployerPod.Name); err != nil {
178
-				if !kerrors.IsNotFound(err) {
179
-					// if the pod deletion failed, then log the error and continue
180
-					// we will try to delete any remaining deployer pods and return an error later
181
-					utilruntime.HandleError(fmt.Errorf("couldn't delete completed deployer pod %s/%s for deployment %s: %v", deployment.Namespace, deployerPod.Name, deployutil.LabelForDeployment(deployment), err))
182
-					cleanedAll = false
183
-				}
184
-				// Already deleted
185
-			} else {
186
-				glog.V(4).Infof("Deleted completed deployer pod %s/%s for deployment %s", deployment.Namespace, deployerPod.Name, deployutil.LabelForDeployment(deployment))
187
-			}
188
-		}
189
-
190
-		if !cleanedAll {
191
-			return actionableError(fmt.Sprintf("couldn't clean up all deployer pods for %s", deployment.Name))
167
+		if err := c.cleanupDeployerPods(deployment); err != nil {
168
+			return err
192 169
 		}
193 170
 	}
194 171
 
... ...
@@ -264,27 +255,24 @@ func (c *DeploymentController) makeDeployerPod(deployment *kapi.ReplicationContr
264 264
 	return pod, nil
265 265
 }
266 266
 
267
-func (c *DeploymentController) cancelDeployerPods(deployment *kapi.ReplicationController) error {
267
+func (c *DeploymentController) cleanupDeployerPods(deployment *kapi.ReplicationController) error {
268 268
 	deployerPods, err := c.podClient.getDeployerPodsFor(deployment.Namespace, deployment.Name)
269 269
 	if err != nil {
270
-		return fmt.Errorf("couldn't fetch deployer pods for %s while trying to cancel deployment: %v", deployutil.LabelForDeployment(deployment), err)
270
+		return fmt.Errorf("couldn't fetch deployer pods for %q: %v", deployutil.LabelForDeployment(deployment), err)
271 271
 	}
272
-	glog.V(4).Infof("Cancelling %d deployer pods for deployment %s", len(deployerPods), deployutil.LabelForDeployment(deployment))
273
-	zeroDelay := int64(1)
274
-	cleanedAll := len(deployerPods) > 0
272
+
273
+	cleanedAll := true
275 274
 	for _, deployerPod := range deployerPods {
276
-		// Set the ActiveDeadlineSeconds on the pod so it's terminated very soon.
277
-		if deployerPod.Spec.ActiveDeadlineSeconds == nil || *deployerPod.Spec.ActiveDeadlineSeconds != zeroDelay {
278
-			deployerPod.Spec.ActiveDeadlineSeconds = &zeroDelay
279
-			if _, err := c.podClient.updatePod(deployerPod.Namespace, &deployerPod); err != nil {
280
-				cleanedAll = false
281
-				utilruntime.HandleError(fmt.Errorf("couldn't cancel deployer pod %s for deployment %s: %v", deployerPod.Name, deployutil.LabelForDeployment(deployment), err))
282
-			}
283
-			glog.V(4).Infof("Cancelled deployer pod %s for deployment %s", deployerPod.Name, deployutil.LabelForDeployment(deployment))
275
+		if err := c.podClient.deletePod(deployerPod.Namespace, deployerPod.Name); err != nil && !kerrors.IsNotFound(err) {
276
+			// if the pod deletion failed, then log the error and continue
277
+			// we will try to delete any remaining deployer pods and return an error later
278
+			utilruntime.HandleError(fmt.Errorf("couldn't delete completed deployer pod %q for deployment %q: %v", deployerPod.Name, deployutil.LabelForDeployment(deployment), err))
279
+			cleanedAll = false
284 280
 		}
285 281
 	}
286
-	if cleanedAll {
287
-		c.emitDeploymentEvent(deployment, kapi.EventTypeNormal, "Cancelled", "Cancelled all deployer pods")
282
+
283
+	if !cleanedAll {
284
+		return actionableError(fmt.Sprintf("couldn't clean up all deployer pods for %s", deployment.Name))
288 285
 	}
289 286
 	return nil
290 287
 }
... ...
@@ -673,9 +673,9 @@ func TestHandle_cancelNew(t *testing.T) {
673 673
 	}
674 674
 }
675 675
 
676
-func TestHandle_cancelNewWithDeployers(t *testing.T) {
676
+func TestHandle_cleanupNewWithDeployers(t *testing.T) {
677 677
 	var updatedDeployment *kapi.ReplicationController
678
-	updatedDeployer := false
678
+	deletedDeployer := false
679 679
 
680 680
 	deployment, _ := deployutil.MakeDeployment(deploytest.OkDeploymentConfig(1), kapi.Codecs.LegacyCodec(deployapi.SchemeGroupVersion))
681 681
 	deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
... ...
@@ -696,13 +696,13 @@ func TestHandle_cancelNewWithDeployers(t *testing.T) {
696 696
 				t.Fatalf("unexpected call to make container")
697 697
 				return nil, nil
698 698
 			},
699
-			updatePodFunc: func(namespace string, pod *kapi.Pod) (*kapi.Pod, error) {
700
-				updatedDeployer = true
701
-				return nil, nil
702
-			},
703 699
 			getDeployerPodsForFunc: func(namespace, name string) ([]kapi.Pod, error) {
704 700
 				return []kapi.Pod{*relatedPod(deployment)}, nil
705 701
 			},
702
+			deletePodFunc: func(namespace, name string) error {
703
+				deletedDeployer = true
704
+				return nil
705
+			},
706 706
 		},
707 707
 		makeContainer: func(strategy *deployapi.DeploymentStrategy) *kapi.Container {
708 708
 			return okContainer()
... ...
@@ -718,16 +718,16 @@ func TestHandle_cancelNewWithDeployers(t *testing.T) {
718 718
 	if e, a := deployapi.DeploymentStatusPending, deployutil.DeploymentStatusFor(updatedDeployment); e != a {
719 719
 		t.Fatalf("expected deployment status %s, got %s", e, a)
720 720
 	}
721
-	if !updatedDeployer {
722
-		t.Fatalf("expected deployer update")
721
+	if !deletedDeployer {
722
+		t.Fatalf("expected deployer delete")
723 723
 	}
724 724
 }
725 725
 
726
-// TestHandle_cancelPendingRunning ensures that deployer pods are terminated
726
+// TestHandle_cleanupPendingRunning ensures that deployer pods are deleted
727 727
 // for deployments in post-New phases.
728
-func TestHandle_cancelPendingRunning(t *testing.T) {
728
+func TestHandle_cleanupPendingRunning(t *testing.T) {
729 729
 	deployerPodCount := 3
730
-	updatedPods := []kapi.Pod{}
730
+	deletedPods := 0
731 731
 
732 732
 	controller := &DeploymentController{
733 733
 		decodeConfig: func(deployment *kapi.ReplicationController) (*deployapi.DeploymentConfig, error) {
... ...
@@ -744,9 +744,9 @@ func TestHandle_cancelPendingRunning(t *testing.T) {
744 744
 			getPodFunc: func(namespace, name string) (*kapi.Pod, error) {
745 745
 				return ttlNonZeroPod(), nil
746 746
 			},
747
-			updatePodFunc: func(namespace string, pod *kapi.Pod) (*kapi.Pod, error) {
748
-				updatedPods = append(updatedPods, *pod)
749
-				return pod, nil
747
+			deletePodFunc: func(namespace, name string) error {
748
+				deletedPods++
749
+				return nil
750 750
 			},
751 751
 			getDeployerPodsForFunc: func(namespace, name string) ([]kapi.Pod, error) {
752 752
 				pods := []kapi.Pod{}
... ...
@@ -768,7 +768,7 @@ func TestHandle_cancelPendingRunning(t *testing.T) {
768 768
 	}
769 769
 
770 770
 	for _, status := range cases {
771
-		updatedPods = []kapi.Pod{}
771
+		deletedPods = 0
772 772
 		deployment, _ := deployutil.MakeDeployment(deploytest.OkDeploymentConfig(1), kapi.Codecs.LegacyCodec(deployapi.SchemeGroupVersion))
773 773
 		deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(status)
774 774
 		deployment.Annotations[deployapi.DeploymentCancelledAnnotation] = deployapi.DeploymentCancelledAnnotationValue
... ...
@@ -777,13 +777,8 @@ func TestHandle_cancelPendingRunning(t *testing.T) {
777 777
 			t.Fatalf("unexpected error: %v", err)
778 778
 		}
779 779
 
780
-		if e, a := len(updatedPods), deployerPodCount; e != a {
781
-			t.Fatalf("expected %d updated pods, got %d", e, a)
782
-		}
783
-		for _, pod := range updatedPods {
784
-			if e, a := int64(1), *pod.Spec.ActiveDeadlineSeconds; e != a {
785
-				t.Errorf("expected ActiveDeadlineSeconds %d, got %d", e, a)
786
-			}
780
+		if e, a := deletedPods, deployerPodCount; e != a {
781
+			t.Fatalf("expected %d deleted pods, got %d", e, a)
787 782
 		}
788 783
 	}
789 784
 }