Browse code

Making the deployer pod name deterministic - deployer pod name is now the same as the deployment name - if an unrelated pod exists with the same name, the deployment is set to Failed

Abhishek Gupta authored on 2015/05/20 03:19:33
Showing 7 changed files
... ...
@@ -191,6 +191,15 @@ const (
191 191
 	DeploymentCancelledAnnotation = "openshift.io/deployment.cancelled"
192 192
 )
193 193
 
194
+// These constants represent the various reasons for cancelling a deployment
195
+// or for a deployment being placed in a failed state
196
+const (
197
+	DeploymentCancelledByUser                 = "The deployment was cancelled by the user"
198
+	DeploymentCancelledNewerDeploymentExists  = "The deployment was cancelled as a newer deployment was found running"
199
+	DeploymentFailedUnrelatedDeploymentExists = "The deployment failed as an unrelated pod with the same name as this deployment is already running"
200
+	DeploymentFailedDeployerPodNoLongerExists = "The deployment failed as the deployer pod no longer exists"
201
+)
202
+
194 203
 // DeploymentConfig represents a configuration for a single deployment (represented as a
195 204
 // ReplicationController). It also contains details about changes which resulted in the current
196 205
 // state of the DeploymentConfig. Each change to the DeploymentConfig which should result in
... ...
@@ -188,6 +188,15 @@ const (
188 188
 	DeploymentCancelledAnnotation = "openshift.io/deployment.cancelled"
189 189
 )
190 190
 
191
+// These constants represent the various reasons for cancelling a deployment
192
+// or for a deployment being placed in a failed state
193
+const (
194
+	DeploymentCancelledByUser                 = "The deployment was cancelled by the user"
195
+	DeploymentCancelledNewerDeploymentExists  = "The deployment was cancelled as a newer deployment was found running"
196
+	DeploymentFailedUnrelatedDeploymentExists = "The deployment failed as an unrelated pod with the same name as this deployment is already running"
197
+	DeploymentFailedDeployerPodNoLongerExists = "The deployment failed as the deployer pod no longer exists"
198
+)
199
+
191 200
 // DeploymentConfig represents a configuration for a single deployment (represented as a
192 201
 // ReplicationController). It also contains details about changes which resulted in the current
193 202
 // state of the DeploymentConfig. Each change to the DeploymentConfig which should result in
... ...
@@ -155,6 +155,15 @@ const (
155 155
 	DeploymentCancelledAnnotation = "openshift.io/deployment.cancelled"
156 156
 )
157 157
 
158
+// These constants represent the various reasons for cancelling a deployment
159
+// or for a deployment being placed in a failed state
160
+const (
161
+	DeploymentCancelledByUser                 = "The deployment was cancelled by the user"
162
+	DeploymentCancelledNewerDeploymentExists  = "The deployment was cancelled as a newer deployment was found running"
163
+	DeploymentFailedUnrelatedDeploymentExists = "The deployment failed as an unrelated pod with the same name as this deployment is already running"
164
+	DeploymentFailedDeployerPodNoLongerExists = "The deployment failed as the deployer pod no longer exists"
165
+)
166
+
158 167
 // DeploymentConfig represents a configuration for a single deployment (represented as a
159 168
 // ReplicationController). It also contains details about changes which resulted in the current
160 169
 // state of the DeploymentConfig. Each change to the DeploymentConfig which should result in
... ...
@@ -56,9 +56,41 @@ func (c *DeploymentController) Handle(deployment *kapi.ReplicationController) er
56 56
 
57 57
 		deploymentPod, err := c.podClient.createPod(deployment.Namespace, podTemplate)
58 58
 		if err != nil {
59
-			// If the pod already exists, it's possible that a previous CreatePod succeeded but
60
-			// the deployment state update failed and now we're re-entering.
61
-			if !kerrors.IsAlreadyExists(err) {
59
+			if kerrors.IsAlreadyExists(err) {
60
+				// If the pod already exists, it's possible that a previous CreatePod succeeded but
61
+				// the deployment state update failed and now we're re-entering.
62
+				// Ensure that the pod is the one we created by verifying the annotation on it
63
+				existingPod, err := c.podClient.getPod(deployment.Namespace, deployutil.DeployerPodNameForDeployment(deployment))
64
+				if err != nil {
65
+					c.recorder.Eventf(deployment, "failedCreate", "Error getting existing deployer pod for %s: %v", deployutil.LabelForDeployment(deployment), err)
66
+					return fmt.Errorf("couldn't fetch existing deployer pod for %s: %v", deployutil.LabelForDeployment(deployment), err)
67
+				}
68
+				// TODO: Investigate checking the container image of the running pod and
69
+				// comparing with the intended deployer pod image.
70
+				// If we do so, we'll need to ensure that changes to 'unrelated' pods
71
+				// don't result in updates to the deployment
72
+				// So, the image check will have to be done in other areas of the code as well
73
+				if deployutil.DeploymentNameFor(existingPod) == deployment.Name {
74
+					// we'll just set the deploymentPod so that pod name annotation
75
+					// can be set on the deployment below
76
+					deploymentPod = existingPod
77
+				} else {
78
+					c.recorder.Eventf(deployment, "failedCreate", "Error creating deployer pod for %s since another pod with the same name exists", deployutil.LabelForDeployment(deployment))
79
+
80
+					// we seem to have an unrelated pod running with the same name as the deployment
81
+					// set the deployment status to Failed
82
+					failedStatus := string(deployapi.DeploymentStatusFailed)
83
+					deployment.Annotations[deployapi.DeploymentStatusAnnotation] = failedStatus
84
+					deployment.Annotations[deployapi.DeploymentStatusReasonAnnotation] = deployapi.DeploymentFailedUnrelatedDeploymentExists
85
+					if _, err := c.deploymentClient.updateDeployment(deployment.Namespace, deployment); err != nil {
86
+						c.recorder.Eventf(deployment, "failedUpdate", "Error updating deployment %s status to %s", deployutil.LabelForDeployment(deployment), failedStatus)
87
+						glog.Errorf("Error updating deployment %s status to %s", deployutil.LabelForDeployment(deployment), failedStatus)
88
+					} else {
89
+						glog.V(2).Infof("Updated deployment %s status to %s", deployutil.LabelForDeployment(deployment), failedStatus)
90
+					}
91
+					return fatalError(fmt.Sprintf("couldn't create deployer pod for %s since an unrelated pod with the same name exists", deployutil.LabelForDeployment(deployment)))
92
+				}
93
+			} else {
62 94
 				c.recorder.Eventf(deployment, "failedCreate", "Error creating deployer pod for %s: %v", deployutil.LabelForDeployment(deployment), err)
63 95
 				return fmt.Errorf("couldn't create deployer pod for %s: %v", deployutil.LabelForDeployment(deployment), err)
64 96
 			}
... ...
@@ -121,7 +153,7 @@ func (c *DeploymentController) makeDeployerPod(deployment *kapi.ReplicationContr
121 121
 
122 122
 	pod := &kapi.Pod{
123 123
 		ObjectMeta: kapi.ObjectMeta{
124
-			GenerateName: deployutil.DeployerPodNameForDeployment(deployment),
124
+			Name: deployutil.DeployerPodNameForDeployment(deployment),
125 125
 			Annotations: map[string]string{
126 126
 				deployapi.DeploymentAnnotation: deployment.Name,
127 127
 			},
... ...
@@ -154,6 +186,7 @@ type deploymentClient interface {
154 154
 
155 155
 // podClient abstracts access to pods.
156 156
 type podClient interface {
157
+	getPod(namespace, name string) (*kapi.Pod, error)
157 158
 	createPod(namespace string, pod *kapi.Pod) (*kapi.Pod, error)
158 159
 	deletePod(namespace, name string) error
159 160
 }
... ...
@@ -174,10 +207,15 @@ func (i *deploymentClientImpl) updateDeployment(namespace string, deployment *ka
174 174
 
175 175
 // podClientImpl is a pluggable podClient.
176 176
 type podClientImpl struct {
177
+	getPodFunc    func(namespace, name string) (*kapi.Pod, error)
177 178
 	createPodFunc func(namespace string, pod *kapi.Pod) (*kapi.Pod, error)
178 179
 	deletePodFunc func(namespace, name string) error
179 180
 }
180 181
 
182
+func (i *podClientImpl) getPod(namespace, name string) (*kapi.Pod, error) {
183
+	return i.getPodFunc(namespace, name)
184
+}
185
+
181 186
 func (i *podClientImpl) createPod(namespace string, pod *kapi.Pod) (*kapi.Pod, error) {
182 187
 	return i.createPodFunc(namespace, pod)
183 188
 }
... ...
@@ -37,7 +37,6 @@ func TestHandle_createPodOk(t *testing.T) {
37 37
 		podClient: &podClientImpl{
38 38
 			createPodFunc: func(namespace string, pod *kapi.Pod) (*kapi.Pod, error) {
39 39
 				createdPod = pod
40
-				pod.Name = pod.GenerateName
41 40
 				return pod, nil
42 41
 			},
43 42
 		},
... ...
@@ -189,21 +188,31 @@ func TestHandle_createPodFail(t *testing.T) {
189 189
 	}
190 190
 }
191 191
 
192
-// TestHandle_createPodAlreadyExists ensures that attempts to create a
192
+// TestHandle_deployerPodAlreadyExists ensures that attempts to create a
193 193
 // deployer pod which  was already created don't result in an error
194 194
 // (effectively skipping the handling as redundant).
195
-func TestHandle_createPodAlreadyExists(t *testing.T) {
195
+func TestHandle_deployerPodAlreadyExists(t *testing.T) {
196
+	var updatedDeployment *kapi.ReplicationController
197
+
198
+	config := deploytest.OkDeploymentConfig(1)
199
+	deployment, _ := deployutil.MakeDeployment(config, kapi.Codec)
200
+	deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
201
+	deployerPod := relatedPod(deployment)
202
+
196 203
 	controller := &DeploymentController{
197 204
 		decodeConfig: func(deployment *kapi.ReplicationController) (*deployapi.DeploymentConfig, error) {
198 205
 			return deployutil.DecodeDeploymentConfig(deployment, api.Codec)
199 206
 		},
200 207
 		deploymentClient: &deploymentClientImpl{
201 208
 			updateDeploymentFunc: func(namespace string, deployment *kapi.ReplicationController) (*kapi.ReplicationController, error) {
202
-				t.Fatalf("unexpected deployment update")
203
-				return nil, nil
209
+				updatedDeployment = deployment
210
+				return updatedDeployment, nil
204 211
 			},
205 212
 		},
206 213
 		podClient: &podClientImpl{
214
+			getPodFunc: func(namespace, name string) (*kapi.Pod, error) {
215
+				return deployerPod, nil
216
+			},
207 217
 			createPodFunc: func(namespace string, pod *kapi.Pod) (*kapi.Pod, error) {
208 218
 				return nil, kerrors.NewAlreadyExists("Pod", pod.Name)
209 219
 			},
... ...
@@ -214,15 +223,74 @@ func TestHandle_createPodAlreadyExists(t *testing.T) {
214 214
 		recorder: &record.FakeRecorder{},
215 215
 	}
216 216
 
217
-	// Verify no-op
218
-	config := deploytest.OkDeploymentConfig(1)
219
-	deployment, _ := deployutil.MakeDeployment(config, kapi.Codec)
220
-	deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusPending)
221 217
 	err := controller.Handle(deployment)
222 218
 
223 219
 	if err != nil {
224 220
 		t.Fatalf("unexpected error: %v", err)
225 221
 	}
222
+
223
+	if updatedDeployment.Annotations[deployapi.DeploymentPodAnnotation] != deployerPod.Name {
224
+		t.Fatalf("deployment not updated with pod name annotation")
225
+	}
226
+
227
+	if updatedDeployment.Annotations[deployapi.DeploymentStatusAnnotation] != string(deployapi.DeploymentStatusPending) {
228
+		t.Fatalf("deployment status not updated to pending")
229
+	}
230
+}
231
+
232
+// TestHandle_unrelatedPodAlreadyExists ensures that attempts to create a
233
+// deployer pod, when a pod with the same name but missing annotations
234
+// results in an error
235
+func TestHandle_unrelatedPodAlreadyExists(t *testing.T) {
236
+	var updatedDeployment *kapi.ReplicationController
237
+
238
+	config := deploytest.OkDeploymentConfig(1)
239
+	deployment, _ := deployutil.MakeDeployment(config, kapi.Codec)
240
+	deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
241
+	otherPod := unrelatedPod(deployment)
242
+
243
+	controller := &DeploymentController{
244
+		decodeConfig: func(deployment *kapi.ReplicationController) (*deployapi.DeploymentConfig, error) {
245
+			return deployutil.DecodeDeploymentConfig(deployment, api.Codec)
246
+		},
247
+		deploymentClient: &deploymentClientImpl{
248
+			updateDeploymentFunc: func(namespace string, deployment *kapi.ReplicationController) (*kapi.ReplicationController, error) {
249
+				updatedDeployment = deployment
250
+				return updatedDeployment, nil
251
+			},
252
+		},
253
+		podClient: &podClientImpl{
254
+			getPodFunc: func(namespace, name string) (*kapi.Pod, error) {
255
+				return otherPod, nil
256
+			},
257
+			createPodFunc: func(namespace string, pod *kapi.Pod) (*kapi.Pod, error) {
258
+				return nil, kerrors.NewAlreadyExists("Pod", pod.Name)
259
+			},
260
+		},
261
+		makeContainer: func(strategy *deployapi.DeploymentStrategy) (*kapi.Container, error) {
262
+			return okContainer(), nil
263
+		},
264
+		recorder: &record.FakeRecorder{},
265
+	}
266
+
267
+	err := controller.Handle(deployment)
268
+
269
+	if err == nil {
270
+		t.Fatalf("expected error")
271
+	}
272
+
273
+	if _, isFatal := err.(fatalError); !isFatal {
274
+		t.Fatalf("expected a fatal error, got a %#v", err)
275
+	}
276
+
277
+	if _, exists := updatedDeployment.Annotations[deployapi.DeploymentPodAnnotation]; exists {
278
+		t.Fatalf("deployment updated with pod name annotation")
279
+	}
280
+
281
+	status := updatedDeployment.Annotations[deployapi.DeploymentStatusAnnotation]
282
+	if status != string(deployapi.DeploymentStatusFailed) {
283
+		t.Fatalf("expected deployment status: Failed. found: %s", status)
284
+	}
226 285
 }
227 286
 
228 287
 // TestHandle_noop ensures that pending, running, and failed states result in
... ...
@@ -426,3 +494,25 @@ func okContainer() *kapi.Container {
426 426
 		},
427 427
 	}
428 428
 }
429
+
430
+func relatedPod(deployment *kapi.ReplicationController) *kapi.Pod {
431
+	return &kapi.Pod{
432
+		ObjectMeta: kapi.ObjectMeta{
433
+			Name: deployment.Name,
434
+			Annotations: map[string]string{
435
+				deployapi.DeploymentAnnotation: deployment.Name,
436
+			},
437
+		},
438
+	}
439
+}
440
+
441
+func unrelatedPod(deployment *kapi.ReplicationController) *kapi.Pod {
442
+	return &kapi.Pod{
443
+		ObjectMeta: kapi.ObjectMeta{
444
+			Name: deployment.Name,
445
+			Annotations: map[string]string{
446
+				"unrelatedKey": "unrelatedValue",
447
+			},
448
+		},
449
+	}
450
+}
... ...
@@ -58,6 +58,9 @@ func (factory *DeploymentControllerFactory) Create() controller.RunnableControll
58 58
 			},
59 59
 		},
60 60
 		podClient: &podClientImpl{
61
+			getPodFunc: func(namespace, name string) (*kapi.Pod, error) {
62
+				return factory.KubeClient.Pods(namespace).Get(name)
63
+			},
61 64
 			createPodFunc: func(namespace string, pod *kapi.Pod) (*kapi.Pod, error) {
62 65
 				return factory.KubeClient.Pods(namespace).Create(pod)
63 66
 			},
... ...
@@ -52,7 +52,7 @@ func LatestDeploymentNameForConfig(config *deployapi.DeploymentConfig) string {
52 52
 }
53 53
 
54 54
 func DeployerPodNameForDeployment(deployment *api.ReplicationController) string {
55
-	return fmt.Sprintf("deploy-%s", deployment.Name)
55
+	return deployment.Name
56 56
 }
57 57
 
58 58
 // LabelForDeployment builds a string identifier for a Deployment.