Browse code

Make etcd example more resilient to failure

Michal Fojtik authored on 2015/06/16 01:02:10
Showing 4 changed files
... ...
@@ -19,6 +19,9 @@ RUN ETCD_URL=https://github.com/coreos/etcd/releases/download/${ETCD_RELEASE}/et
19 19
 
20 20
 EXPOSE 2379 2380
21 21
 
22
+# Make the datadir world writeable
23
+RUN mkdir -p /var/lib/etcd && chmod go+rwx /var/lib/etcd
24
+
22 25
 VOLUME ["/var/lib/etcd"]
23 26
 
24 27
 ADD etcd*.sh /usr/local/bin/
... ...
@@ -6,7 +6,7 @@
6 6
 address=$(getent ahosts ${HOSTNAME} | grep RAW | cut -d ' ' -f 1)
7 7
 
8 8
 exec /usr/local/bin/etcd \
9
-  -advertise-client-urls http://${address}:2379 \
10
-  -listen-client-urls http://${address}:2379 \
11
-  -data-dir /var/lib/etcd \
12
-  -name discovery
9
+  --advertise-client-urls http://${address}:2379 \
10
+  --listen-client-urls http://${address}:2379 \
11
+  --data-dir /var/lib/etcd \
12
+  --name discovery
... ...
@@ -7,22 +7,59 @@
7 7
 # size of the cluster in the discovery service and register itself.
8 8
 
9 9
 # If we are not running in cluster, then just execute the etcd binary
10
-if [[ -z "${ETCD_DISCOVERY-}" ]]; then
10
+if [[ -z "${ETCD_DISCOVERY_TOKEN-}" ]]; then
11 11
   exec /usr/local/bin/etcd "$@"
12 12
 fi
13 13
 
14
+# This variable is used by etcd server
15
+export ETCD_DISCOVERY="${ETCD_DISCOVERY_URL}/v2/keys/discovery/${ETCD_DISCOVERY_TOKEN}"
16
+
17
+# Set the size of this cluster to pre-defined number
18
+# Will retry several times till the etcd-discovery service is not ready
19
+for i in {1..5}; do
20
+  echo "Attempt #${i} to update the cluster size in ${ETCD_DISCOVERY_URL} ..."
21
+  etcdctl --peers "${ETCD_DISCOVERY_URL}" set discovery/${ETCD_DISCOVERY_TOKEN}/_config/size ${ETCD_NUM_MEMBERS} && break || sleep 2
22
+done
23
+
24
+# The IP address of this container
14 25
 address=$(getent ahosts ${HOSTNAME} | grep RAW | cut -d ' ' -f 1)
15 26
 
16
-curl -sX PUT ${ETCD_DISCOVERY}/_config/size -d value=${ETCD_NUM_MEMBERS}
27
+# In case of failure when this container will be restarted, we have to remove
28
+# this member from the list of members in discovery service. The new container
29
+# will be added automatically and the data will be replicated.
30
+ETCDCTL_PEERS="${ETCD_DISCOVERY_URL}"
31
+initial_cluster=""
32
+new_member=0
17 33
 
18
-# Adding UNIX timestamp prevents having duplicate member id's
19
-member_id="${HOSTNAME}-$(date +"%s")"
34
+for member_url in $(etcdctl ls discovery/${ETCD_DISCOVERY_TOKEN}/); do
35
+  out=$(etcdctl get ${member_url})
36
+  if ! echo $out | grep -q "${address}"; then
37
+    initial_cluster+="${out},"
38
+    continue
39
+  fi
40
+  etcdctl rm ${member_url}
41
+  member_id=$(echo "${member_url}" | cut -d '/' -f 4)
42
+  new_member=1
43
+  etcdctl --peers http://etcd:2379 member remove ${member_id}
44
+  echo "Waiting for ${member_id} removal to propagate ..."
45
+  sleep 3
46
+done
47
+
48
+# If this member already exists in the cluster, perform recovery using
49
+# 'existing' cluster state.
50
+if [ $new_member != 0 ]; then
51
+  out=$(etcdctl --peers http://etcd:2379 member add ${HOSTNAME} http://${address}:2380 | grep ETCD_INITIAL_CLUSTER)
52
+  echo "Waiting for ${HOSTNAME} to be added into cluster ..." && sleep 5
53
+  eval "export ${out}"
54
+  export ETCD_INITIAL_CLUSTER_STATE="existing"
55
+  unset ETCD_DISCOVERY
56
+fi
20 57
 
21
-echo "Starting member ${member_id} (${address})..."
58
+echo "Starting etcd member ${HOSTNAME} on ${address} ..."
22 59
 exec /usr/local/bin/etcd \
23
-  -initial-advertise-peer-urls http://${address}:2380 \
24
-  -listen-peer-urls http://${address}:2380 \
25
-  -advertise-client-urls http://${address}:2379 \
26
-  -listen-client-urls http://${address}:2379 \
27
-  -data-dir /var/lib/etcd \
28
-  -name ${member_id}
60
+  --initial-advertise-peer-urls http://${address}:2380 \
61
+  --listen-peer-urls http://${address}:2380 \
62
+  --advertise-client-urls http://${address}:2379 \
63
+  --listen-client-urls http://127.0.0.1:2379,http://${address}:2379 \
64
+  --data-dir /var/lib/etcd \
65
+  --name ${HOSTNAME}
... ...
@@ -1,6 +1,6 @@
1 1
 {
2 2
   "kind": "Template",
3
-  "apiVersion": "v1beta3",
3
+  "apiVersion": "v1",
4 4
   "metadata": {
5 5
     "name": "etcd",
6 6
     "creationTimestamp": null,
... ...
@@ -13,7 +13,7 @@
13 13
   "objects": [
14 14
     {
15 15
       "kind": "ImageStream",
16
-      "apiVersion": "v1beta3",
16
+      "apiVersion": "v1",
17 17
       "metadata": {
18 18
         "name": "etcd",
19 19
         "creationTimestamp": null
... ...
@@ -38,7 +38,7 @@
38 38
     },
39 39
     {
40 40
       "kind": "Service",
41
-      "apiVersion": "v1beta3",
41
+      "apiVersion": "v1",
42 42
       "metadata": {
43 43
         "name": "etcd-discovery",
44 44
         "creationTimestamp": null,
... ...
@@ -68,7 +68,7 @@
68 68
     },
69 69
     {
70 70
       "kind": "Service",
71
-      "apiVersion": "v1beta3",
71
+      "apiVersion": "v1",
72 72
       "metadata": {
73 73
         "name": "etcd",
74 74
         "creationTimestamp": null,
... ...
@@ -106,7 +106,7 @@
106 106
     },
107 107
     {
108 108
       "kind": "DeploymentConfig",
109
-      "apiVersion": "v1beta3",
109
+      "apiVersion": "v1",
110 110
       "metadata": {
111 111
         "name": "etcd-discovery",
112 112
         "creationTimestamp": null
... ...
@@ -149,7 +149,6 @@
149 149
                 "resources": {},
150 150
                 "terminationMessagePath": "/dev/termination-log",
151 151
                 "imagePullPolicy": "IfNotPresent",
152
-                "capabilities": {},
153 152
                 "securityContext": {
154 153
                   "capabilities": {},
155 154
                   "privileged": false
... ...
@@ -157,8 +156,7 @@
157 157
               }
158 158
             ],
159 159
             "restartPolicy": "Always",
160
-            "dnsPolicy": "ClusterFirst",
161
-            "serviceAccount": ""
160
+            "dnsPolicy": "ClusterFirst"
162 161
           }
163 162
         }
164 163
       },
... ...
@@ -166,7 +164,7 @@
166 166
     },
167 167
     {
168 168
       "kind": "DeploymentConfig",
169
-      "apiVersion": "v1beta3",
169
+      "apiVersion": "v1",
170 170
       "metadata": {
171 171
         "name": "etcd",
172 172
         "creationTimestamp": null
... ...
@@ -221,14 +219,21 @@
221 221
                     "value": "${ETCD_CLUSTER_TOKEN}"
222 222
                   },
223 223
                   {
224
-                    "name": "ETCD_DISCOVERY",
225
-                    "value": "${ETCD_DISCOVERY}"
224
+                    "name": "ETCD_DISCOVERY_TOKEN",
225
+                    "value": "${ETCD_DISCOVERY_TOKEN}"
226
+                  },
227
+                  {
228
+                    "name": "ETCD_DISCOVERY_URL",
229
+                    "value": "${ETCD_DISCOVERY_URL}"
230
+                  },
231
+                  {
232
+                    "name": "ETCDCTL_PEERS",
233
+                    "value": "http://etcd:2379"
226 234
                   }
227 235
                 ],
228 236
                 "resources": {},
229 237
                 "terminationMessagePath": "/dev/termination-log",
230 238
                 "imagePullPolicy": "IfNotPresent",
231
-                "capabilities": {},
232 239
                 "securityContext": {
233 240
                   "capabilities": {},
234 241
                   "privileged": false
... ...
@@ -236,8 +241,7 @@
236 236
               }
237 237
             ],
238 238
             "restartPolicy": "Always",
239
-            "dnsPolicy": "ClusterFirst",
240
-            "serviceAccount": ""
239
+            "dnsPolicy": "ClusterFirst"
241 240
           }
242 241
         }
243 242
       },
... ...
@@ -256,10 +260,15 @@
256 256
       "value": "3"
257 257
     },
258 258
     {
259
-      "name": "ETCD_DISCOVERY",
260
-      "description": "A token used for etcd discovery",
259
+      "name": "ETCD_DISCOVERY_URL",
260
+      "description": "Discovery URL connects etcd instances together by storing a list of peer addresses, metadata and the initial size of the cluster under a unique address",
261
+      "value": "http://etcd-discovery:2379"
262
+    },
263
+    {
264
+      "name": "ETCD_DISCOVERY_TOKEN",
265
+      "description": "A unique token used by the discovery service",
261 266
       "generate": "expression",
262
-      "from": "http://etcd-discovery:2379/v2/keys/discovery/[a-z0-9]{40}"
267
+      "from": "[a-z0-9]{40}"
263 268
     },
264 269
     {
265 270
       "name": "ETCD_CLUSTER_TOKEN",