Browse code

Fix rabbitmq retry for error checking

I think this retry check has been broken since we introduced "set -e".
Unfortunately it seems the issue of rabbitmq not starting first-time
persists on centos 7 hosts occasionally, e.g. [1]:

---
+ rabbit_setuser stackrabbit secretrabbit
+ local user=stackrabbit pass=secretrabbit found= out=
++ sudo rabbitmqctl list_users
Error: unable to connect to node 'rabbit@devstack-centos7-rax-iad-100675': nodedown

DIAGNOSTICS
===========

nodes in question: ['rabbit@devstack-centos7-rax-iad-100675']

hosts, their running nodes and ports:
- devstack-centos7-rax-iad-100675: [{rabbitmqctl29293,39511}]

current node details:
- node name: 'rabbitmqctl29293@devstack-centos7-rax-iad-100675'
- home dir: /var/lib/rabbitmq
- cookie hash: KieJnx1pnllKbHVihGcDqA==
---

Fix up this retry while we investigate [2]

[1] http://logs.openstack.org/64/141864/1/check//check-tempest-dsvm-centos7/4308f0c/logs/devstacklog.txt.gz
[2] https://bugzilla.redhat.com/show_bug.cgi?id=1144100

Change-Id: I11fb3728e08adc1e0f7acca63e5a308d24dce78e

Ian Wienand authored on 2014/12/16 07:53:36
Showing 1 changed files
... ...
@@ -176,17 +176,31 @@ function restart_rpc_backend {
176 176
         echo_summary "Starting RabbitMQ"
177 177
         # NOTE(bnemec): Retry initial rabbitmq configuration to deal with
178 178
         # the fact that sometimes it fails to start properly.
179
-        # Reference: https://bugzilla.redhat.com/show_bug.cgi?id=1059028
179
+        # Reference: https://bugzilla.redhat.com/show_bug.cgi?id=1144100
180 180
         local i
181 181
         for i in `seq 10`; do
182
+            local rc=0
183
+
184
+            [[ $i -eq "10" ]] && die $LINENO "Failed to set rabbitmq password"
185
+
182 186
             if is_fedora || is_suse; then
183 187
                 # service is not started by default
184 188
                 restart_service rabbitmq-server
185 189
             fi
186
-            rabbit_setuser "$RABBIT_USERID" "$RABBIT_PASSWORD"
190
+
191
+            rabbit_setuser "$RABBIT_USERID" "$RABBIT_PASSWORD" || rc=$?
192
+            if [ $rc -ne 0 ]; then
193
+                continue
194
+            fi
195
+
187 196
             # change the rabbit password since the default is "guest"
188
-            sudo rabbitmqctl change_password $RABBIT_USERID $RABBIT_PASSWORD && break
189
-            [[ $i -eq "10" ]] && die $LINENO "Failed to set rabbitmq password"
197
+            sudo rabbitmqctl change_password \
198
+                $RABBIT_USERID $RABBIT_PASSWORD || rc=$?
199
+            if [ $rc -ne 0 ]; then
200
+                continue;
201
+            fi
202
+
203
+            break
190 204
         done
191 205
         if is_service_enabled n-cell; then
192 206
             # Add partitioned access for the child cell