Browse code

Merge "Wait for compute service to check in"

Jenkins authored on 2017/09/07 03:16:21
Showing 3 changed files
... ...
@@ -407,6 +407,26 @@ EOF
407 407
     return $rval
408 408
 }
409 409
 
410
+function wait_for_compute {
411
+    local timeout=$1
412
+    local rval=0
413
+    time_start "wait_for_service"
414
+    timeout $timeout bash -x <<EOF || rval=$?
415
+        ID=""
416
+        while [[ "\$ID" == "" ]]; do
417
+            sleep 1
418
+            ID=\$(openstack --os-cloud devstack-admin --os-region "$REGION_NAME" compute service list --host `hostname` --service nova-compute -c ID -f value)
419
+        done
420
+EOF
421
+    time_stop "wait_for_service"
422
+    # Figure out what's happening on platforms where this doesn't work
423
+    if [[ "$rval" != 0 ]]; then
424
+        echo "Didn't find service registered by hostname after $timeout seconds"
425
+        openstack --os-cloud devstack-admin --os-region "$REGION_NAME" compute service list
426
+    fi
427
+    return $rval
428
+}
429
+
410 430
 
411 431
 # ping check
412 432
 # Uses globals ``ENABLED_SERVICES``, ``TOP_DIR``, ``MULTI_HOST``, ``PRIVATE_NETWORK``
... ...
@@ -955,6 +955,28 @@ function start_nova_conductor {
955 955
     done
956 956
 }
957 957
 
958
+function is_nova_ready {
959
+    # NOTE(sdague): with cells v2 all the compute services must be up
960
+    # and checked into the database before discover_hosts is run. This
961
+    # happens in all in one installs by accident, because > 30 seconds
962
+    # happen between here and the script ending. However, in multinode
963
+    # tests this can very often not be the case. So ensure that the
964
+    # compute is up before we move on.
965
+    if is_service_enabled n-cell; then
966
+        # cells v1 can't complete the check below because it munges
967
+        # hostnames with cell information (grumble grumble).
968
+        return
969
+    fi
970
+    # TODO(sdague): honestly, this probably should be a plug point for
971
+    # an external system.
972
+    if [[ "$VIRT_DRIVER" == 'xenserver' ]]; then
973
+        # xenserver encodes information in the hostname of the compute
974
+        # because of the dom0/domU split. Just ignore for now.
975
+        return
976
+    fi
977
+    wait_for_compute 60
978
+}
979
+
958 980
 function start_nova {
959 981
     # this catches the cells v1 case early
960 982
     _set_singleconductor
... ...
@@ -1431,6 +1431,13 @@ fi
1431 1431
 # Sanity checks
1432 1432
 # =============
1433 1433
 
1434
+# Check that computes are all ready
1435
+#
1436
+# TODO(sdague): there should be some generic phase here.
1437
+if is_service_enabled n-cpu; then
1438
+    is_nova_ready
1439
+fi
1440
+
1434 1441
 # Check the status of running services
1435 1442
 service_check
1436 1443