Browse code

Revert "Build retry loop for screen sessions"

This reverts commit cd0fba6c3803bb79b37f09709378cf120a6824c6.

It appears that this isn't actually making things better, and
possibly increasing failure rates. Revert to go back to the
tried and true model of just sleeping a lot.

Change-Id: Ie630f72d91f16160fa12b3892e5c74378d244cea

Sean Dague authored on 2014/06/28 20:32:31
Showing 1 changed files
... ...
@@ -1044,100 +1044,44 @@ function run_process {
1044 1044
     echo $!
1045 1045
 }
1046 1046
 
1047
-function _start_in_screen {
1048
-    local service=$1
1049
-    local cmd=$2
1050
-    local screen_name=${SCREEN_NAME:-stack}
1051
-    local status_dir=${SERVICE_DIR:-${DEST}/status}
1052
-    local service_dir="$status_dir/$screen_name"
1053
-    local pid="$service_dir/$service.pid"
1054
-    local failure="$service_dir/$service.failure"
1055
-
1056
-    if [[ -n ${SCREEN_LOGDIR} ]]; then
1057
-        local logfile=${SCREEN_LOGDIR}/screen-${service}.${CURRENT_LOG_TIME}.log
1058
-        local shortlog=${SCREEN_LOGDIR}/screen-${service}.log
1059
-        # this whole dance is done because of slow nodes
1060
-        screen -S $screen_name -p $service -X logfile ${logfile}
1061
-        screen -S $screen_name -p $service -X log on
1062
-        ln -sf ${logfile} ${shortlog}
1063
-    fi
1064
-
1065
-    NL=`echo -ne '\015'`
1066
-    # This fun command does the following:
1067
-    # - the passed server command is backgrounded
1068
-    # - the pid of the background process is saved in the usual place
1069
-    # - the server process is brought back to the foreground
1070
-    # - if the server process exits prematurely the fg command errors
1071
-    #   and a message is written to stdout and the service failure file
1072
-    # The pid saved can be used in screen_stop() as a process group
1073
-    # id to kill off all child processes
1074
-    echo "Running: $cmd & echo \$! >$pid; fg || echo \"$service failed to start\" | tee \"$failure\"$NL"
1075
-    screen -S $screen_name -p $service -X stuff "$cmd & echo \$! >$pid; fg || echo \"$service failed to start\" | tee \"$failure\"$NL"
1076
-}
1077
-
1078
-
1079
-function _is_running_in_screen {
1080
-    local service=$1
1081
-    local screen_name=${SCREEN_NAME:-stack}
1082
-    local status_dir=${SERVICE_DIR:-${DEST}/status}
1083
-    local service_dir="$status_dir/$screen_name"
1084
-    local pid="$service_dir/$service.pid"
1085
-    local failure="$service_dir/$service.failure"
1086
-    if [[ ! -e "$pid" && ! -e "$failure" ]]; then
1087
-        # if we don't have a pid or a failure for why, the command may not
1088
-        # have stuffed in there
1089
-        echo "Warning: neither $pid nor $failure exist, $service didn't seem to start"
1090
-        return 1
1091
-    fi
1092
-    if [[ -n ${SCREEN_LOGDIR} ]]; then
1093
-        # if we should be logging, but we don't have a log file, something is wrong
1094
-        local logfile=${SCREEN_LOGDIR}/screen-${service}.${CURRENT_LOG_TIME}.log
1095
-        if [[ ! -e "$logfile" ]]; then
1096
-            echo "Warning: expected logfile $logfile not found, something wrong with starting $service"
1097
-            return 1
1098
-        fi
1099
-    fi
1100
-    return 0
1101
-}
1102
-
1103 1047
 # Helper to launch a service in a named screen
1104 1048
 # screen_it service "command-line"
1105 1049
 function screen_it {
1106
-    local service=$1
1107
-    local cmd=$2
1108
-    local screen_name=${SCREEN_NAME:-stack}
1109
-    local status_dir=${SERVICE_DIR:-${DEST}/status}
1110
-    local service_dir="$status_dir/$screen_name"
1111
-    local use_screen=$(trueorfalse True $USE_SCREEN)
1112
-    local pid="$service_dir/$service.pid"
1050
+    SCREEN_NAME=${SCREEN_NAME:-stack}
1051
+    SERVICE_DIR=${SERVICE_DIR:-${DEST}/status}
1052
+    USE_SCREEN=$(trueorfalse True $USE_SCREEN)
1113 1053
 
1114 1054
     if is_service_enabled $1; then
1115 1055
         # Append the service to the screen rc file
1116
-        screen_rc "$service" "$cmd"
1117
-
1118
-        if [[ "$use_screen" = "True" ]]; then
1119
-            screen -S $screen_name -X screen -t $service
1120
-
1121
-            # this retry loop brought to you by slow compute nodes, screen raciness,
1122
-            # and frustration in upgrading.
1123
-            local screen_tries=0
1124
-            while [ "$screen_tries" -lt 10 ]; do
1125
-                _start_in_screen "$service" "$cmd"
1126
-                if _is_running_in_screen $service; then
1127
-                    screen_tries=10
1128
-                else
1129
-                    screen_tries=$[screen_tries + 1]
1130
-                    echo "Failed to start service after $screen_tries attempt(s), retrying"
1131
-                    if [[ "$screen_tries" -eq 10 ]]; then
1132
-                        echo "Too many retries, giving up"
1133
-                        exit 1
1134
-                    fi
1135
-                    sleep 1
1136
-                fi
1137
-            done
1056
+        screen_rc "$1" "$2"
1057
+
1058
+        if [[ "$USE_SCREEN" = "True" ]]; then
1059
+            screen -S $SCREEN_NAME -X screen -t $1
1060
+
1061
+            if [[ -n ${SCREEN_LOGDIR} ]]; then
1062
+                screen -S $SCREEN_NAME -p $1 -X logfile ${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log
1063
+                screen -S $SCREEN_NAME -p $1 -X log on
1064
+                ln -sf ${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log ${SCREEN_LOGDIR}/screen-${1}.log
1065
+            fi
1066
+
1067
+            # sleep to allow bash to be ready to be send the command - we are
1068
+            # creating a new window in screen and then sends characters, so if
1069
+            # bash isn't running by the time we send the command, nothing happens
1070
+            sleep 3
1071
+
1072
+            NL=`echo -ne '\015'`
1073
+            # This fun command does the following:
1074
+            # - the passed server command is backgrounded
1075
+            # - the pid of the background process is saved in the usual place
1076
+            # - the server process is brought back to the foreground
1077
+            # - if the server process exits prematurely the fg command errors
1078
+            #   and a message is written to stdout and the service failure file
1079
+            # The pid saved can be used in screen_stop() as a process group
1080
+            # id to kill off all child processes
1081
+            screen -S $SCREEN_NAME -p $1 -X stuff "$2 & echo \$! >$SERVICE_DIR/$SCREEN_NAME/$1.pid; fg || echo \"$1 failed to start\" | tee \"$SERVICE_DIR/$SCREEN_NAME/$1.failure\"$NL"
1138 1082
         else
1139 1083
             # Spawn directly without screen
1140
-            run_process "$service" "$cmd" >$pid
1084
+            run_process "$1" "$2" >$SERVICE_DIR/$SCREEN_NAME/$1.pid
1141 1085
         fi
1142 1086
     fi
1143 1087
 }