Browse code

Revert "Build retry loop for screen sessions"

This reverts commit 0afa912e99dc9bad8b490960beb8f0cf85750dcc.

This possibly made things worse, though it times in with the
trusty add, so it's hard to tell. Revert to see if grenade gets
better.

Change-Id: Ic399957fc9d4a7da28b030cdf895df061b2567c8
Related-Bug: #1331274

Sean Dague authored on 2014/06/28 04:21:41
Showing 1 changed files
... ...
@@ -1058,100 +1058,44 @@ function run_process {
1058 1058
     echo $!
1059 1059
 }
1060 1060
 
1061
-function _start_in_screen {
1062
-    local service=$1
1063
-    local cmd=$2
1064
-    local screen_name=${SCREEN_NAME:-stack}
1065
-    local status_dir=${SERVICE_DIR:-${DEST}/status}
1066
-    local service_dir="$status_dir/$screen_name"
1067
-    local pid="$service_dir/$service.pid"
1068
-    local failure="$service_dir/$service.failure"
1069
-
1070
-    if [[ -n ${SCREEN_LOGDIR} ]]; then
1071
-        local logfile=${SCREEN_LOGDIR}/screen-${service}.${CURRENT_LOG_TIME}.log
1072
-        local shortlog=${SCREEN_LOGDIR}/screen-${service}.log
1073
-        # this whole dance is done because of slow nodes
1074
-        screen -S $screen_name -p $service -X logfile ${logfile}
1075
-        screen -S $screen_name -p $service -X log on
1076
-        ln -sf ${logfile} ${shortlog}
1077
-    fi
1078
-
1079
-    NL=`echo -ne '\015'`
1080
-    # This fun command does the following:
1081
-    # - the passed server command is backgrounded
1082
-    # - the pid of the background process is saved in the usual place
1083
-    # - the server process is brought back to the foreground
1084
-    # - if the server process exits prematurely the fg command errors
1085
-    #   and a message is written to stdout and the service failure file
1086
-    # The pid saved can be used in screen_stop() as a process group
1087
-    # id to kill off all child processes
1088
-    echo "Running: $cmd & echo \$! >$pid; fg || echo \"$service failed to start\" | tee \"$failure\"$NL"
1089
-    screen -S $screen_name -p $service -X stuff "$cmd & echo \$! >$pid; fg || echo \"$service failed to start\" | tee \"$failure\"$NL"
1090
-}
1091
-
1092
-
1093
-function _is_running_in_screen {
1094
-    local service=$1
1095
-    local screen_name=${SCREEN_NAME:-stack}
1096
-    local status_dir=${SERVICE_DIR:-${DEST}/status}
1097
-    local service_dir="$status_dir/$screen_name"
1098
-    local pid="$service_dir/$service.pid"
1099
-    local failure="$service_dir/$service.failure"
1100
-    if [[ ! -e "$pid" && ! -e "$failure" ]]; then
1101
-        # if we don't have a pid or a failure for why, the command may not
1102
-        # have stuffed in there
1103
-        echo "Warning: neither $pid nor $failure exist, $service didn't seem to start"
1104
-        return 1
1105
-    fi
1106
-    if [[ -n ${SCREEN_LOGDIR} ]]; then
1107
-        # if we should be logging, but we don't have a log file, something is wrong
1108
-        local logfile=${SCREEN_LOGDIR}/screen-${service}.${CURRENT_LOG_TIME}.log
1109
-        if [[ ! -e "$logfile" ]]; then
1110
-            echo "Warning: expected logfile $logfile not found, something wrong with starting $service"
1111
-            return 1
1112
-        fi
1113
-    fi
1114
-    return 0
1115
-}
1116
-
1117 1061
 # Helper to launch a service in a named screen
1118 1062
 # screen_it service "command-line"
1119 1063
 function screen_it {
1120
-    local service=$1
1121
-    local cmd=$2
1122
-    local screen_name=${SCREEN_NAME:-stack}
1123
-    local status_dir=${SERVICE_DIR:-${DEST}/status}
1124
-    local service_dir="$status_dir/$screen_name"
1125
-    local use_screen=$(trueorfalse True $USE_SCREEN)
1126
-    local pid="$service_dir/$service.pid"
1064
+    SCREEN_NAME=${SCREEN_NAME:-stack}
1065
+    SERVICE_DIR=${SERVICE_DIR:-${DEST}/status}
1066
+    USE_SCREEN=$(trueorfalse True $USE_SCREEN)
1127 1067
 
1128 1068
     if is_service_enabled $1; then
1129 1069
         # Append the service to the screen rc file
1130
-        screen_rc "$service" "$cmd"
1131
-
1132
-        if [[ "$use_screen" = "True" ]]; then
1133
-            screen -S $screen_name -X screen -t $service
1134
-
1135
-            # this retry loop brought to you by slow compute nodes, screen raciness,
1136
-            # and frustration in upgrading.
1137
-            local screen_tries=0
1138
-            while [ "$screen_tries" -lt 10 ]; do
1139
-                _start_in_screen "$service" "$cmd"
1140
-                if _is_running_in_screen $service; then
1141
-                    screen_tries=10
1142
-                else
1143
-                    screen_tries=$[screen_tries + 1]
1144
-                    echo "Failed to start service after $screen_tries attempt(s), retrying"
1145
-                    if [[ "$screen_tries" -eq 10 ]]; then
1146
-                        echo "Too many retries, giving up"
1147
-                        exit 1
1148
-                    fi
1149
-                    sleep 1
1150
-                fi
1151
-            done
1070
+        screen_rc "$1" "$2"
1071
+
1072
+        if [[ "$USE_SCREEN" = "True" ]]; then
1073
+            screen -S $SCREEN_NAME -X screen -t $1
1074
+
1075
+            if [[ -n ${SCREEN_LOGDIR} ]]; then
1076
+                screen -S $SCREEN_NAME -p $1 -X logfile ${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log
1077
+                screen -S $SCREEN_NAME -p $1 -X log on
1078
+                ln -sf ${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log ${SCREEN_LOGDIR}/screen-${1}.log
1079
+            fi
1080
+
1081
+            # sleep to allow bash to be ready to be send the command - we are
1082
+            # creating a new window in screen and then sends characters, so if
1083
+            # bash isn't running by the time we send the command, nothing happens
1084
+            sleep 3
1085
+
1086
+            NL=`echo -ne '\015'`
1087
+            # This fun command does the following:
1088
+            # - the passed server command is backgrounded
1089
+            # - the pid of the background process is saved in the usual place
1090
+            # - the server process is brought back to the foreground
1091
+            # - if the server process exits prematurely the fg command errors
1092
+            #   and a message is written to stdout and the service failure file
1093
+            # The pid saved can be used in screen_stop() as a process group
1094
+            # id to kill off all child processes
1095
+            screen -S $SCREEN_NAME -p $1 -X stuff "$2 & echo \$! >$SERVICE_DIR/$SCREEN_NAME/$1.pid; fg || echo \"$1 failed to start\" | tee \"$SERVICE_DIR/$SCREEN_NAME/$1.failure\"$NL"
1152 1096
         else
1153 1097
             # Spawn directly without screen
1154
-            run_process "$service" "$cmd" >$pid
1098
+            run_process "$1" "$2" >$SERVICE_DIR/$SCREEN_NAME/$1.pid
1155 1099
         fi
1156 1100
     fi
1157 1101
 }