|
...
|
...
|
@@ -1058,44 +1058,100 @@ function run_process {
|
|
1058
|
1058
|
echo $!
|
|
1059
|
1059
|
}
|
|
1060
|
1060
|
|
|
|
1061
|
+function _start_in_screen {
|
|
|
1062
|
+ local service=$1
|
|
|
1063
|
+ local cmd=$2
|
|
|
1064
|
+ local screen_name=${SCREEN_NAME:-stack}
|
|
|
1065
|
+ local status_dir=${SERVICE_DIR:-${DEST}/status}
|
|
|
1066
|
+ local service_dir="$status_dir/$screen_name"
|
|
|
1067
|
+ local pid="$service_dir/$service.pid"
|
|
|
1068
|
+ local failure="$service_dir/$service.failure"
|
|
|
1069
|
+
|
|
|
1070
|
+ if [[ -n ${SCREEN_LOGDIR} ]]; then
|
|
|
1071
|
+ local logfile=${SCREEN_LOGDIR}/screen-${service}.${CURRENT_LOG_TIME}.log
|
|
|
1072
|
+ local shortlog=${SCREEN_LOGDIR}/screen-${service}.log
|
|
|
1073
|
+ # this whole dance is done because of slow nodes
|
|
|
1074
|
+ screen -S $screen_name -p $service -X logfile ${logfile}
|
|
|
1075
|
+ screen -S $screen_name -p $service -X log on
|
|
|
1076
|
+ ln -sf ${logfile} ${shortlog}
|
|
|
1077
|
+ fi
|
|
|
1078
|
+
|
|
|
1079
|
+ NL=`echo -ne '\015'`
|
|
|
1080
|
+ # This fun command does the following:
|
|
|
1081
|
+ # - the passed server command is backgrounded
|
|
|
1082
|
+ # - the pid of the background process is saved in the usual place
|
|
|
1083
|
+ # - the server process is brought back to the foreground
|
|
|
1084
|
+ # - if the server process exits prematurely the fg command errors
|
|
|
1085
|
+ # and a message is written to stdout and the service failure file
|
|
|
1086
|
+ # The pid saved can be used in screen_stop() as a process group
|
|
|
1087
|
+ # id to kill off all child processes
|
|
|
1088
|
+ echo "Running: $cmd & echo \$! >$pid; fg || echo \"$service failed to start\" | tee \"$failure\"$NL"
|
|
|
1089
|
+ screen -S $screen_name -p $service -X stuff "$cmd & echo \$! >$pid; fg || echo \"$service failed to start\" | tee \"$failure\"$NL"
|
|
|
1090
|
+}
|
|
|
1091
|
+
|
|
|
1092
|
+
|
|
|
1093
|
+function _is_running_in_screen {
|
|
|
1094
|
+ local service=$1
|
|
|
1095
|
+ local screen_name=${SCREEN_NAME:-stack}
|
|
|
1096
|
+ local status_dir=${SERVICE_DIR:-${DEST}/status}
|
|
|
1097
|
+ local service_dir="$status_dir/$screen_name"
|
|
|
1098
|
+ local pid="$service_dir/$service.pid"
|
|
|
1099
|
+ local failure="$service_dir/$service.failure"
|
|
|
1100
|
+ if [[ ! -e "$pid" && ! -e "$failure" ]]; then
|
|
|
1101
|
+ # if we don't have a pid or a failure for why, the command may not
|
|
|
1102
|
+ # have stuffed in there
|
|
|
1103
|
+ echo "Warning: neither $pid nor $failure exist, $service didn't seem to start"
|
|
|
1104
|
+ return 1
|
|
|
1105
|
+ fi
|
|
|
1106
|
+ if [[ -n ${SCREEN_LOGDIR} ]]; then
|
|
|
1107
|
+ # if we should be logging, but we don't have a log file, something is wrong
|
|
|
1108
|
+ local logfile=${SCREEN_LOGDIR}/screen-${service}.${CURRENT_LOG_TIME}.log
|
|
|
1109
|
+ if [[ ! -e "$logfile" ]]; then
|
|
|
1110
|
+ echo "Warning: expected logfile $logfile not found, something wrong with starting $service"
|
|
|
1111
|
+ return 1
|
|
|
1112
|
+ fi
|
|
|
1113
|
+ fi
|
|
|
1114
|
+ return 0
|
|
|
1115
|
+}
|
|
|
1116
|
+
|
|
1061
|
1117
|
# Helper to launch a service in a named screen
|
|
1062
|
1118
|
# screen_it service "command-line"
|
|
1063
|
1119
|
function screen_it {
|
|
1064
|
|
- SCREEN_NAME=${SCREEN_NAME:-stack}
|
|
1065
|
|
- SERVICE_DIR=${SERVICE_DIR:-${DEST}/status}
|
|
1066
|
|
- USE_SCREEN=$(trueorfalse True $USE_SCREEN)
|
|
|
1120
|
+ local service=$1
|
|
|
1121
|
+ local cmd=$2
|
|
|
1122
|
+ local screen_name=${SCREEN_NAME:-stack}
|
|
|
1123
|
+ local status_dir=${SERVICE_DIR:-${DEST}/status}
|
|
|
1124
|
+ local service_dir="$status_dir/$screen_name"
|
|
|
1125
|
+ local use_screen=$(trueorfalse True $USE_SCREEN)
|
|
|
1126
|
+ local pid="$service_dir/$service.pid"
|
|
1067
|
1127
|
|
|
1068
|
1128
|
if is_service_enabled $1; then
|
|
1069
|
1129
|
# Append the service to the screen rc file
|
|
1070
|
|
- screen_rc "$1" "$2"
|
|
1071
|
|
-
|
|
1072
|
|
- if [[ "$USE_SCREEN" = "True" ]]; then
|
|
1073
|
|
- screen -S $SCREEN_NAME -X screen -t $1
|
|
1074
|
|
-
|
|
1075
|
|
- if [[ -n ${SCREEN_LOGDIR} ]]; then
|
|
1076
|
|
- screen -S $SCREEN_NAME -p $1 -X logfile ${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log
|
|
1077
|
|
- screen -S $SCREEN_NAME -p $1 -X log on
|
|
1078
|
|
- ln -sf ${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log ${SCREEN_LOGDIR}/screen-${1}.log
|
|
1079
|
|
- fi
|
|
1080
|
|
-
|
|
1081
|
|
- # sleep to allow bash to be ready to be send the command - we are
|
|
1082
|
|
- # creating a new window in screen and then sends characters, so if
|
|
1083
|
|
- # bash isn't running by the time we send the command, nothing happens
|
|
1084
|
|
- sleep 3
|
|
1085
|
|
-
|
|
1086
|
|
- NL=`echo -ne '\015'`
|
|
1087
|
|
- # This fun command does the following:
|
|
1088
|
|
- # - the passed server command is backgrounded
|
|
1089
|
|
- # - the pid of the background process is saved in the usual place
|
|
1090
|
|
- # - the server process is brought back to the foreground
|
|
1091
|
|
- # - if the server process exits prematurely the fg command errors
|
|
1092
|
|
- # and a message is written to stdout and the service failure file
|
|
1093
|
|
- # The pid saved can be used in screen_stop() as a process group
|
|
1094
|
|
- # id to kill off all child processes
|
|
1095
|
|
- screen -S $SCREEN_NAME -p $1 -X stuff "$2 & echo \$! >$SERVICE_DIR/$SCREEN_NAME/$1.pid; fg || echo \"$1 failed to start\" | tee \"$SERVICE_DIR/$SCREEN_NAME/$1.failure\"$NL"
|
|
|
1130
|
+ screen_rc "$service" "$cmd"
|
|
|
1131
|
+
|
|
|
1132
|
+ if [[ "$use_screen" = "True" ]]; then
|
|
|
1133
|
+ screen -S $screen_name -X screen -t $service
|
|
|
1134
|
+
|
|
|
1135
|
+ # this retry loop brought to you by slow compute nodes, screen raciness,
|
|
|
1136
|
+ # and frustration in upgrading.
|
|
|
1137
|
+ local screen_tries=0
|
|
|
1138
|
+ while [ "$screen_tries" -lt 10 ]; do
|
|
|
1139
|
+ _start_in_screen "$service" "$cmd"
|
|
|
1140
|
+ if _is_running_in_screen $service; then
|
|
|
1141
|
+ screen_tries=10
|
|
|
1142
|
+ else
|
|
|
1143
|
+ screen_tries=$[screen_tries + 1]
|
|
|
1144
|
+ echo "Failed to start service after $screen_tries attempt(s), retrying"
|
|
|
1145
|
+ if [[ "$screen_tries" -eq 10 ]]; then
|
|
|
1146
|
+ echo "Too many retries, giving up"
|
|
|
1147
|
+ exit 1
|
|
|
1148
|
+ fi
|
|
|
1149
|
+ sleep 1
|
|
|
1150
|
+ fi
|
|
|
1151
|
+ done
|
|
1096
|
1152
|
else
|
|
1097
|
1153
|
# Spawn directly without screen
|
|
1098
|
|
- run_process "$1" "$2" >$SERVICE_DIR/$SCREEN_NAME/$1.pid
|
|
|
1154
|
+ run_process "$service" "$cmd" >$pid
|
|
1099
|
1155
|
fi
|
|
1100
|
1156
|
fi
|
|
1101
|
1157
|
}
|