Browse code

Run processes without screen

This introduces new run_process() and screen_service() functions and sets the
groundwork to change how DevStack starts services. screen_service() is simply a
direct call to the screen portion of the old screen_it() function and is intended
to run commands that only need to run under screen, such as log file watchers.

run_process() is a replacement for screen_it() (which remains until all of the
services are updated). The usage is similar but requires updates to every current
screen_it() call to remove everything that requires the command to be interpreted
by a shell.

The old run_process() and _run_process() functions are still present as
old_run_process() and _old_run_process() to support the deprecated screen_it()
function. These will all go away in the future once all services have been
confirmed to have been changed over.

There is a similar new set of stop process functions stop_process() and
screen_stop_service(). The old screen_stop() will also remain for the deprecation
period.

As an initial test/demostration this review also includes the changes for
lib/cinder to demonstrate what is required for every service.

I included the scripts I used to test this; tests/fake-service.sh and
tests/run-process.sh are quite rough around the edges and may bite. They should
mature into productive members of the testing ecosystem someday.

Conflicts:
functions-common
lib/cinder

Change-Id: I03322bf0208353ebd267811735c66f13a516637b
(cherry picked from commit 3159a821c2e0662278746c5311e187bcfd4d8b75)

Dean Troyer authored on 2014/08/28 04:13:58
Showing 4 changed files
... ...
@@ -982,8 +982,9 @@ function zypper_install {
982 982
 # _run_process() is designed to be backgrounded by run_process() to simulate a
983 983
 # fork.  It includes the dirty work of closing extra filehandles and preparing log
984 984
 # files to produce the same logs as screen_it().  The log filename is derived
985
-# from the service name and global-and-now-misnamed SCREEN_LOGDIR
986
-# _run_process service "command-line"
985
+# from the service name and global-and-now-misnamed ``SCREEN_LOGDIR``
986
+# Uses globals ``CURRENT_LOG_TIME``, ``SCREEN_LOGDIR``, ``SCREEN_NAME``, ``SERVICE_DIR``
987
+# _old_run_process service "command-line"
987 988
 function _run_process {
988 989
     local service=$1
989 990
     local command="$2"
... ...
@@ -1002,8 +1003,12 @@ function _run_process {
1002 1002
         export PYTHONUNBUFFERED=1
1003 1003
     fi
1004 1004
 
1005
-    exec /bin/bash -c "$command"
1006
-    die "$service exec failure: $command"
1005
+    # Run under ``setsid`` to force the process to become a session and group leader.
1006
+    # The pid saved can be used with pkill -g to get the entire process group.
1007
+    setsid $command & echo $! >$SERVICE_DIR/$SCREEN_NAME/$1.pid
1008
+
1009
+    # Just silently exit this process
1010
+    exit 0
1007 1011
 }
1008 1012
 
1009 1013
 # Helper to remove the ``*.failure`` files under ``$SERVICE_DIR/$SCREEN_NAME``.
... ...
@@ -1030,59 +1035,63 @@ function is_running {
1030 1030
     return $RC
1031 1031
 }
1032 1032
 
1033
-# run_process() launches a child process that closes all file descriptors and
1034
-# then exec's the passed in command.  This is meant to duplicate the semantics
1035
-# of screen_it() without screen.  PIDs are written to
1036
-# $SERVICE_DIR/$SCREEN_NAME/$service.pid
1033
+# Run a single service under screen or directly
1034
+# If the command includes shell metachatacters (;<>*) it must be run using a shell
1037 1035
 # run_process service "command-line"
1038 1036
 function run_process {
1039 1037
     local service=$1
1040 1038
     local command="$2"
1041 1039
 
1042
-    # Spawn the child process
1043
-    _run_process "$service" "$command" &
1044
-    echo $!
1040
+    if is_service_enabled $service; then
1041
+        if [[ "$USE_SCREEN" = "True" ]]; then
1042
+            screen_service "$service" "$command"
1043
+        else
1044
+            # Spawn directly without screen
1045
+            _run_process "$service" "$command" &
1046
+        fi
1047
+    fi
1045 1048
 }
1046 1049
 
1047 1050
 # Helper to launch a service in a named screen
1048
-# screen_it service "command-line"
1049
-function screen_it {
1051
+# Uses globals ``CURRENT_LOG_TIME``, ``SCREEN_NAME``, ``SCREEN_LOGDIR``,
1052
+# ``SERVICE_DIR``, ``USE_SCREEN``
1053
+# screen_service service "command-line"
1054
+# Run a command in a shell in a screen window
1055
+function screen_service {
1056
+    local service=$1
1057
+    local command="$2"
1058
+
1050 1059
     SCREEN_NAME=${SCREEN_NAME:-stack}
1051 1060
     SERVICE_DIR=${SERVICE_DIR:-${DEST}/status}
1052 1061
     USE_SCREEN=$(trueorfalse True $USE_SCREEN)
1053 1062
 
1054
-    if is_service_enabled $1; then
1063
+    if is_service_enabled $service; then
1055 1064
         # Append the service to the screen rc file
1056
-        screen_rc "$1" "$2"
1065
+        screen_rc "$service" "$command"
1057 1066
 
1058
-        if [[ "$USE_SCREEN" = "True" ]]; then
1059
-            screen -S $SCREEN_NAME -X screen -t $1
1060
-
1061
-            if [[ -n ${SCREEN_LOGDIR} ]]; then
1062
-                screen -S $SCREEN_NAME -p $1 -X logfile ${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log
1063
-                screen -S $SCREEN_NAME -p $1 -X log on
1064
-                ln -sf ${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log ${SCREEN_LOGDIR}/screen-${1}.log
1065
-            fi
1067
+        screen -S $SCREEN_NAME -X screen -t $service
1066 1068
 
1067
-            # sleep to allow bash to be ready to be send the command - we are
1068
-            # creating a new window in screen and then sends characters, so if
1069
-            # bash isn't running by the time we send the command, nothing happens
1070
-            sleep 3
1071
-
1072
-            NL=`echo -ne '\015'`
1073
-            # This fun command does the following:
1074
-            # - the passed server command is backgrounded
1075
-            # - the pid of the background process is saved in the usual place
1076
-            # - the server process is brought back to the foreground
1077
-            # - if the server process exits prematurely the fg command errors
1078
-            #   and a message is written to stdout and the service failure file
1079
-            # The pid saved can be used in screen_stop() as a process group
1080
-            # id to kill off all child processes
1081
-            screen -S $SCREEN_NAME -p $1 -X stuff "$2 & echo \$! >$SERVICE_DIR/$SCREEN_NAME/$1.pid; fg || echo \"$1 failed to start\" | tee \"$SERVICE_DIR/$SCREEN_NAME/$1.failure\"$NL"
1082
-        else
1083
-            # Spawn directly without screen
1084
-            run_process "$1" "$2" >$SERVICE_DIR/$SCREEN_NAME/$1.pid
1069
+        if [[ -n ${SCREEN_LOGDIR} ]]; then
1070
+            screen -S $SCREEN_NAME -p $service -X logfile ${SCREEN_LOGDIR}/screen-${service}.${CURRENT_LOG_TIME}.log
1071
+            screen -S $SCREEN_NAME -p $service -X log on
1072
+            ln -sf ${SCREEN_LOGDIR}/screen-${service}.${CURRENT_LOG_TIME}.log ${SCREEN_LOGDIR}/screen-${service}.log
1085 1073
         fi
1074
+
1075
+        # sleep to allow bash to be ready to be send the command - we are
1076
+        # creating a new window in screen and then sends characters, so if
1077
+        # bash isn't running by the time we send the command, nothing happens
1078
+        sleep 3
1079
+
1080
+        NL=`echo -ne '\015'`
1081
+        # This fun command does the following:
1082
+        # - the passed server command is backgrounded
1083
+        # - the pid of the background process is saved in the usual place
1084
+        # - the server process is brought back to the foreground
1085
+        # - if the server process exits prematurely the fg command errors
1086
+        #   and a message is written to stdout and the service failure file
1087
+        # The pid saved can be used in screen_stop() as a process group
1088
+        # id to kill off all child processes
1089
+        screen -S $SCREEN_NAME -p $service -X stuff "$command & echo \$! >$SERVICE_DIR/$SCREEN_NAME/${service}.pid; fg || echo \"$service failed to start\" | tee \"$SERVICE_DIR/$SCREEN_NAME/${service}.failure\"$NL"
1086 1090
     fi
1087 1091
 }
1088 1092
 
... ...
@@ -1118,20 +1127,40 @@ function screen_rc {
1118 1118
 # If screen is being used kill the screen window; this will catch processes
1119 1119
 # that did not leave a PID behind
1120 1120
 # screen_stop service
1121
-function screen_stop {
1121
+function screen_stop_service {
1122
+    local service=$1
1123
+
1122 1124
     SCREEN_NAME=${SCREEN_NAME:-stack}
1123 1125
     SERVICE_DIR=${SERVICE_DIR:-${DEST}/status}
1124 1126
     USE_SCREEN=$(trueorfalse True $USE_SCREEN)
1125 1127
 
1126
-    if is_service_enabled $1; then
1128
+    if is_service_enabled $service; then
1129
+        # Clean up the screen window
1130
+        screen -S $SCREEN_NAME -p $service -X kill
1131
+    fi
1132
+}
1133
+
1134
+# Stop a service process
1135
+# If a PID is available use it, kill the whole process group via TERM
1136
+# If screen is being used kill the screen window; this will catch processes
1137
+# that did not leave a PID behind
1138
+# Uses globals ``SERVICE_DIR``, ``USE_SCREEN``
1139
+# stop_process service
1140
+function stop_process {
1141
+    local service=$1
1142
+
1143
+    SERVICE_DIR=${SERVICE_DIR:-${DEST}/status}
1144
+    USE_SCREEN=$(trueorfalse True $USE_SCREEN)
1145
+
1146
+    if is_service_enabled $service; then
1127 1147
         # Kill via pid if we have one available
1128
-        if [[ -r $SERVICE_DIR/$SCREEN_NAME/$1.pid ]]; then
1129
-            pkill -TERM -P -$(cat $SERVICE_DIR/$SCREEN_NAME/$1.pid)
1130
-            rm $SERVICE_DIR/$SCREEN_NAME/$1.pid
1148
+        if [[ -r $SERVICE_DIR/$SCREEN_NAME/$service.pid ]]; then
1149
+            pkill -g $(cat $SERVICE_DIR/$SCREEN_NAME/$service.pid)
1150
+            rm $SERVICE_DIR/$SCREEN_NAME/$service.pid
1131 1151
         fi
1132 1152
         if [[ "$USE_SCREEN" = "True" ]]; then
1133 1153
             # Clean up the screen window
1134
-            screen -S $SCREEN_NAME -p $1 -X kill
1154
+            screen_stop_service $service
1135 1155
         fi
1136 1156
     fi
1137 1157
 }
... ...
@@ -1166,6 +1195,80 @@ function service_check {
1166 1166
 }
1167 1167
 
1168 1168
 
1169
+# Deprecated Functions
1170
+# --------------------
1171
+
1172
+# _old_run_process() is designed to be backgrounded by old_run_process() to simulate a
1173
+# fork.  It includes the dirty work of closing extra filehandles and preparing log
1174
+# files to produce the same logs as screen_it().  The log filename is derived
1175
+# from the service name and global-and-now-misnamed ``SCREEN_LOGDIR``
1176
+# Uses globals ``CURRENT_LOG_TIME``, ``SCREEN_LOGDIR``, ``SCREEN_NAME``, ``SERVICE_DIR``
1177
+# _old_run_process service "command-line"
1178
+function _old_run_process {
1179
+    local service=$1
1180
+    local command="$2"
1181
+
1182
+    # Undo logging redirections and close the extra descriptors
1183
+    exec 1>&3
1184
+    exec 2>&3
1185
+    exec 3>&-
1186
+    exec 6>&-
1187
+
1188
+    if [[ -n ${SCREEN_LOGDIR} ]]; then
1189
+        exec 1>&${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log 2>&1
1190
+        ln -sf ${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log ${SCREEN_LOGDIR}/screen-${1}.log
1191
+
1192
+        # TODO(dtroyer): Hack to get stdout from the Python interpreter for the logs.
1193
+        export PYTHONUNBUFFERED=1
1194
+    fi
1195
+
1196
+    exec /bin/bash -c "$command"
1197
+    die "$service exec failure: $command"
1198
+}
1199
+
1200
+# old_run_process() launches a child process that closes all file descriptors and
1201
+# then exec's the passed in command.  This is meant to duplicate the semantics
1202
+# of screen_it() without screen.  PIDs are written to
1203
+# ``$SERVICE_DIR/$SCREEN_NAME/$service.pid`` by the spawned child process.
1204
+# old_run_process service "command-line"
1205
+function old_run_process {
1206
+    local service=$1
1207
+    local command="$2"
1208
+
1209
+    # Spawn the child process
1210
+    _old_run_process "$service" "$command" &
1211
+    echo $!
1212
+}
1213
+
1214
+# Compatibility for existing start_XXXX() functions
1215
+# Uses global ``USE_SCREEN``
1216
+# screen_it service "command-line"
1217
+function screen_it {
1218
+    if is_service_enabled $1; then
1219
+        # Append the service to the screen rc file
1220
+        screen_rc "$1" "$2"
1221
+
1222
+        if [[ "$USE_SCREEN" = "True" ]]; then
1223
+            screen_service "$1" "$2"
1224
+        else
1225
+            # Spawn directly without screen
1226
+            old_run_process "$1" "$2" >$SERVICE_DIR/$SCREEN_NAME/$1.pid
1227
+        fi
1228
+    fi
1229
+}
1230
+
1231
+# Compatibility for existing stop_XXXX() functions
1232
+# Stop a service in screen
1233
+# If a PID is available use it, kill the whole process group via TERM
1234
+# If screen is being used kill the screen window; this will catch processes
1235
+# that did not leave a PID behind
1236
+# screen_stop service
1237
+function screen_stop {
1238
+    # Clean up the screen window
1239
+    stop_process $1
1240
+}
1241
+
1242
+
1169 1243
 # Python Functions
1170 1244
 # ================
1171 1245
 
... ...
@@ -514,10 +514,15 @@ function start_cinder {
514 514
         sudo tgtadm --mode system --op update --name debug --value on
515 515
     fi
516 516
 
517
-    screen_it c-api "cd $CINDER_DIR && $CINDER_BIN_DIR/cinder-api --config-file $CINDER_CONF"
518
-    screen_it c-sch "cd $CINDER_DIR && $CINDER_BIN_DIR/cinder-scheduler --config-file $CINDER_CONF"
519
-    screen_it c-bak "cd $CINDER_DIR && $CINDER_BIN_DIR/cinder-backup --config-file $CINDER_CONF"
520
-    screen_it c-vol "cd $CINDER_DIR && $CINDER_BIN_DIR/cinder-volume --config-file $CINDER_CONF"
517
+    run_process c-api "$CINDER_BIN_DIR/cinder-api --config-file $CINDER_CONF"
518
+    echo "Waiting for Cinder API to start..."
519
+    if ! wait_for_service $SERVICE_TIMEOUT $CINDER_SERVICE_PROTOCOL://$CINDER_SERVICE_HOST:$CINDER_SERVICE_PORT; then
520
+        die $LINENO "c-api did not start"
521
+    fi
522
+
523
+    run_process c-sch "$CINDER_BIN_DIR/cinder-scheduler --config-file $CINDER_CONF"
524
+    run_process c-bak "$CINDER_BIN_DIR/cinder-backup --config-file $CINDER_CONF"
525
+    run_process c-vol "$CINDER_BIN_DIR/cinder-volume --config-file $CINDER_CONF"
521 526
 
522 527
     # NOTE(jdg): For cinder, startup order matters.  To ensure that repor_capabilities is received
523 528
     # by the scheduler start the cinder-volume service last (or restart it) after the scheduler
524 529
new file mode 100755
... ...
@@ -0,0 +1,19 @@
0
+#!/bin/bash
1
+# fake-service.sh - a fake service for start/stop testing
2
+# $1 - sleep time
3
+
4
+SLEEP_TIME=${1:-3}
5
+
6
+LOG=/tmp/fake-service.log
7
+TIMESTAMP_FORMAT=${TIMESTAMP_FORMAT:-"%F-%H%M%S"}
8
+
9
+# duplicate output
10
+exec 1> >(tee -a ${LOG})
11
+
12
+echo ""
13
+echo "Starting fake-service for ${SLEEP_TIME}"
14
+while true; do
15
+    echo "$(date +${TIMESTAMP_FORMAT}) [$$]"
16
+    sleep ${SLEEP_TIME}
17
+done
18
+
0 19
new file mode 100755
... ...
@@ -0,0 +1,109 @@
0
+#!/bin/bash
1
+# tests/exec.sh - Test DevStack screen_it() and screen_stop()
2
+#
3
+# exec.sh start|stop|status
4
+#
5
+# Set USE_SCREEN to change the default
6
+#
7
+# This script emulates the basic exec envirnment in ``stack.sh`` to test
8
+# the process spawn and kill operations.
9
+
10
+if [[ -z $1 ]]; then
11
+    echo "$0 start|stop"
12
+    exit 1
13
+fi
14
+
15
+TOP_DIR=$(cd $(dirname "$0")/.. && pwd)
16
+source $TOP_DIR/functions
17
+
18
+USE_SCREEN=${USE_SCREEN:-False}
19
+
20
+ENABLED_SERVICES=fake-service
21
+
22
+SERVICE_DIR=/tmp
23
+SCREEN_NAME=test
24
+SCREEN_LOGDIR=${SERVICE_DIR}/${SCREEN_NAME}
25
+
26
+
27
+# Kill background processes on exit
28
+trap clean EXIT
29
+clean() {
30
+    local r=$?
31
+    jobs -p
32
+    kill >/dev/null 2>&1 $(jobs -p)
33
+    exit $r
34
+}
35
+
36
+
37
+# Exit on any errors so that errors don't compound
38
+trap failed ERR
39
+failed() {
40
+    local r=$?
41
+    jobs -p
42
+    kill >/dev/null 2>&1 $(jobs -p)
43
+    set +o xtrace
44
+    [ -n "$LOGFILE" ] && echo "${0##*/} failed: full log in $LOGFILE"
45
+    exit $r
46
+}
47
+
48
+function status {
49
+    if [[ -r $SERVICE_DIR/$SCREEN_NAME/fake-service.pid ]]; then
50
+        pstree -pg $(cat $SERVICE_DIR/$SCREEN_NAME/fake-service.pid)
51
+    fi
52
+    ps -ef | grep fake
53
+}
54
+
55
+function setup_screen {
56
+if [[ ! -d $SERVICE_DIR/$SCREEN_NAME ]]; then
57
+    rm -rf $SERVICE_DIR/$SCREEN_NAME
58
+    mkdir -p $SERVICE_DIR/$SCREEN_NAME
59
+fi
60
+
61
+if [[ "$USE_SCREEN" == "True" ]]; then
62
+    # Create a new named screen to run processes in
63
+    screen -d -m -S $SCREEN_NAME -t shell -s /bin/bash
64
+    sleep 1
65
+
66
+    # Set a reasonable status bar
67
+    if [ -z "$SCREEN_HARDSTATUS" ]; then
68
+        SCREEN_HARDSTATUS='%{= .} %-Lw%{= .}%> %n%f %t*%{= .}%+Lw%< %-=%{g}(%{d}%H/%l%{g})'
69
+    fi
70
+    screen -r $SCREEN_NAME -X hardstatus alwayslastline "$SCREEN_HARDSTATUS"
71
+fi
72
+
73
+# Clear screen rc file
74
+SCREENRC=$TOP_DIR/tests/$SCREEN_NAME-screenrc
75
+if [[ -e $SCREENRC ]]; then
76
+    echo -n > $SCREENRC
77
+fi
78
+}
79
+
80
+# Mimic logging
81
+    # Set up output redirection without log files
82
+    # Copy stdout to fd 3
83
+    exec 3>&1
84
+    if [[ "$VERBOSE" != "True" ]]; then
85
+        # Throw away stdout and stderr
86
+        #exec 1>/dev/null 2>&1
87
+        :
88
+    fi
89
+    # Always send summary fd to original stdout
90
+    exec 6>&3
91
+
92
+
93
+if [[ "$1" == "start" ]]; then
94
+    echo "Start service"
95
+    setup_screen
96
+    screen_it fake-service "$TOP_DIR/tests/fake-service.sh"
97
+    sleep 1
98
+    status
99
+elif [[ "$1" == "stop" ]]; then
100
+    echo "Stop service"
101
+    screen_stop fake-service
102
+    status
103
+elif [[ "$1" == "status" ]]; then
104
+    status
105
+else
106
+    echo "Unknown command"
107
+    exit 1
108
+fi