Browse code

Run processes without screen

This introduces new run_process() and screen_service() functions and sets the
groundwork to change how DevStack starts services. screen_service() is simply a
direct call to the screen portion of the old screen_it() function and is intended
to run commands that only need to run under screen, such as log file watchers.

run_process() is a replacement for screen_it() (which remains until all of the
services are updated). The usage is similar but requires updates to every current
screen_it() call to remove everything that requires the command to be interpreted
by a shell.

The old run_process() and _run_process() functions are still present as
old_run_process() and _old_run_process() to support the deprecated screen_it()
function. These will all go away in the future once all services have been
confirmed to have been changed over.

There is a similar new set of stop process functions stop_process() and
screen_stop_service(). The old screen_stop() will also remain for the deprecation
period.

As an initial test/demostration this review also includes the changes for
lib/cinder to demonstrate what is required for every service.

I included the scripts I used to test this; tests/fake-service.sh and
tests/run-process.sh are quite rough around the edges and may bite. They should
mature into productive members of the testing ecosystem someday.

Change-Id: I03322bf0208353ebd267811735c66f13a516637b

Dean Troyer authored on 2014/08/28 04:13:58
Showing 4 changed files
... ...
@@ -1135,8 +1135,8 @@ function zypper_install {
1135 1135
 # fork.  It includes the dirty work of closing extra filehandles and preparing log
1136 1136
 # files to produce the same logs as screen_it().  The log filename is derived
1137 1137
 # from the service name and global-and-now-misnamed ``SCREEN_LOGDIR``
1138
-# Uses globals ``CURRENT_LOG_TIME``, ``SCREEN_LOGDIR``
1139
-# _run_process service "command-line"
1138
+# Uses globals ``CURRENT_LOG_TIME``, ``SCREEN_LOGDIR``, ``SCREEN_NAME``, ``SERVICE_DIR``
1139
+# _old_run_process service "command-line"
1140 1140
 function _run_process {
1141 1141
     local service=$1
1142 1142
     local command="$2"
... ...
@@ -1155,8 +1155,12 @@ function _run_process {
1155 1155
         export PYTHONUNBUFFERED=1
1156 1156
     fi
1157 1157
 
1158
-    exec /bin/bash -c "$command"
1159
-    die "$service exec failure: $command"
1158
+    # Run under ``setsid`` to force the process to become a session and group leader.
1159
+    # The pid saved can be used with pkill -g to get the entire process group.
1160
+    setsid $command & echo $! >$SERVICE_DIR/$SCREEN_NAME/$1.pid
1161
+
1162
+    # Just silently exit this process
1163
+    exit 0
1160 1164
 }
1161 1165
 
1162 1166
 # Helper to remove the ``*.failure`` files under ``$SERVICE_DIR/$SCREEN_NAME``.
... ...
@@ -1184,61 +1188,63 @@ function is_running {
1184 1184
     return $exitcode
1185 1185
 }
1186 1186
 
1187
-# run_process() launches a child process that closes all file descriptors and
1188
-# then exec's the passed in command.  This is meant to duplicate the semantics
1189
-# of screen_it() without screen.  PIDs are written to
1190
-# ``$SERVICE_DIR/$SCREEN_NAME/$service.pid``
1187
+# Run a single service under screen or directly
1188
+# If the command includes shell metachatacters (;<>*) it must be run using a shell
1191 1189
 # run_process service "command-line"
1192 1190
 function run_process {
1193 1191
     local service=$1
1194 1192
     local command="$2"
1195 1193
 
1196
-    # Spawn the child process
1197
-    _run_process "$service" "$command" &
1198
-    echo $!
1194
+    if is_service_enabled $service; then
1195
+        if [[ "$USE_SCREEN" = "True" ]]; then
1196
+            screen_service "$service" "$command"
1197
+        else
1198
+            # Spawn directly without screen
1199
+            _run_process "$service" "$command" &
1200
+        fi
1201
+    fi
1199 1202
 }
1200 1203
 
1201 1204
 # Helper to launch a service in a named screen
1202 1205
 # Uses globals ``CURRENT_LOG_TIME``, ``SCREEN_NAME``, ``SCREEN_LOGDIR``,
1203 1206
 # ``SERVICE_DIR``, ``USE_SCREEN``
1204
-# screen_it service "command-line"
1205
-function screen_it {
1207
+# screen_service service "command-line"
1208
+# Run a command in a shell in a screen window
1209
+function screen_service {
1210
+    local service=$1
1211
+    local command="$2"
1212
+
1206 1213
     SCREEN_NAME=${SCREEN_NAME:-stack}
1207 1214
     SERVICE_DIR=${SERVICE_DIR:-${DEST}/status}
1208 1215
     USE_SCREEN=$(trueorfalse True $USE_SCREEN)
1209 1216
 
1210
-    if is_service_enabled $1; then
1217
+    if is_service_enabled $service; then
1211 1218
         # Append the service to the screen rc file
1212
-        screen_rc "$1" "$2"
1219
+        screen_rc "$service" "$command"
1213 1220
 
1214
-        if [[ "$USE_SCREEN" = "True" ]]; then
1215
-            screen -S $SCREEN_NAME -X screen -t $1
1216
-
1217
-            if [[ -n ${SCREEN_LOGDIR} ]]; then
1218
-                screen -S $SCREEN_NAME -p $1 -X logfile ${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log
1219
-                screen -S $SCREEN_NAME -p $1 -X log on
1220
-                ln -sf ${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log ${SCREEN_LOGDIR}/screen-${1}.log
1221
-            fi
1221
+        screen -S $SCREEN_NAME -X screen -t $service
1222 1222
 
1223
-            # sleep to allow bash to be ready to be send the command - we are
1224
-            # creating a new window in screen and then sends characters, so if
1225
-            # bash isn't running by the time we send the command, nothing happens
1226
-            sleep 3
1227
-
1228
-            NL=`echo -ne '\015'`
1229
-            # This fun command does the following:
1230
-            # - the passed server command is backgrounded
1231
-            # - the pid of the background process is saved in the usual place
1232
-            # - the server process is brought back to the foreground
1233
-            # - if the server process exits prematurely the fg command errors
1234
-            #   and a message is written to stdout and the service failure file
1235
-            # The pid saved can be used in screen_stop() as a process group
1236
-            # id to kill off all child processes
1237
-            screen -S $SCREEN_NAME -p $1 -X stuff "$2 & echo \$! >$SERVICE_DIR/$SCREEN_NAME/$1.pid; fg || echo \"$1 failed to start\" | tee \"$SERVICE_DIR/$SCREEN_NAME/$1.failure\"$NL"
1238
-        else
1239
-            # Spawn directly without screen
1240
-            run_process "$1" "$2" >$SERVICE_DIR/$SCREEN_NAME/$1.pid
1223
+        if [[ -n ${SCREEN_LOGDIR} ]]; then
1224
+            screen -S $SCREEN_NAME -p $service -X logfile ${SCREEN_LOGDIR}/screen-${service}.${CURRENT_LOG_TIME}.log
1225
+            screen -S $SCREEN_NAME -p $service -X log on
1226
+            ln -sf ${SCREEN_LOGDIR}/screen-${service}.${CURRENT_LOG_TIME}.log ${SCREEN_LOGDIR}/screen-${service}.log
1241 1227
         fi
1228
+
1229
+        # sleep to allow bash to be ready to be send the command - we are
1230
+        # creating a new window in screen and then sends characters, so if
1231
+        # bash isn't running by the time we send the command, nothing happens
1232
+        sleep 3
1233
+
1234
+        NL=`echo -ne '\015'`
1235
+        # This fun command does the following:
1236
+        # - the passed server command is backgrounded
1237
+        # - the pid of the background process is saved in the usual place
1238
+        # - the server process is brought back to the foreground
1239
+        # - if the server process exits prematurely the fg command errors
1240
+        #   and a message is written to stdout and the service failure file
1241
+        # The pid saved can be used in screen_stop() as a process group
1242
+        # id to kill off all child processes
1243
+        screen -S $SCREEN_NAME -p $service -X stuff "$command & echo \$! >$SERVICE_DIR/$SCREEN_NAME/${service}.pid; fg || echo \"$service failed to start\" | tee \"$SERVICE_DIR/$SCREEN_NAME/${service}.failure\"$NL"
1242 1244
     fi
1243 1245
 }
1244 1246
 
... ...
@@ -1276,20 +1282,40 @@ function screen_rc {
1276 1276
 # that did not leave a PID behind
1277 1277
 # Uses globals ``SCREEN_NAME``, ``SERVICE_DIR``, ``USE_SCREEN``
1278 1278
 # screen_stop service
1279
-function screen_stop {
1279
+function screen_stop_service {
1280
+    local service=$1
1281
+
1280 1282
     SCREEN_NAME=${SCREEN_NAME:-stack}
1281 1283
     SERVICE_DIR=${SERVICE_DIR:-${DEST}/status}
1282 1284
     USE_SCREEN=$(trueorfalse True $USE_SCREEN)
1283 1285
 
1284
-    if is_service_enabled $1; then
1286
+    if is_service_enabled $service; then
1287
+        # Clean up the screen window
1288
+        screen -S $SCREEN_NAME -p $service -X kill
1289
+    fi
1290
+}
1291
+
1292
+# Stop a service process
1293
+# If a PID is available use it, kill the whole process group via TERM
1294
+# If screen is being used kill the screen window; this will catch processes
1295
+# that did not leave a PID behind
1296
+# Uses globals ``SERVICE_DIR``, ``USE_SCREEN``
1297
+# stop_process service
1298
+function stop_process {
1299
+    local service=$1
1300
+
1301
+    SERVICE_DIR=${SERVICE_DIR:-${DEST}/status}
1302
+    USE_SCREEN=$(trueorfalse True $USE_SCREEN)
1303
+
1304
+    if is_service_enabled $service; then
1285 1305
         # Kill via pid if we have one available
1286
-        if [[ -r $SERVICE_DIR/$SCREEN_NAME/$1.pid ]]; then
1287
-            pkill -TERM -P -$(cat $SERVICE_DIR/$SCREEN_NAME/$1.pid)
1288
-            rm $SERVICE_DIR/$SCREEN_NAME/$1.pid
1306
+        if [[ -r $SERVICE_DIR/$SCREEN_NAME/$service.pid ]]; then
1307
+            pkill -g $(cat $SERVICE_DIR/$SCREEN_NAME/$service.pid)
1308
+            rm $SERVICE_DIR/$SCREEN_NAME/$service.pid
1289 1309
         fi
1290 1310
         if [[ "$USE_SCREEN" = "True" ]]; then
1291 1311
             # Clean up the screen window
1292
-            screen -S $SCREEN_NAME -p $1 -X kill
1312
+            screen_stop_service $service
1293 1313
         fi
1294 1314
     fi
1295 1315
 }
... ...
@@ -1325,6 +1351,80 @@ function service_check {
1325 1325
 }
1326 1326
 
1327 1327
 
1328
+# Deprecated Functions
1329
+# --------------------
1330
+
1331
+# _old_run_process() is designed to be backgrounded by old_run_process() to simulate a
1332
+# fork.  It includes the dirty work of closing extra filehandles and preparing log
1333
+# files to produce the same logs as screen_it().  The log filename is derived
1334
+# from the service name and global-and-now-misnamed ``SCREEN_LOGDIR``
1335
+# Uses globals ``CURRENT_LOG_TIME``, ``SCREEN_LOGDIR``, ``SCREEN_NAME``, ``SERVICE_DIR``
1336
+# _old_run_process service "command-line"
1337
+function _old_run_process {
1338
+    local service=$1
1339
+    local command="$2"
1340
+
1341
+    # Undo logging redirections and close the extra descriptors
1342
+    exec 1>&3
1343
+    exec 2>&3
1344
+    exec 3>&-
1345
+    exec 6>&-
1346
+
1347
+    if [[ -n ${SCREEN_LOGDIR} ]]; then
1348
+        exec 1>&${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log 2>&1
1349
+        ln -sf ${SCREEN_LOGDIR}/screen-${1}.${CURRENT_LOG_TIME}.log ${SCREEN_LOGDIR}/screen-${1}.log
1350
+
1351
+        # TODO(dtroyer): Hack to get stdout from the Python interpreter for the logs.
1352
+        export PYTHONUNBUFFERED=1
1353
+    fi
1354
+
1355
+    exec /bin/bash -c "$command"
1356
+    die "$service exec failure: $command"
1357
+}
1358
+
1359
+# old_run_process() launches a child process that closes all file descriptors and
1360
+# then exec's the passed in command.  This is meant to duplicate the semantics
1361
+# of screen_it() without screen.  PIDs are written to
1362
+# ``$SERVICE_DIR/$SCREEN_NAME/$service.pid`` by the spawned child process.
1363
+# old_run_process service "command-line"
1364
+function old_run_process {
1365
+    local service=$1
1366
+    local command="$2"
1367
+
1368
+    # Spawn the child process
1369
+    _old_run_process "$service" "$command" &
1370
+    echo $!
1371
+}
1372
+
1373
+# Compatibility for existing start_XXXX() functions
1374
+# Uses global ``USE_SCREEN``
1375
+# screen_it service "command-line"
1376
+function screen_it {
1377
+    if is_service_enabled $1; then
1378
+        # Append the service to the screen rc file
1379
+        screen_rc "$1" "$2"
1380
+
1381
+        if [[ "$USE_SCREEN" = "True" ]]; then
1382
+            screen_service "$1" "$2"
1383
+        else
1384
+            # Spawn directly without screen
1385
+            old_run_process "$1" "$2" >$SERVICE_DIR/$SCREEN_NAME/$1.pid
1386
+        fi
1387
+    fi
1388
+}
1389
+
1390
+# Compatibility for existing stop_XXXX() functions
1391
+# Stop a service in screen
1392
+# If a PID is available use it, kill the whole process group via TERM
1393
+# If screen is being used kill the screen window; this will catch processes
1394
+# that did not leave a PID behind
1395
+# screen_stop service
1396
+function screen_stop {
1397
+    # Clean up the screen window
1398
+    stop_process $1
1399
+}
1400
+
1401
+
1328 1402
 # Python Functions
1329 1403
 # ================
1330 1404
 
... ...
@@ -431,15 +431,15 @@ function start_cinder {
431 431
         sudo tgtadm --mode system --op update --name debug --value on
432 432
     fi
433 433
 
434
-    screen_it c-api "cd $CINDER_DIR && $CINDER_BIN_DIR/cinder-api --config-file $CINDER_CONF"
434
+    run_process c-api "$CINDER_BIN_DIR/cinder-api --config-file $CINDER_CONF"
435 435
     echo "Waiting for Cinder API to start..."
436 436
     if ! wait_for_service $SERVICE_TIMEOUT $CINDER_SERVICE_PROTOCOL://$CINDER_SERVICE_HOST:$CINDER_SERVICE_PORT; then
437 437
         die $LINENO "c-api did not start"
438 438
     fi
439 439
 
440
-    screen_it c-sch "cd $CINDER_DIR && $CINDER_BIN_DIR/cinder-scheduler --config-file $CINDER_CONF"
441
-    screen_it c-bak "cd $CINDER_DIR && $CINDER_BIN_DIR/cinder-backup --config-file $CINDER_CONF"
442
-    screen_it c-vol "cd $CINDER_DIR && $CINDER_BIN_DIR/cinder-volume --config-file $CINDER_CONF"
440
+    run_process c-sch "$CINDER_BIN_DIR/cinder-scheduler --config-file $CINDER_CONF"
441
+    run_process c-bak "$CINDER_BIN_DIR/cinder-backup --config-file $CINDER_CONF"
442
+    run_process c-vol "$CINDER_BIN_DIR/cinder-volume --config-file $CINDER_CONF"
443 443
 
444 444
     # NOTE(jdg): For cinder, startup order matters.  To ensure that repor_capabilities is received
445 445
     # by the scheduler start the cinder-volume service last (or restart it) after the scheduler
446 446
new file mode 100755
... ...
@@ -0,0 +1,19 @@
0
+#!/bin/bash
1
+# fake-service.sh - a fake service for start/stop testing
2
+# $1 - sleep time
3
+
4
+SLEEP_TIME=${1:-3}
5
+
6
+LOG=/tmp/fake-service.log
7
+TIMESTAMP_FORMAT=${TIMESTAMP_FORMAT:-"%F-%H%M%S"}
8
+
9
+# duplicate output
10
+exec 1> >(tee -a ${LOG})
11
+
12
+echo ""
13
+echo "Starting fake-service for ${SLEEP_TIME}"
14
+while true; do
15
+    echo "$(date +${TIMESTAMP_FORMAT}) [$$]"
16
+    sleep ${SLEEP_TIME}
17
+done
18
+
0 19
new file mode 100755
... ...
@@ -0,0 +1,109 @@
0
+#!/bin/bash
1
+# tests/exec.sh - Test DevStack screen_it() and screen_stop()
2
+#
3
+# exec.sh start|stop|status
4
+#
5
+# Set USE_SCREEN to change the default
6
+#
7
+# This script emulates the basic exec envirnment in ``stack.sh`` to test
8
+# the process spawn and kill operations.
9
+
10
+if [[ -z $1 ]]; then
11
+    echo "$0 start|stop"
12
+    exit 1
13
+fi
14
+
15
+TOP_DIR=$(cd $(dirname "$0")/.. && pwd)
16
+source $TOP_DIR/functions
17
+
18
+USE_SCREEN=${USE_SCREEN:-False}
19
+
20
+ENABLED_SERVICES=fake-service
21
+
22
+SERVICE_DIR=/tmp
23
+SCREEN_NAME=test
24
+SCREEN_LOGDIR=${SERVICE_DIR}/${SCREEN_NAME}
25
+
26
+
27
+# Kill background processes on exit
28
+trap clean EXIT
29
+clean() {
30
+    local r=$?
31
+    jobs -p
32
+    kill >/dev/null 2>&1 $(jobs -p)
33
+    exit $r
34
+}
35
+
36
+
37
+# Exit on any errors so that errors don't compound
38
+trap failed ERR
39
+failed() {
40
+    local r=$?
41
+    jobs -p
42
+    kill >/dev/null 2>&1 $(jobs -p)
43
+    set +o xtrace
44
+    [ -n "$LOGFILE" ] && echo "${0##*/} failed: full log in $LOGFILE"
45
+    exit $r
46
+}
47
+
48
+function status {
49
+    if [[ -r $SERVICE_DIR/$SCREEN_NAME/fake-service.pid ]]; then
50
+        pstree -pg $(cat $SERVICE_DIR/$SCREEN_NAME/fake-service.pid)
51
+    fi
52
+    ps -ef | grep fake
53
+}
54
+
55
+function setup_screen {
56
+if [[ ! -d $SERVICE_DIR/$SCREEN_NAME ]]; then
57
+    rm -rf $SERVICE_DIR/$SCREEN_NAME
58
+    mkdir -p $SERVICE_DIR/$SCREEN_NAME
59
+fi
60
+
61
+if [[ "$USE_SCREEN" == "True" ]]; then
62
+    # Create a new named screen to run processes in
63
+    screen -d -m -S $SCREEN_NAME -t shell -s /bin/bash
64
+    sleep 1
65
+
66
+    # Set a reasonable status bar
67
+    if [ -z "$SCREEN_HARDSTATUS" ]; then
68
+        SCREEN_HARDSTATUS='%{= .} %-Lw%{= .}%> %n%f %t*%{= .}%+Lw%< %-=%{g}(%{d}%H/%l%{g})'
69
+    fi
70
+    screen -r $SCREEN_NAME -X hardstatus alwayslastline "$SCREEN_HARDSTATUS"
71
+fi
72
+
73
+# Clear screen rc file
74
+SCREENRC=$TOP_DIR/tests/$SCREEN_NAME-screenrc
75
+if [[ -e $SCREENRC ]]; then
76
+    echo -n > $SCREENRC
77
+fi
78
+}
79
+
80
+# Mimic logging
81
+    # Set up output redirection without log files
82
+    # Copy stdout to fd 3
83
+    exec 3>&1
84
+    if [[ "$VERBOSE" != "True" ]]; then
85
+        # Throw away stdout and stderr
86
+        #exec 1>/dev/null 2>&1
87
+        :
88
+    fi
89
+    # Always send summary fd to original stdout
90
+    exec 6>&3
91
+
92
+
93
+if [[ "$1" == "start" ]]; then
94
+    echo "Start service"
95
+    setup_screen
96
+    screen_it fake-service "$TOP_DIR/tests/fake-service.sh"
97
+    sleep 1
98
+    status
99
+elif [[ "$1" == "stop" ]]; then
100
+    echo "Stop service"
101
+    screen_stop fake-service
102
+    status
103
+elif [[ "$1" == "status" ]]; then
104
+    status
105
+else
106
+    echo "Unknown command"
107
+    exit 1
108
+fi