Browse code

Add GIT_TIMEOUT variable to watch git operations

During my CI testing of each devstack change I can often see git get
itself stuck and hang indefinitely. I'm not sure if it's transient
network issues, or issues at the remote end (seen with both github.com
and git.openstack.org) but it hits fairly frequently. Retrying the
command usually gets it going again. Searching for "git hanging" and
similar shows its not entirely uncommon...

This adds a watchdog timeout for remote git operations based on a new
environment variable GIT_TIMEOUT. It will retry 3 times before giving
up. The wrapper is applied to the main remote git calls.

Change-Id: I5b0114ca26b7ac2f25993264f761cba9ec8c09e1

Ian Wienand authored on 2014/02/20 11:55:13
Showing 2 changed files
... ...
@@ -498,16 +498,16 @@ function git_clone {
498 498
         if [[ ! -d $GIT_DEST ]]; then
499 499
             [[ "$ERROR_ON_CLONE" = "True" ]] && \
500 500
                 die $LINENO "Cloning not allowed in this configuration"
501
-            git clone $GIT_REMOTE $GIT_DEST
501
+            git_timed clone $GIT_REMOTE $GIT_DEST
502 502
         fi
503 503
         cd $GIT_DEST
504
-        git fetch $GIT_REMOTE $GIT_REF && git checkout FETCH_HEAD
504
+        git_timed fetch $GIT_REMOTE $GIT_REF && git checkout FETCH_HEAD
505 505
     else
506 506
         # do a full clone only if the directory doesn't exist
507 507
         if [[ ! -d $GIT_DEST ]]; then
508 508
             [[ "$ERROR_ON_CLONE" = "True" ]] && \
509 509
                 die $LINENO "Cloning not allowed in this configuration"
510
-            git clone $GIT_REMOTE $GIT_DEST
510
+            git_timed clone $GIT_REMOTE $GIT_DEST
511 511
             cd $GIT_DEST
512 512
             # This checkout syntax works for both branches and tags
513 513
             git checkout $GIT_REF
... ...
@@ -516,7 +516,7 @@ function git_clone {
516 516
             cd $GIT_DEST
517 517
             # set the url to pull from and fetch
518 518
             git remote set-url origin $GIT_REMOTE
519
-            git fetch origin
519
+            git_timed fetch origin
520 520
             # remove the existing ignored files (like pyc) as they cause breakage
521 521
             # (due to the py files having older timestamps than our pyc, so python
522 522
             # thinks the pyc files are correct using them)
... ...
@@ -541,6 +541,37 @@ function git_clone {
541 541
     git show --oneline | head -1
542 542
 }
543 543
 
544
+# git can sometimes get itself infinitely stuck with transient network
545
+# errors or other issues with the remote end.  This wraps git in a
546
+# timeout/retry loop and is intended to watch over non-local git
547
+# processes that might hang.  GIT_TIMEOUT, if set, is passed directly
548
+# to timeout(1); otherwise the default value of 0 maintains the status
549
+# quo of waiting forever.
550
+# usage: git_timed <git-command>
551
+function git_timed() {
552
+    local count=0
553
+    local timeout=0
554
+
555
+    if [[ -n "${GIT_TIMEOUT}" ]]; then
556
+        timeout=${GIT_TIMEOUT}
557
+    fi
558
+
559
+    until timeout -s SIGINT ${timeout} git "$@"; do
560
+        # 124 is timeout(1)'s special return code when it reached the
561
+        # timeout; otherwise assume fatal failure
562
+        if [[ $? -ne 124 ]]; then
563
+            die $LINENO "git call failed: [git $@]"
564
+        fi
565
+
566
+        count=$(($count + 1))
567
+        warn "timeout ${count} for git call: [git $@]"
568
+        if [ $count -eq 3 ]; then
569
+            die $LINENO "Maximum of 3 git retries reached"
570
+        fi
571
+        sleep 5
572
+    done
573
+}
574
+
544 575
 # git update using reference as a branch.
545 576
 # git_update_branch ref
546 577
 function git_update_branch() {
... ...
@@ -571,7 +602,7 @@ function git_update_tag() {
571 571
 
572 572
     git tag -d $GIT_TAG
573 573
     # fetching given tag only
574
-    git fetch origin tag $GIT_TAG
574
+    git_timed fetch origin tag $GIT_TAG
575 575
     git checkout -f $GIT_TAG
576 576
 }
577 577
 
... ...
@@ -69,6 +69,17 @@ fi
69 69
 # (currently only implemented for MySQL backend)
70 70
 DATABASE_QUERY_LOGGING=$(trueorfalse True $DATABASE_QUERY_LOGGING)
71 71
 
72
+# Set a timeout for git operations.  If git is still running when the
73
+# timeout expires, the command will be retried up to 3 times.  This is
74
+# in the format for timeout(1);
75
+#
76
+#  DURATION is a floating point number with an optional suffix: 's'
77
+#  for seconds (the default), 'm' for minutes, 'h' for hours or 'd'
78
+#  for days.
79
+#
80
+# Zero disables timeouts
81
+GIT_TIMEOUT=${GIT_TIMEOUT:-0}
82
+
72 83
 # Repositories
73 84
 # ------------
74 85