Browse code

Add a peak memory tracker to dstat

We can see at-a-glance memory usage during the run with dstat but we
have no way to break that down into an overview of where memory is
going.

This adds a peer-service to dstat that records snapshots of the system
during peak memory usage. It checks periodically if there is less
memory available than before and, if so, records the running processes
and vm overview.

The intent is to add logic into the verify-pipeline jobs to use this
report and send statistics on peak memory usage to statsd [1]. We can
then build a picture of memory-usage growth over time. This type of
report would have allowed better insight into issues such as
introduced by Idf3a3a914b54779172776822710b3e52e751b1d1 where
memory-usage jumped dramatically after switching to pip versions of
libraries. Tracking details of memory usage is going to be an
important part of future development.

[1] http://graphite.openstack.org/

Change-Id: I4b0a8f382dcaa09331987ab84a68546ec29cbc18

Ian Wienand authored on 2015/04/09 12:51:23
Showing 2 changed files
... ...
@@ -21,11 +21,17 @@ function start_dstat {
21 21
     # A better kind of sysstat, with the top process per time slice
22 22
     DSTAT_OPTS="-tcmndrylpg --top-cpu-adv --top-io-adv"
23 23
     run_process dstat "dstat $DSTAT_OPTS"
24
+
25
+    # To enable peakmem_tracker add:
26
+    #    enable_service peakmem_tracker
27
+    # to your localrc
28
+    run_process peakmem_tracker "$TOP_DIR/tools/peakmem_tracker.sh"
24 29
 }
25 30
 
26 31
 # stop_dstat() stop dstat process
27 32
 function stop_dstat {
28 33
     stop_process dstat
34
+    stop_process peakmem_tracker
29 35
 }
30 36
 
31 37
 # Restore xtrace
32 38
new file mode 100755
... ...
@@ -0,0 +1,96 @@
0
+#!/bin/bash
1
+#
2
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
3
+# not use this file except in compliance with the License. You may obtain
4
+# a copy of the License at
5
+#
6
+#    http://www.apache.org/licenses/LICENSE-2.0
7
+#
8
+# Unless required by applicable law or agreed to in writing, software
9
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
10
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
11
+# License for the specific language governing permissions and limitations
12
+# under the License.
13
+
14
+set -o errexit
15
+
16
+# time to sleep between checks
17
+SLEEP_TIME=20
18
+
19
+# MemAvailable is the best estimation and has built-in heuristics
20
+# around reclaimable memory.  However, it is not available until 3.14
21
+# kernel (i.e. Ubuntu LTS Trusty misses it).  In that case, we fall
22
+# back to free+buffers+cache as the available memory.
23
+USE_MEM_AVAILBLE=0
24
+if grep -q '^MemAvailable:' /proc/meminfo; then
25
+    USE_MEM_AVAILABLE=1
26
+fi
27
+
28
+function get_mem_available {
29
+    if [[ $USE_MEM_AVAILABLE -eq 1 ]]; then
30
+        awk '/^MemAvailable:/ {print $2}' /proc/meminfo
31
+    else
32
+        awk '/^MemFree:/ {free=$2}
33
+            /^Buffers:/ {buffers=$2}
34
+            /^Cached:/  {cached=$2}
35
+            END { print free+buffers+cached }' /proc/meminfo
36
+    fi
37
+}
38
+
39
+# whenever we see less memory available than last time, dump the
40
+# snapshot of current usage; i.e. checking the latest entry in the
41
+# file will give the peak-memory usage
42
+function tracker {
43
+    local low_point=$(get_mem_available)
44
+    while [ 1 ]; do
45
+
46
+        local mem_available=$(get_mem_available)
47
+
48
+        if [[ $mem_available -lt $low_point ]]; then
49
+            low_point=$mem_available
50
+            echo "[[["
51
+            date
52
+            echo "---"
53
+            # always available greppable output; given difference in
54
+            # meminfo output as described above...
55
+            echo "peakmem_tracker low_point: $mem_available"
56
+            echo "---"
57
+            cat /proc/meminfo
58
+            echo "---"
59
+            # would hierarchial view be more useful (-H)?  output is
60
+            # not sorted by usage then, however, and the first
61
+            # question is "what's using up the memory"
62
+            #
63
+            # there are a lot of kernel threads, especially on a 8-cpu
64
+            # system.  do a best-effort removal to improve
65
+            # signal/noise ratio of output.
66
+            ps --sort=-pmem -eo pid:10,pmem:6,rss:15,ppid:10,cputime:10,nlwp:8,wchan:25,args:100 |
67
+                grep -v ']$'
68
+            echo "]]]"
69
+        fi
70
+
71
+        sleep $SLEEP_TIME
72
+    done
73
+}
74
+
75
+function usage {
76
+    echo "Usage: $0 [-x] [-s N]" 1>&2
77
+    exit 1
78
+}
79
+
80
+while getopts ":s:x" opt; do
81
+    case $opt in
82
+        s)
83
+            SLEEP_TIME=$OPTARG
84
+            ;;
85
+        x)
86
+            set -o xtrace
87
+            ;;
88
+        *)
89
+            usage
90
+            ;;
91
+    esac
92
+done
93
+shift $((OPTIND-1))
94
+
95
+tracker