-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmonitor_abo.sh
173 lines (150 loc) · 5.49 KB
/
monitor_abo.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#!/bin/bash
#
# IMPORTANT: adapt the variable NODETOOL_AUTH by adding the authentication options for nodetool in your environment
# and other options that may be needed ! Empty var -> no authentication parameters
NODETOOL_AUTH=""
#set USE_JSTACK to 1 if using jstack. Otherwise set to 0
#Also put the absolute path for jstack in JSTACK_CMD *if* not already in $PATH (e.g. /tmp/jvm/java-11-openjdk-amd64/bin/jstack )
USE_JSTACK=1
JSTACK_CMD=jstack
#the script will end itself after this amount of seconds if no Ctrl-C received
MAX_SECONDS=900
#sjk sample frequency
SJK_INTERVAL=1000ms
#seconds between iostat, top, mpstats samples
OS_STATS_INTERVAL=1
#---- End of normal user customization ----
RUN_ID=`date +%s`
#all timestamps in ISO
isodate="date --iso-8601=seconds"
export S_TIME_FORMAT=ISO
#presumes only ONE DSE process is running,
PS_LINE=$(ps -e -o pid,user:20,cmd| grep com.datastax.bdp.DseModule| grep -v grep)
DSE_OWNER=$(echo $PS_LINE| awk '{print $2}' -)
DSE_PID=$(echo $PS_LINE| awk '{print $1}' -)
WHOAMI=`whoami`
JSTACK_USES_SUDO=0
trap ctrl_c INT
function ctrl_c() {
echo "CTRL-C pressed. Terminating background activity"
#kill $(jobs -p)
kill -TERM -- -$$
do_end
}
do_tpstats() {
while [ 1 ];do echo; $isodate; echo '=========='; nodetool $NODETOOL_AUTH tpstats ; sleep 6; done >> tpstats-`hostname`-$RUN_ID.out
}
do_proxyhistograms() {
sleep 2;while [ 1 ];do echo; $isodate; echo '=========='; nodetool $NODETOOL_AUTH proxyhistograms; sleep 60; done >> proxyhistograms-`hostname`-$RUN_ID.out
}
do_tablehistograms() {
sleep 1;while [ 1 ];do echo; $isodate; echo '=========='; nodetool $NODETOOL_AUTH tablehistograms; sleep 60; done >> tablehistograms-`hostname`-$RUN_ID.out 2> /dev/null
}
do_tablestats() {
sleep 5;while [ 1 ];do echo; $isodate; echo '=========='; nodetool $NODETOOL_AUTH tablestats; sleep 60; done >> tablestats-`hostname`-$RUN_ID.out
}
do_dse_netstats() {
sleep 3;while [ 1 ];do echo; $isodate; echo '=========='; nodetool $NODETOOL_AUTH netstats; sleep 15; done >> dse_netstats-`hostname`-$RUN_ID.out
}
do_top_cpu_procs () {
while [ 1 ]; do echo; $isodate; echo '=========='; top -b | head -n 20 ; sleep $OS_STATS_INTERVAL ;echo; echo '=========='; done >> os_top_cpu-`hostname`-$RUN_ID.out
}
#these are more frequent
do_ttop() {
nodetool $NODETOOL_AUTH sjk ttop -o CPU -ri $SJK_INTERVAL >> ttop-`hostname`-$RUN_ID.out
}
do_stcap() {
echo "INFO - stcap used to gather stack. Give it an extra 40-50s after the end of the monitoring period before doing Ctrl-C"
nodetool $NODETOOL_AUTH sjk stcap -o stcap-`hostname`-$RUN_ID.out -t ${MAX_SECONDS}s -i $SJK_INTERVAL > /dev/null
}
do_jstack() {
JSTACK_OUTFILE=jstack-`hostname`-$RUN_ID.out
MYSELF=`whoami`
if [ $MYSELF != $DSE_OWNER ]; then
echo "WARNING - Current user '$USER' is not the owner of the DSE process '$DSE_OWNER', jstack may not work properly"
fi
while [ 1 ]; do
if [ $JSTACK_USES_SUDO -eq 1 ]; then
sudo -u $DSE_OWNER $JSTACK_CMD -l $DSE_PID >> $JSTACK_OUTFILE
else
$JSTACK_CMD -l $DSE_PID >> $JSTACK_OUTFILE
fi
sleep 1
done
}
do_begin()
{
echo "hostname ---"
hostname
echo "IP ---"
hostname -i
echo "CPUs ---"
lscpu
echo "Memory ----"
free
echo "processes"
ps -efl
echo "mountpoints ---"
df -h
echo "..."
lsblk --output NAME,KNAME,TYPE,MAJ:MIN,FSTYPE,SIZE,RA,MOUNTPOINT,LABEL
#these go in their own file
nodetool $NODETOOL_AUTH status > nodetool-status-`hostname`-$RUN_ID.out
nodetool $NODETOOL_AUTH describecluster > nodetool-describecluster-`hostname`-$RUN_ID.out
nodetool $NODETOOL_AUTH compactionstats > nodetool-compactionstats-`hostname`-$RUN_ID.out
nodetool $NODETOOL_AUTH compactionhistory > nodetool-compactionhistory-`hostname`-$RUN_ID.out
nodetool $NODETOOL_AUTH gossipinfo > nodetool-gossipinfo-`hostname`-$RUN_ID.out
nodetool $NODETOOL_AUTH info > nodetool-info-`hostname`-$RUN_ID.out
}
# the "main" code -----
#check if jstack exists
if [ $USE_JSTACK -eq 1 ]; then
if [ $JSTACK_CMD == "jstack" ] ; then
X=$(which $JSTACK_CMD)
if [ $? -ne 0 ]; then
echo "ERROR - the path for the command >" $AA "< is not in PATH"
echo " either set JSTACK_CMD to the absolute path for jstack or set USE_JSTACK=0"
exit 1
fi
else #if relative/absolute path
if [ -x $JSTACK_CMD ]; then
echo "INFO - file $JSTACK_CMD exist and executable"
else
echo "ERROR - file $JSTACK_CMD does not exist or is non executable"
echo " either set JSTACK_CMD to the absolute path for jstack or set USE_JSTACK=0"
exit 1
fi
fi
#check if sudo is needed and if it works
if [ $WHOAMI != $DSE_OWNER ] ; then
echo "jstack will need sudo as current user $WHOAMI is not the same as the DSE process owner $DSE_OWNER"
JSTACK_USES_SUDO=1
fi
fi
echo "gather-begin"
do_begin >> common-`hostname`-$RUN_ID.out
echo "gather loop actions"
do_tpstats &
do_proxyhistograms &
do_tablehistograms &
do_tablestats &
do_dse_netstats &
do_top_cpu_procs &
if [ $USE_JSTACK -eq 1 ] ; then
do_jstack &
else
do_stcap &
fi
iostat -x -c -d -t $OS_STATS_INTERVAL >> iostat-`hostname`.out &
mpstat -P ALL -I SCPU -u $OS_STATS_INTERVAL >> mpstat-`hostname`.out &
do_ttop &
echo "launched commands, press Ctrl-C to exit, or wait " $MAX_SECONDS " seconds for the script to complete automatically"
echo "children list "
jobs
#wait $MAX_SECONDS to have the lot
for ((n=$MAX_SECONDS;n>0;n--)); do
echo -e -n $n \\r
sleep 1
done
echo "end of script"
ctrl_c