diff --git a/apache/ganglia_apache.pl b/apache/ganglia_apache.pl new file mode 100644 index 0000000..486a5da --- /dev/null +++ b/apache/ganglia_apache.pl @@ -0,0 +1,143 @@ +#!/usr/bin/perl +# +# script found at http://ganglia.sourceforge.net/gmetric/view.php?id=18 +# +# Feeds ganglia with web server error rate information. +# +# Can be called by Apache by setting up a special logger: +# +# LogFormat "%>s" status_only +# CustomLog "|/path/to/apache-logs-to-ganglia.pl -d 10" status_only +# +# +# Author: Nicolas Marchildon (nicolas@marchildon.net) +# Date: Date: 2002/11/26 04:15:19 +# Revision: Revision: 1.3 +# +# Modified by Ben Hartshorne +# $Header: /var/lib/cvs/ops/ganglia/ganglia_apache.pl,v 1.1 2006/07/11 17:29:27 ben Exp $ + +use Getopt::Long; + +# Defaults +$DELAY = 20; +$METRIC = 'Apache'; +$GMETRIC = "/usr/bin/gmetric"; +$GMETRIC_ARGS="-c /etc/gmond.conf"; + +# Parse command line +GetOptions( { d => \$DELAY, delay => \$DELAY, + m => \$METRIC, metric => \$METRIC + }, + 'd|delay=i', + 'p|port=i', + 'h|host=s', + 'm|metric=s'); + +# Validate command line +if ( length($DELAY) == 0 + || length($METRIC) == 0) { + print STDERR <) { + parse_line $_; + } + alarm 0; + }; + if ($@ and $@ !~ /alarm clock restart/) { die } + report; +} + diff --git a/disk/disk_gmetric.sh b/disk/disk_gmetric.sh new file mode 100644 index 0000000..c3daea5 --- /dev/null +++ b/disk/disk_gmetric.sh @@ -0,0 +1,255 @@ +#!/bin/bash + +### $Header: /var/lib/cvs/ops/ganglia/disk_gmetric.sh,v 1.5 2007/11/30 17:29:27 ben Exp $ + +### this script reports disk metrics to ganglia. +### It should be called from cron every n minutes. +### It will report blocks per second on each disk, +### and will automatically adjust for whatever +### timeframe it is called + +### Copyright Simply Hired, Inc. 2006 +### License to use, modify, and distribute under the GPL +### http://www.gnu.org/licenses/gpl.txt + +VERSION=1.5 + +GMETRIC="/usr/bin/gmetric" +GMETRIC_ARGS="-c /etc/gmond.conf" +STATEFILE="/var/lib/ganglia/metrics/io.stats" +date=`date +%s` +iostat="/usr/bin/iostat" + +ERROR_CREATE="/tmp/disk_gmetric_create_statefile_failed" +ERROR_IOSTAT="/tmp/disk_gmetric_no_iostat" +ERROR_DEVNAMES="/tmp/disk_gmetric_bad_devname" +ERROR_DEVNAMES2="/tmp/disk_gmetric_bad_devname_didnt_fix" +ERROR_GMETRIC="/tmp/disk_gmetric_no_gmetric" +ERROR_TIMEDIFF="/tmp/disk_gmetric_timediff" +ERROR_NOTROOT="/tmp/disk_gmetric_notroot" + +if [ $UID -ne 0 ] +then + if [ -e $ERROR_NOTROOT ] ; then exit 1; fi + echo "Error: this script must be run as root." + touch $ERROR_NOTROOT + exit 1 +fi +rm -f $ERROR_NOTROOT + +if [ "x$1" == "x-h" ] +then + echo "Usage: disk_gmetric.sh [--clean]" + echo " --clean delete all tmp files" + exit 0 +fi + +if [ "x$1" == "x--clean" ] +then + rm -f $ERROR_CREATE $ERROR_IOSTAT $ERROR_DEVNAME $ERROR_DEVNAME2 $ERROR_GMETRIC $ERROR_TIMEDIFF $ERROR_NOTROOT $STATEFILE + retval=$? + if [ $retval -ne 0 ] + then + echo "failed to clean up." + exit 1 + else + echo "All cleaned up." + exit 0 + fi +fi + +# save and turn off /STDERR for th estatefile tests +exec 3>&2 +exec 2>/dev/null + +# if the GMETRIC program isn't installed, compain +if [ ! -e $GMETRIC ] +then + if [ -e $ERROR_GMETRIC ] ; then exit 1; fi + echo "" + echo "Error: GMETRIC doesn't seem to be installed." + echo "$GMETRIC doesn't exist." + echo "" + touch $ERROR_GMETRIC + exit 1 +fi + +# if the iostat program isn't installed, compain +if [ ! -e $iostat ] +then + if [ -e $ERROR_IOSTAT ] + then + exit 1 + fi + echo "" + echo "Error: iostat doesn't seem to be installed." + echo "$iostat doesn't exist." + echo "" + touch $ERROR_IOSTAT + exit 1 +fi + +# if the statefile doesn't exist, we either havn't +# run yet or there's something bigger wrong. +if [ ! -e $STATEFILE ] +then + if [ ! -d `dirname $STATEFILE` ] + then + mkdir -p `dirname $STATEFILE` + fi + echo "$date" > $STATEFILE + $iostat -d | tail +4 >> $STATEFILE + if [ ! -e $STATEFILE ] + then + # if it didn't exist and we couldn't create + # it, we should just scream bloody murder and die. + # only scream once though... + if [ -e $ERROR_CREATE ] + then + exit 1 + fi + echo "" + echo "ERROR: couldn't create $STATEFILE" + echo "" + touch $ERROR_CREATE + exit 1 + fi + echo "Created statefile. Exitting." + exit 0 +fi + +# restore stderr +exec 2>&3 +exec 3>&- + +# this script uses iostat (part of the sysstat packag) +# to retrieve disk metrics +stats=(`$iostat -d | tail +4`) +old_stats=(`cat $STATEFILE`) +old_date=${old_stats[0]} + +read=0 +write=0 +old_read=0 +old_write=0 +read_sum=0 +write_sum=0 + +### function get_rw sets the variables $read and $write +### to the total number of read blocks and write blocks +### for a device. Which device is specified as an argument +### to the function. +### The function returns 1 if an invalid device number +### was specified. +function get_rw() { + base=$(($1 * 6 )) + if [ "k${stats[$base]}" == "k" ] + then + # we're done looping + return 1; + else + devname=${stats[$base]} + read=${stats[$(($base + 4))]} + write=${stats[$(($base + 5))]} + return 0 + fi +} + +function get_old_rw() { + base=$(($1 * 6 )) + base=$((base + 1)) + if [ "k${old_stats[$base]}" == "k" ] + then + # we're done looping + return 1; + else + old_devname=${old_stats[$base]} + old_read=${old_stats[$(($base + 4))]} + old_write=${old_stats[$(($base + 5))]} + return 0 + fi +} + +time_diff=$(($date - $old_date)) + + +devnum=0 +get_rw $devnum +get_old_rw $devnum +res=$? +while [ $res -eq 0 ] +do + # if devname and old_devname aren't the same, + # this whole function is invalid. + if [ $devname != $old_devname ] + then + if [ -e $ERROR_DEVNAMES ] + then + if [ -e $ERROR_DEVNAMES2 ] ; then exit 1; fi + echo "Sorry, my attempt at fixing the problem failed." + echo "It's now up to you, dear human." + touch $ERROR_DEVNAMES2 + exit 1 + fi + echo "something is broken." + echo "devnames are not the same." + echo "devname=$devname old_devname=$old_devname" + echo "I'm backing up the current statefile ($STATEFILE) " + echo "and will recreate it next time to see if that fixes this." + mydate=`date +%Y%m%d%H%M%S` + mv -fv $STATEFILE{,.${mydate}} + touch $ERROR_DEVNAMES + exit 1 + fi + rm -f $ERROR_DEVNAMES $ERROR_DEVNAME2 + #devname, read, write, old_devname, old_read, old_write + # are all set. calculate stat/sec and report. + read_diff=$(($read - $old_read)) + write_diff=$(($write - $old_write)) + # if read_diff or write_diff are less than 0, the counter has wrapped + # and we should reset ourselves + if [ `expr $read_diff \< 0` -eq 1 -o `expr $write_diff \< 0` -eq 1 ] + then + #just write out the new stats and exit; there's nothing we can do + echo "$date" > $STATEFILE + $iostat -d | tail +4 >> $STATEFILE + exit 1 + fi + # if the system gets backed up and multiple invocations are launched + # at the same time, the time difference between them is 0 and the + # metric is meaningless. + if [ $time_diff -eq 0 ] + then + if [ -e $ERROR_TIMEDIFF ] ; then exit 1 ; fi + echo "something is broken." + echo "time_diff is 0." + touch $ERROR_TIMEDIFF + exit 1 + fi + rm -f $ERROR_TIMEDIFF + rps=`echo "scale=3;$read_diff / $time_diff" | bc` + wps=`echo "scale=3;$write_diff / $time_diff" | bc` + + read_sum=`echo "scale=3;$read_sum + $rps" | bc` + write_sum=`echo "scale=3;$write_sum + $wps" | bc` + + # report what we have calculated + $GMETRIC $GMETRIC_ARGS --name="${devname}_reads" --value="$rps" --type="float" --units="blocks/sec" + $GMETRIC $GMETRIC_ARGS --name="${devname}_writes" --value="$wps" --type="float" --units="blocks/sec" + +# echo "$devname $rps $wps $read_sum $write_sum " >> /tmp/foo.txt + + devnum=$((devnum + 1)) + get_rw $devnum + get_old_rw $devnum + res=$? +done + +$GMETRIC $GMETRIC_ARGS --name="disk_reads" --value="$read_sum" --type="float" --units="blocks/sec" +$GMETRIC $GMETRIC_ARGS --name="disk_writes" --value="$write_sum" --type="float" --units="blocks/sec" + +echo "$date" > $STATEFILE +$iostat -d | tail +4 >> $STATEFILE + +rm -f $ERROR_CREATE $ERROR_IOSTAT $ERROR_DEVNAME2 $ERROR_DEVNAME $ERROR_GMETRIC $ERROR_TIMEDIFF $ERROR_NOTROOT + diff --git a/disk/disk_wait_gmetric.sh b/disk/disk_wait_gmetric.sh new file mode 100644 index 0000000..fac5d91 --- /dev/null +++ b/disk/disk_wait_gmetric.sh @@ -0,0 +1,180 @@ +#!/bin/bash + +### $Header: /var/lib/cvs/ops/ganglia/disk_wait_gmetric.sh,v 1.3 2006/07/11 17:29:27 ben Exp $ + +### this script reports disk metrics to ganglia. +### It should be called from cron every n minutes. +### It will report blocks per second on each disk, +### and will automatically adjust for whatever +### timeframe it is called + +### Copyright Simply Hired, Inc. 2006 +### License to use, modify, and distribute under the GPL +### http://www.gnu.org/licenses/gpl.txt + +VERSION=1.0 + +GMETRIC="/usr/bin/gmetric" +GANGLIA_ARGS="-c /etc/gmond.conf" +STATEFILE="/var/lib/ganglia/metrics/io_wait.stats" +date=`date +%s` +iostat="/usr/bin/iostat" + +ERROR_CREATE="/tmp/disk_wait_gmetric_create_statefile_failed" +ERROR_IOSTAT="/tmp/disk_wait_gmetric_no_iostat" +ERROR_DEVNAMES="/tmp/disk_wait_gmetric_bad_devname" +ERROR_DEVNAMES2="/tmp/disk_wait_gmetric_bad_devname_didnt_fix" +ERROR_GMETRIC="/tmp/disk_wait_gmetric_no_gmetric" +ERROR_TIMEDIFF="/tmp/disk_wait_gmetric_timediff" +ERROR_NOTROOT="/tmp/disk_wait_gmetric_notroot" + +if [ $UID -ne 0 ] +then + if [ -e $ERROR_NOTROOT ] ; then exit 1; fi + echo "Error: this script must be run as root." + touch $ERROR_NOTROOT + exit 1 +fi +rm -f $ERROR_NOTROOT + +if [ "x$1" == "x-h" ] +then + echo "Usage: disk_wait_gmetric.sh [--clean]" + echo " --clean delete all tmp files" + exit 0 +fi + +if [ "x$1" == "x--clean" ] +then + rm -f $ERROR_CREATE $ERROR_IOSTAT $ERROR_DEVNAME $ERROR_DEVNAME2 $ERROR_GMETRIC $ERROR_TIMEDIFF $ERROR_NOTROOT $STATEFILE + retval=$? + if [ $retval -ne 0 ] + then + echo "failed to clean up." + exit 1 + else + echo "All cleaned up." + exit 0 + fi +fi + +# save and turn off /STDERR for th estatefile tests +exec 3>&2 +exec 2>/dev/null + +# if the GMETRIC program isn't installed, compain +if [ ! -e $GMETRIC ] +then + if [ -e $ERROR_GMETRIC ] ; then exit 1; fi + echo "" + echo "Error: GMETRIC doesn't seem to be installed." + echo "$GMETRIC doesn't exist." + echo "" + touch $ERROR_GMETRIC + exit 1 +fi + +# if the iostat program isn't installed, compain +if [ ! -e $iostat ] +then + if [ -e $ERROR_IOSTAT ] + then + exit 1 + fi + echo "" + echo "Error: iostat (from the package sysstat) doesn't seem to be installed." + echo "$iostat doesn't exist." + echo "" + touch $ERROR_IOSTAT + exit 1 +fi + +# if the statefile doesn't exist, we either havn't +# run yet or there's something bigger wrong. +if [ ! -e $STATEFILE ] +then + if [ ! -d `dirname $STATEFILE` ] + then + mkdir -p `dirname $STATEFILE` + fi + # iostat -x 1 2 gives a summary and a report for the last second + # we're only interested in the second half. We count the number of + # lines, strip the first, divide by 2, and strip the header. + # this gives us just the interesting part. + tot_lines=`$iostat -x 1 2 | grep -v "^$" | wc -l` + rel_lines=`expr \( $tot_lines - 1 \) / 2 - 1` + echo $tot_lines > $STATEFILE + echo $rel_lines >> $STATEFILE + + if [ ! -e $STATEFILE ] + then + # if it didn't exist and we couldn't create + # it, we should just scream bloody murder and die. + # only scream once though... + if [ -e $ERROR_CREATE ] + then + exit 1 + fi + echo "" + echo "ERROR: couldn't create $STATEFILE" + echo "" + touch $ERROR_CREATE + exit 1 + fi + echo "Created statefile. Exitting." + exit 0 +fi + +# restore stderr +exec 2>&3 +exec 3>&- + +# this script uses iostat (part of the sysstat packag) +# to retrieve disk metrics +tot_lines=`$iostat -x 1 2 | grep -v "^$" | wc -l` +old_stats=(`cat $STATEFILE`) +old_tot_lines=${old_stats[0]} + +if [ $tot_lines -ne $old_tot_lines ] +then + echo "something is broken." + echo "the number of lines of iostat output has changed" + echo "current tot_lines=$tot_lines old_tot_lines=$old_tot_lines" + echo "I'm backing up the current statefile ($STATEFILE) " + echo "and will recreate it next time to see if that fixes this." + mydate=`date +%Y%m%d%H%M%S` + mv -fv $STATEFILE{,.${mydate}} + touch $ERROR_DEVNAMES + exit 1 +fi + +rel_lines=${old_stats[1]} +#stats=(`$iostat -x 30 2 | grep -v "^$" | tail -$rel_lines`) +stats=(`$iostat -x 5 2 | grep -v "^$" | tail -$rel_lines`) +# the default gmond already reports this one... +#iowait=${stats[3]} + +$GMETRIC $GMETRIC_ARGS --name="cpu_waitio" --value="$iowait" --type="float" --units="%" + +res=0 +index=19 +while [ $res -eq 0 ] +do + devname=${stats[$index]} + await=${stats[$(($index + 11))]} + util=${stats[$(($index + 13))]} + + $GMETRIC $GMETRIC_ARGS --name="${devname}_await" --value="$await" --type="float" --units="millisec" + $GMETRIC $GMETRIC_ARGS --name="${devname}_util" --value="$util" --type="float" --units="%" + + index=$(($index + 14)) + #if we're done, cut out of the loop + if [ "k${stats[$index]}" == "k" ] + then + res=1 + fi +done + +#cleanup +rm -f $ERROR_CREATE $ERROR_IOSTAT $ERROR_DEVNAME2 $ERROR_DEVNAME $ERROR_GMETRIC $ERROR_TIMEDIFF $ERROR_NOTROOT + diff --git a/memcached/mcd_gmetric.sh b/memcached/mcd_gmetric.sh new file mode 100755 index 0000000..6f8a9bc --- /dev/null +++ b/memcached/mcd_gmetric.sh @@ -0,0 +1,234 @@ +#!/bin/bash + +### $Id: mcd_gmetric.sh 16661 2006-11-07 00:56:33Z ben $ + +### This script queries a memcached server running +### on localhost and reports a few statistics to +### ganglia. +### It reports +### *mcd_curr_items - the number of objects stored +### *mcd_curr_bytes - current bytes used +### *mcd_curr_conns - current number of connections +### *mcd_hit_perc - hits / gets for current time duration +### (current hit percentage) +### For more description on any of these metrics, +### see the protocols.txt file in the MCD docs. + +### Copyright Simply Hired, Inc. 2006 +### License to use, modify, and distribute under the GPL +### http://www.gnu.org/licenses/gpl.txt + +VERSION=1.1 + +GMETRIC="/usr/bin/gmetric" +GMETRIC_ARGS="-c /etc/gmond.conf" +STATEFILE="/var/lib/ganglia/metrics/mcd.stats" +ERROR_NOTROOT="/tmp/mcd_gmetric_notroot" +ERROR_CANT_CONNECT="/tmp/mcd_gmetric_cant_connect" +ERROR_CREATE="/tmp/mcd_gmetric_create_statefile_failed" +ERROR_GETS_EMPTY="/tmp/mcd_gets_empty" + +MCD_CONF="/etc/sysconfig/memcached" +MCD_DEFAULT_PORT="11211" + +date=`date +%s` + +if [ $UID -ne 0 ] +then + if [ -e $ERROR_NOTROOT ] ; then exit 1; fi + echo "Error: this script must be run as root." + touch $ERROR_NOTROOT + exit 1 +fi +rm -f $ERROR_NOTROOT + +while [ -n "$1" ] +do + case "x$1" in + "x-h" | "x--help" ) + echo "Usage: mcd_gmetric.sh [--clean] [--config ]" + echo " --clean delete all tmp files" + echo " --config the location of the mcd config file to read" + echo " (default ${MCD_CONF})" + exit 0 + ;; + "x--clean" ) + rm -f $STATEFILE $ERROR_NOTROOT $ERROR_CANT_CONNECT $ERROR_CREATE + retval=$? + if [ $retval -ne 0 ] + then + echo "failed to clean up." + exit 1 + else + echo "All cleaned up." + exit 0 + fi + ;; + "x--config" ) + shift + mcd_config=$1 + if [ ! -n "$mcd_config" ] + then + echo "mcd configuration filename required" + exit 1 + fi + if [ ! -e "$mcd_config" ] + then + echo "mcd configuration file does not exist" + exit 1 + fi + if [ ! -r "$mcd_config" ] + then + echo "mcd configuration file cannot be read" + exit 1 + fi + source ${mcd_config} + MCD_PORT=${PORT} + ;; + *) + echo "unrecognized option." + exit 1 + ;; + esac + shift +done + +# set default MCD port if none specified +MCD_PORT=${MCD_PORT:-$MCD_DEFAULT_PORT} + +# if the GMETRIC program isn't installed, compain +if [ ! -e $GMETRIC ] +then + if [ -e $ERROR_GMETRIC ] ; then exit 1; fi + echo "" + echo "Error: GMETRIC doesn't seem to be installed." + echo "$GMETRIC doesn't exist." + echo "" + touch $ERROR_GMETRIC + exit 1 +fi + +# get current statistics +exec 3>&2 #turn off STDERR +exec 2>/dev/null +stats_array=(`echo "stats" | nc localhost $MCD_PORT`) +retval=$? +exec 2>&1 #turn on STDERR +exec 3>&- + +if [ $retval -ne 0 ] +then + if [ -e $ERROR_CANT_CONNECT ] ; then exit 1 ; fi + echo "I can't connect to mcd." + echo "Bummer. " + touch $ERROR_CANT_CONNECT + exit 1 +fi + +mcd_curr_items=`echo ${stats_array[23]}|tr -c -d [0-9]` #this tr thing is because there's a trailing ^M on the string from netcat that breaks bc. +mcd_curr_bytes=`echo ${stats_array[29]}|tr -c -d [0-9]` +mcd_curr_conns=`echo ${stats_array[32]}|tr -c -d [0-9]` +mcd_total_gets=`echo ${stats_array[41]}|tr -c -d [0-9]` +mcd_total_sets=`echo ${stats_array[44]}|tr -c -d [0-9]` +mcd_total_hits=`echo ${stats_array[47]}|tr -c -d [0-9]` + +if [ -z "$mcd_total_gets" ] +then +# this actually happens rather often for some reason, so I'm just going to fail silently. +# if [ -e $ERROR_GETS_EMPTY ] ; then exit 1 ; fi +# echo "" +# echo "ERROR: mcd_total_gets empty." +# echo "" + exit 1 +fi +rm -f $ERROR_GETS_EMPTY + + +# save and turn off /STDERR for the statefile tests +exec 3>&2 +exec 2>/dev/null + +# if the statefile doesn't exist, we either havn't +# run yet or there's something bigger wrong. +if [ ! -e $STATEFILE ] +then + if [ ! -d `dirname $STATEFILE` ] + then + mkdir -p `dirname $STATEFILE` + fi + echo "$date $mcd_curr_items $mcd_curr_bytes $mcd_curr_conns $mcd_total_gets $mcd_total_sets $mcd_total_hits" > $STATEFILE + if [ ! -e $STATEFILE ] + then + # if it didn't exist and we couldn't create + # it, we should just scream bloody murder and die. + # only scream once though... + if [ -e $ERROR_CREATE ] + then + exit 1 + fi + echo "" + echo "ERROR: couldn't create $STATEFILE" + echo "" + touch $ERROR_CREATE + exit 1 + fi + echo "Created statefile. Exitting." + exit 0 +fi + +# restore stderr +exec 2>&3 +exec 3>&- + +old_stats_array=(`cat $STATEFILE`) +old_date=${old_stats_array[0]} +old_mcd_curr_items=${old_stats_array[1]} +old_mcd_curr_bytes=${old_stats_array[2]} +old_mcd_curr_conns=${old_stats_array[3]} +old_mcd_total_gets=${old_stats_array[4]} +old_mcd_total_sets=${old_stats_array[5]} +old_mcd_total_hits=${old_stats_array[6]} + +echo "$date $mcd_curr_items $mcd_curr_bytes $mcd_curr_conns $mcd_total_gets $mcd_total_sets $mcd_total_hits" > $STATEFILE + +time_diff=$(($date - $old_date)) +mcd_total_gets_diff=$(($mcd_total_gets - $old_mcd_total_gets)) +mcd_total_sets_diff=$(($mcd_total_sets - $old_mcd_total_sets)) +mcd_total_hits_diff=$(($mcd_total_hits - $old_mcd_total_hits)) + +if [ $time_diff -eq 0 ] +then + if [ -e $ERROR_TIMEDIFF ] ; then exit 1 ; fi + echo "something is broken." + echo "time_diff is 0." + touch $ERROR_TIMEDIFF + exit 1 +fi + +# none of these numbers should be less than 1, but if they are, just send back 1. +if [ $mcd_total_gets_diff -le 1 ] ; then mcd_total_gets_diff=1 ; fi +if [ $mcd_total_sets_diff -le 1 ] ; then mcd_total_sets_diff=1 ; fi +if [ $mcd_total_hits_diff -le 1 ] ; then mcd_total_hits_diff=1 ; fi + +mcd_gets_per_sec=`echo "scale=3;${mcd_total_gets_diff}/${time_diff}"|bc` +mcd_sets_per_sec=`echo "scale=3;${mcd_total_sets_diff}/${time_diff}"|bc` +mcd_hits_per_sec=`echo "scale=3;${mcd_total_hits_diff}/${time_diff}"|bc` +mcd_hit_perc=`echo "scale=3; ${mcd_total_hits_diff} * 100 / ${mcd_total_gets_diff}" | bc` + +# if we're running on a non-standard port, it might be the case that +# we've got multiple memcached's being watched. Make the metric name +# differentiate between them. +if [ $MCD_PORT -ne $MCD_DEFAULT_PORT ] +then + metric_name_uniquifier="${MCD_PORT}_" +fi + +$GMETRIC $GMETRIC_ARGS --name="mcd_${metric_name_uniquifier}seconds_measured" --value=${time_diff} --type=uint32 --units="secs" +$GMETRIC $GMETRIC_ARGS --name="mcd_${metric_name_uniquifier}items_cached" --value=${mcd_curr_items} --type=uint32 --units="items" +$GMETRIC $GMETRIC_ARGS --name="mcd_${metric_name_uniquifier}bytes_used" --value=${mcd_curr_bytes} --type=uint32 --units="bytes" +$GMETRIC $GMETRIC_ARGS --name="mcd_${metric_name_uniquifier}conns" --value=${mcd_curr_conns} --type=uint32 --units="connections" +$GMETRIC $GMETRIC_ARGS --name="mcd_${metric_name_uniquifier}gets" --value=${mcd_gets_per_sec} --type=float --units="gps" +$GMETRIC $GMETRIC_ARGS --name="mcd_${metric_name_uniquifier}sets" --value=${mcd_sets_per_sec} --type=float --units="sps" +$GMETRIC $GMETRIC_ARGS --name="mcd_${metric_name_uniquifier}cache_hits" --value=${mcd_hits_per_sec} --type=float --units="hps" +$GMETRIC $GMETRIC_ARGS --name="mcd_${metric_name_uniquifier}cache_hit%" --value=${mcd_hit_perc} --type=float --units="%" + diff --git a/mysql/mysql_gmetric.sh b/mysql/mysql_gmetric.sh new file mode 100644 index 0000000..1a10846 --- /dev/null +++ b/mysql/mysql_gmetric.sh @@ -0,0 +1,196 @@ +#!/bin/bash + +### $Header: /var/lib/cvs/ops/ganglia/mysql_gmetric.sh,v 1.3 2006/07/11 17:51:13 ben Exp $ + +### this script is a replacement for mysql_metrics.sh +### instead of just returning a single metric, this +### script gets all three and submits them so you +### only hit mysqladmin once per minute instead of +### 3 times + +### Copyright Simply Hired, Inc. 2006 +### License to use, modify, and distribute under the GPL +### http://www.gnu.org/licenses/gpl.txt + +VERSION=1.5 + +GMETRIC="/usr/bin/gmetric" +GMETRIC_ARGS="-c /etc/gmond.conf" +STATEFILE="/var/lib/ganglia/metrics/mysql.stats" +MYSQL_SOCKFILE="/var/lib/mysql/mysql.sock" +ERROR_NOTROOT="/tmp/mysql_gmetric_notroot" +ERROR_NOSOCKFILE="/tmp/mysql_gmetric_nosockfile" +ERROR_CANT_CONNECT="/tmp/mysql_gmetric_cant_connect" +ERROR_CREATE="/tmp/mysql_gmetric_create_statefile_failed" + +# this script requires a user with usage and 'replication slave' privs. if you +# don't check any slaves, you can leave out repl privs it will silently fail +# the slave test and not report the metric. +# usage means 'no privs' so having it on *.* doesn't expose anything. *.* is +# required for replication client. +### grant USAGE on *.* to 'ganglia'@'localhost' identified by 'xxxxx'; +### grant REPLICATION CLIENT on *.* to 'ganglia'@'localhost' identified by 'xxxxx'; +MYSQL_USER="ganglia" +MYSQL_PASS="xxxxx" + +date=`date +%s` + +if [ $UID -ne 0 ] +then + if [ -e $ERROR_NOTROOT ] ; then exit 1; fi + echo "Error: this script must be run as root." + touch $ERROR_NOTROOT + exit 1 +fi +rm -f $ERROR_NOTROOT + +if [ "x$1" == "x-h" ] +then + echo "Usage: submit_mysql_gmetric.sh [--clean]" + echo " --clean delete all tmp files" + exit 0 +fi + +if [ "x$1" == "x--clean" ] +then + rm -f $STATEFILE $ERROR_NOTROOT $ERROR_NOSOCKFILE $ERROR_CANT_CONNECT $ERROR_CREATE + retval=$? + if [ $retval -ne 0 ] + then + echo "failed to clean up." + exit 1 + else + echo "All cleaned up." + exit 0 + fi +fi + +# if the GMETRIC program isn't installed, compain +if [ ! -e $GMETRIC ] +then + if [ -e $ERROR_GMETRIC ] ; then exit 1; fi + echo "" + echo "Error: GMETRIC doesn't seem to be installed." + echo "$GMETRIC doesn't exist." + echo "" + touch $ERROR_GMETRIC + exit 1 +fi + +# if the sockfile doesn't exist, mysql probably isn't running. +if [ ! -e $MYSQL_SOCKFILE ] +then + if [ -e $ERROR_NOSOCKFILE ] ; then exit 1 ; fi + echo "Mysql sock file ($MYSQL_SOCKFILE) doesn't exist." + echo "This usually implies that mysql isn't running." + echo "I'm going to stop reporting until the sock file comes back." + touch $ERROR_NOSOCKFILE + exit 1 +fi + +# if we passed the sockfile test, but $ERROR_NOSOCKFILE exists, it was probably just started. +if [ -e $ERROR_NOSOCKFILE ] +then + echo "The sock file has returned. I'm starting up again." + rm $ERROR_NOSOCKFILE +fi + +exec 3>&2 +exec 2>/dev/null +string=`mysqladmin --connect_timeout=15 -u $MYSQL_USER -p${MYSQL_PASS} status` +retval=$? +slavestr=`mysql --connect_timeout=15 -u $MYSQL_USER -p${MYSQL_PASS} -e "show slave status\G" | grep "Seconds_Behind_Master"` +exec 2>&3 +exec 3>&- + +if [ $retval -ne 0 ] +then + if [ -e $ERROR_CANT_CONNECT ] ; then exit 1 ; fi + echo "Even though the sock file exists, I can't connect to mysql." + echo "Bummer. " + touch $ERROR_CANT_CONNECT + exit 1 +fi + + +threads=`echo $string | sed 's/.*Threads: \([0-9]*\) .*/\1/'` +queries=`echo $string | sed -e "s/.*Questions: \([0-9]*\) .*/\1/"` +slow_q=`echo $string | sed -e "s/.*Slow queries: \([0-9]*\) .*/\1/"` +# slave_sec might be empty if this db host is not a slave +slave_sec=`echo $slavestr | sed -e "s/.*Seconds_Behind_Master: \([0-9]*\).*/\1/"` + +# save and turn off /STDERR for th estatefile tests +exec 3>&2 +exec 2>/dev/null + +# if the statefile doesn't exist, we either havn't +# run yet or there's something bigger wrong. +if [ ! -e $STATEFILE ] +then + if [ ! -d `dirname $STATEFILE` ] + then + mkdir -p `dirname $STATEFILE` + fi + echo "$date $queries $slow_q" > $STATEFILE + $iostat -d | tail +4 >> $STATEFILE + if [ ! -e $STATEFILE ] + then + # if it didn't exist and we couldn't create + # it, we should just scream bloody murder and die. + # only scream once though... + if [ -e $ERROR_CREATE ] + then + exit 1 + fi + echo "" + echo "ERROR: couldn't create $STATEFILE" + echo "" + touch $ERROR_CREATE + exit 1 + fi + echo "Created statefile. Exitting." + exit 0 +fi + +# restore stderr +exec 2>&3 +exec 3>&- + +old_stats=(`cat $STATEFILE`) +old_date=${old_stats[0]} +old_queries=${old_stats[1]} +old_slow_q=${old_stats[2]} + +echo "$date $queries $slow_q" > $STATEFILE + +time_diff=$(($date - $old_date)) +queries_diff=$(($queries - $old_queries)) +slow_q_diff=$((slow_q - $old_slow_q)) + +if [ $time_diff -eq 0 ] +then + if [ -e $ERROR_TIMEDIFF ] ; then exit 1 ; fi + echo "something is broken." + echo "time_diff is 0." + touch $ERROR_TIMEDIFF + exit 1 +fi + +if [ $queries_diff -le 0 ] ; then queries_diff=0 ; fi +if [ $slow_q_diff -le 0 ] ; then slow_q_diff=0 ; fi + + +#queries per second +qps=`echo "scale=3;$queries_diff / $time_diff" | bc` +sqps=`echo "scale=3;$slow_q_diff / $time_diff" | bc` + +$GMETRIC $GMETRIC_ARGS --name="mysql_threads" --value=${threads} --type=int16 +$GMETRIC $GMETRIC_ARGS --name="mysql_queries" --value=${qps} --type=float --units="qps" +$GMETRIC $GMETRIC_ARGS --name="mysql_slow_queries" --value=${sqps} --type=float --units="qps" + +# if slave sec exists, i.e. this mysqld host is a slave. +# If it's not, don't submit the metric +if [ -n "${slave_sec}" ] +then + $GMETRIC $GMETRIC_ARGS --name="mysql_slave" --value="${slave_sec}" --type="int16" --units="sec" +fi diff --git a/network/network_gmetric.sh b/network/network_gmetric.sh new file mode 100644 index 0000000..d86d1ce --- /dev/null +++ b/network/network_gmetric.sh @@ -0,0 +1,272 @@ +#!/bin/bash + +### $Header: /var/lib/cvs/ops/ganglia/network_gmetric.sh,v 1.3 2006/07/11 17:29:27 ben Exp $ + +### this script reports network metrics to ganglia. +### It should be called from cron every n minutes. +### It will report network usage per interface +### and will automatically adjust for whatever +### timeframe it is called + +### Copyright Simply Hired, Inc. 2006 +### License to use, modify, and distribute under the GPL +### http://www.gnu.org/licenses/gpl.txt + +VERSION=1.3 + +GMETRIC="/usr/bin/gmetric" +GMETRIC_ARGS="-c /etc/gmond.conf" +STATEFILE="/var/lib/ganglia/metrics/net.stats" +date=`date +%s` +procfile="/proc/net/dev" + +ERROR_CREATE="/tmp/network_gmetric_create_statefile_failed" +ERROR_IOSTAT="/tmp/network_gmetric_no_procfile" +ERROR_DEVNAMES="/tmp/network_gmetric_bad_devname" +ERROR_DEVNAMES2="/tmp/network_gmetric_bad_devname_didnt_fix" +ERROR_GMETRIC="/tmp/network_gmetric_no_gmetric" +ERROR_TIMEDIFF="/tmp/network_gmetric_timediff" +ERROR_NOTROOT="/tmp/network_gmetric_notroot" + +if [ $UID -ne 0 ] +then + if [ -e $ERROR_NOTROOT ] ; then exit 1; fi + echo "Error: this script must be run as root." + touch $ERROR_NOTROOT + exit 1 +fi +rm -f $ERROR_NOTROOT + +if [ "x$1" == "x-h" ] +then + echo "Usage: network_gmetric.sh [--clean]" + echo " --clean delete all tmp files" + exit 0 +fi + +if [ "x$1" == "x--clean" ] +then + rm -f $ERROR_CREATE $ERROR_IOSTAT $ERROR_DEVNAME $ERROR_DEVNAME2 $ERROR_GMETRIC $ERROR_TIMEDIFF $ERROR_NOTROOT $STATEFILE + retval=$? + if [ $retval -ne 0 ] + then + echo "failed to clean up." + exit 1 + else + echo "All cleaned up." + exit 0 + fi +fi + +# save and turn off /STDERR for th estatefile tests +exec 3>&2 +exec 2>/dev/null + +# if the GMETRIC program isn't installed, compain +if [ ! -e $GMETRIC ] +then + if [ -e $ERROR_GMETRIC ] ; then exit 1; fi + echo "" + echo "Error: GMETRIC doesn't seem to be installed." + echo "$GMETRIC doesn't exist." + echo "" + touch $ERROR_GMETRIC + exit 1 +fi + +# if the /proc/net/dev file doesn't exist (eh?!) complain +if [ ! -e $procfile ] +then + if [ -e $ERROR_IOSTAT ] + then + exit 1 + fi + echo "" + echo "Error: $procfile doesn't seem to exist." + echo "" + touch $ERROR_IOSTAT + exit 1 +fi + +# if the statefile doesn't exist, we either havn't +# run yet or there's something bigger wrong. +if [ ! -e $STATEFILE ] +then + if [ ! -d `dirname $STATEFILE` ] + then + mkdir -p `dirname $STATEFILE` + fi + echo "$date" > $STATEFILE + cat $procfile | sed -e "s/:/ /" | grep "eth" >> $STATEFILE + if [ ! -e $STATEFILE ] + then + # if it didn't exist and we couldn't create + # it, we should just scream bloody murder and die. + # only scream once though... + if [ -e $ERROR_CREATE ] + then + exit 1 + fi + echo "" + echo "ERROR: couldn't create $STATEFILE" + echo "" + touch $ERROR_CREATE + exit 1 + fi + echo "Created statefile. Exitting." + exit 0 +fi + +# restore stderr +exec 2>&3 +exec 3>&- + +# this script uses gets its stats directly from /proc +stats=(`cat $procfile | sed -e "s/:/ /" | grep "eth"`) +old_stats=(`cat $STATEFILE`) +old_date=${old_stats[0]} + +read=0 +write=0 +old_read=0 +old_write=0 +read_sum=0 +write_sum=0 + +### function get_rw sets the variables $read and $write +### to the total number of read blocks and write blocks +### for a device. Which device is specified as an argument +### to the function. +### The function returns 1 if an invalid device number +### was specified. +function get_rw() { + base=$1 + let "base *= 17" + if [ "k${stats[$base]}" == "k" ] + then + # we're done looping + return 1; + else + devname=${stats[$base]} + read=${stats[$(($base + 1))]} + write=${stats[$(($base + 9))]} + return 0 + fi +} + +function get_old_rw() { + base=$1 + let "base *= 17" + let "base += 1" + if [ "k${old_stats[$base]}" == "k" ] + then + # we're done looping + return 1; + else + old_devname=${old_stats[$base]} + old_read=${old_stats[$(($base + 1))]} + old_write=${old_stats[$(($base + 9))]} + return 0 + fi +} + +time_diff=$(($date - $old_date)) + +devnum=0 +get_rw $devnum +get_old_rw $devnum +res=$? +while [ $res -eq 0 ] +do + # if devname and old_devname aren't the same, + # this whole function is invalid. + if [ $devname != $old_devname ] + then + if [ -e $ERROR_DEVNAMES ] + then + if [ -e $ERROR_DEVNAMES2 ] ; then exit 1; fi + echo "Sorry, my attempt at fixing the problem failed." + echo "It's now up to you, dear human." + touch $ERROR_DEVNAMES2 + exit 1 + fi + echo "something is broken." + echo "devnames are not the same." + echo "devname=$devname old_devname=$old_devname" + echo "I'm backing up the current statefile ($STATEFILE) " + echo "and will recreate it next time to see if that fixes this." + mydate=`date +%Y%m%d%H%M%S` + mv -fv $STATEFILE{,.${mydate}} + touch $ERROR_DEVNAMES + exit 1 + fi + rm -f $ERROR_DEVNAMES $ERROR_DEVNAME2 + #devname, read, write, old_devname, old_read, old_write + # are all set. calculate stat/sec and report. + if [ $read -lt $old_read ] + then + # counter wrapped - add 2^32 + let "read += 4294967296" + fi + if [ $write -lt $old_write ] + then + # counter wrapped - add 2^32 + let "write += 4294967295" + fi + read_diff=$(($read - $old_read)) + write_diff=$(($write - $old_write)) + if [ $time_diff -eq 0 ] + then + if [ -e $ERROR_TIMEDIFF ] ; then exit 1 ; fi + echo "something is broken." + echo "time_diff is 0." + touch $ERROR_TIMEDIFF + exit 1 + fi + rm -f $ERROR_TIMEDIFF + rps=`echo "scale=3;$read_diff / $time_diff" | bc` + wps=`echo "scale=3;$write_diff / $time_diff" | bc` + + read_sum=`echo "scale=3;$read_sum + $rps" | bc` + write_sum=`echo "scale=3;$write_sum + $wps" | bc` + + # log current values +# echo `date +%Y.%m.%d.%H:%M:%S` "network_gmetric values: ${devname}: old_read: $old_read old_write: $old_write read: $read write: $write RPS: $rps WPS: $wps" >> /var/log/gmetric.log + + # report what we have calculated + # only send in metric if it's greater than 0 + if [ `expr $rps \> 0` -eq 1 ]; + then + $GMETRIC $GMETRIC_ARGS --name="${devname}_rx" --value="$rps" --type="float" --units="bytes/sec" + fi + if [ `expr $wps \> 0` -eq 1 ]; + then + $GMETRIC $GMETRIC_ARGS --name="${devname}_tx" --value="$wps" --type="float" --units="bytes/sec" + fi + +# echo "$devname $rps $wps $read_sum $write_sum " >> /tmp/foo.txt + + devnum=$((devnum + 1)) + get_rw $devnum + get_old_rw $devnum + res=$? +done + +# log current values +#echo `date +%Y.%m.%d.%H:%M:%S` "network_gmetric values: sum: RPS: $read_sum WPS: $write_sum" >> /var/log/gmetric.log + +# only send in metric if it's greater than 0 +if [ `expr $read_sum \> 0` -eq 1 ]; +then + $GMETRIC $GMETRIC_ARGS --name="network_rx" --value="$read_sum" --type="float" --units="bytes/sec" +fi +if [ `expr $write_sum \> 0` -eq 1 ]; +then + $GMETRIC $GMETRIC_ARGS --name="network_tx" --value="$write_sum" --type="float" --units="bytes/sec" +fi + +echo "$date" > $STATEFILE +cat $procfile | sed -e "s/:/ /" | grep "eth" >> $STATEFILE + +rm -f $ERROR_CREATE $ERROR_IOSTAT $ERROR_DEVNAME2 $ERROR_DEVNAME $ERROR_GMETRIC $ERROR_TIMEDIFF $ERROR_NOTROOT +