#!/bin/bash

# This Nagios script was written against version 3.3 &amp; 3.4 of Gluster.  Older
# versions will most likely not work at all with this monitoring script.
#
# Gluster currently requires elevated permissions to do anything.  In order to
# accommodate this, you need to allow your Nagios user some additional
# permissions via sudo.  The line you want to add will look something like the
# following in /etc/sudoers (or something equivalent):
#
# Defaults:nagios !requiretty
# nagios ALL=(root) NOPASSWD:/usr/sbin/gluster volume status [[\:graph\:]]* detail,/usr/sbin/gluster volume heal [[\:graph\:]]* info
#
# That should give us all the access we need to check the status of any
# currently defined peers and volumes.

# Inspired by a script of Mark Nipper
#
# 2013, Mark Ruys, mark.ruys@peercode.nl
#
# Modified by Steve Thomas
#

PATH=/sbin:/bin:/usr/sbin:/usr/bin

PROGNAME=$(basename -- $0)
PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'`
REVISION="1.0.0"

. $PROGPATH/../utils.sh

# parse command line
usage () {
  echo ""
  echo "USAGE: "
  echo "  $PROGNAME -v VOLUME -n BRICKS [-w GB -c GB]"
  echo "     -n BRICKS: number of bricks"
  echo "     -w and -c values in GB"
  exit $STATE_UNKNOWN
}

while getopts "v:n:w:c:" opt; do
  case $opt in
    v) VOLUME=${OPTARG} ;;
    n) BRICKS=${OPTARG} ;;
    w) WARN=${OPTARG} ;;
    c) CRIT=${OPTARG} ;;
    *) usage ;;
  esac
done

if [ -z "${VOLUME}" -o -z "${BRICKS}" ]; then
  usage
fi

Exit () {
	$ECHO "$1: ${2:0}"
	status=STATE_$1
	exit ${!status}
}

# check for commands
for cmd in basename bc awk sudo pidof gluster; do
	if ! type -p "$cmd" > /dev/null; then
		Exit UNKNOWN "$cmd not found\n"
	fi
done

# check for glusterd (management daemon)
if ! pidof glusterd > /dev/null; then
	Exit CRITICAL "glusterd management daemon not running\n"
fi

# check for glusterfsd (brick daemon)
if ! pidof glusterfsd > /dev/null; then
	Exit CRITICAL "glusterfsd brick daemon not running\n"
fi

# get volume heal status
heal=0
for entries in $(sudo gluster volume heal ${VOLUME} info | awk '/^Number of entries: /{print $4}'); do
	if [ "$entries" -gt 0 ]; then
		let $((heal+=entries))
	fi
done
if [ "$heal" -gt 0 ]; then
	errors=("${errors[@]}" "$heal unsynched entries")
fi

# get volume status
bricksfound=0
freegb=9999999
shopt -s nullglob
while read -r line; do
	field=($(echo $line))
	case ${field[0]} in
	Brick) 
		brick=${field[@]:2}
		;;
	Disk)
		key=${field[@]:0:3}
		if [ "${key}" = "Disk Space Free" ]; then
			freeunit=${field[@]:4}
			unit=${freeunit: -2}
			free=${freeunit%$unit}
			if [ "$unit" != "GB" ]; then
				Exit UNKNOWN "Unknown disk space size $freeunit\n"
			fi
			if (( $(bc <<< "${free} < ${freegb}") == 1 )); then
				freegb=$free
			fi
		fi
		;;
	Online)
		online=${field[@]:2}
		if [ "${online}" = "Y" ]; then
			let $((bricksfound++))
		else
			errors=("${errors[@]}" "$brick offline")
		fi
		;;
	esac
done < <( sudo gluster volume status ${VOLUME} detail)

if [ $bricksfound -eq 0 ]; then
	Exit CRITICAL "No bricks found\n"
elif [ $bricksfound -lt $BRICKS ]; then
	errors=("${errors[@]}" "found $bricksfound bricks, expected $BRICKS\n")
fi

if [ -n "$CRIT" -a -n "$WARN" ]; then
	if (( $(bc <<< "${CRIT} > ${WARN}") == 1 )); then
		Exit UNKNOWN "Critical threshold below warning\n"
	elif (( $(bc <<< "${freegb} < ${CRIT}") == 1 )); then
		Exit CRITICAL "Free space ${freegb}GB\n"
	elif (( $(bc <<< "${freegb} < ${WARN}") == 1 )); then
		errors=("${errors[@]}" "Free space ${freegb}GB\n")
	fi
fi

# exit with warning if errors
if [ -n "$errors" ]; then
	sep='; '
	msg=$(printf "${sep}%s" "${errors[@]}")
	msg=${msg:${#sep}}

	Exit WARNING "${msg}"
fi

# exit with no errors
Exit OK "${bricksfound} bricks; free space ${freegb}GB\n"