====== [SCRIPT] Nagios check plugin for oracle ======
====== Description ======
Script used as plugin for Nagios to remotely check Oracle.
This script is a complete rewrite of [[http://exchange.nagios.org/directory/Plugins/Databases/Oracle/Check-Remote-Oracle-through-Ssh-(No-need-to-install-Oracle-client)/details|this one]].
====== Instructions ======
===== Install =====
Download script code and copy to your nagios plugins home, for example:
/usr/local/nagios/libexec/check_remote_oracle
Then you'll have to add it to nagios and enable checks through it.
===== Configuration =====
Required variables (not passed through CLI):
^ Variable ^ Default value ^ Description ^
| ORACLEUSER
| NAGIOS
| User for connecting to OracleDB (no the //HOST// running OracleDB) |
| ORACLEUSERPASSWORD
| NAGIOSPASSWORD
| Password for the previous user |
You can create the NAGIOS user issuing the following commands:
CREATE USER "NAGIOS"
PROFILE "DEFAULT"
IDENTIFIED BY "NAGIOSPASSWORD" DEFAULT TABLESPACE "USERS"
TEMPORARY TABLESPACE "TEMP"
ACCOUNT UNLOCK ;
grant CONNECT, SELECT_CATALOG_ROLE to "NAGIOS" ;
Control specific grants:
* SGA:
GRANT SELECT ON SYS.V_$SGASTAT TO VOXELADMIN ;
For a better understand of the usage, this script will do something like:
ssh ora01.ciberterminal.net
sqlplus 'NAGIOSUSER/NAGIOSPASSWORD@ORACLE_SID'
SELECT 1+1 FROM DUAL;
**Note**: This user is not user in the //--dgstats// check, look the check instructions here down.
===== Testing =====
You can test it by executing it:
bash check_remote_oracle
I will show the instructions:
$ bash check_remote_oracle
Usage:
./check_remote_oracle -H [-U Oracle OS User] --tns
./check_remote_oracle -H [-U Oracle OS User] --db
./check_remote_oracle -H [-U Oracle OS User] --login
./check_remote_oracle -H [-U Oracle OS User] --sessions
./check_remote_oracle -H [-U Oracle OS User] --cache
./check_remote_oracle -H [-U Oracle OS User] --parsing
./check_remote_oracle -H [-U Oracle OS User] --tablespace
./check_remote_oracle -H [-U Oracle OS User] --undo
./check_remote_oracle -H [-U Oracle OS User] --diskgroup
./check_remote_oracle -H [-U Oracle OS User] --asmfs
./check_remote_oracle -H [-U Oracle OS User] --dgstats
./check_remote_oracle -H [-U Oracle OS User] --redogen
./check_remote_oracle --help
./check_remote_oracle --version
For example, checking the statistics of the dataguard:
$ bash check_remote_oracle -H scan01.ciberterminal.net --sessions DBSID 150 100
OK - DBSID Concurrent Active Sessions Count: 4 |Sessions=4;100;150;0;20
===== Check Descriptions =====
==== --tns ====
Check remote TNS server, that is: ''tnsping'' it (not much useful...)
==== --db ====
Check remote database (search /bin/ps for PMON process)
==== --login ====
Attempt a dummy login and alert if not ORA-01017: invalid username/password
==== --sessions ====
Check remote database concurrent active sessions:
select count(SES.SID)
from V$SESSION SES, V$SQLAREA SQL, V$SESSION_WAIT WA, V$PROCESS P
where SES.STATUS='ACTIVE'
AND SES.SQL_ID=SQL.SQL_ID
AND SES.SID=WA.SID
and SES.paddr=p.addr
and UPPER(SES.USERNAME) not in ( 'SYS','SYSMAN','MDSYS','SYSTEM','NAGIOS' )
/
==== --cache ====
Check remote database for library and buffer cache hit ratios:
SELECT (1-(pr.value/(dbg.value+cg.value)))*100
from v$sysstat pr, v$sysstat dbg, v$sysstat cg
where pr.name='physical reads'
and dbg.name='db block gets'
and cg.name='consistent gets'
/
==== --parsing ====
Check remote database for Soft/Hard parse ratios:
SELECT VALUE FROM V$SYSSTAT WHERE NAME = 'parse count (total)' ;
SELECT VALUE FROM V$SYSSTAT WHERE NAME = 'parse count (hard)' ;
SELECT VALUE FROM V$SYSSTAT WHERE NAME = 'parse count (failures)' ;
SELECT VALUE FROM V$SYSSTAT WHERE NAME = 'parse count (describe)' ;
==== --tablespace ====
Check remote database for **** capacity in given ORACLE_SID :
SELECT MAX_MB,
REAL_FREE_MB FREE_MB
FROM (
SELECT MAXUSAGE.TABLESPACE_NAME,
MAXUSAGE.MAX_MB,
CASE WHEN MAXUSAGE.ACTUAL_DATAFILE_MB < MAXUSAGE.MAX_MB THEN
MAX_MB-(ACTUAL_DATAFILE_MB-FREE_MB)
ELSE
FREE_MB
END REAL_FREE_MB
FROM
(
select TABLESPACE_NAME,
SUM(case when MAXBYTES > 0 then MAXBYTES else BYTES END)/1024/1024 MAX_MB,
SUM(BYTES)/1024/1024 ACTUAL_DATAFILE_MB
FROM DBA_DATA_FILES
GROUP BY TABLESPACE_NAME
) MAXUSAGE,
(
select TABLESPACE_NAME, SUM(BYTES)/1024/1024 FREE_MB
FROM dba_free_space
GROUP BY TABLESPACE_NAME
) FREEUSAGE
WHERE MAXUSAGE.TABLESPACE_NAME=FREEUSAGE.TABLESPACE_NAME
AND MAXUSAGE.TABLESPACE_NAME = ''
)
;
==== --undo ====
Check remote database for **UNDO** usage :
select TOTALSPACE.TOTAL TOTAL_SPACE,
TOTALSPACE.TOTAL-NVL(USEDSPACE.USED,0.0) FREE_SPACE
FROM (
SELECT TABLESPACE_NAME, SUM(BYTES)/1024/1024 TOTAL
FROM DBA_DATA_FILES WHERE TABLESPACE_NAME = (SELECT value FROM V\\\$parameter WHERE name = 'undo_tablespace')
GROUP BY TABLESPACE_NAME
) TOTALSPACE
LEFT OUTER JOIN
(
SELECT TABLESPACE_NAME, SUM(BYTES)/1024/1024 USED
FROM DBA_UNDO_EXTENTS
WHERE (STATUS='UNEXPIRED' OR STATUS='ACTIVE')
AND TABLESPACE_NAME = (SELECT value FROM V\\\$parameter WHERE name = 'undo_tablespace')
GROUP BY TABLESPACE_NAME
) USEDSPACE
ON TOTALSPACE.TABLESPACE_NAME=USEDSPACE.TABLESPACE_NAME
;
==== --diskgroup ====
Check remote database for **** capacity in ORACLE_ASM_SID (Tipically +ASM1/2...)
SELECT TOTAL_MB, NVL(FREE_MB,0.0) FREE_MB
FROM V$ASM_DISKGROUP
WHERE NAME='' ;
==== --dgstats ====
Dataguard statistics (Apply & Transport Lag).
This check is done **ON THE STANDBY** (the master does not have information on **V$DATAGUARD_STATS**.
So ORACLE_SID and HOSTNAME must be the STANDBY ones.
SELECT * FROM V$DATAGUARD_STATS WHERE NAME LIKE '%lag' ;
==== --redogen ====
Daily REDO generation statistics.
Something like that:
SELECT A.DAY, Round(A.COUNT#*B.AVG#/1024/1024) Daily_Avg_Mb
FROM
(
SELECT To_Char(First_Time,'YYYY-MM-DD') DAY, COUNT(1) COUNT#, MIN(RECID) MIN#, MAX(RECID) MAX#
FROM v$log_history
GROUP BY To_Char(First_Time,'YYYY-MM-DD')
ORDER BY 1 DESC
) A,
(
SELECT Avg(BYTES) AVG#
FROM v$log
) B
ORDER BY DAY
/
====== Script code ======
#! /bin/bash
#
# latigid010@yahoo.com
# 01/06/2000
#
# enricm@gmail.com
# 16/04/2012
#
# dodger@ciberterminal.net
# 03/10/2013: Complete rewrite of this f*****shit
#
# This Nagios plugin was created to check Oracle status on a remote site through SSH
#
########################################################################
#
# CONSTANTS
#
########################################################################
# EXIT STATUS
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
STATE_DEPENDENT=4
# /EXIT STATUS
REVISION="1.1"
# colors
LIGHTGREEN="\033[1;32m"
LIGHTRED="\033[1;31m"
WHITE="\033[0;37m"
RESET="\033[0;00m"
########################################################################
#
# / CONSTANTS
#
########################################################################
########################################################################
#
# FUNCTIONS
#
########################################################################
# printf "%s${LIGHTRED}USAGE:${RESET}
# $0 ORACLE_SID BACKUP_METHOD
#
# Where ORACLE_SID allowed: ${LIGHTGREEN}${KNOWNSIDS[*]}${RESET}
# And BACKUP_METHOD allowed: ${LIGHTGREEN}${KNOWNTYPE[*]}${RESET}\n"
# # VERY INITIAL CHECKS
support() {
printf "%sSend email to dodger@ciberterminal.net if you have questions\nregarding use of this software. To submit patches or suggest improvements,\nsend email to nagiosplug-dodger@ciberterminal.net.\nPlease include version information with all correspondence (when possible,\nuse output from the --version option of the plugin itself).\n" | sed -e 's/\n/ /g'
}
print_revision() {
echo "$1 v$2 (nagios-plugins 1.4.15)"
printf "%sThe nagios plugins come with ABSOLUTELY NO WARRANTY. You may redistribute\ncopies of the plugins under the terms of the GNU General Public License.\nFor more information about these matters, see the file named COPYING.\n" | sed -e 's/\n/ /g'
}
print_usage() {
echo "Usage:
${PROGPATH}/${PROGNAME} -H [-U Oracle OS User] --tns
${PROGPATH}/${PROGNAME} -H [-U Oracle OS User] --db
${PROGPATH}/${PROGNAME} -H [-U Oracle OS User] --login
${PROGPATH}/${PROGNAME} -H [-U Oracle OS User] --sessions
${PROGPATH}/${PROGNAME} -H [-U Oracle OS User] --cache
${PROGPATH}/${PROGNAME} -H [-U Oracle OS User] --parsing
${PROGPATH}/${PROGNAME} -H [-U Oracle OS User] --tablespace
${PROGPATH}/${PROGNAME} -H [-U Oracle OS User] --undo
${PROGPATH}/${PROGNAME} -H [-U Oracle OS User] --diskgroup
${PROGPATH}/${PROGNAME} -H [-U Oracle OS User] --asmfs
${PROGPATH}/${PROGNAME} -H [-U Oracle OS User] --dgstats
${PROGPATH}/${PROGNAME} -H [-U Oracle OS User] --redogen
${PROGPATH}/${PROGNAME} --help
${PROGPATH}/${PROGNAME} --version
"
}
print_help() {
print_revision $PROGNAME $REVISION
echo ""
print_usage
echo "
Check Oracle status
--tns SID/IP Address
Check remote TNS server
--db SID
Check remote database (search /bin/ps for PMON process)
--sessions SID
Check remote database concurrent active sessions
--login SID
Attempt a dummy login and alert if not ORA-01017: invalid username/password
--cache
Check remote database for library and buffer cache hit ratios
--parsing
Check remote database for Soft/Hard parse ratios
--tablespace
Check remote database for tablespace capacity in ORACLE_SID
--undo
Check remote database for UNDO tablespace capacity in ORACLE_SID
--diskgroup
Check remote database for diskgroup capacity in ORACLE_ASM_SID (Tipically +ASM1/2...)
--dgstats
Dataguard statistics (Apply & Transport Lag). This check is done ON THE STANDBY (the master does not have information on V\$DATAGUARD_STATS. So ORACLE_SID and HOSTNAME must be the STANDBY ones.
--redogen
Daily check for yesterday's redo generation
--help
Print this help screen
--version
Print version and license information
If the plugin doesn't work, check that the ORACLE_HOME environment
variable is set, that ORACLE_HOME/bin is in your PATH, and the
tnsnames.ora file is locatable and is properly configured on your Oracle server.
If you want to use a default Oracle home, add in your oratab file:
*:/opt/app/oracle/product/7.3.4:N
"
}
usage()
{
# Information options
case "${1^^}" in
"--HELP"|"-H")
print_help
exit $STATE_OK
;;
"--VERSION"|"-V")
print_revision $PROGNAME $REVISION
exit $STATE_OK
;;
*)
print_usage
exit $STATE_OK
;;
esac
}
return_values()
{
local let VALUE=$1
if [ ${VALUE} -gt ${CRITICAL} ] ; then
MSG="CRITICAL"
RETURNCODE=${STATE_CRITICAL}
elif [ ${VALUE} -gt ${WARNING} ] ; then
MSG="WARNING"
RETURNCODE=${STATE_WARNING}
elif [ ${VALUE} -le ${WARNING} ] ; then
MSG="OK"
RETURNCODE=${STATE_OK}
else
MSG="UNKNOWN"
RETURNCODE=${STATE_UNKNOWN}
fi
echo ${MSG}
return ${RETURNCODE}
}
get_remote_oraclehome()
{
# Hunt down a reasonable ORACLE_HOME
ORATABLIST="$(${SSH} "locate oratab" | egrep "/oratab$" | tr '\n' ' ')"
for ORATAB in ${ORATABLIST} ; do
ORACLE_HOME=$(${SSH} "cat ${ORATAB}" | egrep "^(${ORACLE_SID}|\*):" | awk -F\: '{print $2}')
[[ "${ORACLE_HOME}" ]] && [ $($SSH "ls -d ${ORACLE_HOME}" |wc -l) -eq 1 ] && return 0
done
return 1
}
check_tns()
{
local AUX="${ORAENV} tnsping ${ORACLE_SID}"
local TNSCHECK="$(${SSH} ${AUX})"
# echo ${TNSCHECK}
if [[ "${TNSCHECK}" =~ .*OK.*\(([0-9]{1,})\ .* ]] ; then
return ${STATE_OK}
else
return ${STATE_CRITICAL}
fi
}
check_db()
{
local PMONCHECK="$(${SSH} "ps -ef" | egrep -v grep | grep -c "ora_pmon_${ORACLE_SID}")"
if [ ${PMONCHECK} -ge 1 ] ; then
# echo "${ORACLE_SID} OK - ${PMONCHECK} PMON process(es) running"
return ${STATE_OK}
else
# echo "${ORACLE_SID} Database is DOWN"
return ${STATE_CRITICAL}
fi
}
check_login()
{
local AUX="${ORAENV} echo 'select 1+1 from dual ;' | ${SQLPLUS}"
local LOGINCHECK="$(${SSH} ${AUX} | egrep -c '^[[:space:]]{0,}2$')"
if [ ${LOGINCHECK} -ge 1 ] ; then
# echo "${ORACLE_SID} OK - dummy login connected"
return ${STATE_OK}
else
# echo "${ORACLE_SID} - dummy login fail"
return ${STATE_CRITICAL}
fi
}
# THIS FUNCTION IS PERSONALIZED FOR VOXEL
# TSDG variable must be "SCHEMA1:SCHEMA2:SCHEMA-n"
check_asessions()
{
local let RETURNCODE=${STATE_UNKNOWN}
local let MAXINDEX=0
local let i=0
local let x=0
local let TOTALSESSIONS=0
declare -a RESSULTARRAY
case ${ORACLE_SID^^} in
"DIVAPRO" )
local QUERY="SET HEAD OFF
SET PAGES 0
SET FEED OFF
set numf 99999
select count(*) from V\\\$SESSION where USERNAME='DIVAPP' and PROGRAM LIKE 'w3wp.exe' ;
select count(*) from V\\\$SESSION where USERNAME='DIVAPP' and PROGRAM NOT LIKE 'w3wp.exe' ;
"
;;
"BAVELPRO" )
local QUERY="SET HEAD OFF
SET PAGES 0
SET FEED OFF
set numf 99999
select count(*) from V\\\$SESSION where USERNAME='VOXEL';
select count(*) from V\\\$SESSION where USERNAME='MANAGER_DATA_SYNCH' ;
select count(*) from V\\\$SESSION where USERNAME='MANAGER_WEB' ;
select count(*) from V\\\$SESSION where USERNAME='MIRINDA_USER' ;
"
;;
* )
echo "CRITICAL - ${ORACLE_SID} Error getting asessions check ORACLE_SID"
return ${STATE_CRITICAL}
;;
esac
local AUX="${ORAENV} printf \"%s\n${QUERY}\" | ${SQLPLUS}"
local RESSULT="$(${SSH} "${AUX}" | awk '{print $1}')"
if [[ "${RESSULT}" =~ .*(ORA\-[0-9]{1,}).* ]] ; then
echo "CRITICAL - ${ORACLE_SID} Error getting sessions: ${BASH_REMATCH[1]}"
return ${STATE_CRITICAL}
fi
for i in ${RESSULT} ; do
let TOTALSESSIONS+=$i
RESSULTARRAY[$x]=$i
[ $i -gt ${RESSULTARRAY[$MAXINDEX]} ] && MAXINDEX=$x
let x++
done
MSG="$(return_values ${RESSULTARRAY[${MAXINDEX}]})"
RETURNCODE=$?
case ${ORACLE_SID^^} in
"DIVAPRO" )
echo "${MSG} - ${ORACLE_SID} All Oracle Opened Sessions Count: ${TOTALSESSIONS} |Sessions=${TOTALSESSIONS} DEVOLUIVA_WEB=${RESSULTARRAY[0]} DEVOLUIVA_BACKEND=${RESSULTARRAY[1]}"
return ${RETURNCODE}
;;
"BAVELPRO" )
AUX="Sessions=${TOTALSESSIONS}"
AUX="${AUX} VOXEL=${RESSULTARRAY[0]}"
AUX="${AUX} MANAGER_DATA_SYNCH=${RESSULTARRAY[1]}"
AUX="${AUX} MANAGER_WEB=${RESSULTARRAY[2]}"
AUX="${AUX} MIRINDA_USER=${RESSULTARRAY[3]}"
echo "${MSG} - ${ORACLE_SID} All Oracle Opened Sessions Count: ${TOTALSESSIONS} |${AUX}"
return ${RETURNCODE}
;;
* )
echo "CRITICAL - ${ORACLE_SID} Error getting asessions check ORACLE_SID"
return ${STATE_CRITICAL}
;;
esac
}
#This a statistics-only control, so it always return OK unless it can't connect to Oracle
check_redogen()
{
local let RETURNCODE=${STATE_UNKNOWN}
local QUERY="SET HEAD OFF
SET PAGES 0
SET FEED OFF
set numf 9999999999
SELECT Round(A.COUNT#*B.AVG#/1024/1024) YESTERDAY_REDOGEN_MB
FROM (
SELECT To_Char(First_Time,'YYYY-MM-DD') DAY, COUNT(1) COUNT#, MIN(RECID) MIN#, MAX(RECID) MAX#
FROM v\\\$log_history
WHERE To_Char(First_Time,'YYYY-MM-DD')=To_Char(SYSDATE-1,'YYYY-MM-DD')
GROUP BY To_Char(First_Time,'YYYY-MM-DD')
ORDER BY 1 DESC
) A,
(
SELECT Avg(BYTES) AVG#
FROM v\\\$log
) B
/"
local AUX="${ORAENV} printf \"%s\n${QUERY}\" | ${SQLPLUS}"
local RESSULT="$(${SSH} "${AUX}" | awk '{print $1}')"
if [[ "${RESSULT}" =~ .*(ORA\-[0-9]{1,}).* ]] ; then
echo "CRITICAL - ${ORACLE_SID} Error getting redo generation: ${BASH_REMATCH[1]}"
return ${STATE_CRITICAL}
fi
echo "OK - ${ORACLE_SID} Redo generation: ${RESSULT} |RedoGen=${RESSULT}"
return 0
}
check_cache()
{
local let RETURNCODE=${STATE_UNKNOWN}
local QUERY="SET HEAD OFF
SET PAGES 0
SET FEED OFF
set numf 9999999.99
SELECT (1-(pr.value/(dbg.value+cg.value)))*100
from v\\\$sysstat pr, v\\\$sysstat dbg, v\\\$sysstat cg
where pr.name='physical reads'
and dbg.name='db block gets'
and cg.name='consistent gets'
/"
local AUX="${ORAENV} printf \"%s\n${QUERY}\" | ${SQLPLUS}"
local LIBHITS="$(${SSH} "${AUX}" | awk '{print $1}')"
local QUERY="SET HEAD OFF
SET PAGES 0
SET FEED OFF
set numf 9999999.99
select sum(lc.pins)/(sum(lc.pins)+sum(lc.reloads))*100
from v\\\$librarycache lc
/"
local AUX="${ORAENV} printf \"%s\n${QUERY}\" | ${SQLPLUS}"
local CACHEHITS="$(${SSH} "${AUX}" | awk '{print $1}')"
if [[ "${CACHEHITS}" =~ .*(ORA\-[0-9]{1,}).* || "${LIBHITS}" =~ .*(ORA\-[0-9]{1,}).* ]] ; then
echo "CRITICAL - ${ORACLE_SID} Error getting Cache: ${BASH_REMATCH[1]}"
return ${STATE_CRITICAL}
fi
if ! [[ "${CACHEHITS}" =~ [0-9]{1,2}\.[0-9]{0,2} || "${LIBHITS}" =~ [0-9]{1,2}\.[0-9]{0,2} ]] ; then
echo "CRITICAL - ${ORACLE_SID} Error getting Cache, values returned: ${CACHEHITS} , ${LIBHITS}"
return ${STATE_CRITICAL}
fi
if [[ ${CACHEHITS/.*} -le ${CRITICAL} || ${LIBHITS/.*} -le ${CRITICAL} ]] ; then
MSG="CRITICAL"
RETURNCODE=${STATE_CRITICAL}
elif [[ ${CACHEHITS/.*} -le ${WARNING} || ${LIBHITS/.*} -le ${WARNING} ]] ; then
MSG="WARNING"
RETURNCODE=${STATE_WARNING}
elif [[ ${CACHEHITS/.*} -gt ${WARNING} && ${LIBHITS/.*} -gt ${WARNING} ]] ; then
MSG="OK"
RETURNCODE=${STATE_OK}
else
MSG="UNKNOWN"
RETURNCODE=${STATE_UNKNOWN}
fi
echo "${MSG} - ${ORACLE_SID} - Cache Hit Rates: ${CACHEHITS}% Lib -- ${LIBHITS}% Buff|lib=${CACHEHITS}%;${CRITICAL};${WARNING};0;100 buffer=${LIBHITS}%;${CRITICAL};${WARNING};0;100"
return ${RETURNCODE}
}
check_parsing()
{
local let RETURNCODE=${STATE_UNKNOWN}
local QUERY="SET HEAD OFF
SET PAGES 0
SET FEED OFF
set numf 99999999999999
SELECT VALUE FROM V\\\$SYSSTAT WHERE NAME = 'parse count (total)' ;
SELECT VALUE FROM V\\\$SYSSTAT WHERE NAME = 'parse count (hard)' ;
SELECT VALUE FROM V\\\$SYSSTAT WHERE NAME = 'parse count (failures)' ;
SELECT VALUE FROM V\\\$SYSSTAT WHERE NAME = 'parse count (describe)' ;"
local AUX="${ORAENV} printf \"%s\n${QUERY}\" | ${SQLPLUS}"
local RESSULT="$(${SSH} "${AUX}" | tr '\n' ' ')"
if [[ "${RESSULT}" =~ .*(ORA\-[0-9]{1,}).* || "${RESSULT}" =~ ^(([[:space:]]|)[0-9]{1,}){4}$ ]] ; then
echo "CRITICAL - ${ORACLE_SID} Error getting parsing data: ${RESSULT}"
return ${STATE_CRITICAL}
fi
# PARSING INDEXES like possitional parameter
# TOTAL=1
# HARD=2
# FAIL=3
# DESCRIBE=4
SOFTPARSE="$(echo "${RESSULT}" | awk '{printf "%2.2f",($1-$2-$3-$4)/$1*100}')"
HARDPARSE="$(echo "${RESSULT}" | awk '{printf "%2.2f",$2/$1*100}')"
FAILEDPARSE="$(echo "${RESSULT}" | awk '{printf "%2.2f",$3/$1*100}')"
if [[ ${SOFTPARSE/.*} -le ${CRITICAL} ]] ; then
MSG="CRITICAL"
RETURNCODE=${STATE_CRITICAL}
elif [[ ${SOFTPARSE/.*} -le ${WARNING} ]] ; then
MSG="WARNING"
RETURNCODE=${STATE_WARNING}
elif [[ ${SOFTPARSE/.*} -gt ${WARNING} ]] ; then
MSG="OK"
RETURNCODE=${STATE_OK}
else
MSG="UNKNOWN"
RETURNCODE=${STATE_UNKNOWN}
fi
echo "${MSG} - ${ORACLE_SID} - Parse Ratio %: ${SOFTPARSE}% Soft -- ${HARDPARSE}% Hard -- ${FAILEDPARSE}% Failed|Soft=${SOFTPARSE}%;${CRITICAL};${WARNING};0;100 Hard=${HARDPARSE}%;${CRITICAL};${WARNING};0;100 Failed=${FAILEDPARSE}%;${CRITICAL};${WARNING};0;100"
return ${RETURNCODE}
}
generic_space_check()
{
local CHECK="$1"
local OBJECTNAME="$2"
case ${CHECK,,} in
"undo" )
local QUERY="SET HEAD OFF
SET PAGES 0
SET FEED OFF
set numf 9999999.99
select TOTALSPACE.TOTAL TOTAL_SPACE,
TOTALSPACE.TOTAL-NVL(USEDSPACE.USED,0.0) FREE_SPACE
FROM (
SELECT TABLESPACE_NAME, SUM(BYTES)/1024/1024 TOTAL
FROM DBA_DATA_FILES WHERE TABLESPACE_NAME = (SELECT value FROM V\\\$parameter WHERE name = 'undo_tablespace')
GROUP BY TABLESPACE_NAME
) TOTALSPACE
LEFT OUTER JOIN
(
SELECT TABLESPACE_NAME, SUM(BYTES)/1024/1024 USED
FROM DBA_UNDO_EXTENTS
WHERE (STATUS='UNEXPIRED' OR STATUS='ACTIVE')
AND TABLESPACE_NAME = (SELECT value FROM V\\\$parameter WHERE name = 'undo_tablespace')
GROUP BY TABLESPACE_NAME
) USEDSPACE
ON TOTALSPACE.TABLESPACE_NAME=USEDSPACE.TABLESPACE_NAME
;"
OBJECTNAME=UNDO
;;
"tablespace" )
local QUERY="SET HEAD OFF
SET PAGES 0
SET FEED OFF
SET NUMF 99999999.99
SELECT MAX_MB,
REAL_FREE_MB FREE_MB
FROM (
SELECT MAXUSAGE.TABLESPACE_NAME,
MAXUSAGE.MAX_MB,
CASE WHEN MAXUSAGE.ACTUAL_DATAFILE_MB < MAXUSAGE.MAX_MB THEN
MAX_MB-(ACTUAL_DATAFILE_MB-FREE_MB)
ELSE
FREE_MB
END REAL_FREE_MB
FROM
(
select TABLESPACE_NAME,
SUM(case when MAXBYTES > 0 then MAXBYTES else BYTES END)/1024/1024 MAX_MB,
SUM(BYTES)/1024/1024 ACTUAL_DATAFILE_MB
FROM DBA_DATA_FILES
GROUP BY TABLESPACE_NAME
) MAXUSAGE,
(
select TABLESPACE_NAME, SUM(BYTES)/1024/1024 FREE_MB
FROM dba_free_space
GROUP BY TABLESPACE_NAME
) FREEUSAGE
WHERE MAXUSAGE.TABLESPACE_NAME=FREEUSAGE.TABLESPACE_NAME
AND MAXUSAGE.TABLESPACE_NAME = '${OBJECTNAME}'
)
;"
;;
"diskgroup" )
local QUERY="SET HEAD OFF
SET PAGES 0
SET FEED OFF
SET NUMF 9999999999
SELECT TOTAL_MB, NVL(FREE_MB,0.0) FREE_MB
FROM V\\\$ASM_DISKGROUP
WHERE NAME='${OBJECTNAME}' ;"
;;
* )
echo "CRITICAL - Function generic_space_check received wrong parameter : ${CHECK} "
return ${STATE_CRITICAL}
;;
esac
local AUX="${ORAENV} printf \"%s\n${QUERY}\" | ${SQLPLUS}"
local RESSULT="$(${SSH} "${AUX}")"
if [[ "${RESSULT}" =~ .*(ORA\-[0-9]{1,}).* || "${RESSULT}" =~ ^(([[:space:]]|)[0-9]{1,}){2}$ || ! "${RESSULT}" ]] ; then
echo "CRITICAL - ${ORACLE_SID} Error getting ${CHECK^} usage: ${RESSULT}"
return ${STATE_CRITICAL}
fi
local TOTAL="$(echo ${RESSULT} | awk '{print $1}')"
local FREE="$(echo ${RESSULT} | awk '{print $2}')"
local USED="$(echo "${RESSULT}" | awk '{printf "%2.2f",$1-$2}')"
local PERCENT="$(echo "${RESSULT}" | awk '{printf "%2.2f",100-($2/$1*100)}')"
MSG="$(return_values ${PERCENT/.*})"
RETURNCODE=$?
echo "${MSG} - ${ORACLE_SID} ${CHECK^} ${OBJECTNAME} : ${PERCENT}%, Used : ${USED} of ${TOTAL} MB |${OBJECTNAME}=${PERCENT};${WARNING};${CRITICAL};0;20"
return ${RETURNCODE}
}
asmfs_space_check()
{
local OBJECTNAME="$1"
local AUX="df -P ${OBJECTNAME} | egrep ${OBJECTNAME}"
local RESSULT="$(${SSH} "${AUX}" 2> /dev/null)"
if [[ ! "${RESSULT}" =~ ^\/.*%.*${OBJECTNAME}$ ]] ; then
echo "CRITICAL - Error getting ${COMMAND} space for ${OBJECTNAME}"
return ${STATE_CRITICAL}
fi
local TOTAL="$(echo ${RESSULT} | awk '{print $2*1024}')"
local FREE="$(echo ${RESSULT} | awk '{print $4*1024}')"
local USED="$(echo "${RESSULT}" | awk '{printf $3*1024}')"
local PERCENT="$(echo "${RESSULT}" | awk '{printf $5}')"
local WARNINGVALUE="$(echo ${TOTAL} ${WARNING} | awk '{printf "%.2f",$1*$2/100}')"
local CRITICALVALUE="$(echo ${TOTAL} ${CRITICAL} | awk '{printf "%.2f",$1*$2/100}')"
MSG="$(return_values ${PERCENT//%/})"
RETURNCODE=$?
echo "${MSG} - ${CHECK^} ${OBJECTNAME} : ${PERCENT}, Used : $((${USED}/1024/1024/1024)) GB of $((${TOTAL}/1024/1024/1024)) GB |size=${TOTAL}B used=${USED}B;${WARNINGVALUE};${CRITICALVALUE};0;${TOTAL}"
return ${RETURNCODE}
}
# This check needs SYS/ AS SYSDBA priviledge as connect to the standby (supposed to be in mount state)
dataguard_stats_new()
{
local ORAENVSID="${ORAENV} export ORACLE_SID=${ORACLE_SID} ;"
local let RETURNCODE=${STATE_UNKNOWN}
local SQLPLUS="sqlplus -s '/ as sysdba'"
local TRANSPORTLAG=""
local APPLYLAG=""
local QUERY="SET HEAD OFF
SET PAGES 0
SET FEED OFF
set numf 99999999999999
SELECT VALUE FROM V\\\$DATAGUARD_STATS ;"
local AUX="${ORAENVSID} printf \"%s\n${QUERY}\" | ${SQLPLUS}"
local RESSULT="$(${SSH} "${AUX}")"
if [[ "${RESSULT}" =~ .*(ORA\-[0-9]{1,}).* || "${RESSULT}" =~ ^(([[:space:]]|)[0-9]{1,}){4}$ ]] ; then
echo "CRITICAL - ${ORACLE_SID} Error getting dataguard data: ${RESSULT}"
return ${STATE_CRITICAL}
fi
TRANSPORTLAG="$(echo "${RESSULT}" | head -1)"
APPLYLAG="$(echo "${RESSULT}" | head -2| tail -1)"
# echo "Transport: ${TRANSPORTLAG}"
# echo "Apply: ${APPLYLAG}"
TRANSPORTLAGSECONDS=$(echo "(((${TRANSPORTLAG:1:2}*24)+${TRANSPORTLAG:4:2})*60+${TRANSPORTLAG:7:2})*60+${TRANSPORTLAG:10:2}" | bc)
APPLYLAGSECONDS=$(echo "(((${APPLYLAG:1:2}*24)+${APPLYLAG:4:2})*60+${APPLYLAG:7:2})*60+${APPLYLAG:10:2}" | bc)
if [ ${TRANSPORTLAGSECONDS} -gt ${APPLYLAGSECONDS} ] ; then
MSG="$(return_values ${TRANSPORTLAGSECONDS})"
RETURNCODE=$?
else
MSG="$(return_values ${APPLYLAGSECONDS})"
RETURNCODE=$?
fi
echo "${MSG} - Dataguard stats for ${ORACLE_SID} - TransportLag: ${TRANSPORTLAG} ApplyLag: ${APPLYLAG}|transport=${TRANSPORTLAGSECONDS} apply=${APPLYLAGSECONDS};${WARNING};${CRITICAL};0;86400"
return ${RETURNCODE}
}
########################################################################
#
# / FUNCTIONS
#
########################################################################
########################################################################
#
# VARIABLES
#
########################################################################
PROGNAME=$(basename $0)
PROGPATH=$(dirname $0)
[ $# -le 1 ] && usage $*
REMOTE_SERVER=$2 && shift 2
if [[ ! ${REMOTE_SERVER} ]]
then
echo "No remote server specified!!!"
exit ${STATE_UNKNOWN}
fi
# Checking for non-standard user to connect
if [[ "${1}" = "-U" ]]
then
ORACLE_OSUSER=$2 && shift 2
else
ORACLE_OSUSER="oracle"
fi
SSH="ssh ${ORACLE_OSUSER}@${REMOTE_SERVER} -xq"
ORACLEUSER="NAGIOS"
ORACLEUSERPASSWORD="NAGIOSPASSWORD"
########################################################################
#
# / VARIABLES
#
########################################################################
########################################################################
#
# MAIN
#
########################################################################
# CLEANING THE COMMAND
COMMAND="${1:2}"
ORACLE_SID="$2"
[[ ! "${COMMAND}" || ! "${ORACLE_SID}" ]] && print_usage && exit ${STATE_UNKNOWN}
if [ $# -eq 4 ] ; then
CRITICAL=$3
WARNING=$4
elif [ $# -eq 5 ] ; then
TSDG=$3
CRITICAL=$4
WARNING=$5
fi
if [[ "${WARNING}" && ${CRITICAL} ]] ; then
if [ ${WARNING} -gt ${CRITICAL} ] ; then
echo "UNKNOWN - Warning level is less than Critical"
exit $STATE_UNKNOWN
fi
fi
# if the check is not for asmfs, then a ORACLE_HOME is necessary
if [[ ! "${COMMAND}" = "asmfs" ]] ; then
get_remote_oraclehome
RES=$?
if [ ${RES} -ne ${STATE_OK} ] ; then
echo "CRITICAL - No ORACLE_HOME found"
exit $STATE_UNKNOWN
fi
fi
# SQLPLUS FOR CONNECTIONS
SQLPLUS="sqlplus -s '${ORACLEUSER}/${ORACLEUSERPASSWORD}@${ORACLE_SID}'"
ORAENV="ORACLE_HOME=${ORACLE_HOME};PATH=$PATH:$ORACLE_HOME/bin;LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ORACLE_HOME/lib;export ORACLE_HOME PATH LD_LIBRARY_PATH;"
case "${COMMAND}" in
"tns")
check_tns
RES=$?
if [ ${RES} -eq ${STATE_OK} ] ; then
echo "OK - reply time ${BASH_REMATCH[1]} from ${ORACLE_SID}"
else
echo "CRITICAL - No TNS Listener on ${ORACLE_SID}"
fi
;;
"db")
check_db
RES=$?
if [ ${RES} -eq ${STATE_OK} ] ; then
echo "OK - ${ORACLE_SID} Database running"
else
echo "CRITICAL - ${ORACLE_SID} Database is DOWN"
fi
;;
"login")
check_login
RES=$?
if [ ${RES} -eq ${STATE_OK} ] ; then
echo "OK - ${ORACLE_SID} dummy login connected"
else
echo "CRITICAL - ${ORACLE_SID} dummy login fail"
fi
;;
"sessions")
check_sessions
RES=$?
;;
"cache")
check_cache
RES=$?
;;
"parsing")
check_parsing
RES=$?
;;
"undo")
generic_space_check undo
RES=$?
;;
"tablespace")
generic_space_check tablespace "${TSDG}"
RES=$?
;;
"diskgroup")
generic_space_check diskgroup "${TSDG}"
RES=$?
;;
"asmfs")
ASMFS_DISK=${ORACLE_SID}
asmfs_space_check ${ASMFS_DISK}
RES=$?
;;
"dgstats")
# dataguard_stats
dataguard_stats_new
RES=$?
;;
"redogen")
check_redogen
RES=$?
;;
*)
exit $STATE_UNKNOWN
;;
esac
# EXIT STATUS
# STATE_OK=0
# STATE_WARNING=1
# STATE_CRITICAL=2
# STATE_UNKNOWN=3
# STATE_DEPENDENT=4
# /EXIT STATUS
#WIP
exit ${RES}