#!/bin/ksh
#       This is OSW Scripts page version.
#       Refer questions to "gs-w osw scripts"
#
# monitor_processes - jdjohnsn 4/22/2005 
# version 1.2
# monitor_processes - jdjohnsn 4/22/2005 
# check on a schedule set in crontab to set email to a list of users whenever
# any of a number of processes go down

# modified 4/24/2006 to add a check for the number of DCP std files in the
# /usr/opt/nwis/data/sentry_depot/stddata directory. These can build up for
# reasons other than one of the processes failing, like when wallops
# retransmits data for several weeks all at once. Other WSCs may have to
# change the variable $NODE_ID if it varies from that defined in 
# /usr/opt/decodes/setup/satin.cfg

# modified 8/1/2006 to add email instructions on what to do when the process
# is found not running. 

# modified 8/8/06 to address PR 15629 
#        - corrected file name of nwstransferd
#        - added NWIS host name to email subject line

# modified 9/6/2006 - discovered that if the program is run under the userid
# of one of those processes it is looking for, it will return the grep command
# itself, and it will think that it is returning the process, thus will never
# see that the process really is NOT running. Added the "grep -v grep" to
# remove that possibility, regardless of what userid the program is run under.


###### get nwis environment

. /usr/opt/etc/nwis.profile

###### set HOST name

HOST=`hostname|cut -d'.' -f1`

###### get database name

nwdb=`echo $NWISDB |cut -d':' -f3`

# check to see if maintenance is running, and if so, don't run
# sends email only to one person rather than the entier $TO list

schema_run=`ps -fu root | grep "schema"`
if [ -n "$schema_run" ]
then
   echo "monitor_process says maint is running" |\
   /usr/bin/mailx -s "monitor_process says maint is running" user@usgs.gov
exit
fi

# set list of email ids
TO=user1@usgs.gov,user2@usgs.gov,user3@usgs.gov

################ check for number of dcascr files > 5
#
# get node id from satin.cfg file

NODE_ID=`grep '^node' /usr/opt/decodes/setup/satin.cfg |cut -d'=' -f2`

# get number of files

no_fil=`'ls' -1 /usr/opt/nwis/data/sentry_depot/stddata/$NODE_ID* |wc -l`

if [ $no_fil -gt 5 ]
  then
   echo "WARNING: STDDATA $NODE_ID* FILES > 5 " | \
 /usr/bin/mailx -s "**** WARNING - NO OF STDDATA FILES IS $no_fil " $TO

fi

###############

# list of processes to check

satin_run=`ps -fu satin | grep "/usr/opt/decodes/libexec/satin"|grep -v grep`
init_run=`ps -fu satin |grep "src_lrgs"|grep -v grep`
sentry_run=`ps -fu satin | grep "/usr/opt/nwis/util/sentry $nwdb"|grep -v grep`
sencomp_run=`ps -fu satin | grep "/usr/opt/nwis/util/sencomp $nwdb"|grep -v grep`
transfer_run=`ps -fu nwisweb | grep "/usr/opt/nwisweb/transfer/bin/nwtransferd"|grep -v grep`
repserv_run=`ps -fu nwis | grep "/usr/opt/ingres/bin/repserv"|grep -v grep`

if [ -z $satin_run ]
then
   echo "WARNING, SATIN NOT RUNNING ON HOST $HOST \n
   Run satinstatus to check status.\n 
   Probably will need to run, as user satin, 'stopsatin' \(it may reply that \n
   satin is already stopped\), 'startsatin' and then 'initsatin -a' \n
   to reinitialize all processes." |\
   /usr/bin/mailx -s "*** SATIN WARNING: satin -service PROCESS NOT FOUND ON HOST $HOST" $TO
fi

if [ -z $init_run ]
then
   echo "WARNING, SATIN NOT INITIALIZED ON HOST $HOST\n
   The connection to any lrgs has been lost. Run satinstatus to check status.\n
   Probably will need to run, as user satin, 'stopsatin', 'startsatin' and then\n
   'initsatin -a' to reinitialize all processes. " |\
   /usr/bin/mailx -s "*** SATIN INIT WARNING: src_lrgs PROCESS NOT FOUND ON HOST $HOST" $TO
fi

if [ -z $sentry_run ]
then
   echo "WARNING, SENTRY NOT RUNNING ON HOST $HOST\n
         Run 'sentrystatus' to check both sentry and sencomp.\n 
         If sencomp IS running and sentry is not, first, as user satin,\n 
         run 'stopsentry nwisca 01', and then restart both with\n 
         'startsentry nwisca 01'. If neither are running, simply run \n 
         the startsentry command" |\
   /usr/bin/mailx -s "*** SENTRY WARNING: sentry $nwdb PROCESS NOT FOUND ON HOST $HOST" $TO

fi

if [ -z $sencomp_run ]
then
   echo "WARNING, SENCOMP NOT RUNNING ON HOST $HOST\n
         Run 'sentrystatus' to check both sentry and sencomp.\n
         If sentry IS running and sencomp is not, first, as user satin,\n
         run 'stopsentry nwisca 01', and then restart both with\n
         'startsentry nwisca 01'. If neither are running, simply run \n
         the startsentry command" |\
   /usr/bin/mailx -s "*** SENCOMP WARNING: sencomp $nwdb PROCESS NOT FOUND ON HOST $HOST" $TO
fi

if [ -z $transfer_run ]
then
   echo "WARNING, NWISWEB TRANSFER PROCESS NOT RUNNING ON HOST $HOST \n
         As user nwisweb, run the command \n
         /etc/init.d/nwtransfer start" |\
   /usr/bin/mailx -s "*** NWISWEB TRANSFER WARNING: nwtransferd.pl PROCESS NOT FOUND ON HOST $HOST" $TO
fi

if [ -z $repserv_run ]
then
   echo "WARNING, NWISRT REPLICATOR PROCESS NOT RUNNING ON HOST $HOST \n
         As root user, run the command \n
         /etc/init.d/replicator start"|\
   /usr/bin/mailx -s "*** NWIS-RT WARNING: repserv PROCESS NOT FOUND ON HOST $HOST" $TO
fi

